From b5667c61ecb35a55da78ad3794fc17f2e5eaff92 Mon Sep 17 00:00:00 2001 From: James Martin Date: Mon, 5 Sep 2022 23:48:56 -0700 Subject: [PATCH] Initial commit. --- .editorconfig | 12 +++ .gitignore | 13 +++ LICENSE.txt | 5 ++ Makefile | 24 ++++++ README.md | 6 ++ src/asm.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/asm.h | 51 ++++++++++++ src/io.c | 157 +++++++++++++++++++++++++++++++++++ src/io.h | 22 +++++ src/ir.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++ src/ir.h | 47 +++++++++++ src/main.c | 83 ++++++++++++++++++ 12 files changed, 865 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 LICENSE.txt create mode 100644 Makefile create mode 100644 README.md create mode 100644 src/asm.c create mode 100644 src/asm.h create mode 100644 src/io.c create mode 100644 src/io.h create mode 100644 src/ir.c create mode 100644 src/ir.h create mode 100644 src/main.c diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..66d1d73 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +# https://EditorConfig.org/ +root = true + +[*] +indent_size = 4 +charset = utf-8 +indent_style = space +trim_trailing_whitespace = true +insert_final_newline = true + +[Makefile] +indent_style = tab diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..15f04b0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +* +!*/ + +# source code +!/src/**/*.c +!/src/**/*.h + +# top-level configuration +!/.editorconfig +!/.gitignore +!/LICENSE.txt +!/Makefile +!/README.md diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..4476f77 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,5 @@ +Copyright (C) 2022 by James Martin + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b426fd3 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +SHELL = /bin/sh + +.SUFFIXES: +.SUFFIXES: .c .o + +CFLAGS = -std=c99 -pedantic -Wextra -Os +LDFLAGS = -lc + +OBJECTS = main.o asm.o io.o ir.o + +.PHONY: passc +passc: .bin $(OBJECTS) + $(CC) -o bin/passc $(addprefix bin/obj/,$(OBJECTS)) $(LDFLAGS) + +.PHONY: .bin +.bin: + @mkdir -p bin/obj + +%.o : src/%.c + $(CC) -c $(CFLAGS) $< -o bin/obj/$@ + +.PHONY: clean +clean: + @-rm -rf bin diff --git a/README.md b/README.md new file mode 100644 index 0000000..f0e3a28 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# Passlang +A one-pass, linear-time compile-and-go compiler. + +I'm imposing these restrictions on myself to avoid over-engineering and give myself a design challenge. +The goal is to make the absolute best language I can under these constraints, +and then incrementally begin to relax the restrictions only when absolutely necessary to make progress. diff --git a/src/asm.c b/src/asm.c new file mode 100644 index 0000000..945848b --- /dev/null +++ b/src/asm.c @@ -0,0 +1,226 @@ +// REFERENCES: +// http://ref.x86asm.net/index.html (geek64-abc) +// https://wiki.osdev.org/X86-64_Instruction_Encoding +// https://defuse.ca/online-x86-assembler.htm + +#include "asm.h" +#include "io.h" + +#include +#include + +ip here; + +#define REX 0x40 +// REX prefix with 64-bit operands set +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 +#define REXQ_R(r) (REX | ((r >= R8) ? REX_R : 0)) +#define REXQ_B(r) (REX | ((r >= R8) ? REX_B : 0)) +// REX prefix including upper bit of register +#define REXQ_WR(r) (REX_W | REXQ_R(r)) +#define REXQ_WRB(r, b) (REX_W | REXQ_R(r) | REXQ_B(b)) +// lower 3 bits of register (not including part encoded in REX) +#define REG(r) (r & 7) + +#define MODRM_RR (3 << 6) +#define MODRM_RM (0 << 6) +#define MODRM_RD8 (1 << 6) +#define MODRM_RD32 (2 << 6) +#define MODRMQ(reg, rm) ((REG(reg) << 3) | REG(rm)) +#define MODRMQ_RR(reg, rm) (MODRM_RR | MODRMQ(reg, rm)) +#define MODRMQ_RM(reg, base) (MODRM_RM | MODRMQ(reg, base)) +#define MODRMQ_RD8(reg, base) (MODRM_RD8 | MODRMQ(reg, base)) +#define MODRMQ_RD32(reg, base) (MODRM_RD32 | MODRMQ(reg, base)) + +static int32_t rel_offs(ip from, ip to) { + // jumping through hoops to hopefully avoid UB + int64_t off = (int64_t) to - (int64_t) from; + if (off > INT32_MAX || off < INT32_MIN) { + fprintf(stderr, "displacement greater than 32 bits!\n"); + fprintf(stderr, "if you ever encounter this error, let me know and I'll implement it.\n"); + exit(1); + } + return (int32_t) off; +} + +static int32_t rip_rel(ip there) { + return rel_offs(here, there); +} + +#define JUMP_DISP8_SIZE 2 +static void inst_jump_disp8(uint8_t disp) { + uint8_t inst[JUMP_DISP8_SIZE] = { + 0xeb, // jmp Jbs + disp + }; + emit(&inst, JUMP_DISP8_SIZE); +} + +#define JUMP_DISP32_SIZE 5 +static void inst_jump_disp32(uint32_t disp) { + emit_u8(0xe9); // jmp Jvds + emit_u32(disp); +} + +void inst_jump(ip there) { + int32_t disp8 = rel_offs(here + JUMP_DISP8_SIZE, there); + if (disp8 < INT8_MAX && disp8 > INT8_MIN) { + inst_jump_disp8(disp8); + } else { + inst_jump_disp32(rel_offs(here + JUMP_DISP32_SIZE, there)); + } +} + +ip inst_jump_unresolved(void) { + inst_jump_disp32(0); + return here; +} + +void inst_jump_resolve(ip disp, ip there) { + patch_i32(disp - 4, rel_offs(disp, there)); +} + +static void inst_mov_imm32(reg reg, uint32_t imm) { + // mov Zvqp Ivqp + if (reg >= R8) { + emit_u8(REXQ_R(reg)); + } + emit_u8(0xb8 + REG(reg)); + emit_u32(imm); +} + +static void inst_mov_imm64(reg reg, uint64_t imm) { + // mov Zvqp Ivqp + uint8_t buf[10] = { REXQ_WR(reg), 0xb8 + REG(reg), 0, 0, 0, 0, 0, 0, 0, 0 }; + memcpy(&buf[2], &imm, sizeof(uint64_t)); + emit(buf, 10); +} + +void inst_mov_imm(reg reg, uint64_t imm) { + // TODO: emit `mov ax`, `mov al`, xor, xor+inc, xor+neg + if (imm <= UINT32_MAX) { + inst_mov_imm32(reg, (uint32_t) imm); + } else { + inst_mov_imm64(reg, imm); + } +} + +void inst_mov_imm_i64(reg reg, int64_t imm) { + // TODO: emit sign extensions + if (imm >= 0 && imm <= UINT32_MAX) { + inst_mov_imm32(reg, (uint32_t) imm); + } else { + inst_mov_imm64(reg, (uint64_t) imm); + } +} + +static void check_base(reg base) { + if (base == SP || base == BP || base == R12 || base == R13) { + fprintf(stderr, "indirect addressing not implemented for sp & co\n"); + exit(1); + } +} + +void inst_mov(reg dest, reg src) { + // mov Evqp Gvqp + emit_u8(REXQ_WRB(dest, src)); + emit_u8(0x89); + emit_u8(MODRMQ_RR(src, dest)); +} + +void inst_mov_from(reg dest, reg base) { + check_base(base); + // mov Gvqp Evqp + emit_u8(REXQ_WRB(base, dest)); + emit_u8(0x8B); + emit_u8(MODRMQ_RM(dest, base)); +} + +static void inst_mov_from_disp8(reg dest, reg base, int8_t disp) { + check_base(base); + // mov Gvqp Evqp + emit_u8(REXQ_WRB(base, dest)); + emit_u8(0x8B); + emit_u8(MODRMQ_RD8(dest, base)); + emit_u8(disp); +} + +static void inst_mov_from_disp32(reg dest, reg base, int32_t disp) { + check_base(base); + // mov Gvqp Evqp + emit_u8(REXQ_WRB(base, dest)); + emit_u8(0x8B); + emit_u8(MODRMQ_RD32(dest, base)); + emit_u32((int32_t) disp); +} + +void inst_mov_from_disp(reg dest, reg base, int32_t disp) { + if (disp == 0) { + inst_mov_from(dest, base); + } else if (disp <= INT8_MAX && disp >= INT8_MIN) { + inst_mov_from_disp8(dest, base, (int8_t) disp); + } else { + inst_mov_from_disp32(dest, base, disp); + } +} + +void inst_mov_to(reg base, reg src) { + check_base(base); + // mov Evqp Gvqp + emit_u8(REXQ_WRB(base, src)); + emit_u8(0x89); + emit_u8(MODRMQ_RR(base, src)); +} + +static void inst_mov_to_disp8(reg base, reg src, int8_t disp) { + check_base(base); + // mov Evqp Gvqp + emit_u8(REXQ_WRB(base, src)); + emit_u8(0x89); + emit_u8(MODRMQ_RD8(base, src)); + emit_u8(disp); +} + +static void inst_mov_to_disp32(reg base, reg src, int32_t disp) { + check_base(base); + // mov Evqp Gvqp + emit_u8(REXQ_WRB(base, src)); + emit_u8(0x89); + emit_u8(MODRMQ_RD32(base, src)); + emit_u32((uint32_t) disp); +} + +void inst_mov_to_disp(reg base, reg src, int32_t disp) { + if (disp == 0) { + inst_mov_to(base, src); + } else if (disp <= INT8_MAX && disp >= INT8_MIN) { + inst_mov_to_disp8(base, src, (int8_t) disp); + } else { + inst_mov_to_disp32(base, src, disp); + } +} + + +void inst_syscall(void) { + const uint8_t buf[2] = { 0x0f, 0x05 }; + emit(&buf, 2); +} + +void inst_push(reg reg) { + // push Zvq + if (reg >= R8) { + emit_u8(REX_B); + } + emit_u8(0x50 + REG(reg)); +} + +void inst_pop(reg reg) { + // pop Zvq + if (reg >= R8) { + emit_u8(REX_B); + } + emit_u8(0x58 + REG(reg)); +} diff --git a/src/asm.h b/src/asm.h new file mode 100644 index 0000000..a45b424 --- /dev/null +++ b/src/asm.h @@ -0,0 +1,51 @@ +#ifndef _ASM_H +#define _ASM_H + +#include +#include + +typedef size_t ip; +extern ip here; + +// A general-purpose x86 register. +// The specific register size (e.g. al/ax/eax/rax) depends on the instruction. +// All registers are valid for all instructions; we will perform exchanges if necessary. +typedef enum reg { + RA = 0, // rax, eax, ax, al + RC = 1, // rcx, ecx, cx, cl + RD = 2, // rdx, edx, dx, dl + RB = 3, // rbx, ebx, bx, bl + SP = 4, // rsp, esp, sp, spl (we do not use ah) + BP = 5, // rbp, ebp, bp, bpl (we do not use ch) + SI = 6, // rsi, esi, si, sil (we do not use dh) + DI = 7, // rdi, edi, di, dil (we do not use bh) + R8 = 8, // r8, r8d, r8w, r8l + R9 = 9, // r9, r9d, r9w, r9l + R10 = 10, // r10, r10d, r10w, r10l + R11 = 11, // r11, r11d, r11w, r11l + R12 = 12, // r12, r12d, r12w, r12l + R13 = 13, // r13, r13d, r13w, r13l + R14 = 14, // r14, r14d, r14w, r14l + R15 = 15, // r15, r15d, r15w, r15l +} reg; + +/// Jump to a known address. +void inst_jump(ip there); + +/// Jump to an unresolved address. +ip inst_jump_unresolved(void); +void inst_jump_resolve(ip disp, ip there); + +void inst_mov_imm(reg reg, uint64_t imm); +void inst_mov_imm_i64(reg reg, int64_t imm); +void inst_syscall(void); + +void inst_push(reg reg); +void inst_pop(reg reg); + +void inst_mov(reg dest, reg src); +void inst_mov_from(reg dest, reg base); +void inst_mov_from_disp(reg dest, reg base, int32_t disp); +void inst_mov_to(reg base, reg src); +void inst_mov_to_disp(reg base, reg src, int32_t disp); +#endif diff --git a/src/io.c b/src/io.c new file mode 100644 index 0000000..f4b47bd --- /dev/null +++ b/src/io.c @@ -0,0 +1,157 @@ +#ifdef __unix__ +#define _GNU_SOURCE +#endif + +#include "io.h" + +#include +#include +#include + +#ifdef __unix__ +// This program can be trivially converted to work with only the C standard library +// at the cost of not being able to link the output file atomically. +#include +#include +#include +#include +#endif + +static const char* outfile_name; +FILE* infile; +FILE* outfile; + +#ifdef __unix__ +void open_files(const char* infile_name, const char* outfile_name_) { + outfile_name = outfile_name_; + // To avoid creating a corrupt or incomplete output file, + // we operate on a temporary file and atomically link it only once compilation has succeeded. + unlink(outfile_name); + + int infile_fd = open(infile_name, O_RDONLY); + if (infile_fd == -1) { + fprintf(stderr, "failed to open source file: %s\n", strerror(errno)); + exit(1); + } + off_t infile_len = lseek(infile_fd, 0, SEEK_END); + if (infile_len == (off_t) -1) { + fprintf(stderr, "failed to get length of source file: %s\n", strerror(errno)); + exit(1); + } + // There'll probably never be a source file large enough for this to make a difference, + // and I *certainly* haven't profiled, but... I've always wanted to use these syscalls. :) + posix_fadvise(infile_fd, 0, infile_len, POSIX_FADV_SEQUENTIAL); + posix_fadvise(infile_fd, 0, infile_len, POSIX_FADV_NOREUSE); + infile = fdopen(infile_fd, "rb"); + if (infile_fd == -1) { + fprintf(stderr, "failed to open source file fd as file handle: %s\n", strerror(errno)); + exit(1); + } + + int outfile_fd = open(dirname((char*) outfile_name), O_WRONLY | O_TMPFILE, S_IRWXU | S_IRWXG | S_IRWXO); + if (outfile_fd == -1) { + fprintf(stderr, "failed to create temporary output file: %s\n", strerror(errno)); + exit(1); + } + outfile = fdopen(outfile_fd, "wb"); + if (outfile == NULL) { + fprintf(stderr, "failed to open output file fd as file handle: %s\n", strerror(errno)); + exit(1); + } +} + +void close_files(void) { + if (fflush(outfile) != 0) { + fprintf(stderr, "failed to flush output file: %s\n", strerror(errno)); + exit(1); + } + + char outfile_tempname[20]; + snprintf(outfile_tempname, 20, "/proc/self/fd/%d", fileno(outfile)); + if (linkat(AT_FDCWD, outfile_tempname, AT_FDCWD, outfile_name, AT_SYMLINK_FOLLOW) == -1) { + fprintf(stderr, "failed to link output file into file system: %s\n", strerror(errno)); + exit(1); + } + + fclose(outfile); + fclose(infile); +} +#else +void open_files(const char* infile_name, const char* outfile_name) { + infile = fopen(infile_name, "rb"); + if (infile == NULL) { + fprintf(stderr, "failed to open source file: %s\n", strerror(errno)); + exit(1); + } + + // There is no way for us to mark the file as executable. + // Then again, if it's not Unix, that probably doesn't matter. + outfile = fopen(outfile_name, "wb"); + if (outfile == NULL) { + fprintf(stderr, "failed to open output file: %s\n", strerror(errno)); + exit(1); + } +} + +void close_files(void) { + if (fclose(outfile) != 0) { + fprintf(stderr, "failed to close output file: %s\n", strerror(errno)); + // NOTE: ideally we'd do this on any dirty exit + // TODO: use portable tempfiles and then just copy the entire file at the end? + if (remove(outfile_name) != 0) { + fprintf(stderr, "failed to remove output file, if it exists, it is corrupt: %s\n", strerror(errno)); + } + exit(1); + } + fclose(infile); +} +#endif + +void emit(const void* restrict ptr, size_t count) { + fwrite(ptr, 1, count, outfile); + if (ferror(outfile)) { + fprintf(stderr, "failed to write to output file\n"); + exit(1); + } +} + +void emit_u8(uint8_t x) { + emit(&x, sizeof(uint8_t)); +} + +void emit_u32(uint32_t x) { + emit(&x, sizeof(uint32_t)); +} + +void emit_u64(uint64_t x) { + emit(&x, sizeof(uint64_t)); +} + +void patch(size_t off, const void* ptr, size_t count) { + fpos_t save; + if (fgetpos(outfile, &save) != 0) { + fprintf(stderr, "failed to save file position before patch: %s\n", strerror(errno)); + exit(1); + } + if (fseek(outfile, (long) off, SEEK_SET) != 0) { + fprintf(stderr, "failed to set file position for patch: %s\n", strerror(errno)); + exit(1); + } + fwrite(ptr, 1, count, outfile); + if (ferror(outfile) != 0) { + fprintf(stderr, "failed to patch output file: %s\n", strerror(errno)); + exit(1); + } + if (fsetpos(outfile, &save) != 0) { + fprintf(stderr, "failed to restore file position after patch: %s\n", strerror(errno)); + exit(1); + } +} + +void patch_u32(size_t off, uint32_t x) { + patch(off, &x, sizeof(uint32_t)); +} + +void patch_i32(size_t off, int32_t x) { + patch_u32(off, (uint32_t) x); +} diff --git a/src/io.h b/src/io.h new file mode 100644 index 0000000..2a9561f --- /dev/null +++ b/src/io.h @@ -0,0 +1,22 @@ +#ifndef _IO_H +#define _IO_H + +#include +#include + +extern FILE* outfile; +extern FILE* infile; + +void open_files(const char* infile_name, const char* outfile_name); +void close_files(void); + +void emit(const void* ptr, size_t count); +void emit_u8(uint8_t x); +void emit_u32(uint32_t x); +void emit_u64(uint64_t x); + +void patch(size_t off, const void* ptr, size_t count); +void patch_u32(size_t off, uint32_t x); +void patch_i32(size_t off, int32_t x); + +#endif diff --git a/src/ir.c b/src/ir.c new file mode 100644 index 0000000..9c22530 --- /dev/null +++ b/src/ir.c @@ -0,0 +1,219 @@ +#include "asm.h" +#include "ir.h" + +#include +#include +#include +#include +#include +#include + +struct fixups { + struct fixups* next; + ip disp; +}; + +struct label_info { + const struct frame* frame; + size_t argc; + ip definition; + struct fixups* fixups; +}; + +struct labels { + struct labels* prev; + struct label_info info; +}; + +struct frame { + struct frame* prev; + size_t depth; + struct labels* labels; +}; +static struct frame* current_frame; + +struct storage { + enum { STORE_REG, STORE_STACK } type; + union { reg reg; size_t off; }; +}; + +static size_t top_of_stack; + +// Enter a new stack frame. +static void enter(void) { + struct frame* next = malloc(sizeof(struct frame)); + next->prev = current_frame; + next->depth = top_of_stack; + current_frame = next; +} + +// Leave the current stack frame. +static void leave(void) { + struct frame* next = current_frame; + current_frame = next->prev; + top_of_stack = next->depth; + struct labels* labels = next->labels; + free(next); + while (labels != NULL) { + struct labels* next_label = labels->prev; + free(labels); + labels = next_label; + } +} + +// Allocate registers or stack space for the arguments. +static void reserve(size_t argc) { + top_of_stack += argc; +} + +static var new_var(void) { + var var = top_of_stack; + top_of_stack++; + inst_push(RA); + return var; +} + +static struct storage storage(var var) { + struct storage storage; + if (var == top_of_stack - 1) { + storage.type = STORE_REG; + storage.reg = RA; + } else { + storage.type = STORE_STACK; + storage.off = -var * 8 - 16; + } + return storage; +} + +static void move(var dest, var src) { + if (dest == src) return; + struct storage ds = storage(dest); + struct storage ss = storage(src); + if (ds.type == STORE_REG && ss.type == STORE_REG) { + inst_mov(ds.reg, ss.reg); + } else if (ds.type == STORE_STACK && ss.type == STORE_REG) { + inst_mov_to_disp(RB, ss.reg, ds.off); + } else if (ds.type == STORE_REG && ds.type == STORE_STACK) { + inst_mov_from_disp(ds.reg, RB, ss.off); + } else { + // FIXME: DI is scratch register? + inst_mov_from_disp(DI, RB, ss.off); + inst_mov_to_disp(RB, DI, ds.off); + } +} + +static void exchange(var x, var y) { + if (x == y) return; + assert(0); // UNIMPLEMENTED +} + +// Restore the stack and registers to a previous frame, +// in preparation for a jump out of the current frame. +// +// This involves loading spilled variables into registers, +// restoring the stack pointer, +// spilling variables onto the stack to make space for arguments, +// and relocating arguments to the correct registers. +static void restore(const struct frame* frame, size_t argc, var* args) { + for (size_t i = 0; i < argc; i++) { + var arg = args[i]; + size_t depth = frame->depth + i; + if (arg == depth) continue; + size_t conflict = (size_t) -1; + for (size_t j = i + 1; j < argc; j++) { + if (depth == args[j]) { + conflict = j; + break; + } + } + if (conflict == (size_t) -1) { + move(depth, args[i]); + } else { + // TODO: an algorithm which produces fewer exchanges + exchange(args[conflict], args[i]); + args[conflict] = args[i]; + } + } +} + +label declare_label(size_t argc) { + struct labels* labels = malloc(sizeof(struct labels)); + labels->prev = current_frame->labels; + labels->info.frame = current_frame; + labels->info.argc = argc; + labels->info.definition = (ip) -1; + current_frame->labels = labels; + return &labels->info; +} + +void define_label(label label, var* args) { + assert(label->frame == current_frame); + label->definition = here; + struct fixups* fixups = label->fixups; + while (fixups != NULL) { + struct fixups* fixup = fixups; + inst_jump_resolve(fixup->disp, here); + fixups = fixup->next; + free(fixup); + } + enter(); + reserve(label->argc); +} + +void queue_fixup(label label, ip disp) { + struct fixups* fixup = malloc(sizeof(struct fixups)); + fixup->next = label->fixups; + fixup->disp = disp; + label->fixups = fixup; +} + +void jump(label label, var* args) { + restore(label->frame, label->argc, args); + if (label->definition != (ip) -1) { + inst_jump(label->definition); + } else { + ip disp = inst_jump_unresolved(); + queue_fixup(label, disp); + } + leave(); +} + +void jump_table(size_t branches, label* labels, var index, var* args) { + assert(0); // UNIMPLEMENTED +} + +void jump_if(label label, var cond, var* args) { + assert(0); // UNIMPLEMENTED +} + + +static void save(var* vars) { + +} + +var lit(uint64_t x) { + var var = new_var(); + struct storage stg = storage(var); + assert(stg.type == STORE_REG); + inst_mov_imm(stg.reg, x); + return var; +} + +void syscall(size_t argc, var* args) { + assert(argc > 0); + // rax already populated by top of stack + // FIXME: this won't work forever + // FIXME: save args in case we don't want to sysexit + // FIXME: save other registers + if (argc > 1) + inst_mov_from_disp(DI, RB, -args[1] * 8 - 16); + if (argc > 2) + inst_mov_from_disp(SI, RB, -args[2] * 8 - 16); + if (argc > 3) + inst_mov_from_disp(RD, RB, -args[3] * 8 - 16); + inst_syscall(); +} + +void init(void) { + inst_mov(RB, SP); +} diff --git a/src/ir.h b/src/ir.h new file mode 100644 index 0000000..71033fc --- /dev/null +++ b/src/ir.h @@ -0,0 +1,47 @@ +#ifndef _ASSEMBLER_H +#define _ASSEMBLER_H + +#include +#include + +typedef size_t var; +typedef struct label_info* label; + +/// Declare a new label. +/// +/// A label is the destination of a jump, +/// located with a fixed stack context and fixed argument types. +label declare_label(size_t args); + +/// Define a previously-declared label. +/// +/// The label must be defined in the same stack context +/// which it was declared in. +/// +/// The definition of the label is... here, which is to say +/// whatever code you proceed to generate after this. +/// +/// A label is implicitly terminated by an unconditional jump or exit. +/// However, it may exit at multiple locations via unconditional jumps. +void define_label(label label, var* args); + +/// Jump to label, unconditionally. Terminates a block. +/// +/// It is only possible to jump to a label in a parent or adjacent stack frame +/// (you can't jump *deeper* into the stack). +void jump(label label, var* args); + +/// Jump to label in table; never returns. Terminates a block. +/// +/// All labels must be at the same depth and accept the same arguments. +/// `index` must not be out of bounds. +void jump_table(size_t branches, label* labels, var index, var* args); + +/// Jump to label if `cond` is not zero. Does not terminate a block. +void jump_if(label label, var cond, var* args); + +var lit(uint64_t x); + +void syscall(size_t argc, var* args); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..8067981 --- /dev/null +++ b/src/main.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io.h" +#include "ir.h" + + +#define ELF_HEADER_SIZE 0xb0 + +size_t compile(void) { + init(); + var code = lit(42); + var call = lit(60); + var args[2] = { call, code }; + syscall(2, args); + return ELF_HEADER_SIZE; +} + +static void write_elf(uint64_t entry_point) { + uint64_t file_len = ftell(outfile); + fseek(outfile, 0, SEEK_SET); + + // Hardcoded ELF header for statically-linked position-independent executable. + // Since we only support Linux amd64 static PIE, there's no need to abstract over this for now. + uint8_t elf_header[ELF_HEADER_SIZE] = { + // ELF header + 0x7F, 'E', 'L', 'F', // ELF magic + 2, 1, 1, 3, 0, // 64-bit little-endian Linux, ELF version 1 + 0, 0, 0, 0, 0, 0, 0, // padding + 3, 0, 0x3E, 0, 1, 0, 0, 0, // dynamic executable, amd64, ELF version 1 again + 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: entry point address + 0x40, 0, 0, 0, 0, 0, 0, 0, // program header table offset (immediately after ELF) + 0, 0, 0, 0, 0, 0, 0, 0, // section header table offset (none) + 0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes + 2, 0, 0, 0, 0, 0, 0, 0, // 2 segments, no sections + + // program header segment + 6, 0, 0, 0, 4, 0, 0, 0, // program header segment, readable + 0x40, 0, 0, 0, 0, 0, 0, 0, // immediately after ELF header + 0x40, 0, 0, 0, 0, 0, 0, 0, // virtual address + 0, 0, 0, 0, 0, 0, 0, 0, // physical address + 0x70, 0, 0, 0, 0, 0, 0, 0, // size in file (2 * size of program header) + 0x70, 0, 0, 0, 0, 0, 0, 0, // size in memory + 8, 0, 0, 0, 0, 0, 0, 0, // alignment + + // executable segment + 1, 0, 0, 0, 5, 0, 0, 0, // loadable segment, readable and executable + 0, 0, 0, 0, 0, 0, 0, 0, // whole file + 0, 0, 0, 0, 0, 0, 0, 0, // virtual address + 0, 0, 0, 0, 0, 0, 0, 0, // physical address + 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in file + 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in memory + 0, 0x10, 0, 0, 0, 0, 0, 0, // alignment (4K) + }; + uint64_t ep = (uint64_t) entry_point; + uint64_t fl = (uint64_t) file_len; + memcpy(&elf_header[0x18], &entry_point, sizeof(uint64_t)); + memcpy(&elf_header[0x98], &file_len, sizeof(uint64_t)); + memcpy(&elf_header[0x98 + sizeof(uint64_t)], &file_len, sizeof(uint64_t)); + + emit(elf_header, ELF_HEADER_SIZE); +} + +int main(int argc, char** argv) { + if (argc != 3) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + open_files(argv[2], argv[1]); + + fseek(outfile, ELF_HEADER_SIZE, SEEK_SET); + size_t entry_point = compile(); + write_elf((uint64_t) entry_point); + + close_files(); + return 0; +}