diff --git a/Makefile b/Makefile index 186cbcb..5c4098d 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL = /bin/sh CFLAGS = -std=c99 -pedantic -Wextra -Os LDFLAGS = -lc -OBJECTS = asm.o io.o ir.o lex.o lex/indent.o lang.o main.o parse.o x86encode.o +OBJECTS = asm.o format.o io.o ir.o lex.o lex/indent.o lang.o main.o parse.o x86encode.o .PHONY: passc passc: .bin $(OBJECTS) diff --git a/src/asm.c b/src/asm.c index 446b715..04e42b3 100644 --- a/src/asm.c +++ b/src/asm.c @@ -1,30 +1,20 @@ /// This file handles the contextual generation of machine code. /// It abstracts over quirks like the limitations of addressing modes, /// provides higher-level functionality, and can perform peephole optimization. -/// -/// Reserved registers: -/// -/// * rsp: the stack pointer -/// * rbp: the base of the stack frame -/// * rax: the top of the stack -/// * r13: a scratch register for compound instructions #include "asm.h" -#include "io.h" +#include "format.h" +#include "x86encode.h" #include #include -void inst_jump(ip there) { - x86_inst_jmp_disp((int32_t) (there - here)); +void inst_jump(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jmp_disp8(disp); + } + x86_inst_jmp_disp32_op(); + relocate_pc32(sym); // TODO: support 64-bit jumps? } - -ip inst_jump_unresolved(void) { - x86_inst_jmp_disp32(0); - return here; -} - -void inst_jump_resolve(ip disp, ip there) { - patch_i32(disp - 4, (int32_t) (there - here)); -} diff --git a/src/asm.h b/src/asm.h index 0ab924a..4e7e196 100644 --- a/src/asm.h +++ b/src/asm.h @@ -1,18 +1,9 @@ #ifndef _ASM_H #define _ASM_H -#include "x86encode.h" - -#include -#include - -typedef uint32_t ip; -extern ip here; +#include "format.h" /// Jump to a known address. -void inst_jump(ip there); +void inst_jump(symbol sym); -/// Jump to an unresolved address. -ip inst_jump_unresolved(void); -void inst_jump_resolve(ip disp, ip there); #endif diff --git a/src/format.c b/src/format.c new file mode 100644 index 0000000..d5f8e43 --- /dev/null +++ b/src/format.c @@ -0,0 +1,194 @@ +/// This file handles concerns the executable file format. +/// This includes keeping track of the current virtual address, +/// performing relocations, and creating the executable file header. +/// +/// The set of features we actually use is very small, so hopefully +/// this will turn out to be able to port this across executable formats +/// and architectures with relatively few modifications. +#include "format.h" +#include "io.h" + +#include +#include +#include +#include +#include + +struct symbol { + uint64_t vaddr; +}; + +#define MAX_SYMBOLS 65535 +static uint32_t symbol_count = 0; +static struct symbol symbols[MAX_SYMBOLS]; + +enum relocation_type { + REL_PC32 = 2, + REL_GOTPCREL = 9, + REL_SIZE32 = 32, + REL_SIZE64 = 33, +}; + +struct relocation { + enum relocation_type type; + symbol symbol; + uint64_t offset; +}; + +#define MAX_RELOCATIONS 65535 +static uint32_t relocation_count = 0; +static struct relocation relocations[MAX_RELOCATIONS]; + +static uint64_t file_here = 0; + +/// Not the size of the ELF header per se, but rather th ELF header +/// plus the program headers and section headers we include. +/// We reserve this much space at the beginning of every file +/// to fill in once the executable is finished. +#define ELF_HEADER_SIZE 0xb0 + +void elf_executable(void) { + reserve(ELF_HEADER_SIZE); + file_here += ELF_HEADER_SIZE; +} + +void finish_executable(symbol entry_point) { + uint64_t file_len = file_here; + + // Hardcoded ELF header for statically-linked position-independent executable. + // Since we only support Linux amd64 static PIE, there's no need to abstract over this for now. + uint8_t elf_header[ELF_HEADER_SIZE] = { + // ELF header + 0x7F, 'E', 'L', 'F', // ELF magic + 2, 1, 1, 3, 0, // 64-bit little-endian Linux, ELF version 1 + 0, 0, 0, 0, 0, 0, 0, // padding + 3, 0, 0x3E, 0, 1, 0, 0, 0, // dynamic executable, amd64, ELF version 1 again + 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: entry point address + 0x40, 0, 0, 0, 0, 0, 0, 0, // program header table offset (immediately after ELF) + 0, 0, 0, 0, 0, 0, 0, 0, // section eader table offset (none) + 0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes + 2, 0, 0, 0, 0, 0, 0, 0, // 2 segments, no sections + + // program header segment + 6, 0, 0, 0, 4, 0, 0, 0, // program header segment, readable + 0x40, 0, 0, 0, 0, 0, 0, 0, // immediately after ELF header + 0x40, 0, 0, 0, 0, 0, 0, 0, // virtual address + 0, 0, 0, 0, 0, 0, 0, 0, // physical address + 0x70, 0, 0, 0, 0, 0, 0, 0, // size in file (2 * size of program header) + 0x70, 0, 0, 0, 0, 0, 0, 0, // size in memory + 8, 0, 0, 0, 0, 0, 0, 0, // alignment + + // executable segment + 1, 0, 0, 0, 5, 0, 0, 0, // loadable segment, readable and executable + 0, 0, 0, 0, 0, 0, 0, 0, // whole file + 0, 0, 0, 0, 0, 0, 0, 0, // virtual address + 0, 0, 0, 0, 0, 0, 0, 0, // physical address + 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in file + 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in memory + 0, 0x10, 0, 0, 0, 0, 0, 0, // alignment (4K) + }; + uint64_t ep = (uint64_t) symbols[entry_point].vaddr; + uint64_t fl = (uint64_t) file_len; + memcpy(&elf_header[0x18], &ep, sizeof(uint64_t)); + memcpy(&elf_header[0x98], &fl, sizeof(uint64_t)); + memcpy(&elf_header[0x98 + sizeof(uint64_t)], &fl, sizeof(uint64_t)); + + patch(0, elf_header, ELF_HEADER_SIZE); + + for (uint32_t i = 0; i < relocation_count; i++) { + struct relocation rel = relocations[i]; + assert(rel.type == REL_PC32); + uint64_t vaddr = symbols[rel.symbol].vaddr; + assert(vaddr != (uint64_t) -1); + int64_t disp = (int64_t) rel.offset - (int64_t) vaddr + 4; + assert(disp >= INT32_MIN && disp <= INT32_MAX); + patch_u32(rel.offset, (int32_t) disp); + } +} + +symbol new_symbol(void) { + struct symbol* sym = &symbols[symbol_count]; + sym->vaddr = (uint64_t) -1; + if (symbol_count == MAX_SYMBOLS) { + fprintf(stderr, "error: exceeded maximum number of symbols\n"); + exit(1); + } + return symbol_count++; +} + +void define_executable_symbol(symbol s) { + struct symbol* sym = &symbols[s]; + sym->vaddr = file_here; +} + +void define_readonly_symbol(symbol sym) { + // TODO: + assert(0); +} + +void append_data(size_t size, const void* buf) { + file_here += size; + emit(buf, size); +} + +void append_u8(uint8_t x) { + emit_u8(x); +} + +void append_u32(uint32_t x) { + emit_u32(x); +} + +void append_u64(uint64_t x) { + emit_u64(x); +} + +static struct relocation* new_relocation(void) { + if (relocation_count == MAX_RELOCATIONS) { + fprintf(stderr, "error: exceeded maximum number of relocations\n"); + exit(1); + } + struct relocation* rel = &relocations[relocation_count]; + relocation_count++; + return rel; +} + +void relocate_pc32(symbol sym) { + int32_t offset = symbol_offset(sym, 4); + if (offset != INT32_MAX) { + append_u32((uint32_t) offset); + return; + } + append_u32(0); + struct relocation* rel = new_relocation(); + rel->type = REL_PC32; + rel->offset = file_here; + rel->symbol = sym; +} + +int32_t symbol_offset(symbol sym, int8_t off) { + uint64_t vaddr = symbols[sym].vaddr; + if (vaddr == (uint64_t) -1) { + return INT32_MAX; + } + int64_t disp = (int64_t) file_here - (int64_t) vaddr + off; + if (disp >= INT32_MAX || disp <= INT32_MIN) { + return INT32_MAX; + } + return disp; +} + +void relocate_gotpcrel(symbol sym) { + // TODO + assert(0); +} + +void relocate_size32(symbol sym) { + // TODO + assert(0); +} + +void relocate_size64(symbol sym) { + // TODO + assert(0); +} diff --git a/src/format.h b/src/format.h new file mode 100644 index 0000000..a53e463 --- /dev/null +++ b/src/format.h @@ -0,0 +1,67 @@ +#ifndef FORMAT_H +#define FORMAT_H + +#include +#include +#include + +typedef uint32_t symbol; + +/// Begin a new ELF executable. +void elf_executable(void); +/// All definitions are complete. Finish processing the executable. +void finish_executable(symbol entry_point); + +/// Create a new symbol. You will later have to define this with +/// `define_executable_symbol`, `define_readonly_symbol`, +/// or import it from an external library. +symbol new_symbol(void); + +void define_executable_symbol(symbol sym); + +void define_readonly_symbol(symbol sym); + +void append_data(size_t size, const void* buf); +void append_u8(uint8_t x); +void append_u32(uint32_t x); +void append_u64(uint64_t x); + +/// Assuming the symbol is located in the same segment as this code, +/// insert a 32-bit offset for the symbol relative to the virtual address +/// of the current address (Program Counter (PC)). +/// +/// This is used for generating relative jumps. +/// +/// If the symbol is defined in this object, then it will be computed +/// at compile-time and not emitted as an actual relocation in the executable. +void relocate_pc32(symbol sym); + +/// Like pc32 for DIY relocations. This returns INT32_MAX if the symbol +/// has not been defined yet. This exists so that the assembler can manually +/// emit 8-bit short jumps when the distance is short enough. +/// +/// The offset exists to account for the offset being relative to the +/// *end* of an instruction, whereas the symbol offset is computed +/// relative to `here`. This makes bounds-checking slightly easier. +int32_t symbol_offset(symbol sym, int8_t offset); + +/// If the symbol is not in the same segment as this code, +/// for example when linking against a dynamic library or accessing read-only data, +/// then a 32-bit offset may not be sufficient to access a symbol with pc32. +/// This necessitates a Global Offset Table (GOT) which contains the 64-bit +/// absolute addresses of the symbols which *is* located in this segment. +/// Thus, this relocation inserts a 32-bit PC-relative address which points +/// into the location in the GOT which contains the 64-bit absolute address +/// of the symbol. +/// +/// This is used for generating jumps to dynamic library code +/// and for accessing data in a different segment (e.g. read-only symbols). +void relocate_gotpcrel(symbol sym); + +/// Insert the 32-bit truncated size of a symbol. +void relocate_size32(symbol sym); + +/// Insert the 64-bit size of a symbol. +void relocate_size64(symbol sym); + +#endif diff --git a/src/io.c b/src/io.c index db45be5..0b8820a 100644 --- a/src/io.c +++ b/src/io.c @@ -14,8 +14,6 @@ static const char* outfile_name; static FILE* infile; static FILE* outfile; -// HACK: "here" tracking should be handled by the assembler, not IO. -uint32_t here = 0; void open_files(const char* infile_name, const char* outfile_name_) { outfile_name = outfile_name_; @@ -53,7 +51,6 @@ void reserve(size_t len) { fprintf(stderr, "failed to reserve space in in output file: %s\n", strerror(errno)); exit(1); } - here += len; } void emit(const void* restrict ptr, size_t count) { @@ -62,7 +59,6 @@ void emit(const void* restrict ptr, size_t count) { fprintf(stderr, "failed to write to output file\n"); exit(1); } - here += count; } void emit_u8(uint8_t x) { diff --git a/src/io.h b/src/io.h index de359da..7dd4de3 100644 --- a/src/io.h +++ b/src/io.h @@ -4,8 +4,6 @@ #include #include -extern uint32_t here; - void open_files(const char* infile_name, const char* outfile_name); void close_files(void); diff --git a/src/ir.c b/src/ir.c index 512f101..2699010 100644 --- a/src/ir.c +++ b/src/ir.c @@ -4,7 +4,9 @@ /// and register allocation. #include "asm.h" +#include "format.h" #include "ir.h" +#include "x86encode.h" #include #include @@ -26,9 +28,7 @@ struct stack_frame { struct label { uint32_t frame; uint32_t argc; - ip definition; - uint32_t fixupc; - ip fixups[MAX_FIXUPS]; + symbol symbol; }; static uint32_t stack_depth = 0; @@ -66,7 +66,8 @@ void leave(var* args) { label declare(uint32_t argc) { assert(label_depth < MAX_LABELS); - struct label label = { stack_frame, argc, (ip) -1, 0, 0 }; + symbol sym = new_symbol(); + struct label label = { stack_frame, argc, sym }; labels[label_depth] = label; return label_depth++; } @@ -79,22 +80,12 @@ label declare_exit(uint32_t argc) { void define(label l, var* args) { struct label* label = &labels[l]; - label->definition = here; - while (label->fixupc > 0) { - label->fixupc--; - inst_jump_resolve(label->fixups[label->fixupc], here); - } + define_executable_symbol(label->symbol); } void jump(label l, var* args) { struct label* label = &labels[l]; - if (label->definition == (ip) -1) { - assert(label->fixupc < MAX_FIXUPS); - label->fixups[label->fixupc] = inst_jump_unresolved(); - label->fixupc++; - } else { - inst_jump(label->definition); - } + inst_jump(label->symbol); } void jump_table(size_t branches, label* labels, var index, var* args) { diff --git a/src/main.c b/src/main.c index 90b81d0..8d51d29 100644 --- a/src/main.c +++ b/src/main.c @@ -1,19 +1,18 @@ -#include -#include -#include #include #include #include #include -#include +#include "format.h" #include "io.h" #include "ir.h" #include "parse.h" #define ELF_HEADER_SIZE 0xb0 -size_t compile(void) { +symbol compile(void) { + symbol entry_point = new_symbol(); + define_executable_symbol(entry_point); var argc, argv, env; init(&argc, &argv, &env); var a = lit(52); @@ -22,51 +21,7 @@ size_t compile(void) { var sys_exit = lit(60); var args[2] = { sys_exit, exit_code }; syscall(2, args); - return ELF_HEADER_SIZE; -} - -static void write_elf(uint64_t entry_point) { - uint64_t file_len = here; - - // Hardcoded ELF header for statically-linked position-independent executable. - // Since we only support Linux amd64 static PIE, there's no need to abstract over this for now. - uint8_t elf_header[ELF_HEADER_SIZE] = { - // ELF header - 0x7F, 'E', 'L', 'F', // ELF magic - 2, 1, 1, 3, 0, // 64-bit little-endian Linux, ELF version 1 - 0, 0, 0, 0, 0, 0, 0, // padding - 3, 0, 0x3E, 0, 1, 0, 0, 0, // dynamic executable, amd64, ELF version 1 again - 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: entry point address - 0x40, 0, 0, 0, 0, 0, 0, 0, // program header table offset (immediately after ELF) - 0, 0, 0, 0, 0, 0, 0, 0, // section eader table offset (none) - 0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes - 2, 0, 0, 0, 0, 0, 0, 0, // 2 segments, no sections - - // program header segment - 6, 0, 0, 0, 4, 0, 0, 0, // program header segment, readable - 0x40, 0, 0, 0, 0, 0, 0, 0, // immediately after ELF header - 0x40, 0, 0, 0, 0, 0, 0, 0, // virtual address - 0, 0, 0, 0, 0, 0, 0, 0, // physical address - 0x70, 0, 0, 0, 0, 0, 0, 0, // size in file (2 * size of program header) - 0x70, 0, 0, 0, 0, 0, 0, 0, // size in memory - 8, 0, 0, 0, 0, 0, 0, 0, // alignment - - // executable segment - 1, 0, 0, 0, 5, 0, 0, 0, // loadable segment, readable and executable - 0, 0, 0, 0, 0, 0, 0, 0, // whole file - 0, 0, 0, 0, 0, 0, 0, 0, // virtual address - 0, 0, 0, 0, 0, 0, 0, 0, // physical address - 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in file - 0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in memory - 0, 0x10, 0, 0, 0, 0, 0, 0, // alignment (4K) - }; - uint64_t ep = (uint64_t) entry_point; - uint64_t fl = (uint64_t) file_len; - memcpy(&elf_header[0x18], &entry_point, sizeof(uint64_t)); - memcpy(&elf_header[0x98], &file_len, sizeof(uint64_t)); - memcpy(&elf_header[0x98 + sizeof(uint64_t)], &file_len, sizeof(uint64_t)); - - patch(0, elf_header, ELF_HEADER_SIZE); + return entry_point; } int main(int argc, char** argv) { @@ -76,11 +31,11 @@ int main(int argc, char** argv) { } open_files(argv[2], argv[1]); - parse(); + //parse(); - reserve(ELF_HEADER_SIZE); - size_t entry_point = compile(); - write_elf((uint64_t) entry_point); + elf_executable(); + symbol entry_point = compile(); + finish_executable(entry_point); close_files(); return 0; diff --git a/src/x86encode.c b/src/x86encode.c index 6c42370..c67895c 100644 --- a/src/x86encode.c +++ b/src/x86encode.c @@ -6,7 +6,7 @@ // https://wiki.osdev.org/X86-64_Instruction_Encoding // https://defuse.ca/online-x86-assembler.htm -#include "io.h" +#include "format.h" #include "x86encode.h" #define REX 0x40 @@ -26,101 +26,101 @@ static void x86_opt_rexr(reg reg) { if (reg >= R8) { - emit_u8(REX | REX_R); + append_u8(REX | REX_R); } } static void x86_rexwr(reg reg) { uint8_t rex = REX | REX_W; if (reg >= R8) rex |= REX_R; - emit_u8(rex); + append_u8(rex); } static void x86_rexwb(reg b) { uint8_t rex = REX | REX_W; if (b >= R8) rex |= REX_B; - emit_u8(rex); + append_u8(rex); } static void x86_rexwrb(reg r, reg b) { uint8_t rex = REX | REX_W; if (r >= R8) rex |= REX_R; if (b >= R8) rex |= REX_B; - emit_u8(rex); + append_u8(rex); } static void x86_modrr(reg r, reg b) { - emit_u8(MODRM_RR | (REG(r) << 3) | REG(b)); + append_u8(MODRM_RR | (REG(r) << 3) | REG(b)); } static void x86_modrm(reg r, reg b) { - emit_u8(MODRM_RM | (REG(r) << 3) | REG(b)); + append_u8(MODRM_RM | (REG(r) << 3) | REG(b)); } static void x86_modrm8(reg r, reg b) { - emit_u8(MODRM_RM8 | (REG(r) << 3) | REG(b)); + append_u8(MODRM_RM8 | (REG(r) << 3) | REG(b)); } static void x86_modrm32(reg r, reg b) { - emit_u8(MODRM_RM32 | (REG(r) << 3) | REG(b)); + append_u8(MODRM_RM32 | (REG(r) << 3) | REG(b)); } static void x86_modxm(uint8_t ext, reg b) { - emit_u8(MODRM_RR | (ext << 3) | REG(b)); + append_u8(MODRM_RR | (ext << 3) | REG(b)); } static void x86_enc_opr(uint8_t op, reg reg) { x86_opt_rexr(reg); - emit_u8(op + REG(reg)); + append_u8(op + REG(reg)); } static void x86_enc_rexw_opr(uint8_t op, reg reg) { x86_rexwr(reg); - emit_u8(op + REG(reg)); + append_u8(op + REG(reg)); } static void x86_enc_opr_imm32(uint8_t op, reg reg, uint32_t imm) { x86_opt_rexr(reg); - emit_u8(op + REG(reg)); - emit_u32(imm); + append_u8(op + REG(reg)); + append_u32(imm); } static void x86_enc_rexw_opr_imm32(uint8_t op, reg reg, uint32_t imm) { x86_rexwr(reg); - emit_u8(op + REG(reg)); - emit_u32(imm); + append_u8(op + REG(reg)); + append_u32(imm); } static void x86_enc_rexw_opr_imm64(uint8_t op, reg reg, uint64_t imm) { x86_rexwr(reg); - emit_u8(op + REG(reg)); - emit_u64(imm); + append_u8(op + REG(reg)); + append_u64(imm); } static void x86_enc_rexw_modrr(uint8_t op, reg r, reg m) { x86_rexwrb(r, m); - emit_u8(op); + append_u8(op); x86_modrr(r, m); } static void x86_enc_rexw_modrm(uint8_t op, reg r, reg b) { x86_rexwrb(r, b); - emit_u8(op); + append_u8(op); x86_modrm(r, b); } static void x86_enc_rexw_modrm8(uint8_t op, reg r, reg b, int8_t disp) { x86_rexwrb(r, b); - emit_u8(op); + append_u8(op); x86_modrm8(r, b); - emit_u8(disp); + append_u8(disp); } static void x86_enc_rexw_modrm32(uint8_t op, reg r, reg b, int32_t disp) { x86_rexwrb(r, b); - emit_u8(op); + append_u8(op); x86_modrm32(r, b); - emit_u32(disp); + append_u32(disp); } static void x86_enc_rexw_modrmd(uint8_t op, reg r, reg b, int32_t disp) { @@ -135,16 +135,16 @@ static void x86_enc_rexw_modrmd(uint8_t op, reg r, reg b, int32_t disp) { static void x86_enc_rexw_modxm_imm8(uint8_t op, uint8_t ext, reg m, uint8_t imm) { x86_rexwb(m); - emit_u8(op); + append_u8(op); x86_modxm(ext, m); - emit_u8(imm); + append_u8(imm); } static void x86_enc_rexw_modxm_imm32(uint8_t op, uint8_t ext, reg m, uint32_t imm) { x86_rexwb(m); - emit_u8(op); + append_u8(op); x86_modxm(ext, m); - emit_u32(imm); + append_u32(imm); } static void x86_enc_rexw_modxm_imm(uint8_t op, uint8_t ext, reg m, uint32_t imm) { @@ -157,12 +157,12 @@ static void x86_enc_rexw_modxm_imm(uint8_t op, uint8_t ext, reg m, uint32_t imm) static void x86_enc_disp8(uint8_t op, int8_t disp) { uint8_t buf[2] = { op, (uint8_t) disp }; - emit(buf, 2); + append_data(2, buf); } static void x86_enc_disp32(uint8_t op, int32_t disp) { - emit_u8(op); - emit_u32((uint32_t) disp); + append_u8(op); + append_u32((uint32_t) disp); } void x86_inst_mov_r64_imm64(reg dest, uint64_t imm) { @@ -243,6 +243,10 @@ void x86_inst_jmp_disp(int32_t disp) { } } +void x86_inst_jmp_disp32_op(void) { + append_u8(0xe9); +} + // TODO: special instructions for AX void x86_inst_sub_r64_imm8(reg dest, int8_t imm) { x86_enc_rexw_modxm_imm8(0x83, 5, dest, (uint8_t) imm); @@ -267,5 +271,5 @@ void x86_inst_add_r64_imm8(reg dest, int8_t imm) { void x86_inst_syscall(void) { const uint8_t buf[2] = { 0x0f, 0x05 }; - emit(buf, 2); + append_data(2, buf); } diff --git a/src/x86encode.h b/src/x86encode.h index 41dbd60..de87ac4 100644 --- a/src/x86encode.h +++ b/src/x86encode.h @@ -47,6 +47,7 @@ void x86_inst_jmp_disp8(int8_t disp); #define X86_JMP_DISP32_SIZE 5 void x86_inst_jmp_disp32(int32_t disp); void x86_inst_jmp_disp(int32_t disp); +void x86_inst_jmp_disp32_op(void); void x86_inst_sub_r64_imm8(reg dest, int8_t imm); void x86_inst_sub_r64_imm32(reg dest, int32_t imm);