Factor out executable format handling into a new file.

2022-09-10 22:06:21 -07:00 · 2022-09-10 22:06:21 -07:00 · 8808c41250
parent 3fe367675a
commit 8808c41250
11 changed files with 326 additions and 139 deletions
--- a/2
+++ b/2
@ -6,7 +6,7 @@ SHELL = /bin/sh
 CFLAGS = -std=c99 -pedantic -Wextra -Os
 LDFLAGS = -lc

-OBJECTS = asm.o io.o ir.o lex.o lex/indent.o lang.o main.o parse.o x86encode.o
+OBJECTS = asm.o format.o io.o ir.o lex.o lex/indent.o lang.o main.o parse.o x86encode.o

 .PHONY: passc
 passc: .bin $(OBJECTS)
--- a/src/asm.c
+++ b/src/asm.c
@ -1,30 +1,20 @@
 /// This file handles the contextual generation of machine code.
 /// It abstracts over quirks like the limitations of addressing modes,
 /// provides higher-level functionality, and can perform peephole optimization.
-///
-/// Reserved registers:
-///
-///   * rsp: the stack pointer
-///   * rbp: the base of the stack frame
-///   * rax: the top of the stack
-///   * r13: a scratch register for compound instructions

 #include "asm.h"
-#include "io.h"
+#include "format.h"
+#include "x86encode.h"

 #include <stdlib.h>
 #include <string.h>

-void inst_jump(ip there) {
-    x86_inst_jmp_disp((int32_t) (there - here));
+void inst_jump(symbol sym) {
+    int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE);
+    if (disp >= INT8_MIN && disp <= INT8_MAX) {
+        x86_inst_jmp_disp8(disp);
+    }
+    x86_inst_jmp_disp32_op();
+    relocate_pc32(sym);
    // TODO: support 64-bit jumps?
 }
-
-ip inst_jump_unresolved(void) {
-    x86_inst_jmp_disp32(0);
-    return here;
-}
-
-void inst_jump_resolve(ip disp, ip there) {
-    patch_i32(disp - 4, (int32_t) (there - here));
-}
--- a/src/asm.h
+++ b/src/asm.h
@ -1,18 +1,9 @@
 #ifndef _ASM_H
 #define _ASM_H

-#include "x86encode.h"
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef uint32_t ip;
-extern ip here;
+#include "format.h"

 /// Jump to a known address.
-void inst_jump(ip there);
+void inst_jump(symbol sym);

-/// Jump to an unresolved address.
-ip inst_jump_unresolved(void);
-void inst_jump_resolve(ip disp, ip there);
 #endif
--- a/src/format.c
+++ b/src/format.c
@ -0,0 +1,194 @@
+/// This file handles concerns the executable file format.
+/// This includes keeping track of the current virtual address,
+/// performing relocations, and creating the executable file header.
+///
+/// The set of features we actually use is very small, so hopefully
+/// this will turn out to be able to port this across executable formats
+/// and architectures with relatively few modifications.
+#include "format.h"
+#include "io.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct symbol {
+    uint64_t vaddr;
+};
+
+#define MAX_SYMBOLS 65535
+static uint32_t symbol_count = 0;
+static struct symbol symbols[MAX_SYMBOLS];
+
+enum relocation_type {
+    REL_PC32 = 2,
+    REL_GOTPCREL = 9,
+    REL_SIZE32 = 32,
+    REL_SIZE64 = 33,
+};
+
+struct relocation {
+    enum relocation_type type;
+    symbol symbol;
+    uint64_t offset;
+};
+
+#define MAX_RELOCATIONS 65535
+static uint32_t relocation_count = 0;
+static struct relocation relocations[MAX_RELOCATIONS];
+
+static uint64_t file_here = 0;
+
+/// Not the size of the ELF header per se, but rather th ELF header
+/// plus the program headers and section headers we include.
+/// We reserve this much space at the beginning of every file
+/// to fill in once the executable is finished.
+#define ELF_HEADER_SIZE 0xb0
+
+void elf_executable(void) {
+    reserve(ELF_HEADER_SIZE);
+    file_here += ELF_HEADER_SIZE;
+}
+
+void finish_executable(symbol entry_point) {
+    uint64_t file_len = file_here;
+
+    // Hardcoded ELF header for statically-linked position-independent executable.
+    // Since we only support Linux amd64 static PIE, there's no need to abstract over this for now.
+    uint8_t elf_header[ELF_HEADER_SIZE] = {
+        // ELF header
+        0x7F, 'E', 'L', 'F',          // ELF magic
+        2, 1, 1, 3, 0,                // 64-bit little-endian Linux, ELF version 1
+        0, 0, 0, 0, 0, 0, 0,          // padding
+        3, 0, 0x3E, 0, 1, 0, 0, 0,    // dynamic executable, amd64, ELF version 1 again
+        0, 0, 0, 0, 0, 0, 0, 0,       // PATCHME: entry point address
+        0x40, 0, 0, 0, 0, 0, 0, 0,    // program header table offset (immediately after ELF)
+        0, 0, 0, 0, 0, 0, 0, 0,       // section eader table offset (none)
+        0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes
+        2, 0, 0, 0, 0, 0, 0, 0,       // 2 segments, no sections
+
+        // program header segment
+        6, 0, 0, 0, 4, 0, 0, 0,       // program header segment, readable
+        0x40, 0, 0, 0, 0, 0, 0, 0,    // immediately after ELF header
+        0x40, 0, 0, 0, 0, 0, 0, 0,    // virtual address
+        0, 0, 0, 0, 0, 0, 0, 0,       // physical address
+        0x70, 0, 0, 0, 0, 0, 0, 0,    // size in file (2 * size of program header)
+        0x70, 0, 0, 0, 0, 0, 0, 0,    // size in memory
+        8, 0, 0, 0, 0, 0, 0, 0,       // alignment
+
+        // executable segment
+        1, 0, 0, 0, 5, 0, 0, 0,       // loadable segment, readable and executable
+        0, 0, 0, 0, 0, 0, 0, 0,       // whole file
+        0, 0, 0, 0, 0, 0, 0, 0,       // virtual address
+        0, 0, 0, 0, 0, 0, 0, 0,       // physical address
+        0, 0, 0, 0, 0, 0, 0, 0,       // PATCHME: size in file
+        0, 0, 0, 0, 0, 0, 0, 0,       // PATCHME: size in memory
+        0, 0x10, 0, 0, 0, 0, 0, 0,    // alignment (4K)
+    };
+    uint64_t ep = (uint64_t) symbols[entry_point].vaddr;
+    uint64_t fl = (uint64_t) file_len;
+    memcpy(&elf_header[0x18], &ep, sizeof(uint64_t));
+    memcpy(&elf_header[0x98], &fl, sizeof(uint64_t));
+    memcpy(&elf_header[0x98 + sizeof(uint64_t)], &fl, sizeof(uint64_t));
+
+    patch(0, elf_header, ELF_HEADER_SIZE);
+
+    for (uint32_t i = 0; i < relocation_count; i++) {
+        struct relocation rel = relocations[i];
+        assert(rel.type == REL_PC32);
+        uint64_t vaddr = symbols[rel.symbol].vaddr;
+        assert(vaddr != (uint64_t) -1);
+        int64_t disp = (int64_t) rel.offset - (int64_t) vaddr + 4;
+        assert(disp >= INT32_MIN && disp <= INT32_MAX);
+        patch_u32(rel.offset, (int32_t) disp);
+    }
+}
+
+symbol new_symbol(void) {
+    struct symbol* sym = &symbols[symbol_count];
+    sym->vaddr = (uint64_t) -1;
+    if (symbol_count == MAX_SYMBOLS) {
+        fprintf(stderr, "error: exceeded maximum number of symbols\n");
+        exit(1);
+    }
+    return symbol_count++;
+}
+
+void define_executable_symbol(symbol s) {
+    struct symbol* sym = &symbols[s];
+    sym->vaddr = file_here;
+}
+
+void define_readonly_symbol(symbol sym) {
+    // TODO:
+    assert(0);
+}
+
+void append_data(size_t size, const void* buf) {
+    file_here += size;
+    emit(buf, size);
+}
+
+void append_u8(uint8_t x) {
+    emit_u8(x);
+}
+
+void append_u32(uint32_t x) {
+    emit_u32(x);
+}
+
+void append_u64(uint64_t x) {
+    emit_u64(x);
+}
+
+static struct relocation* new_relocation(void) {
+    if (relocation_count == MAX_RELOCATIONS) {
+        fprintf(stderr, "error: exceeded maximum number of relocations\n");
+        exit(1);
+    }
+    struct relocation* rel = &relocations[relocation_count];
+    relocation_count++;
+    return rel;
+}
+
+void relocate_pc32(symbol sym) {
+    int32_t offset = symbol_offset(sym, 4);
+    if (offset != INT32_MAX) {
+        append_u32((uint32_t) offset);
+        return;
+    }
+    append_u32(0);
+    struct relocation* rel = new_relocation();
+    rel->type = REL_PC32;
+    rel->offset = file_here;
+    rel->symbol = sym;
+}
+
+int32_t symbol_offset(symbol sym, int8_t off) {
+    uint64_t vaddr = symbols[sym].vaddr;
+    if (vaddr == (uint64_t) -1) {
+        return INT32_MAX;
+    }
+    int64_t disp = (int64_t) file_here - (int64_t) vaddr + off;
+    if (disp >= INT32_MAX || disp <= INT32_MIN) {
+        return INT32_MAX;
+    }
+    return disp;
+}
+
+void relocate_gotpcrel(symbol sym) {
+    // TODO
+    assert(0);
+}
+
+void relocate_size32(symbol sym) {
+    // TODO
+    assert(0);
+}
+
+void relocate_size64(symbol sym) {
+    // TODO
+    assert(0);
+}
--- a/src/format.h
+++ b/src/format.h
@ -0,0 +1,67 @@
+#ifndef FORMAT_H
+#define FORMAT_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+typedef uint32_t symbol;
+
+/// Begin a new ELF executable.
+void elf_executable(void);
+/// All definitions are complete. Finish processing the executable.
+void finish_executable(symbol entry_point);
+
+/// Create a new symbol. You will later have to define this with
+/// `define_executable_symbol`, `define_readonly_symbol`,
+/// or import it from an external library.
+symbol new_symbol(void);
+
+void define_executable_symbol(symbol sym);
+
+void define_readonly_symbol(symbol sym);
+
+void append_data(size_t size, const void* buf);
+void append_u8(uint8_t x);
+void append_u32(uint32_t x);
+void append_u64(uint64_t x);
+
+/// Assuming the symbol is located in the same segment as this code,
+/// insert a 32-bit offset for the symbol relative to the virtual address
+/// of the current address (Program Counter (PC)).
+///
+/// This is used for generating relative jumps.
+///
+/// If the symbol is defined in this object, then it will be computed
+/// at compile-time and not emitted as an actual relocation in the executable.
+void relocate_pc32(symbol sym);
+
+/// Like pc32 for DIY relocations. This returns INT32_MAX if the symbol
+/// has not been defined yet. This exists so that the assembler can manually
+/// emit 8-bit short jumps when the distance is short enough.
+///
+/// The offset exists to account for the offset being relative to the
+/// *end* of an instruction, whereas the symbol offset is computed
+/// relative to `here`. This makes bounds-checking slightly easier.
+int32_t symbol_offset(symbol sym, int8_t offset);
+
+/// If the symbol is not in the same segment as this code,
+/// for example when linking against a dynamic library or accessing read-only data,
+/// then a 32-bit offset may not be sufficient to access a symbol with pc32.
+/// This necessitates a Global Offset Table (GOT) which contains the 64-bit
+/// absolute addresses of the symbols which *is* located in this segment.
+/// Thus, this relocation inserts a 32-bit PC-relative address which points
+/// into the location in the GOT which contains the 64-bit absolute address
+/// of the symbol.
+///
+/// This is used for generating jumps to dynamic library code
+/// and for accessing data in a different segment (e.g. read-only symbols).
+void relocate_gotpcrel(symbol sym);
+
+/// Insert the 32-bit truncated size of a symbol.
+void relocate_size32(symbol sym);
+
+/// Insert the 64-bit size of a symbol.
+void relocate_size64(symbol sym);
+
+#endif
--- a/src/io.c
+++ b/src/io.c
@ -14,8 +14,6 @@
 static const char* outfile_name;
 static FILE* infile;
 static FILE* outfile;
-// HACK: "here" tracking should be handled by the assembler, not IO.
-uint32_t here = 0;

 void open_files(const char* infile_name, const char* outfile_name_) {
    outfile_name = outfile_name_;
@ -53,7 +51,6 @@ void reserve(size_t len) {
        fprintf(stderr, "failed to reserve space in in output file: %s\n", strerror(errno));
        exit(1);
    }
-    here += len;
 }

 void emit(const void* restrict ptr, size_t count) {
@ -62,7 +59,6 @@ void emit(const void* restrict ptr, size_t count) {
        fprintf(stderr, "failed to write to output file\n");
        exit(1);
    }
-    here += count;
 }

 void emit_u8(uint8_t x) {
--- a/src/io.h
+++ b/src/io.h
@ -4,8 +4,6 @@
 #include <stddef.h>
 #include <stdint.h>

-extern uint32_t here;
-
 void open_files(const char* infile_name, const char* outfile_name);
 void close_files(void);

--- a/src/ir.c
+++ b/src/ir.c
@ -4,7 +4,9 @@
 /// and register allocation.

 #include "asm.h"
+#include "format.h"
 #include "ir.h"
+#include "x86encode.h"

 #include <assert.h>
 #include <stdbool.h>
@ -26,9 +28,7 @@ struct stack_frame {
 struct label {
    uint32_t frame;
    uint32_t argc;
-    ip definition;
-    uint32_t fixupc;
-    ip fixups[MAX_FIXUPS];
+    symbol symbol;
 };

 static uint32_t stack_depth = 0;
@ -66,7 +66,8 @@ void leave(var* args) {

 label declare(uint32_t argc) {
    assert(label_depth < MAX_LABELS);
-    struct label label = { stack_frame, argc, (ip) -1, 0, 0 };
+    symbol sym = new_symbol();
+    struct label label = { stack_frame, argc, sym };
    labels[label_depth] = label;
    return label_depth++;
 }
@ -79,22 +80,12 @@ label declare_exit(uint32_t argc) {

 void define(label l, var* args) {
    struct label* label = &labels[l];
-    label->definition = here;
-    while (label->fixupc > 0) {
-        label->fixupc--;
-        inst_jump_resolve(label->fixups[label->fixupc], here);
-    }
+    define_executable_symbol(label->symbol);
 }

 void jump(label l, var* args) {
    struct label* label = &labels[l];
-    if (label->definition == (ip) -1) {
-        assert(label->fixupc < MAX_FIXUPS);
-        label->fixups[label->fixupc] = inst_jump_unresolved();
-        label->fixupc++;
-    } else {
-        inst_jump(label->definition);
-    }
+    inst_jump(label->symbol);
 }

 void jump_table(size_t branches, label* labels, var index, var* args) {
--- a/src/main.c
+++ b/src/main.c
@ -1,19 +1,18 @@
-#include <assert.h>
-#include <errno.h>
-#include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
-#include <string.h>

+#include "format.h"
 #include "io.h"
 #include "ir.h"
 #include "parse.h"

 #define ELF_HEADER_SIZE 0xb0

-size_t compile(void) {
+symbol compile(void) {
+    symbol entry_point = new_symbol();
+    define_executable_symbol(entry_point);
    var argc, argv, env;
    init(&argc, &argv, &env);
    var a = lit(52);
@ -22,51 +21,7 @@ size_t compile(void) {
    var sys_exit = lit(60);
    var args[2] = { sys_exit, exit_code };
    syscall(2, args);
-    return ELF_HEADER_SIZE;
-}
-
-static void write_elf(uint64_t entry_point) {
-    uint64_t file_len = here;
-
-    // Hardcoded ELF header for statically-linked position-independent executable.
-    // Since we only support Linux amd64 static PIE, there's no need to abstract over this for now.
-    uint8_t elf_header[ELF_HEADER_SIZE] = {
-        // ELF header
-        0x7F, 'E', 'L', 'F',          // ELF magic
-        2, 1, 1, 3, 0,                // 64-bit little-endian Linux, ELF version 1
-        0, 0, 0, 0, 0, 0, 0,          // padding
-        3, 0, 0x3E, 0, 1, 0, 0, 0,    // dynamic executable, amd64, ELF version 1 again
-        0, 0, 0, 0, 0, 0, 0, 0,       // PATCHME: entry point address
-        0x40, 0, 0, 0, 0, 0, 0, 0,    // program header table offset (immediately after ELF)
-        0, 0, 0, 0, 0, 0, 0, 0,       // section eader table offset (none)
-        0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes
-        2, 0, 0, 0, 0, 0, 0, 0,       // 2 segments, no sections
-
-        // program header segment
-        6, 0, 0, 0, 4, 0, 0, 0,       // program header segment, readable
-        0x40, 0, 0, 0, 0, 0, 0, 0,    // immediately after ELF header
-        0x40, 0, 0, 0, 0, 0, 0, 0,    // virtual address
-        0, 0, 0, 0, 0, 0, 0, 0,       // physical address
-        0x70, 0, 0, 0, 0, 0, 0, 0,    // size in file (2 * size of program header)
-        0x70, 0, 0, 0, 0, 0, 0, 0,    // size in memory
-        8, 0, 0, 0, 0, 0, 0, 0,       // alignment
-
-        // executable segment
-        1, 0, 0, 0, 5, 0, 0, 0,       // loadable segment, readable and executable
-        0, 0, 0, 0, 0, 0, 0, 0,       // whole file
-        0, 0, 0, 0, 0, 0, 0, 0,       // virtual address
-        0, 0, 0, 0, 0, 0, 0, 0,       // physical address
-        0, 0, 0, 0, 0, 0, 0, 0,       // PATCHME: size in file
-        0, 0, 0, 0, 0, 0, 0, 0,       // PATCHME: size in memory
-        0, 0x10, 0, 0, 0, 0, 0, 0,    // alignment (4K)
-    };
-    uint64_t ep = (uint64_t) entry_point;
-    uint64_t fl = (uint64_t) file_len;
-    memcpy(&elf_header[0x18], &entry_point, sizeof(uint64_t));
-    memcpy(&elf_header[0x98], &file_len, sizeof(uint64_t));
-    memcpy(&elf_header[0x98 + sizeof(uint64_t)], &file_len, sizeof(uint64_t));
-
-    patch(0, elf_header, ELF_HEADER_SIZE);
+    return entry_point;
 }

 int main(int argc, char** argv) {
@ -76,11 +31,11 @@ int main(int argc, char** argv) {
    }
    open_files(argv[2], argv[1]);

-    parse();
+    //parse();

-    reserve(ELF_HEADER_SIZE);
-    size_t entry_point = compile();
-    write_elf((uint64_t) entry_point);
+    elf_executable();
+    symbol entry_point = compile();
+    finish_executable(entry_point);

    close_files();
    return 0;
--- a/src/x86encode.c
+++ b/src/x86encode.c
@ -6,7 +6,7 @@
 //   https://wiki.osdev.org/X86-64_Instruction_Encoding
 //   https://defuse.ca/online-x86-assembler.htm

-#include "io.h"
+#include "format.h"
 #include "x86encode.h"

 #define REX 0x40
@ -26,101 +26,101 @@

 static void x86_opt_rexr(reg reg) {
    if (reg >= R8) {
-        emit_u8(REX | REX_R);
+        append_u8(REX | REX_R);
    }
 }

 static void x86_rexwr(reg reg) {
    uint8_t rex = REX | REX_W;
    if (reg >= R8) rex |= REX_R;
-    emit_u8(rex);
+    append_u8(rex);
 }

 static void x86_rexwb(reg b) {
    uint8_t rex = REX | REX_W;
    if (b >= R8) rex |= REX_B;
-    emit_u8(rex);
+    append_u8(rex);
 }

 static void x86_rexwrb(reg r, reg b) {
    uint8_t rex = REX | REX_W;
    if (r >= R8) rex |= REX_R;
    if (b >= R8) rex |= REX_B;
-    emit_u8(rex);
+    append_u8(rex);
 }

 static void x86_modrr(reg r, reg b) {
-    emit_u8(MODRM_RR | (REG(r) << 3) | REG(b));
+    append_u8(MODRM_RR | (REG(r) << 3) | REG(b));
 }

 static void x86_modrm(reg r, reg b) {
-    emit_u8(MODRM_RM | (REG(r) << 3) | REG(b));
+    append_u8(MODRM_RM | (REG(r) << 3) | REG(b));
 }

 static void x86_modrm8(reg r, reg b) {
-    emit_u8(MODRM_RM8 | (REG(r) << 3) | REG(b));
+    append_u8(MODRM_RM8 | (REG(r) << 3) | REG(b));
 }

 static void x86_modrm32(reg r, reg b) {
-    emit_u8(MODRM_RM32 | (REG(r) << 3) | REG(b));
+    append_u8(MODRM_RM32 | (REG(r) << 3) | REG(b));
 }

 static void x86_modxm(uint8_t ext, reg b) {
-    emit_u8(MODRM_RR | (ext << 3) | REG(b));
+    append_u8(MODRM_RR | (ext << 3) | REG(b));
 }

 static void x86_enc_opr(uint8_t op, reg reg) {
    x86_opt_rexr(reg);
-    emit_u8(op + REG(reg));
+    append_u8(op + REG(reg));
 }

 static void x86_enc_rexw_opr(uint8_t op, reg reg) {
    x86_rexwr(reg);
-    emit_u8(op + REG(reg));
+    append_u8(op + REG(reg));
 }

 static void x86_enc_opr_imm32(uint8_t op, reg reg, uint32_t imm) {
    x86_opt_rexr(reg);
-    emit_u8(op + REG(reg));
-    emit_u32(imm);
+    append_u8(op + REG(reg));
+    append_u32(imm);
 }

 static void x86_enc_rexw_opr_imm32(uint8_t op, reg reg, uint32_t imm) {
    x86_rexwr(reg);
-    emit_u8(op + REG(reg));
-    emit_u32(imm);
+    append_u8(op + REG(reg));
+    append_u32(imm);
 }

 static void x86_enc_rexw_opr_imm64(uint8_t op, reg reg, uint64_t imm) {
    x86_rexwr(reg);
-    emit_u8(op + REG(reg));
-    emit_u64(imm);
+    append_u8(op + REG(reg));
+    append_u64(imm);
 }

 static void x86_enc_rexw_modrr(uint8_t op, reg r, reg m) {
    x86_rexwrb(r, m);
-    emit_u8(op);
+    append_u8(op);
    x86_modrr(r, m);
 }

 static void x86_enc_rexw_modrm(uint8_t op, reg r, reg b) {
    x86_rexwrb(r, b);
-    emit_u8(op);
+    append_u8(op);
    x86_modrm(r, b);
 }

 static void x86_enc_rexw_modrm8(uint8_t op, reg r, reg b, int8_t disp) {
    x86_rexwrb(r, b);
-    emit_u8(op);
+    append_u8(op);
    x86_modrm8(r, b);
-    emit_u8(disp);
+    append_u8(disp);
 }

 static void x86_enc_rexw_modrm32(uint8_t op, reg r, reg b, int32_t disp) {
    x86_rexwrb(r, b);
-    emit_u8(op);
+    append_u8(op);
    x86_modrm32(r, b);
-    emit_u32(disp);
+    append_u32(disp);
 }

 static void x86_enc_rexw_modrmd(uint8_t op, reg r, reg b, int32_t disp) {
@ -135,16 +135,16 @@ static void x86_enc_rexw_modrmd(uint8_t op, reg r, reg b, int32_t disp) {

 static void x86_enc_rexw_modxm_imm8(uint8_t op, uint8_t ext, reg m, uint8_t imm) {
    x86_rexwb(m);
-    emit_u8(op);
+    append_u8(op);
    x86_modxm(ext, m);
-    emit_u8(imm);
+    append_u8(imm);
 }

 static void x86_enc_rexw_modxm_imm32(uint8_t op, uint8_t ext, reg m, uint32_t imm) {
    x86_rexwb(m);
-    emit_u8(op);
+    append_u8(op);
    x86_modxm(ext, m);
-    emit_u32(imm);
+    append_u32(imm);
 }

 static void x86_enc_rexw_modxm_imm(uint8_t op, uint8_t ext, reg m, uint32_t imm) {
@ -157,12 +157,12 @@ static void x86_enc_rexw_modxm_imm(uint8_t op, uint8_t ext, reg m, uint32_t imm)

 static void x86_enc_disp8(uint8_t op, int8_t disp) {
    uint8_t buf[2] = { op, (uint8_t) disp };
-    emit(buf, 2);
+    append_data(2, buf);
 }

 static void x86_enc_disp32(uint8_t op, int32_t disp) {
-    emit_u8(op);
-    emit_u32((uint32_t) disp);
+    append_u8(op);
+    append_u32((uint32_t) disp);
 }

 void x86_inst_mov_r64_imm64(reg dest, uint64_t imm) {
@ -243,6 +243,10 @@ void x86_inst_jmp_disp(int32_t disp) {
    }
 }

+void x86_inst_jmp_disp32_op(void) {
+    append_u8(0xe9);
+}
+
 // TODO: special instructions for AX
 void x86_inst_sub_r64_imm8(reg dest, int8_t imm) {
    x86_enc_rexw_modxm_imm8(0x83, 5, dest, (uint8_t) imm);
@ -267,5 +271,5 @@ void x86_inst_add_r64_imm8(reg dest, int8_t imm) {

 void x86_inst_syscall(void) {
    const uint8_t buf[2] = { 0x0f, 0x05 };
-    emit(buf, 2);
+    append_data(2, buf);
 }
--- a/src/x86encode.h
+++ b/src/x86encode.h
@ -47,6 +47,7 @@ void x86_inst_jmp_disp8(int8_t disp);
 #define X86_JMP_DISP32_SIZE 5
 void x86_inst_jmp_disp32(int32_t disp);
 void x86_inst_jmp_disp(int32_t disp);
+void x86_inst_jmp_disp32_op(void);

 void x86_inst_sub_r64_imm8(reg dest, int8_t imm);
 void x86_inst_sub_r64_imm32(reg dest, int32_t imm);