#include "bytecode.h" #include "format.h" #include "x86encode.h" #include // TODO: avoid importing for constants #include // Register convention: // ax: left side of cons // dx: right side of cons // bx: free cell list // si: map context // di: unit // sp: C stack (for FFI) // bp,cx,r9-r15 // // Data type representation: // A+B: ax=tag, bx=value // A*B: ax=A, bx=B // 1: ax=1, bx=1 // // To remove a layer of indirection, we assume that every data type // is represented by a cons cell, even those that don't need it (i.e. 1). // This is an extremely naive interpretation of these instructions. // Ideally, we'd decompile all of these structural rules (especially // associativity and commutativity) back into variables, and then perform // register allocation. void comm(void) { // swap the left and right side of the cons x86_inst_xchg_r64_rax(DX); } void assocl(void) { // a, a * b // b * c, a // a * c, b // a * b, c // xchg a, d // xchg d, [a] // xchg d, [a+8] x86_inst_xchg_r64_rax(DX); x86_inst_xchg_r64_m64(DX, AX); x86_inst_xchg_r64_m64_disp8(DX, AX, 8); } void assocr(void) { // a * b, c // c, a * b // b, a * c // a, b * c // xchg a, d // xchg a, [d+8] // xchg a, [d] x86_inst_xchg_r64_rax(DX); x86_inst_xchg_r64_m64_disp8(AX, DX, 8); x86_inst_xchg_r64_m64(AX, DX); } void distr(void) { // a, b + c // a * b + a * c // a, (tag, bc) // tag, (a, bc) // xchg a, [d] x86_inst_xchg_r64_m64(AX, DX); // Awfully convenient how that works out, huh? } void distl(void) { // The intermediate states here are ill-typed, but ultimately everything // gets shuffled around to the right locations. // a + b, c // c + c, a/b // a/b, c+c // a * c + b * c // (tag, ab), c // (tag, c), ab // ab, (tag, c) // tag, (ab, c) // xchg d, [a+8] // xchg d, [a] // xchg a, d x86_inst_xchg_r64_m64_disp8(DX, AX, 8); x86_inst_xchg_r64_m64(DX, AX); x86_inst_xchg_r64_rax(DX); } void factr(void) { // a * b + a * c: // a * (b + c) // tag, (a, bc) // a, (tag, bc) // xchg a, [d] x86_inst_xchg_r64_m64(AX, DX); } void factl(void) { // a * c + b * c // (a + b) * c // tag, (ab, c) // ab, (tag, c) // (tag, c), ab // (tag, ab), c // xchg a, [d] // xchg a, d // xchg [a+8], d x86_inst_xchg_r64_m64(AX, DX); x86_inst_xchg_r64_rax(DX); x86_inst_xchg_r64_m64_disp8(DX, AX, 8); } static void allocate_cons(void) { // a, b free=(_, next) // _, b free=(a, next) // _, next free=(a, b) // _, (a, b) free=next x86_inst_mov_m64_r64(BX, AX); x86_inst_xchg_r64_m64_disp8(DX, BX, 8); x86_inst_xchg_r64_r64(DX, BX); // a, b free=(_, next) // (_, next), b free=a // (_, b), next free=a // (_, b), a free=next // (a, b), _ free=next } static void free_cons(void) { // _, (a, b) free=next // a, (_, b) free=next // a, (_, next) free=b // a, b free=(_, next) x86_inst_mov_r64_m64(AX, DX); x86_inst_xchg_r64_m64_disp8(BX, DX, 8); x86_inst_xchg_r64_r64(DX, BX); } void mapl_begin(void) { x86_inst_push_r64(DX); x86_inst_xchg_r64_rax(DX); free_cons(); } void mapl_end(void) { allocate_cons(); x86_inst_xchg_r64_rax(DX); x86_inst_pop_r64(DX); } void mapr_begin(void) { x86_inst_push_r64(AX); free_cons(); } void mapr_end(void) { allocate_cons(); x86_inst_pop_r64(AX); } void unitir(void) { allocate_cons(); x86_inst_xchg_r64_rax(DX); x86_inst_mov_r64_r64(DX, DI); } void unitil(void) { allocate_cons(); x86_inst_mov_r64_r64(AX, DI); } void uniter(void) { x86_inst_xchg_r64_rax(DX); free_cons(); } void unitel(void) { free_cons(); } void comm_plus(void) { x86_inst_xor_al_imm8(1); } static void inst_jump(symbol sym) { int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE); if (disp >= INT8_MIN && disp <= INT8_MAX) { x86_inst_jmp_disp8(disp); } else { x86_inst_jmp_disp32_op(); relocate_pc32(sym); } // TODO: support 64-bit jumps? } static void inst_jump_if_zero(symbol sym) { int32_t disp = symbol_offset(sym, X86_JZ_DISP8_SIZE); if (disp >= INT8_MIN && disp <= INT8_MAX) { x86_inst_jz_disp8(disp); } else { x86_inst_jz_disp32_op(); relocate_pc32(sym); } } static void inst_jump_if_not_zero(symbol sym) { int32_t disp = symbol_offset(sym, X86_JNZ_DISP8_SIZE); if (disp >= INT8_MIN && disp <= INT8_MAX) { x86_inst_jnz_disp8(disp); } else { x86_inst_jnz_disp32_op(); relocate_pc32(sym); } } // NOTE: // This is a really stupid implementation of assoc. // However, it might not be worth optimizing compared to // eliminating assoc entirely when possible. void assocl_plus(void) { // a + (b + c) // (a + b) + c // tag, a/(tag, b/c) symbol when_bc = new_symbol(); symbol end_if = new_symbol(); x86_inst_test_r8_r8(AX, AX); inst_jump_if_not_zero(when_bc); ///// A // 0, a allocate_cons(); // 0, (0, a) inst_jump(end_if); /// BC define_executable_symbol(when_bc); // 1, (tag, b/c) symbol when_c = new_symbol(); x86_inst_test_m8_imm8(DX, 1); inst_jump_if_not_zero(when_c); /// B // 1, (0, b) x86_inst_xchg_r64_m64(AX, DX); // 0, (1, b) inst_jump(end_if); /// C define_executable_symbol(when_c); // 1, (1, c) free_cons(); // 1, c define_executable_symbol(end_if); // tag, (tag, a/b)/c } // This is the same as assocl, but with `jump_if_not_zero` // replaced with `jump_if_zero`. void assocr_plus(void) { // (a + b) + c // a + (b + c) // tag, (tag, a/b)/c symbol when_ab = new_symbol(); symbol end_if = new_symbol(); x86_inst_test_r8_r8(AX, AX); inst_jump_if_zero(when_ab); /// C // 1, c allocate_cons(); // 1, (1, c) inst_jump(end_if); /// AB define_executable_symbol(when_ab); // 0, (tag, a/b) symbol when_a = new_symbol(); x86_inst_test_m8_imm8(AX, 1); inst_jump_if_zero(when_a); /// B // 0, (1, b) x86_inst_xchg_r64_m64(AX, DX); // 1, (0, b) inst_jump(end_if); /// A define_executable_symbol(when_a); // 1, (1, a) free_cons(); define_executable_symbol(end_if); // tag, a/(tag, b/c) } #define MAX_BRANCHES 64 static symbol branches[MAX_BRANCHES]; static size_t branchi = 0; static symbol new_branch() { if (branchi == MAX_BRANCHES) { fprintf(stderr, "exeeded maximum number of plus maps\n"); exit(1); } symbol* branch = &branches[branchi++]; *branch = new_symbol(); return *branch; } void mapl_plus_begin(void) { symbol end_branch = new_branch(); x86_inst_test_r8_r8(AX, AX); inst_jump_if_not_zero(end_branch); free_cons(); } void mapl_plus_end(void) { allocate_cons(); x86_zero(AX); define_executable_symbol(branches[--branchi]); } void mapr_plus_begin(void) { symbol end_branch = new_branch(); x86_inst_test_r8_r8(AX, AX); inst_jump_if_zero(end_branch); free_cons(); } void mapr_plus_end(void) { allocate_cons(); x86_inst_mov_r64_imm(AX, 1); define_executable_symbol(branches[--branchi]); } void inl(void) { allocate_cons(); x86_zero(AX); } void inr(void) { allocate_cons(); x86_inst_mov_r64_imm(AX, 1); } void out(void) { // a + a // a free_cons(); } void jump(symbol sym) { inst_jump(sym); } void jump_if(symbol a, symbol b) { x86_inst_test_r8_r8(AX, AX); out(); inst_jump_if_zero(a); inst_jump(b); } static void inst_load(reg dest, symbol sym) { x86_inst_lea_r64_rip_disp32_op(dest); relocate_pc32(sym); } static symbol one_symbol; static symbol loop_point; static symbol exit_point; void halt(void) { inst_jump(exit_point); } static void print_unary(void) { // System calls will mangle AX and DX. x86_inst_mov_r64_r64(R12, AX); x86_inst_mov_r64_r64(R14, DX); symbol loop_point = new_symbol(); symbol exit_point = new_symbol(); define_executable_symbol(loop_point); // Print `1` until we stop hitting rights. x86_inst_test_r8_r8(R12, R12); inst_jump_if_zero(exit_point); x86_inst_mov_r64_m64(R12, R14); x86_inst_mov_r64_m64_disp8(R14, R14, 8); x86_inst_mov_r64_imm(AX, 1); // sys_write x86_inst_mov_r64_imm(DI, 1); // stdout inst_load(SI, one_symbol); x86_inst_mov_r64_imm(DX, 1); x86_inst_syscall(); inst_jump(loop_point); define_executable_symbol(exit_point); } static void exit_syscall(void) { x86_inst_mov_r64_imm(AX, 60); // sys_exit x86_zero(DI); x86_inst_syscall(); } #define MEMORY_SIZE 0x100000 static void initialize_free_list(void) { // allocate 1 MiB with a syscall, because it's easier than // adding an ELF RW segment for the moment. x86_inst_mov_r64_imm(AX, 9); x86_inst_mov_r64_imm(DI, (uint64_t) -1); x86_inst_mov_r64_imm(SI, MEMORY_SIZE); x86_inst_mov_r64_imm(DX, 0x3 /* PROT_READ | PROT_WRITE */); x86_inst_mov_r64_imm(R10, 0x8022 /* MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS */); x86_inst_mov_r64_imm(R8, (uint64_t) -1); x86_zero(R9); x86_inst_syscall(); // The beginning of the free list. x86_inst_mov_r64_r64(BX, AX); x86_inst_mov_r64_imm(CX, MEMORY_SIZE / 2); x86_inst_lea_r64_m64_disp8(DX, AX, 8); symbol exit_point = new_symbol(); symbol loop_point = new_symbol(); define_executable_symbol(loop_point); x86_inst_test_r64_r64(CX, CX); inst_jump_if_zero(exit_point); x86_inst_mov_m64_r64_disp8(AX, DX, 8); x86_inst_mov_r64_r64(AX, DX); x86_inst_add_r64_imm8(DX, 16); x86_inst_sub_r64_imm8(CX, 16); inst_jump(loop_point); define_executable_symbol(exit_point); } symbol init_bytecode(void) { one_symbol = new_symbol(); define_executable_symbol(one_symbol); append_u8((uint8_t) '1'); exit_point = new_symbol(); define_executable_symbol(exit_point); print_unary(); exit_syscall(); symbol entry_point = new_symbol(); define_executable_symbol(entry_point); initialize_free_list(); // Self-referential unit value. //x86_inst_lea_r64_m64_disp8(DI, SP, -16); x86_inst_mov_r64_r64(DI, SP); x86_inst_sub_r64_imm8(DI, 16); x86_inst_xor_r32_r32(R14, R14); x86_inst_push_r64(R14); x86_inst_push_r64(R14); // Initial state is a unit. x86_inst_mov_r64_r64(AX, DI); x86_inst_mov_r64_r64(DX, DI); loop_point = new_symbol(); define_executable_symbol(loop_point); return entry_point; } void finish_bytecode(void) { inst_jump(loop_point); }