From 40f88918ef7db34570577d90b4a549517514c3b5 Mon Sep 17 00:00:00 2001 From: James Martin Date: Fri, 28 Jul 2023 14:09:23 -0700 Subject: [PATCH] WIP on new algebraic language, going to change directions again. --- Makefile | 2 +- docs/intermediate-representations.md | 111 ++++++ src/bytecode.c | 489 +++++++++++++++++++++++++++ src/bytecode.h | 57 ++++ src/ir/flat_register.c | 241 ------------- src/ir/flat_register.h | 43 --- src/main.c | 311 ++++++++++++++++- src/x86encode.c | 124 ++++++- src/x86encode.h | 19 +- 9 files changed, 1089 insertions(+), 308 deletions(-) create mode 100644 docs/intermediate-representations.md create mode 100644 src/bytecode.c create mode 100644 src/bytecode.h delete mode 100644 src/ir/flat_register.c delete mode 100644 src/ir/flat_register.h diff --git a/Makefile b/Makefile index 5c4098d..8e1fc0b 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL = /bin/sh CFLAGS = -std=c99 -pedantic -Wextra -Os LDFLAGS = -lc -OBJECTS = asm.o format.o io.o ir.o lex.o lex/indent.o lang.o main.o parse.o x86encode.o +OBJECTS = bytecode.o format.o io.o main.o x86encode.o .PHONY: passc passc: .bin $(OBJECTS) diff --git a/docs/intermediate-representations.md b/docs/intermediate-representations.md new file mode 100644 index 0000000..9a5df4b --- /dev/null +++ b/docs/intermediate-representations.md @@ -0,0 +1,111 @@ +# Intermediate Representations + +## Bytecode + +### Instructions +Instructions for times: + +* `comm : a * b <=> b * a` +* `assocl : a * (b * c) => (a * b) * c` +* `assocr : (a * b) * c => a * (b * c)` +* `mapl (f : a => b) : a * c => b * c` +* `mapr (f : b => c) : a * b => a * c` +* `unitil : a => a * 1` +* `unitir : a => 1 * a` +* `unitel : a * 1 => a` +* `uniter : 1 * a => a` + +Instructions for plus: + +* `comm : a + b <=> b + a` +* `assocl : a + (b + c) => (a + b) + c` +* `assocr : (a + b) + c => a + (b + c)` +* `mapl (f : a => b) : a + c => b + c` +* `mapr (f : b => c) : a + b => a + c` +* `inl (b : type) : a => a + b` +* `inr (b : type) : b => a + b` +* `out : a + a => a` + +Distributivity: + +* `distl : a * (b + c) => (a * b) + (a * c)` +* `distr : (a + b) * c => (a * c) + (b * c)` +* `factl : (a * b) + (a * c) => a * (b + c)` +* `factr : (a * c) + (b * c) => (a + b) * c` + +Recursion: + +* `project: rec r. f(r) -> f(rec r. f(r))` +* `embed: f(rec r. f(r)) -> rec r. f(r)` + +`project` and `embed` are no-ops which exist to make type-checking easier +(i.e. isorecursive over equirecursive types). + +#### Most instructions are redundant +Most of these instructions are redundant: + +* All of the l/r variants can be implemented in terms of each other + using commutativity. +* All of the plus instructions can be implemented in terms of `map`, `in`, and `out`. +* Alternatively, we could have replaced `map` and `out` with a single instruction, + `if (f : a => c) (g : b => c) : a + b => c`. + +So "morally", there are only about 10 instructions: `comm`, `assoc`, `map`, `uniti`, `unite`, +`inl`, `inr`, `if`, `dist`, and `fact`. + +#### Most instructions are reversible +Inverses of instructions: + +* `comm` / `comm` +* `assocl` / `assocr` +* `map f` / `map f*` +* `uniti` / `unite` +* `dist` / `fact` + +The only irreversible instructions are `in` and `out`. + +#### Instructions are algebraic laws +We have a symmetric monoidal category with coproducts where `*` distributes over `+`. +This isn't quite a distributive symmetric monoidal category, because `*` isn't a product. + +Likewise, we *almost* have a distributive lattice (characterized as a meet-semilattice +with binary joins), but `*` isn't guaranteed to be idempotent. + +The reversible fragment is a wide dagger symmetric monoidal subcategory. + +#### That's really all we need +We simply don't need functions, polymorphism, or `0`. + +`0` isn't very interesting when characterized as an initial object +or as the unit for `+`; I find it's only interesting in the context of +second-order polymorphism, as `forall a. a`. + +## Finite-state 1-bit cons machine +Instructions: + +* `comm` +* `assoc` +* `factor` +* `dist` +* `map` +* `unite` +* `uniti` +* `inl` +* `inr` + +Redundant instructions: + +* `l`/`r` variants +* `out` + +There is a finite number of states, and a state transition table +which determines the next state based on the current state and +a single bit extracted using `dist`. + +## Finite-state random-access 1-bit register machine +Instructions: + +* `x <- enum(imm, y)` +* `w <- struct(x, y, z)` +* `free x` + diff --git a/src/bytecode.c b/src/bytecode.c new file mode 100644 index 0000000..dd2cf69 --- /dev/null +++ b/src/bytecode.c @@ -0,0 +1,489 @@ +#include "bytecode.h" +#include "format.h" +#include "x86encode.h" + +#include // TODO: avoid importing for constants +#include + +// Register convention: +// ax: left side of cons +// dx: right side of cons +// bx: free cell list +// si: map context +// di: unit +// sp: C stack (for FFI) +// bp,cx,r9-r15 +// +// Data type representation: +// A+B: ax=tag, bx=value +// A*B: ax=A, bx=B +// 1: ax=1, bx=1 +// +// To remove a layer of indirection, we assume that every data type +// is represented by a cons cell, even those that don't need it (i.e. 1). + +// This is an extremely naive interpretation of these instructions. +// Ideally, we'd decompile all of these structural rules (especially +// associativity and commutativity) back into variables, and then perform +// register allocation. + +void comm(void) { + // swap the left and right side of the cons + x86_inst_xchg_r64_rax(DX); +} + +void assocl(void) { + // a, a * b + // b * c, a + // a * c, b + // a * b, c + + // xchg a, d + // xchg d, [a] + // xchg d, [a+8] + + x86_inst_xchg_r64_rax(DX); + x86_inst_xchg_r64_m64(DX, AX); + x86_inst_xchg_r64_m64_disp8(DX, AX, 8); +} + +void assocr(void) { + // a * b, c + // c, a * b + // b, a * c + // a, b * c + + // xchg a, d + // xchg a, [d+8] + // xchg a, [d] + x86_inst_xchg_r64_rax(DX); + x86_inst_xchg_r64_m64_disp8(AX, DX, 8); + x86_inst_xchg_r64_m64(AX, DX); +} + +void distl(void) { + // a, b + c + // a * b + a * c + + // a, (tag, bc) + // tag, (a, bc) + + // xchg a, [d] + x86_inst_xchg_r64_m64(AX, DX); + + // Awfully convenient how that works out, huh? +} + +void distr(void) { + // The intermediate states here are ill-typed, but ultimately everything + // gets shuffled around to the right locations. + + // a + b, c + // c + c, a/b + // a/b, c+c + // a * c + b * c + + // (tag, ab), c + // (tag, c), ab + // ab, (tag, c) + // tag, (ab, c) + + // xchg d, [a+8] + // xchg d, [a] + // xchg a, d + x86_inst_xchg_r64_m64_disp8(DX, AX, 8); + x86_inst_xchg_r64_m64(DX, AX); + x86_inst_xchg_r64_rax(DX); +} + +void factl(void) { + // a * b + a * c: + // a * (b + c) + + // tag, (a, bc) + // a, (tag, bc) + + // xchg a, [d] + x86_inst_xchg_r64_m64(AX, DX); +} + +void factr(void) { + // a * c + b * c + // (a + b) * c + + // tag, (ab, c) + // ab, (tag, c) + // (tag, c), ab + // (tag, ab), c + + // xchg a, [d] + // xchg a, d + // xchg [a+8], d + x86_inst_xchg_r64_m64(AX, DX); + x86_inst_xchg_r64_rax(DX); + x86_inst_xchg_r64_m64_disp8(DX, AX, 8); +} + +static void allocate_cons(void) { + // a, b free=(_, next) + // _, b free=(a, next) + // _, next free=(a, b) + // _, (a, b) free=next + x86_inst_mov_m64_r64(BX, AX); + x86_inst_xchg_r64_m64_disp8(DX, BX, 8); + x86_inst_xchg_r64_r64(DX, BX); + + // a, b free=(_, next) + // (_, next), b free=a + // (_, b), next free=a + // (_, b), a free=next + // (a, b), _ free=next +} + +static void free_cons(void) { + // _, (a, b) free=next + // a, (_, b) free=next + // a, (_, next) free=b + // a, b free=(_, next) + x86_inst_mov_r64_m64(AX, DX); + x86_inst_xchg_r64_m64_disp8(BX, DX, 8); + x86_inst_xchg_r64_r64(DX, BX); +} + +void mapl_begin(void) { + x86_inst_push_r64(DX); + x86_inst_xchg_r64_rax(DX); + free_cons(); +} + +void mapl_end(void) { + allocate_cons(); + x86_inst_xchg_r64_rax(DX); + x86_inst_pop_r64(DX); +} + +void mapr_begin(void) { + x86_inst_push_r64(AX); + free_cons(); +} + +void mapr_end(void) { + allocate_cons(); + x86_inst_pop_r64(AX); +} + +void unitil(void) { + allocate_cons(); + x86_inst_xchg_r64_rax(DX); + x86_inst_mov_r64_r64(DX, DI); +} + +void unitir(void) { + allocate_cons(); + x86_inst_mov_r64_r64(AX, DI); +} + +void unitel(void) { + x86_inst_xchg_r64_rax(DX); + free_cons(); +} + +void uniter(void) { + free_cons(); +} + +void comm_plus(void) { + x86_inst_xor_al_imm8(1); +} + +static void inst_jump(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jmp_disp8(disp); + } else { + x86_inst_jmp_disp32_op(); + relocate_pc32(sym); + } + // TODO: support 64-bit jumps? +} + +static void inst_jump_if_zero(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JZ_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jz_disp8(disp); + } else { + x86_inst_jz_disp32_op(); + relocate_pc32(sym); + } +} + + +static void inst_jump_if_not_zero(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JNZ_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jnz_disp8(disp); + } else { + x86_inst_jnz_disp32_op(); + relocate_pc32(sym); + } +} + +// NOTE: +// This is a really stupid implementation of assoc. +// However, it might not be worth optimizing compared to +// eliminating assoc entirely when possible. + +void assocl_plus(void) { + // a + (b + c) + // (a + b) + c + + // tag, a/(tag, b/c) + symbol when_bc = new_symbol(); + symbol end_if = new_symbol(); + x86_inst_test_r8_r8(AX, AX); + inst_jump_if_not_zero(when_bc); + + ///// A + + // 0, a + allocate_cons(); + // 0, (0, a) + inst_jump(end_if); + + /// BC + + define_executable_symbol(when_bc); + // 1, (tag, b/c) + symbol when_c = new_symbol(); + x86_inst_test_m8_imm8(DX, 1); + inst_jump_if_not_zero(when_c); + + /// B + + // 1, (0, b) + x86_inst_xchg_r64_m64(AX, DX); + // 0, (1, b) + inst_jump(end_if); + + /// C + + define_executable_symbol(when_c); + // 1, (1, c) + free_cons(); + // 1, c + + define_executable_symbol(end_if); + // tag, (tag, a/b)/c +} + +// This is the same as assocl, but with `jump_if_not_zero` +// replaced with `jump_if_zero`. + +void assocr_plus(void) { + // (a + b) + c + // a + (b + c) + + // tag, (tag, a/b)/c + symbol when_ab = new_symbol(); + symbol end_if = new_symbol(); + x86_inst_test_r8_r8(AX, AX); + inst_jump_if_zero(when_ab); + + /// C + + // 1, c + allocate_cons(); + // 1, (1, c) + inst_jump(end_if); + + /// AB + + define_executable_symbol(when_ab); + // 0, (tag, a/b) + symbol when_a = new_symbol(); + x86_inst_test_m8_imm8(AX, 1); + inst_jump_if_zero(when_a); + + /// B + + // 0, (1, b) + x86_inst_xchg_r64_m64(AX, DX); + // 1, (0, b) + inst_jump(end_if); + + /// A + + define_executable_symbol(when_a); + // 1, (1, a) + free_cons(); + + define_executable_symbol(end_if); + // tag, a/(tag, b/c) +} + +#define MAX_BRANCHES 64 +static symbol branches[MAX_BRANCHES]; +static size_t branchi = 0; + +static symbol new_branch() { + if (branchi == MAX_BRANCHES) { + fprintf(stderr, "exeeded maximum number of plus maps\n"); + exit(1); + } + symbol* branch = &branches[branchi++]; + *branch = new_symbol(); + return *branch; +} + +void mapl_plus_begin(void) { + symbol end_branch = new_branch(); + x86_inst_test_r8_r8(AX, AX); + inst_jump_if_not_zero(end_branch); + free_cons(); +} + +void mapl_plus_end(void) { + allocate_cons(); + x86_zero(AX); + define_executable_symbol(branches[--branchi]); +} + +void mapr_plus_begin(void) { + symbol end_branch = new_branch(); + x86_inst_test_r8_r8(AX, AX); + inst_jump_if_zero(end_branch); + free_cons(); +} + +void mapr_plus_end(void) { + allocate_cons(); + x86_inst_mov_r64_imm(AX, 1); + define_executable_symbol(branches[--branchi]); +} + +void inl(void) { + allocate_cons(); + x86_zero(AX); +} + +void inr(void) { + allocate_cons(); + x86_inst_mov_r64_imm(AX, 1); +} + +void out(void) { + // a + a + // a + free_cons(); +} + +static void inst_load(reg dest, symbol sym) { + x86_inst_lea_r64_rip_disp32_op(dest); + relocate_pc32(sym); +} + +static symbol one_symbol; +static symbol loop_point; +static symbol exit_point; + +void quit(void) { + inst_jump(exit_point); +} + +static void print_unary(void) { + // System calls will mangle AX and DX. + x86_inst_mov_r64_r64(R12, AX); + x86_inst_mov_r64_r64(R14, DX); + + symbol loop_point = new_symbol(); + symbol exit_point = new_symbol(); + define_executable_symbol(loop_point); + // Print `1` until we stop hitting rights. + x86_inst_test_r8_r8(R12, R12); + inst_jump_if_zero(exit_point); + x86_inst_mov_r64_m64(R12, R14); + x86_inst_mov_r64_m64_disp8(R14, R14, 8); + + x86_inst_mov_r64_imm(AX, 1); // sys_write + x86_inst_mov_r64_imm(DI, 1); // stdout + inst_load(SI, one_symbol); + x86_inst_mov_r64_imm(DX, 1); + x86_inst_syscall(); + inst_jump(loop_point); + define_executable_symbol(exit_point); +} + +static void exit_syscall(void) { + x86_inst_mov_r64_imm(AX, 60); // sys_exit + x86_zero(DI); + x86_inst_syscall(); +} + +#define MEMORY_SIZE 0x100000 + +static void initialize_free_list(void) { + // allocate 1 MiB with a syscall, because it's easier than + // adding an ELF RW segment for the moment. + x86_inst_mov_r64_imm(AX, 9); + x86_inst_mov_r64_imm(DI, (uint64_t) -1); + x86_inst_mov_r64_imm(SI, MEMORY_SIZE); + x86_inst_mov_r64_imm(DX, 0x3 /* PROT_READ | PROT_WRITE */); + x86_inst_mov_r64_imm(R10, 0x8022 /* MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS */); + x86_inst_mov_r64_imm(R8, (uint64_t) -1); + x86_zero(R9); + x86_inst_syscall(); + + // The beginning of the free list. + x86_inst_mov_r64_r64(BX, AX); + + x86_inst_mov_r64_imm(CX, MEMORY_SIZE / 2); + x86_inst_lea_r64_m64_disp8(DX, AX, 8); + symbol exit_point = new_symbol(); + symbol loop_point = new_symbol(); + define_executable_symbol(loop_point); + x86_inst_test_r64_r64(CX, CX); + inst_jump_if_zero(exit_point); + x86_inst_mov_m64_r64_disp8(AX, DX, 8); + x86_inst_mov_r64_r64(AX, DX); + x86_inst_add_r64_imm8(DX, 16); + x86_inst_sub_r64_imm8(CX, 16); + inst_jump(loop_point); + define_executable_symbol(exit_point); +} + +symbol init_bytecode(void) { + one_symbol = new_symbol(); + define_executable_symbol(one_symbol); + append_u8((uint8_t) '1'); + + exit_point = new_symbol(); + define_executable_symbol(exit_point); + print_unary(); + exit_syscall(); + + symbol entry_point = new_symbol(); + define_executable_symbol(entry_point); + initialize_free_list(); + + // Self-referential unit value. + //x86_inst_lea_r64_m64_disp8(DI, SP, -16); + x86_inst_mov_r64_r64(DI, SP); + x86_inst_sub_r64_imm8(DI, 16); + x86_inst_push_r64(DI); + x86_inst_push_r64(DI); + + // Initial state is a unit in the left. + // (Right states will be loop states.) + x86_inst_mov_r64_r64(AX, DI); + x86_inst_mov_r64_r64(DX, DI); + inl(); + + loop_point = new_symbol(); + define_executable_symbol(loop_point); + + return entry_point; +} + +void finish_bytecode(void) { + inst_jump(loop_point); +} diff --git a/src/bytecode.h b/src/bytecode.h new file mode 100644 index 0000000..efe9ba4 --- /dev/null +++ b/src/bytecode.h @@ -0,0 +1,57 @@ +#ifndef _BYTECODE_H +#define _BYTECODE_H + +#include "format.h" + +/// a * b <=> b * a +void comm(void); +/// a * (b * c) => (a * b) * c +void assocl(void); +/// (a * b) * c => a * (b * c) +void assocr(void); +/// a * (b + c) => (a * b) + (a * c) +void distl(void); +/// (a + b) * c => (a * c) + (b * c) +void distr(void); +/// (a * b) + (a * c) => a * (b + c) +void factl(void); +/// (a * c) + (b * c) => (a + b) * c +void factr(void); +/// (a => b) => (a * c => b * c) +void mapl_begin(void); +void mapl_end(void); +/// (b => c) => (a * b => a * c) +void mapr_begin(void); +void mapr_end(void); +/// a => a * 1 +void unitil(void); +/// a => 1 * a +void unitir(void); +/// a * 1 => a +void unitel(void); +/// 1 * a => a +void uniter(void); +/// a + b <=> b + a +void comm_plus(void); +/// a + (b + c) => (a + b) + c +void assocl_plus(void); +/// (a + b) + c => a + (b + c) +void assocr_plus(void); +/// (a => b) => (a + c => b + c) +void mapl_plus_begin(void); +void mapl_plus_end(void); +/// (b => c) => (a + b => a + c) +void mapr_plus_begin(void); +void mapr_plus_end(void); +/// a => a + b +void inl(void); +/// b => a + b +void inr(void); +/// a + a => a +void out(void); + +void quit(void); +symbol init_bytecode(void); +void finish_bytecode(void); + +#endif diff --git a/src/ir/flat_register.c b/src/ir/flat_register.c deleted file mode 100644 index 86d07a0..0000000 --- a/src/ir/flat_register.c +++ /dev/null @@ -1,241 +0,0 @@ -#include "flat_register.h" -#include "../format.h" -#include "../x86encode.h" - -#include -#include -#include -#include -#include - -typedef struct fr_label_info { - symbol symbol; - uint8_t argc; - fr_type types[MAX_ARGS]; -} fr_label_info; - -typedef struct fr_var_info { - fr_type type; - _Bool literal; - uint64_t value; -} fr_var_info; - -static size_t fr_labelc = 0; -static fr_label_info fr_labels[MAX_LABELS]; -static size_t fr_varc = 0; -static fr_var_info fr_vars[MAX_VARS]; -static uint32_t fr_var_offset = 0; - -static fr_var fr_push(fr_type type) { - fr_var_info* info = &fr_vars[fr_varc]; - info->type = type; - info->literal = false; - info->value = fr_var_offset; - fr_var_offset += type.size; - return fr_varc++; -} - -fr_label fr_declare(size_t typec, fr_type* types) { - fr_label_info* info = &fr_labels[fr_labelc]; - info->symbol = new_symbol(); - info->argc = typec; - memcpy(info->types, types, typec * sizeof(fr_type)); - return fr_labelc++; -} - - -void fr_define(fr_label l, fr_var* vars) { - fr_label_info label = fr_labels[l]; - fr_varc = 0; - fr_var_offset = 0; - for (uint8_t i = 0; i < label.argc; i++) { - vars[i] = fr_push(label.types[i]); - } -} - -fr_var fr_lit(fr_type type, uint64_t val) { - fr_var_info* info = &fr_vars[fr_varc]; - info->type = type; - info->literal = true; - info->value = val; - return fr_varc++; -} - -// TODO: register allocation instead of infinitely-growing stack -void fr_load_reg(reg reg, fr_var v) { - fr_var_info var = fr_vars[v]; - if (var.literal && var.type.size == 8) { - x86_inst_mov_r64_imm(reg, var.value); - } else if (var.type.size == 8 || var.type.tag == FR_BOX) { - x86_inst_mov_r64_m64_disp(reg, BP, var.value); - } else if (var.literal && var.type.size == 1) { - x86_inst_mov_r8_m8_disp(reg, BP, var.value); - } else if (var.type.size == 1) { - x86_inst_mov_r8_imm8(reg, var.value); - } else if (var.type.tag == FR_REF) { - x86_inst_lea_r64_m64_disp(reg, BP, var.value); - } else { - fprintf(stderr, "unsupported variable size, for now\n"); - exit(1); - } -} - -void fr_store_reg(reg reg, fr_var v) { - fr_var_info var = fr_vars[v]; - if (var.type.size == 8 || var.type.tag == FR_BOX) { - x86_inst_mov_m64_r64_disp(reg, BP, var.value); - } else if (var.type.size == 1) { - x86_inst_mov_m8_r8_disp(reg, BP, var.value); - } else { - fprintf(stderr, "unsupported variable size, for now\n"); - exit(1); - } -} - -fr_var fr_index(fr_var box, fr_var index) { - // TODO: optimized constant multiplies - // TODO: fuse into indirect addressing modes - fr_load_reg(AX, index); - x86_inst_mov_r64_imm32(DX, fr_vars[index].type.size); - x86_inst_mul_r64(DX); - fr_type type = { - .tag = FR_BOX, - .size = 8, - }; - fr_var var = fr_push(type); - fr_store_reg(AX, var); - return var; -} - -void fr_set(fr_var box, fr_var index, fr_var ref) { - // TODO: use fused addressing modes, optimize for small types - // TODO: use `lea`, optimize for constant index - fr_load_reg(AX, index); - fr_load_reg(DI, box); - fr_load_reg(SI, ref); - x86_inst_mov_r64_imm(CX, fr_vars[box].type.size); - x86_inst_mul_r64(CX); - x86_inst_add_r64_r64(DI, AX); - x86_inst_rep_movsb(); -} - -fr_var fr_load(fr_var box, fr_type type, uint32_t offset) { - // TODO: optimized copy for small (i.e. almost all) types - fr_var dest = fr_push(type); - fr_load_reg(SI, box); - x86_inst_mov_r64_imm(CX, type.size); - x86_inst_lea_r64_m64_disp(DI, BP, offset); - x86_inst_rep_movsb(); - return dest; -} - -fr_var fr_struct(uint32_t memberc, fr_var* members) { - uint32_t size = 0; - for (uint32_t i = 0; i < memberc; i++) { - size += fr_vars[members[i]].type.size; - } - fr_type type = { - .tag = FR_REF, - .size = size, - }; - fr_var var = fr_push(type); - uint32_t offset = fr_vars[var].value; - - // TODO: optimized copy for small (i.e. almost all) types - for (uint32_t i = 0; i < memberc; i++) { - fr_var_info member = fr_vars[members[i]]; - fr_load_reg(AX, members[i]); - if (member.type.size == 1) { - x86_inst_mov_m8_r8_disp(AX, BP, offset); - offset += 1; - } else if (member.type.size == 8 || member.type.tag == FR_BOX) { - x86_inst_mov_m64_r64_disp(AX, BP, offset); - offset += 8; - } else { - x86_inst_lea_r64_m64_disp(SI, BP, member.value); - x86_inst_lea_r64_m64_disp(DI, BP, offset); - x86_inst_mov_r64_imm(CX, member.type.size); - x86_inst_rep_movsb(); - } - } - - return var; -} - -static void fr_prepare_jump(size_t argc, fr_var* args) { - // TODO: avoid unnecessary copying - fr_var scratch = fr_struct(argc, args); - fr_load_reg(SI, scratch); - x86_inst_mov_r64_r64(DI, BP); - x86_inst_mov_r64_imm(CX, fr_vars[scratch].type.size); - x86_inst_rep_movsb(); -} - -void inst_jump(symbol sym) { - int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE); - if (disp >= INT8_MIN && disp <= INT8_MAX) { - x86_inst_jmp_disp8(disp); - } else { - x86_inst_jmp_disp32_op(); - relocate_pc32(sym); - } - // TODO: support 64-bit jumps? -} - -void inst_jump_if_zero(symbol sym) { - int32_t disp = symbol_offset(sym, X86_JZ_DISP8_SIZE); - if (disp >= INT8_MIN && disp <= INT8_MAX) { - x86_inst_jz_disp8(disp); - } else { - x86_inst_jz_disp32_op(); - relocate_pc32(sym); - } -} - - -void inst_jump_if_not_zero(symbol sym) { - int32_t disp = symbol_offset(sym, X86_JNZ_DISP8_SIZE); - if (disp >= INT8_MIN && disp <= INT8_MAX) { - x86_inst_jnz_disp8(disp); - } else { - x86_inst_jnz_disp32_op(); - relocate_pc32(sym); - } -} - -void fr_if(fr_cond cond, fr_var x, fr_var y, fr_label ifl, fr_label elsel, size_t argc, fr_var* args) { - fr_load_reg(BX, x); - fr_load_reg(DX, y); - fr_prepare_jump(argc, args); - x86_inst_cmp_r64_r64(BX, DX); - symbol ifs = fr_labels[ifl].symbol; - symbol elses = fr_labels[elsel].symbol; - switch (cond) { - case FR_EQ: - inst_jump_if_zero(ifs); - inst_jump(elses); - break; - case FR_NE: - inst_jump_if_not_zero(ifs); - inst_jump(elses); - break; - } -} - -void fr_switch(fr_var index, size_t labelc, fr_label* labels, size_t argc, fr_var* args) { - // TODO: - /* - symbol table = new_symbol(); - fr_load_reg(CX, index); - x86_inst_lea_r64_ip_disp32(AX, X86_JMP_M64_SIZE); - x86_inst_add_r64_r64(AX, CX); - x86_inst_jmp_m64( - */ - fprintf(stderr, "jump tables not yet implemented\n"); - exit(1); -} - -void fr_jump(fr_label label, size_t argc, fr_var* args) { - fr_prepare_jump(argc, args); - inst_jump(fr_labels[label].symbol); -} diff --git a/src/ir/flat_register.h b/src/ir/flat_register.h deleted file mode 100644 index e2fd2a9..0000000 --- a/src/ir/flat_register.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef FLAT_REGISTER_H -#define FLAT_REGISTER_H -/// An IR with flat (i.e. non-recursive) syntax and types. -/// Registers only contain bitvectors. Memory layout (size and offsets) are explicit. - -#include -#include - -typedef size_t fr_label; -typedef size_t fr_var; - -#define MAX_LABELS 4096 -#define MAX_VARS 4096 -#define MAX_ARGS 32 - -enum fr_type_tag { - FR_UINT, - FR_BOX, - FR_REF, -}; - -typedef struct fr_type { - enum fr_type_tag tag; - uint32_t size; -} fr_type; - -typedef enum fr_cond { - FR_EQ, - FR_NE, -} fr_cond; - -fr_label fr_declare(size_t typec, fr_type* types); -void fr_define(fr_label label, fr_var* vars); - -fr_var fr_lit(fr_type type, uint64_t val); -fr_var fr_index(fr_var box, fr_var index); -void fr_set(fr_var box, fr_var index, fr_var ref); -fr_var fr_load(fr_var box, fr_type type, uint32_t offset); -fr_var fr_struct(uint32_t memberc, fr_var* members); -void fr_if(fr_cond cond, fr_var x, fr_var y, fr_label ifl, fr_label elsel, size_t argc, fr_var* args); -void fr_switch(fr_var index, size_t labelc, fr_label* labels, size_t argc, fr_var* args); -void fr_jump(fr_label label, size_t argc, fr_var* args); -#endif diff --git a/src/main.c b/src/main.c index d426591..d8b3f84 100644 --- a/src/main.c +++ b/src/main.c @@ -3,25 +3,310 @@ #include #include +#include "bytecode.h" #include "format.h" #include "io.h" -#include "ir.h" -#include "parse.h" #define ELF_HEADER_SIZE 0xb0 + +// a + (b + (c + d)) +// (a + b) + (c + d) +// (b + a) + (c + d) +// b + (a + (c + d)) +// + +void transition_right(void) { + assocl_plus(); + mapl_plus_begin(); + out(); + inr(); + mapl_plus_end(); + assocr_plus(); +} + +void transition_left(void) { + out(); + inl(); +} + +void jump_from_to(size_t from, size_t to) { + if (from < to) { + mapl_plus_begin(); + inl(); + for(; from <= to; to--) { + inr(); + } + mapl_plus_end(); + mapr_plus_begin(); + inr(); + mapr_plus_end(); + out(); + } else if (to > from) { + for (size_t i = 0; i < from - to; i++) { + mapr_plus_end(); + mapl_plus_begin(); + inl(); + mapl_plus_end(); + } + } +} + + +void transition_into(void) { + assocl_plus(); + mapl_plus_begin(); + mapl_plus_begin(); + inl(); + mapl_plus_end(); + out(); + inr(); + mapl_plus_end(); + assocr_plus(); +} + +void transition_while(void) { + assocl_plus(); + mapl_plus_begin(); + mapr_plus_begin(); + inr(); + mapr_plus_end(); + out(); + mapl_plus_end(); + assocr_plus(); +} + +void inc(void) { + inr(); + factl(); +} + +void new_nat(void) { + // ctx + unitil(); // ctx * 1 + inl(); // ctx * 1 + ctx * 1 + factl(); // ctx * (1 + 1) +} + +void swap(void) { + assocr(); + mapr_begin(); + comm(); + mapr_end(); + assocl(); +} + +static void select_var(size_t var) { + // (... * a) * (b * (c * ...)) + for (size_t i = 0; i < var; i++) { + assocr(); + // ((... * a) * b) * (c * ...) + } + comm(); + // (c * ...) * ((... * a) * b) + assocl(); + // ((c * ...) * (... * a)) * b +} + +static void unselect_var(size_t var) { + assocr(); + comm(); + for (size_t i = 0; i < var; i++) { + assocl(); + } +} + +static void case_on(size_t var) { + select_var(var); + distr(); + mapl_plus_begin(); { + unselect_var(var); + } mapl_plus_end(); + mapr_plus_begin(); { + unselect_var(var); + } mapr_plus_end(); +} + +static void snipe(size_t var) { + select_var(var); + unitel(); + comm(); + for (size_t i = 0; i < var; i++) { + assocl(); + } +} + symbol compile(void) { - symbol entry_point = new_symbol(); - define_executable_symbol(entry_point); - var argc, argv, env; - init_ir(&argc, &argv, &env); - parse(); - var a = lit(52); - var b = lit(10); - var exit_code = sub(a, b); - var sys_exit = lit(60); - var args[2] = { sys_exit, exit_code }; - syscall(2, args); + symbol entry_point = init_bytecode(); + + // This is the program we're trying to execute: + // + // fib n = fib_acc n 0 1 + // fib_acc 0 a b = a + // fib_acc (S n) a b = fib_acc n b (a + b) + // + // Looks simple, right? Well, things are a bit more complicated than that. + // + // 1. In `fib_acc 0`, we implicitly drop the value of `b`. Because we do not have + // weakening, we will have to free `b` explicitly here. + // + // fib_acc 0 a 0 = a + // fib_acc 0 a (S b) = fib_acc 0 a b + // + // 2. In `fib_acc (S n)`, we use `b` twice. We do not have contraction, so we must + // explicitly duplicate it, or implicitly duplicate it when we consume `b`. + // + // 3. We do not have addition as a built-in; we will need to define it ourselves. + // Moreover, we do not have functions, so it must be fused into the definition + // of fib_acc. + // + // -- We will duplicate `b` into the first argument (the new `a`) + // -- while adding it to the second argument (`a`, which will become the new `b`). + // fib_acc (S n) a b = fib_acc_plus n 0 a b + // fib_acc_plus n a b' 0 = fib_acc n a b' + // fib_acc_plus n a b' (S b) = fib_acc_plus n (S a) (S b') b + // + // 4. We'll have to do a lot of tedious work shuffling variables around. + // We don't even have implicit associativity, much less commutativity! + // + // We have this hierarchy of states: + // + // 1. start(1) + // 2. fib(n) + // 3. fib_acc(n, a, b) + // 4. fib_acc(0, a, b) + // 5. fib_acc_0(a b) + // + + // States: + // * start(1) + // * fib(n) + // * fib_acc(n, a, b) + // * fib_acc_Z(1, (a, b)) + // * fib_acc_Z_free(a, b) + // * fib_acc_Z_done + // * fib_acc_S + // * fib_acc_S_copy + // * fib_acc_S_copy_done + // * fib_acc_S_copy_S + + // State 0: starting state + mapl_plus_begin(); + // Initialize with integer (5). + inl(); + inr(); + inr(); + inr(); + inr(); + inr(); + mapl_plus_end(); + transition_right(); + + mapr_plus_begin(); + // State 1: fib(n); + mapl_plus_begin(); + // a = 0 + new_nat(); + // b = 1 + new_nat(); + inc(); + mapl_plus_end(); + transition_right(); + + mapr_plus_begin(); + // State 2: fib_acc(n, a, b) + mapl_plus_begin(); + // if n=1, we return the accumulated value + assocr(); + distl(); + mapl_plus_end(); + transition_right(); + + mapr_plus_begin(); + mapl_plus_begin(); + // State 3.1.1: fib_acc_Z(1, (a, b)) + mapl_plus_begin(); + uniter(); + // (a, b) + mapl_plus_end(); + transition_into(); + + mapr_plus_begin(); + // State 3.1.2.1: fib_acc_Z_free(a, b) + mapl_plus_begin(); + // n * (1 + n) + distr(); + mapl_plus_end(); + transition_while(); + + // State 3.1.2.2: fib_acc_Z_done + mapr_plus_begin(); + uniter(); + quit(); + mapr_plus_end(); + mapr_plus_end(); + mapl_plus_end(); + + mapr_plus_begin(); + // State 4: fib_acc_S + mapl_plus_begin(); + assocl(); + new_nat(); + swap(); + new_nat(); + swap(); + mapl_plus_end(); + transition_into(); + + mapr_plus_begin(); + mapl_plus_begin(); + // State 5.1: fib_acc_S_copy(n, a, b1, b2, b) + mapl_plus_begin(); + distl(); + mapl_plus_end(); + transition_into(); + mapr_plus_begin(); + mapl_plus_begin(); + // State 5.2.1: fib_acc_S_copy_done(n, a, b, b, 1) + uniter(); + // TODO: + mapl_plus_end(); + mapr_plus_begin(); + // State 5.2.2: fib_acc_S_copy_S(n, a, b1, b2, b) + mapr_plus_end(); + mapr_plus_end(); + mapl_plus_end(); + + mapr_plus_end(); + mapr_plus_end(); + mapr_plus_end(); + mapr_plus_end(); + mapr_plus_end(); + + + // State 1: fib(n) + assocl_plus(); + mapl_plus_begin(); + + // switch to state 2 + out(); + inr(); + mapl_plus_end(); + assocr_plus(); + + mapr_plus_begin(); + // State 2: fib_acc(n, a, b) + mapl_plus_begin(); + + + // State 2.1: transition to state 3 + mapl_plus_begin(); + + + mapr_plus_end(); + mapr_plus_end(); + + finish_bytecode(); return entry_point; } diff --git a/src/x86encode.c b/src/x86encode.c index f4c7dc2..5b205c9 100644 --- a/src/x86encode.c +++ b/src/x86encode.c @@ -30,6 +30,25 @@ static void x86_opt_rexr(reg reg) { } } +static void x86_opt_rexb(reg reg) { + if (reg >= R8) { + append_u8(REX | REX_B); + } +} + +static void x86_opt_rexrb(reg r, reg b) { + uint8_t rex = REX; + if (r >= R8) { + rex |= REX_R; + } + if (b >= R8) { + rex |= REX_B; + } + if (rex != REX) { + append_u8(rex); + } +} + static void x86_rexwr(reg reg) { uint8_t rex = REX | REX_W; if (reg >= R8) rex |= REX_R; @@ -70,29 +89,29 @@ static void x86_modxm(uint8_t ext, reg b) { } static void x86_enc_opr(uint8_t op, reg reg) { - x86_opt_rexr(reg); + x86_opt_rexb(reg); append_u8(op + REG(reg)); } static void x86_enc_rexw_opr(uint8_t op, reg reg) { - x86_rexwr(reg); + x86_rexwb(reg); append_u8(op + REG(reg)); } static void x86_enc_opr_imm32(uint8_t op, reg reg, uint32_t imm) { - x86_opt_rexr(reg); + x86_opt_rexb(reg); append_u8(op + REG(reg)); append_u32(imm); } static void x86_enc_rexw_opr_imm32(uint8_t op, reg reg, uint32_t imm) { - x86_rexwr(reg); + x86_rexwb(reg); append_u8(op + REG(reg)); append_u32(imm); } static void x86_enc_rexw_opr_imm64(uint8_t op, reg reg, uint64_t imm) { - x86_rexwr(reg); + x86_rexwb(reg); append_u8(op + REG(reg)); append_u64(imm); } @@ -155,6 +174,12 @@ static void x86_enc_rexw_modxm_imm(uint8_t op, uint8_t ext, reg m, uint32_t imm) } } +static void x86_enc_modrr(uint8_t op, reg r, reg b) { + x86_opt_rexrb(r, b); + append_u8(op); + x86_modrr(r, b); +} + static void x86_enc_disp8(uint8_t op, int8_t disp) { uint8_t buf[2] = { op, (uint8_t) disp }; append_data(2, buf); @@ -203,19 +228,57 @@ void x86_inst_mov_r64_m64_disp(reg dest, reg src, int32_t disp) { } void x86_inst_mov_m64_r64(reg dest, reg src) { - x86_enc_rexw_modrm(0x8a, src, dest); + x86_enc_rexw_modrm(0x89, src, dest); } void x86_inst_mov_m64_r64_disp8(reg dest, reg src, int8_t disp) { - x86_enc_rexw_modrm8(0x8a, src, dest, disp); + x86_enc_rexw_modrm8(0x89, src, dest, disp); } void x86_inst_mov_m64_r64_disp32(reg dest, reg src, int32_t disp) { - x86_enc_rexw_modrm32(0x8a, src, dest, disp); + x86_enc_rexw_modrm32(0x89, src, dest, disp); } void x86_inst_mov_m64_r64_disp(reg dest, reg src, int32_t disp) { - x86_enc_rexw_modrmd(0x8a, src, dest, disp); + x86_enc_rexw_modrmd(0x89, src, dest, disp); +} + +void x86_inst_mov_r32_r32(reg dest, reg src) { + x86_enc_modrr(0x8b, dest, src); +} + +void x86_inst_xchg_r64_rax(reg src) { + x86_enc_rexw_opr(0x90, src); +} + +void x86_inst_xchg_r64_r64(reg dest, reg src) { + if (src == AX) { + x86_inst_xchg_r64_rax(dest); + } else if (dest == AX) { + x86_inst_xchg_r64_rax(src); + } else { + x86_enc_rexw_modrr(0x87, dest, src); + } +} + +void x86_inst_xchg_r64_m64(reg dest, reg src) { + x86_enc_rexw_modrm(0x87, dest, src); +} + +void x86_inst_xchg_r64_m64_disp8(reg dest, reg src, int8_t disp) { + x86_enc_rexw_modrm8(0x87, dest, src, disp); +} + +void x86_inst_xchg_r64_m64_disp32(reg dest, reg src, int32_t disp) { + x86_enc_rexw_modrm32(0x87, dest, src, disp); +} + +void x86_inst_xchg_r64_m64_disp(reg dest, reg src, int32_t disp) { + if (disp == 0) { + x86_inst_xchg_r64_r64(dest, src); + } else { + x86_enc_rexw_modrmd(0x87, dest, src, disp); + } } void x86_inst_push_r64(reg reg) { @@ -230,6 +293,16 @@ void x86_inst_test_r64_r64(reg r1, reg r2) { x86_enc_rexw_modrr(0x85, r1, r2); } +void x86_inst_test_r8_r8(reg r1, reg r2) { + x86_enc_modrr(0x84, r1, r2); +} + +void x86_inst_test_m8_imm8(reg r, uint8_t imm) { + x86_opt_rexr(r); + x86_enc_opr(0xf6, r); + append_u8(imm); +} + void x86_inst_jmp_disp8(int8_t disp) { x86_enc_disp8(0xeb, disp); } @@ -297,6 +370,25 @@ void x86_inst_jz_disp32_op(void) { append_u8(0x84); } +void x86_inst_xor_r32_r32(reg dest, reg src) { + x86_enc_modrr(0x31, dest, src); +} + +void x86_inst_xor_m8_imm8(reg dest, uint8_t imm) { + x86_opt_rexr(dest); + x86_enc_opr(0x80, dest); + append_u8(imm); +} + +void x86_inst_xor_al_imm8(uint8_t imm) { + append_u8(0x34); + append_u8(imm); +} + +void x86_zero(reg dest) { + x86_inst_xor_r32_r32(dest, dest); +} + // TODO: special instructions for AX void x86_inst_sub_r64_imm8(reg dest, int8_t imm) { x86_enc_rexw_modxm_imm8(0x83, 5, dest, (uint8_t) imm); @@ -323,6 +415,20 @@ void x86_inst_add_r64_r64(reg dest, reg src) { x86_enc_rexw_modrr(0x01, src, dest); } +void x86_inst_lea_r64_m64_disp8(reg dest, reg src, int8_t disp) { + x86_enc_rexw_modrm8(0x8d, dest, src, disp); +} + +void x86_inst_lea_r64_rip_disp32_op(reg dest) { + x86_rexwr(dest); + append_u8(0x8d); + x86_modrm(dest, BP); +} + +void x86_inst_lea_r64_rip_disp32(reg dest, int32_t disp) { + x86_enc_rexw_modrm32(0x8d, dest, BP, disp); +} + void x86_inst_syscall(void) { const uint8_t buf[2] = { 0x0f, 0x05 }; append_data(2, buf); diff --git a/src/x86encode.h b/src/x86encode.h index c702678..8f2fba5 100644 --- a/src/x86encode.h +++ b/src/x86encode.h @@ -48,11 +48,21 @@ void x86_inst_mov_r8_m8_disp8(reg dest, reg src, int8_t disp); void x86_inst_mov_r8_m8_disp32(reg dest, reg src, int32_t disp); void x86_inst_mov_r8_m8_disp(reg dest, reg src, int32_t disp); +void x86_inst_mov_r32_r32(reg dest, reg src); + +void x86_inst_xchg_r64_rax(reg src); +void x86_inst_xchg_r64_r64(reg dest, reg src); +void x86_inst_xchg_r64_m64(reg dest, reg src); +void x86_inst_xchg_r64_m64_disp8(reg dest, reg src, int8_t disp); +void x86_inst_xchg_r64_m64_disp32(reg dest, reg src, int32_t disp); +void x86_inst_xchg_r64_m64_disp(reg dest, reg src, int32_t disp); + void x86_inst_push_r64(reg reg); void x86_inst_pop_r64(reg reg); void x86_inst_test_r8_r8(reg r1, reg r2); void x86_inst_test_r64_r64(reg r1, reg r2); +void x86_inst_test_m8_imm8(reg r, uint8_t imm); void x86_inst_cmp_r64_r64(reg r1, reg r2); @@ -77,6 +87,11 @@ void x86_inst_jz_disp32(int32_t disp); void x86_inst_jz_disp(int32_t disp); void x86_inst_jz_disp32_op(void); +void x86_inst_xor_r32_r32(reg dest, reg src); +void x86_inst_xor_al_imm8(uint8_t imm); +void x86_inst_xor_m8_imm8(reg dest, uint8_t imm); +void x86_zero(reg dest); + void x86_inst_sub_r64_imm8(reg dest, int8_t imm); void x86_inst_sub_r64_imm32(reg dest, int32_t imm); void x86_inst_sub_r64_imm(reg dest, int32_t imm); @@ -87,9 +102,11 @@ void x86_inst_add_r64_r64(reg dest, reg src); void x86_inst_mul_r64(reg src); -void x86_inst_lea_r64_m64_disp8(reg dest, reg src, int32_t disp); +void x86_inst_lea_r64_m64_disp8(reg dest, reg src, int8_t disp); void x86_inst_lea_r64_m64_disp32(reg dest, reg src, int32_t disp); void x86_inst_lea_r64_m64_disp(reg dest, reg src, int32_t disp); +void x86_inst_lea_r64_rip_disp32_op(reg dest); +void x86_inst_lea_r64_rip_disp32(reg dest, int32_t disp); void x86_inst_rep_movsb(void);