From 1863d420b63b30052415bd267ebf802c240ef04b Mon Sep 17 00:00:00 2001 From: James Martin Date: Sat, 3 Dec 2022 16:49:30 -0800 Subject: [PATCH] WIP IR (broken) --- src/asm.c | 43 -------- src/asm.h | 16 --- src/ir/flat_register.c | 241 +++++++++++++++++++++++++++++++++++++++++ src/ir/flat_register.h | 43 ++++++++ src/x86encode.h | 20 ++++ 5 files changed, 304 insertions(+), 59 deletions(-) delete mode 100644 src/asm.c delete mode 100644 src/asm.h create mode 100644 src/ir/flat_register.c create mode 100644 src/ir/flat_register.h diff --git a/src/asm.c b/src/asm.c deleted file mode 100644 index 78917ae..0000000 --- a/src/asm.c +++ /dev/null @@ -1,43 +0,0 @@ -/// This file handles the contextual generation of machine code. -/// It abstracts over quirks like the limitations of addressing modes, -/// provides higher-level functionality, and can perform peephole optimization. - -#include "asm.h" -#include "format.h" -#include "x86encode.h" - -#include -#include - -void inst_jump(symbol sym) { - int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE); - if (disp >= INT8_MIN && disp <= INT8_MAX) { - x86_inst_jmp_disp8(disp); - } else { - x86_inst_jmp_disp32_op(); - relocate_pc32(sym); - } - // TODO: support 64-bit jumps? -} - -void inst_jump_if_zero(symbol sym, reg reg) { - x86_inst_test_r64_r64(reg, reg); - int32_t disp = symbol_offset(sym, X86_JZ_DISP8_SIZE); - if (disp >= INT8_MIN && disp <= INT8_MAX) { - x86_inst_jz_disp8(disp); - } else { - x86_inst_jz_disp32_op(); - relocate_pc32(sym); - } -} - -void inst_jump_if_not_zero(symbol sym, reg reg) { - x86_inst_test_r64_r64(reg, reg); - int32_t disp = symbol_offset(sym, X86_JNZ_DISP8_SIZE); - if (disp >= INT8_MIN && disp <= INT8_MAX) { - x86_inst_jnz_disp8(disp); - } else { - x86_inst_jnz_disp32_op(); - relocate_pc32(sym); - } -} diff --git a/src/asm.h b/src/asm.h deleted file mode 100644 index 235bccf..0000000 --- a/src/asm.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _ASM_H -#define _ASM_H - -#include "format.h" -#include "x86encode.h" - -/// Jump to a known address. -void inst_jump(symbol sym); - -/// Jump to a known address if the argument is zero. -void inst_jump_if_zero(symbol sym, reg reg); - -/// Jump to a known address if the argument is not zero. -void inst_jump_if_not_zero(symbol sym, reg reg); - -#endif diff --git a/src/ir/flat_register.c b/src/ir/flat_register.c new file mode 100644 index 0000000..86d07a0 --- /dev/null +++ b/src/ir/flat_register.c @@ -0,0 +1,241 @@ +#include "flat_register.h" +#include "../format.h" +#include "../x86encode.h" + +#include +#include +#include +#include +#include + +typedef struct fr_label_info { + symbol symbol; + uint8_t argc; + fr_type types[MAX_ARGS]; +} fr_label_info; + +typedef struct fr_var_info { + fr_type type; + _Bool literal; + uint64_t value; +} fr_var_info; + +static size_t fr_labelc = 0; +static fr_label_info fr_labels[MAX_LABELS]; +static size_t fr_varc = 0; +static fr_var_info fr_vars[MAX_VARS]; +static uint32_t fr_var_offset = 0; + +static fr_var fr_push(fr_type type) { + fr_var_info* info = &fr_vars[fr_varc]; + info->type = type; + info->literal = false; + info->value = fr_var_offset; + fr_var_offset += type.size; + return fr_varc++; +} + +fr_label fr_declare(size_t typec, fr_type* types) { + fr_label_info* info = &fr_labels[fr_labelc]; + info->symbol = new_symbol(); + info->argc = typec; + memcpy(info->types, types, typec * sizeof(fr_type)); + return fr_labelc++; +} + + +void fr_define(fr_label l, fr_var* vars) { + fr_label_info label = fr_labels[l]; + fr_varc = 0; + fr_var_offset = 0; + for (uint8_t i = 0; i < label.argc; i++) { + vars[i] = fr_push(label.types[i]); + } +} + +fr_var fr_lit(fr_type type, uint64_t val) { + fr_var_info* info = &fr_vars[fr_varc]; + info->type = type; + info->literal = true; + info->value = val; + return fr_varc++; +} + +// TODO: register allocation instead of infinitely-growing stack +void fr_load_reg(reg reg, fr_var v) { + fr_var_info var = fr_vars[v]; + if (var.literal && var.type.size == 8) { + x86_inst_mov_r64_imm(reg, var.value); + } else if (var.type.size == 8 || var.type.tag == FR_BOX) { + x86_inst_mov_r64_m64_disp(reg, BP, var.value); + } else if (var.literal && var.type.size == 1) { + x86_inst_mov_r8_m8_disp(reg, BP, var.value); + } else if (var.type.size == 1) { + x86_inst_mov_r8_imm8(reg, var.value); + } else if (var.type.tag == FR_REF) { + x86_inst_lea_r64_m64_disp(reg, BP, var.value); + } else { + fprintf(stderr, "unsupported variable size, for now\n"); + exit(1); + } +} + +void fr_store_reg(reg reg, fr_var v) { + fr_var_info var = fr_vars[v]; + if (var.type.size == 8 || var.type.tag == FR_BOX) { + x86_inst_mov_m64_r64_disp(reg, BP, var.value); + } else if (var.type.size == 1) { + x86_inst_mov_m8_r8_disp(reg, BP, var.value); + } else { + fprintf(stderr, "unsupported variable size, for now\n"); + exit(1); + } +} + +fr_var fr_index(fr_var box, fr_var index) { + // TODO: optimized constant multiplies + // TODO: fuse into indirect addressing modes + fr_load_reg(AX, index); + x86_inst_mov_r64_imm32(DX, fr_vars[index].type.size); + x86_inst_mul_r64(DX); + fr_type type = { + .tag = FR_BOX, + .size = 8, + }; + fr_var var = fr_push(type); + fr_store_reg(AX, var); + return var; +} + +void fr_set(fr_var box, fr_var index, fr_var ref) { + // TODO: use fused addressing modes, optimize for small types + // TODO: use `lea`, optimize for constant index + fr_load_reg(AX, index); + fr_load_reg(DI, box); + fr_load_reg(SI, ref); + x86_inst_mov_r64_imm(CX, fr_vars[box].type.size); + x86_inst_mul_r64(CX); + x86_inst_add_r64_r64(DI, AX); + x86_inst_rep_movsb(); +} + +fr_var fr_load(fr_var box, fr_type type, uint32_t offset) { + // TODO: optimized copy for small (i.e. almost all) types + fr_var dest = fr_push(type); + fr_load_reg(SI, box); + x86_inst_mov_r64_imm(CX, type.size); + x86_inst_lea_r64_m64_disp(DI, BP, offset); + x86_inst_rep_movsb(); + return dest; +} + +fr_var fr_struct(uint32_t memberc, fr_var* members) { + uint32_t size = 0; + for (uint32_t i = 0; i < memberc; i++) { + size += fr_vars[members[i]].type.size; + } + fr_type type = { + .tag = FR_REF, + .size = size, + }; + fr_var var = fr_push(type); + uint32_t offset = fr_vars[var].value; + + // TODO: optimized copy for small (i.e. almost all) types + for (uint32_t i = 0; i < memberc; i++) { + fr_var_info member = fr_vars[members[i]]; + fr_load_reg(AX, members[i]); + if (member.type.size == 1) { + x86_inst_mov_m8_r8_disp(AX, BP, offset); + offset += 1; + } else if (member.type.size == 8 || member.type.tag == FR_BOX) { + x86_inst_mov_m64_r64_disp(AX, BP, offset); + offset += 8; + } else { + x86_inst_lea_r64_m64_disp(SI, BP, member.value); + x86_inst_lea_r64_m64_disp(DI, BP, offset); + x86_inst_mov_r64_imm(CX, member.type.size); + x86_inst_rep_movsb(); + } + } + + return var; +} + +static void fr_prepare_jump(size_t argc, fr_var* args) { + // TODO: avoid unnecessary copying + fr_var scratch = fr_struct(argc, args); + fr_load_reg(SI, scratch); + x86_inst_mov_r64_r64(DI, BP); + x86_inst_mov_r64_imm(CX, fr_vars[scratch].type.size); + x86_inst_rep_movsb(); +} + +void inst_jump(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JMP_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jmp_disp8(disp); + } else { + x86_inst_jmp_disp32_op(); + relocate_pc32(sym); + } + // TODO: support 64-bit jumps? +} + +void inst_jump_if_zero(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JZ_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jz_disp8(disp); + } else { + x86_inst_jz_disp32_op(); + relocate_pc32(sym); + } +} + + +void inst_jump_if_not_zero(symbol sym) { + int32_t disp = symbol_offset(sym, X86_JNZ_DISP8_SIZE); + if (disp >= INT8_MIN && disp <= INT8_MAX) { + x86_inst_jnz_disp8(disp); + } else { + x86_inst_jnz_disp32_op(); + relocate_pc32(sym); + } +} + +void fr_if(fr_cond cond, fr_var x, fr_var y, fr_label ifl, fr_label elsel, size_t argc, fr_var* args) { + fr_load_reg(BX, x); + fr_load_reg(DX, y); + fr_prepare_jump(argc, args); + x86_inst_cmp_r64_r64(BX, DX); + symbol ifs = fr_labels[ifl].symbol; + symbol elses = fr_labels[elsel].symbol; + switch (cond) { + case FR_EQ: + inst_jump_if_zero(ifs); + inst_jump(elses); + break; + case FR_NE: + inst_jump_if_not_zero(ifs); + inst_jump(elses); + break; + } +} + +void fr_switch(fr_var index, size_t labelc, fr_label* labels, size_t argc, fr_var* args) { + // TODO: + /* + symbol table = new_symbol(); + fr_load_reg(CX, index); + x86_inst_lea_r64_ip_disp32(AX, X86_JMP_M64_SIZE); + x86_inst_add_r64_r64(AX, CX); + x86_inst_jmp_m64( + */ + fprintf(stderr, "jump tables not yet implemented\n"); + exit(1); +} + +void fr_jump(fr_label label, size_t argc, fr_var* args) { + fr_prepare_jump(argc, args); + inst_jump(fr_labels[label].symbol); +} diff --git a/src/ir/flat_register.h b/src/ir/flat_register.h new file mode 100644 index 0000000..e2fd2a9 --- /dev/null +++ b/src/ir/flat_register.h @@ -0,0 +1,43 @@ +#ifndef FLAT_REGISTER_H +#define FLAT_REGISTER_H +/// An IR with flat (i.e. non-recursive) syntax and types. +/// Registers only contain bitvectors. Memory layout (size and offsets) are explicit. + +#include +#include + +typedef size_t fr_label; +typedef size_t fr_var; + +#define MAX_LABELS 4096 +#define MAX_VARS 4096 +#define MAX_ARGS 32 + +enum fr_type_tag { + FR_UINT, + FR_BOX, + FR_REF, +}; + +typedef struct fr_type { + enum fr_type_tag tag; + uint32_t size; +} fr_type; + +typedef enum fr_cond { + FR_EQ, + FR_NE, +} fr_cond; + +fr_label fr_declare(size_t typec, fr_type* types); +void fr_define(fr_label label, fr_var* vars); + +fr_var fr_lit(fr_type type, uint64_t val); +fr_var fr_index(fr_var box, fr_var index); +void fr_set(fr_var box, fr_var index, fr_var ref); +fr_var fr_load(fr_var box, fr_type type, uint32_t offset); +fr_var fr_struct(uint32_t memberc, fr_var* members); +void fr_if(fr_cond cond, fr_var x, fr_var y, fr_label ifl, fr_label elsel, size_t argc, fr_var* args); +void fr_switch(fr_var index, size_t labelc, fr_label* labels, size_t argc, fr_var* args); +void fr_jump(fr_label label, size_t argc, fr_var* args); +#endif diff --git a/src/x86encode.h b/src/x86encode.h index 96f7091..c702678 100644 --- a/src/x86encode.h +++ b/src/x86encode.h @@ -38,12 +38,24 @@ void x86_inst_mov_m64_r64(reg dest, reg src); void x86_inst_mov_m64_r64_disp8(reg dest, reg src, int8_t disp); void x86_inst_mov_m64_r64_disp32(reg dest, reg src, int32_t disp); void x86_inst_mov_m64_r64_disp(reg dest, reg src, int32_t disp); +void x86_inst_mov_r8_imm8(reg dest, uint32_t imm); +void x86_inst_mov_m8_r8(reg dest, reg src); +void x86_inst_mov_m8_r8_disp8(reg dest, reg src, int8_t disp); +void x86_inst_mov_m8_r8_disp32(reg dest, reg src, int32_t disp); +void x86_inst_mov_m8_r8_disp(reg dest, reg src, int32_t disp); +void x86_inst_mov_r8_m8(reg dest, reg src); +void x86_inst_mov_r8_m8_disp8(reg dest, reg src, int8_t disp); +void x86_inst_mov_r8_m8_disp32(reg dest, reg src, int32_t disp); +void x86_inst_mov_r8_m8_disp(reg dest, reg src, int32_t disp); void x86_inst_push_r64(reg reg); void x86_inst_pop_r64(reg reg); +void x86_inst_test_r8_r8(reg r1, reg r2); void x86_inst_test_r64_r64(reg r1, reg r2); +void x86_inst_cmp_r64_r64(reg r1, reg r2); + #define X86_JMP_DISP8_SIZE 2 void x86_inst_jmp_disp8(int8_t disp); #define X86_JMP_DISP32_SIZE 5 @@ -73,6 +85,14 @@ void x86_inst_sub_r64_r64(reg dest, reg src); void x86_inst_add_r64_imm8(reg dest, int8_t imm); void x86_inst_add_r64_r64(reg dest, reg src); +void x86_inst_mul_r64(reg src); + +void x86_inst_lea_r64_m64_disp8(reg dest, reg src, int32_t disp); +void x86_inst_lea_r64_m64_disp32(reg dest, reg src, int32_t disp); +void x86_inst_lea_r64_m64_disp(reg dest, reg src, int32_t disp); + +void x86_inst_rep_movsb(void); + void x86_inst_syscall(void); #endif