Initial commit.

master
James T. Martin 2022-09-05 23:48:56 -07:00
commit b5667c61ec
Signed by: james
GPG Key ID: D6FB2F9892F9B225
12 changed files with 865 additions and 0 deletions

12
.editorconfig Normal file
View File

@ -0,0 +1,12 @@
# https://EditorConfig.org/
root = true
[*]
indent_size = 4
charset = utf-8
indent_style = space
trim_trailing_whitespace = true
insert_final_newline = true
[Makefile]
indent_style = tab

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
*
!*/
# source code
!/src/**/*.c
!/src/**/*.h
# top-level configuration
!/.editorconfig
!/.gitignore
!/LICENSE.txt
!/Makefile
!/README.md

5
LICENSE.txt Normal file
View File

@ -0,0 +1,5 @@
Copyright (C) 2022 by James Martin <james@jtmar.me>
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

24
Makefile Normal file
View File

@ -0,0 +1,24 @@
SHELL = /bin/sh
.SUFFIXES:
.SUFFIXES: .c .o
CFLAGS = -std=c99 -pedantic -Wextra -Os
LDFLAGS = -lc
OBJECTS = main.o asm.o io.o ir.o
.PHONY: passc
passc: .bin $(OBJECTS)
$(CC) -o bin/passc $(addprefix bin/obj/,$(OBJECTS)) $(LDFLAGS)
.PHONY: .bin
.bin:
@mkdir -p bin/obj
%.o : src/%.c
$(CC) -c $(CFLAGS) $< -o bin/obj/$@
.PHONY: clean
clean:
@-rm -rf bin

6
README.md Normal file
View File

@ -0,0 +1,6 @@
# Passlang
A one-pass, linear-time compile-and-go compiler.
I'm imposing these restrictions on myself to avoid over-engineering and give myself a design challenge.
The goal is to make the absolute best language I can under these constraints,
and then incrementally begin to relax the restrictions only when absolutely necessary to make progress.

226
src/asm.c Normal file
View File

@ -0,0 +1,226 @@
// REFERENCES:
// http://ref.x86asm.net/index.html (geek64-abc)
// https://wiki.osdev.org/X86-64_Instruction_Encoding
// https://defuse.ca/online-x86-assembler.htm
#include "asm.h"
#include "io.h"
#include <stdlib.h>
#include <string.h>
ip here;
#define REX 0x40
// REX prefix with 64-bit operands set
#define REX_W 0x48
#define REX_R 0x44
#define REX_X 0x42
#define REX_B 0x41
#define REXQ_R(r) (REX | ((r >= R8) ? REX_R : 0))
#define REXQ_B(r) (REX | ((r >= R8) ? REX_B : 0))
// REX prefix including upper bit of register
#define REXQ_WR(r) (REX_W | REXQ_R(r))
#define REXQ_WRB(r, b) (REX_W | REXQ_R(r) | REXQ_B(b))
// lower 3 bits of register (not including part encoded in REX)
#define REG(r) (r & 7)
#define MODRM_RR (3 << 6)
#define MODRM_RM (0 << 6)
#define MODRM_RD8 (1 << 6)
#define MODRM_RD32 (2 << 6)
#define MODRMQ(reg, rm) ((REG(reg) << 3) | REG(rm))
#define MODRMQ_RR(reg, rm) (MODRM_RR | MODRMQ(reg, rm))
#define MODRMQ_RM(reg, base) (MODRM_RM | MODRMQ(reg, base))
#define MODRMQ_RD8(reg, base) (MODRM_RD8 | MODRMQ(reg, base))
#define MODRMQ_RD32(reg, base) (MODRM_RD32 | MODRMQ(reg, base))
static int32_t rel_offs(ip from, ip to) {
// jumping through hoops to hopefully avoid UB
int64_t off = (int64_t) to - (int64_t) from;
if (off > INT32_MAX || off < INT32_MIN) {
fprintf(stderr, "displacement greater than 32 bits!\n");
fprintf(stderr, "if you ever encounter this error, let me know and I'll implement it.\n");
exit(1);
}
return (int32_t) off;
}
static int32_t rip_rel(ip there) {
return rel_offs(here, there);
}
#define JUMP_DISP8_SIZE 2
static void inst_jump_disp8(uint8_t disp) {
uint8_t inst[JUMP_DISP8_SIZE] = {
0xeb, // jmp Jbs
disp
};
emit(&inst, JUMP_DISP8_SIZE);
}
#define JUMP_DISP32_SIZE 5
static void inst_jump_disp32(uint32_t disp) {
emit_u8(0xe9); // jmp Jvds
emit_u32(disp);
}
void inst_jump(ip there) {
int32_t disp8 = rel_offs(here + JUMP_DISP8_SIZE, there);
if (disp8 < INT8_MAX && disp8 > INT8_MIN) {
inst_jump_disp8(disp8);
} else {
inst_jump_disp32(rel_offs(here + JUMP_DISP32_SIZE, there));
}
}
ip inst_jump_unresolved(void) {
inst_jump_disp32(0);
return here;
}
void inst_jump_resolve(ip disp, ip there) {
patch_i32(disp - 4, rel_offs(disp, there));
}
static void inst_mov_imm32(reg reg, uint32_t imm) {
// mov Zvqp Ivqp
if (reg >= R8) {
emit_u8(REXQ_R(reg));
}
emit_u8(0xb8 + REG(reg));
emit_u32(imm);
}
static void inst_mov_imm64(reg reg, uint64_t imm) {
// mov Zvqp Ivqp
uint8_t buf[10] = { REXQ_WR(reg), 0xb8 + REG(reg), 0, 0, 0, 0, 0, 0, 0, 0 };
memcpy(&buf[2], &imm, sizeof(uint64_t));
emit(buf, 10);
}
void inst_mov_imm(reg reg, uint64_t imm) {
// TODO: emit `mov ax`, `mov al`, xor, xor+inc, xor+neg
if (imm <= UINT32_MAX) {
inst_mov_imm32(reg, (uint32_t) imm);
} else {
inst_mov_imm64(reg, imm);
}
}
void inst_mov_imm_i64(reg reg, int64_t imm) {
// TODO: emit sign extensions
if (imm >= 0 && imm <= UINT32_MAX) {
inst_mov_imm32(reg, (uint32_t) imm);
} else {
inst_mov_imm64(reg, (uint64_t) imm);
}
}
static void check_base(reg base) {
if (base == SP || base == BP || base == R12 || base == R13) {
fprintf(stderr, "indirect addressing not implemented for sp & co\n");
exit(1);
}
}
void inst_mov(reg dest, reg src) {
// mov Evqp Gvqp
emit_u8(REXQ_WRB(dest, src));
emit_u8(0x89);
emit_u8(MODRMQ_RR(src, dest));
}
void inst_mov_from(reg dest, reg base) {
check_base(base);
// mov Gvqp Evqp
emit_u8(REXQ_WRB(base, dest));
emit_u8(0x8B);
emit_u8(MODRMQ_RM(dest, base));
}
static void inst_mov_from_disp8(reg dest, reg base, int8_t disp) {
check_base(base);
// mov Gvqp Evqp
emit_u8(REXQ_WRB(base, dest));
emit_u8(0x8B);
emit_u8(MODRMQ_RD8(dest, base));
emit_u8(disp);
}
static void inst_mov_from_disp32(reg dest, reg base, int32_t disp) {
check_base(base);
// mov Gvqp Evqp
emit_u8(REXQ_WRB(base, dest));
emit_u8(0x8B);
emit_u8(MODRMQ_RD32(dest, base));
emit_u32((int32_t) disp);
}
void inst_mov_from_disp(reg dest, reg base, int32_t disp) {
if (disp == 0) {
inst_mov_from(dest, base);
} else if (disp <= INT8_MAX && disp >= INT8_MIN) {
inst_mov_from_disp8(dest, base, (int8_t) disp);
} else {
inst_mov_from_disp32(dest, base, disp);
}
}
void inst_mov_to(reg base, reg src) {
check_base(base);
// mov Evqp Gvqp
emit_u8(REXQ_WRB(base, src));
emit_u8(0x89);
emit_u8(MODRMQ_RR(base, src));
}
static void inst_mov_to_disp8(reg base, reg src, int8_t disp) {
check_base(base);
// mov Evqp Gvqp
emit_u8(REXQ_WRB(base, src));
emit_u8(0x89);
emit_u8(MODRMQ_RD8(base, src));
emit_u8(disp);
}
static void inst_mov_to_disp32(reg base, reg src, int32_t disp) {
check_base(base);
// mov Evqp Gvqp
emit_u8(REXQ_WRB(base, src));
emit_u8(0x89);
emit_u8(MODRMQ_RD32(base, src));
emit_u32((uint32_t) disp);
}
void inst_mov_to_disp(reg base, reg src, int32_t disp) {
if (disp == 0) {
inst_mov_to(base, src);
} else if (disp <= INT8_MAX && disp >= INT8_MIN) {
inst_mov_to_disp8(base, src, (int8_t) disp);
} else {
inst_mov_to_disp32(base, src, disp);
}
}
void inst_syscall(void) {
const uint8_t buf[2] = { 0x0f, 0x05 };
emit(&buf, 2);
}
void inst_push(reg reg) {
// push Zvq
if (reg >= R8) {
emit_u8(REX_B);
}
emit_u8(0x50 + REG(reg));
}
void inst_pop(reg reg) {
// pop Zvq
if (reg >= R8) {
emit_u8(REX_B);
}
emit_u8(0x58 + REG(reg));
}

51
src/asm.h Normal file
View File

@ -0,0 +1,51 @@
#ifndef _ASM_H
#define _ASM_H
#include <stddef.h>
#include <stdint.h>
typedef size_t ip;
extern ip here;
// A general-purpose x86 register.
// The specific register size (e.g. al/ax/eax/rax) depends on the instruction.
// All registers are valid for all instructions; we will perform exchanges if necessary.
typedef enum reg {
RA = 0, // rax, eax, ax, al
RC = 1, // rcx, ecx, cx, cl
RD = 2, // rdx, edx, dx, dl
RB = 3, // rbx, ebx, bx, bl
SP = 4, // rsp, esp, sp, spl (we do not use ah)
BP = 5, // rbp, ebp, bp, bpl (we do not use ch)
SI = 6, // rsi, esi, si, sil (we do not use dh)
DI = 7, // rdi, edi, di, dil (we do not use bh)
R8 = 8, // r8, r8d, r8w, r8l
R9 = 9, // r9, r9d, r9w, r9l
R10 = 10, // r10, r10d, r10w, r10l
R11 = 11, // r11, r11d, r11w, r11l
R12 = 12, // r12, r12d, r12w, r12l
R13 = 13, // r13, r13d, r13w, r13l
R14 = 14, // r14, r14d, r14w, r14l
R15 = 15, // r15, r15d, r15w, r15l
} reg;
/// Jump to a known address.
void inst_jump(ip there);
/// Jump to an unresolved address.
ip inst_jump_unresolved(void);
void inst_jump_resolve(ip disp, ip there);
void inst_mov_imm(reg reg, uint64_t imm);
void inst_mov_imm_i64(reg reg, int64_t imm);
void inst_syscall(void);
void inst_push(reg reg);
void inst_pop(reg reg);
void inst_mov(reg dest, reg src);
void inst_mov_from(reg dest, reg base);
void inst_mov_from_disp(reg dest, reg base, int32_t disp);
void inst_mov_to(reg base, reg src);
void inst_mov_to_disp(reg base, reg src, int32_t disp);
#endif

157
src/io.c Normal file
View File

@ -0,0 +1,157 @@
#ifdef __unix__
#define _GNU_SOURCE
#endif
#include "io.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifdef __unix__
// This program can be trivially converted to work with only the C standard library
// at the cost of not being able to link the output file atomically.
#include <fcntl.h>
#include <libgen.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
static const char* outfile_name;
FILE* infile;
FILE* outfile;
#ifdef __unix__
void open_files(const char* infile_name, const char* outfile_name_) {
outfile_name = outfile_name_;
// To avoid creating a corrupt or incomplete output file,
// we operate on a temporary file and atomically link it only once compilation has succeeded.
unlink(outfile_name);
int infile_fd = open(infile_name, O_RDONLY);
if (infile_fd == -1) {
fprintf(stderr, "failed to open source file: %s\n", strerror(errno));
exit(1);
}
off_t infile_len = lseek(infile_fd, 0, SEEK_END);
if (infile_len == (off_t) -1) {
fprintf(stderr, "failed to get length of source file: %s\n", strerror(errno));
exit(1);
}
// There'll probably never be a source file large enough for this to make a difference,
// and I *certainly* haven't profiled, but... I've always wanted to use these syscalls. :)
posix_fadvise(infile_fd, 0, infile_len, POSIX_FADV_SEQUENTIAL);
posix_fadvise(infile_fd, 0, infile_len, POSIX_FADV_NOREUSE);
infile = fdopen(infile_fd, "rb");
if (infile_fd == -1) {
fprintf(stderr, "failed to open source file fd as file handle: %s\n", strerror(errno));
exit(1);
}
int outfile_fd = open(dirname((char*) outfile_name), O_WRONLY | O_TMPFILE, S_IRWXU | S_IRWXG | S_IRWXO);
if (outfile_fd == -1) {
fprintf(stderr, "failed to create temporary output file: %s\n", strerror(errno));
exit(1);
}
outfile = fdopen(outfile_fd, "wb");
if (outfile == NULL) {
fprintf(stderr, "failed to open output file fd as file handle: %s\n", strerror(errno));
exit(1);
}
}
void close_files(void) {
if (fflush(outfile) != 0) {
fprintf(stderr, "failed to flush output file: %s\n", strerror(errno));
exit(1);
}
char outfile_tempname[20];
snprintf(outfile_tempname, 20, "/proc/self/fd/%d", fileno(outfile));
if (linkat(AT_FDCWD, outfile_tempname, AT_FDCWD, outfile_name, AT_SYMLINK_FOLLOW) == -1) {
fprintf(stderr, "failed to link output file into file system: %s\n", strerror(errno));
exit(1);
}
fclose(outfile);
fclose(infile);
}
#else
void open_files(const char* infile_name, const char* outfile_name) {
infile = fopen(infile_name, "rb");
if (infile == NULL) {
fprintf(stderr, "failed to open source file: %s\n", strerror(errno));
exit(1);
}
// There is no way for us to mark the file as executable.
// Then again, if it's not Unix, that probably doesn't matter.
outfile = fopen(outfile_name, "wb");
if (outfile == NULL) {
fprintf(stderr, "failed to open output file: %s\n", strerror(errno));
exit(1);
}
}
void close_files(void) {
if (fclose(outfile) != 0) {
fprintf(stderr, "failed to close output file: %s\n", strerror(errno));
// NOTE: ideally we'd do this on any dirty exit
// TODO: use portable tempfiles and then just copy the entire file at the end?
if (remove(outfile_name) != 0) {
fprintf(stderr, "failed to remove output file, if it exists, it is corrupt: %s\n", strerror(errno));
}
exit(1);
}
fclose(infile);
}
#endif
void emit(const void* restrict ptr, size_t count) {
fwrite(ptr, 1, count, outfile);
if (ferror(outfile)) {
fprintf(stderr, "failed to write to output file\n");
exit(1);
}
}
void emit_u8(uint8_t x) {
emit(&x, sizeof(uint8_t));
}
void emit_u32(uint32_t x) {
emit(&x, sizeof(uint32_t));
}
void emit_u64(uint64_t x) {
emit(&x, sizeof(uint64_t));
}
void patch(size_t off, const void* ptr, size_t count) {
fpos_t save;
if (fgetpos(outfile, &save) != 0) {
fprintf(stderr, "failed to save file position before patch: %s\n", strerror(errno));
exit(1);
}
if (fseek(outfile, (long) off, SEEK_SET) != 0) {
fprintf(stderr, "failed to set file position for patch: %s\n", strerror(errno));
exit(1);
}
fwrite(ptr, 1, count, outfile);
if (ferror(outfile) != 0) {
fprintf(stderr, "failed to patch output file: %s\n", strerror(errno));
exit(1);
}
if (fsetpos(outfile, &save) != 0) {
fprintf(stderr, "failed to restore file position after patch: %s\n", strerror(errno));
exit(1);
}
}
void patch_u32(size_t off, uint32_t x) {
patch(off, &x, sizeof(uint32_t));
}
void patch_i32(size_t off, int32_t x) {
patch_u32(off, (uint32_t) x);
}

22
src/io.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef _IO_H
#define _IO_H
#include <stdint.h>
#include <stdio.h>
extern FILE* outfile;
extern FILE* infile;
void open_files(const char* infile_name, const char* outfile_name);
void close_files(void);
void emit(const void* ptr, size_t count);
void emit_u8(uint8_t x);
void emit_u32(uint32_t x);
void emit_u64(uint64_t x);
void patch(size_t off, const void* ptr, size_t count);
void patch_u32(size_t off, uint32_t x);
void patch_i32(size_t off, int32_t x);
#endif

219
src/ir.c Normal file
View File

@ -0,0 +1,219 @@
#include "asm.h"
#include "ir.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
struct fixups {
struct fixups* next;
ip disp;
};
struct label_info {
const struct frame* frame;
size_t argc;
ip definition;
struct fixups* fixups;
};
struct labels {
struct labels* prev;
struct label_info info;
};
struct frame {
struct frame* prev;
size_t depth;
struct labels* labels;
};
static struct frame* current_frame;
struct storage {
enum { STORE_REG, STORE_STACK } type;
union { reg reg; size_t off; };
};
static size_t top_of_stack;
// Enter a new stack frame.
static void enter(void) {
struct frame* next = malloc(sizeof(struct frame));
next->prev = current_frame;
next->depth = top_of_stack;
current_frame = next;
}
// Leave the current stack frame.
static void leave(void) {
struct frame* next = current_frame;
current_frame = next->prev;
top_of_stack = next->depth;
struct labels* labels = next->labels;
free(next);
while (labels != NULL) {
struct labels* next_label = labels->prev;
free(labels);
labels = next_label;
}
}
// Allocate registers or stack space for the arguments.
static void reserve(size_t argc) {
top_of_stack += argc;
}
static var new_var(void) {
var var = top_of_stack;
top_of_stack++;
inst_push(RA);
return var;
}
static struct storage storage(var var) {
struct storage storage;
if (var == top_of_stack - 1) {
storage.type = STORE_REG;
storage.reg = RA;
} else {
storage.type = STORE_STACK;
storage.off = -var * 8 - 16;
}
return storage;
}
static void move(var dest, var src) {
if (dest == src) return;
struct storage ds = storage(dest);
struct storage ss = storage(src);
if (ds.type == STORE_REG && ss.type == STORE_REG) {
inst_mov(ds.reg, ss.reg);
} else if (ds.type == STORE_STACK && ss.type == STORE_REG) {
inst_mov_to_disp(RB, ss.reg, ds.off);
} else if (ds.type == STORE_REG && ds.type == STORE_STACK) {
inst_mov_from_disp(ds.reg, RB, ss.off);
} else {
// FIXME: DI is scratch register?
inst_mov_from_disp(DI, RB, ss.off);
inst_mov_to_disp(RB, DI, ds.off);
}
}
static void exchange(var x, var y) {
if (x == y) return;
assert(0); // UNIMPLEMENTED
}
// Restore the stack and registers to a previous frame,
// in preparation for a jump out of the current frame.
//
// This involves loading spilled variables into registers,
// restoring the stack pointer,
// spilling variables onto the stack to make space for arguments,
// and relocating arguments to the correct registers.
static void restore(const struct frame* frame, size_t argc, var* args) {
for (size_t i = 0; i < argc; i++) {
var arg = args[i];
size_t depth = frame->depth + i;
if (arg == depth) continue;
size_t conflict = (size_t) -1;
for (size_t j = i + 1; j < argc; j++) {
if (depth == args[j]) {
conflict = j;
break;
}
}
if (conflict == (size_t) -1) {
move(depth, args[i]);
} else {
// TODO: an algorithm which produces fewer exchanges
exchange(args[conflict], args[i]);
args[conflict] = args[i];
}
}
}
label declare_label(size_t argc) {
struct labels* labels = malloc(sizeof(struct labels));
labels->prev = current_frame->labels;
labels->info.frame = current_frame;
labels->info.argc = argc;
labels->info.definition = (ip) -1;
current_frame->labels = labels;
return &labels->info;
}
void define_label(label label, var* args) {
assert(label->frame == current_frame);
label->definition = here;
struct fixups* fixups = label->fixups;
while (fixups != NULL) {
struct fixups* fixup = fixups;
inst_jump_resolve(fixup->disp, here);
fixups = fixup->next;
free(fixup);
}
enter();
reserve(label->argc);
}
void queue_fixup(label label, ip disp) {
struct fixups* fixup = malloc(sizeof(struct fixups));
fixup->next = label->fixups;
fixup->disp = disp;
label->fixups = fixup;
}
void jump(label label, var* args) {
restore(label->frame, label->argc, args);
if (label->definition != (ip) -1) {
inst_jump(label->definition);
} else {
ip disp = inst_jump_unresolved();
queue_fixup(label, disp);
}
leave();
}
void jump_table(size_t branches, label* labels, var index, var* args) {
assert(0); // UNIMPLEMENTED
}
void jump_if(label label, var cond, var* args) {
assert(0); // UNIMPLEMENTED
}
static void save(var* vars) {
}
var lit(uint64_t x) {
var var = new_var();
struct storage stg = storage(var);
assert(stg.type == STORE_REG);
inst_mov_imm(stg.reg, x);
return var;
}
void syscall(size_t argc, var* args) {
assert(argc > 0);
// rax already populated by top of stack
// FIXME: this won't work forever
// FIXME: save args in case we don't want to sysexit
// FIXME: save other registers
if (argc > 1)
inst_mov_from_disp(DI, RB, -args[1] * 8 - 16);
if (argc > 2)
inst_mov_from_disp(SI, RB, -args[2] * 8 - 16);
if (argc > 3)
inst_mov_from_disp(RD, RB, -args[3] * 8 - 16);
inst_syscall();
}
void init(void) {
inst_mov(RB, SP);
}

47
src/ir.h Normal file
View File

@ -0,0 +1,47 @@
#ifndef _ASSEMBLER_H
#define _ASSEMBLER_H
#include <stddef.h>
#include <stdint.h>
typedef size_t var;
typedef struct label_info* label;
/// Declare a new label.
///
/// A label is the destination of a jump,
/// located with a fixed stack context and fixed argument types.
label declare_label(size_t args);
/// Define a previously-declared label.
///
/// The label must be defined in the same stack context
/// which it was declared in.
///
/// The definition of the label is... here, which is to say
/// whatever code you proceed to generate after this.
///
/// A label is implicitly terminated by an unconditional jump or exit.
/// However, it may exit at multiple locations via unconditional jumps.
void define_label(label label, var* args);
/// Jump to label, unconditionally. Terminates a block.
///
/// It is only possible to jump to a label in a parent or adjacent stack frame
/// (you can't jump *deeper* into the stack).
void jump(label label, var* args);
/// Jump to label in table; never returns. Terminates a block.
///
/// All labels must be at the same depth and accept the same arguments.
/// `index` must not be out of bounds.
void jump_table(size_t branches, label* labels, var index, var* args);
/// Jump to label if `cond` is not zero. Does not terminate a block.
void jump_if(label label, var cond, var* args);
var lit(uint64_t x);
void syscall(size_t argc, var* args);
#endif

83
src/main.c Normal file
View File

@ -0,0 +1,83 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "io.h"
#include "ir.h"
#define ELF_HEADER_SIZE 0xb0
size_t compile(void) {
init();
var code = lit(42);
var call = lit(60);
var args[2] = { call, code };
syscall(2, args);
return ELF_HEADER_SIZE;
}
static void write_elf(uint64_t entry_point) {
uint64_t file_len = ftell(outfile);
fseek(outfile, 0, SEEK_SET);
// Hardcoded ELF header for statically-linked position-independent executable.
// Since we only support Linux amd64 static PIE, there's no need to abstract over this for now.
uint8_t elf_header[ELF_HEADER_SIZE] = {
// ELF header
0x7F, 'E', 'L', 'F', // ELF magic
2, 1, 1, 3, 0, // 64-bit little-endian Linux, ELF version 1
0, 0, 0, 0, 0, 0, 0, // padding
3, 0, 0x3E, 0, 1, 0, 0, 0, // dynamic executable, amd64, ELF version 1 again
0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: entry point address
0x40, 0, 0, 0, 0, 0, 0, 0, // program header table offset (immediately after ELF)
0, 0, 0, 0, 0, 0, 0, 0, // section header table offset (none)
0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes
2, 0, 0, 0, 0, 0, 0, 0, // 2 segments, no sections
// program header segment
6, 0, 0, 0, 4, 0, 0, 0, // program header segment, readable
0x40, 0, 0, 0, 0, 0, 0, 0, // immediately after ELF header
0x40, 0, 0, 0, 0, 0, 0, 0, // virtual address
0, 0, 0, 0, 0, 0, 0, 0, // physical address
0x70, 0, 0, 0, 0, 0, 0, 0, // size in file (2 * size of program header)
0x70, 0, 0, 0, 0, 0, 0, 0, // size in memory
8, 0, 0, 0, 0, 0, 0, 0, // alignment
// executable segment
1, 0, 0, 0, 5, 0, 0, 0, // loadable segment, readable and executable
0, 0, 0, 0, 0, 0, 0, 0, // whole file
0, 0, 0, 0, 0, 0, 0, 0, // virtual address
0, 0, 0, 0, 0, 0, 0, 0, // physical address
0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in file
0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in memory
0, 0x10, 0, 0, 0, 0, 0, 0, // alignment (4K)
};
uint64_t ep = (uint64_t) entry_point;
uint64_t fl = (uint64_t) file_len;
memcpy(&elf_header[0x18], &entry_point, sizeof(uint64_t));
memcpy(&elf_header[0x98], &file_len, sizeof(uint64_t));
memcpy(&elf_header[0x98 + sizeof(uint64_t)], &file_len, sizeof(uint64_t));
emit(elf_header, ELF_HEADER_SIZE);
}
int main(int argc, char** argv) {
if (argc != 3) {
fprintf(stderr, "usage: %s <output file> <source file>\n", argv[0]);
exit(1);
}
open_files(argv[2], argv[1]);
fseek(outfile, ELF_HEADER_SIZE, SEEK_SET);
size_t entry_point = compile();
write_elf((uint64_t) entry_point);
close_files();
return 0;
}