Separate instruction encoding into a separate file.
I describe the intended file structure in comments at the top of each file.master
parent
b5667c61ec
commit
4e06f8d00f
2
Makefile
2
Makefile
|
@ -6,7 +6,7 @@ SHELL = /bin/sh
|
|||
CFLAGS = -std=c99 -pedantic -Wextra -Os
|
||||
LDFLAGS = -lc
|
||||
|
||||
OBJECTS = main.o asm.o io.o ir.o
|
||||
OBJECTS = asm.o io.o ir.o main.o x86encode.o
|
||||
|
||||
.PHONY: passc
|
||||
passc: .bin $(OBJECTS)
|
||||
|
|
224
src/asm.c
224
src/asm.c
|
@ -1,7 +1,13 @@
|
|||
// REFERENCES:
|
||||
// http://ref.x86asm.net/index.html (geek64-abc)
|
||||
// https://wiki.osdev.org/X86-64_Instruction_Encoding
|
||||
// https://defuse.ca/online-x86-assembler.htm
|
||||
/// This file handles the contextual generation of machine code.
|
||||
/// It abstracts over quirks like the limitations of addressing modes,
|
||||
/// provides higher-level functionality, and can perform peephole optimization.
|
||||
///
|
||||
/// Reserved registers:
|
||||
///
|
||||
/// * rsp: the stack pointer
|
||||
/// * rbp: the base of the stack frame
|
||||
/// * rax: the top of the stack
|
||||
/// * r13: a scratch register for compound instructions
|
||||
|
||||
#include "asm.h"
|
||||
#include "io.h"
|
||||
|
@ -9,218 +15,16 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
ip here;
|
||||
|
||||
#define REX 0x40
|
||||
// REX prefix with 64-bit operands set
|
||||
#define REX_W 0x48
|
||||
#define REX_R 0x44
|
||||
#define REX_X 0x42
|
||||
#define REX_B 0x41
|
||||
#define REXQ_R(r) (REX | ((r >= R8) ? REX_R : 0))
|
||||
#define REXQ_B(r) (REX | ((r >= R8) ? REX_B : 0))
|
||||
// REX prefix including upper bit of register
|
||||
#define REXQ_WR(r) (REX_W | REXQ_R(r))
|
||||
#define REXQ_WRB(r, b) (REX_W | REXQ_R(r) | REXQ_B(b))
|
||||
// lower 3 bits of register (not including part encoded in REX)
|
||||
#define REG(r) (r & 7)
|
||||
|
||||
#define MODRM_RR (3 << 6)
|
||||
#define MODRM_RM (0 << 6)
|
||||
#define MODRM_RD8 (1 << 6)
|
||||
#define MODRM_RD32 (2 << 6)
|
||||
#define MODRMQ(reg, rm) ((REG(reg) << 3) | REG(rm))
|
||||
#define MODRMQ_RR(reg, rm) (MODRM_RR | MODRMQ(reg, rm))
|
||||
#define MODRMQ_RM(reg, base) (MODRM_RM | MODRMQ(reg, base))
|
||||
#define MODRMQ_RD8(reg, base) (MODRM_RD8 | MODRMQ(reg, base))
|
||||
#define MODRMQ_RD32(reg, base) (MODRM_RD32 | MODRMQ(reg, base))
|
||||
|
||||
static int32_t rel_offs(ip from, ip to) {
|
||||
// jumping through hoops to hopefully avoid UB
|
||||
int64_t off = (int64_t) to - (int64_t) from;
|
||||
if (off > INT32_MAX || off < INT32_MIN) {
|
||||
fprintf(stderr, "displacement greater than 32 bits!\n");
|
||||
fprintf(stderr, "if you ever encounter this error, let me know and I'll implement it.\n");
|
||||
exit(1);
|
||||
}
|
||||
return (int32_t) off;
|
||||
}
|
||||
|
||||
static int32_t rip_rel(ip there) {
|
||||
return rel_offs(here, there);
|
||||
}
|
||||
|
||||
#define JUMP_DISP8_SIZE 2
|
||||
static void inst_jump_disp8(uint8_t disp) {
|
||||
uint8_t inst[JUMP_DISP8_SIZE] = {
|
||||
0xeb, // jmp Jbs
|
||||
disp
|
||||
};
|
||||
emit(&inst, JUMP_DISP8_SIZE);
|
||||
}
|
||||
|
||||
#define JUMP_DISP32_SIZE 5
|
||||
static void inst_jump_disp32(uint32_t disp) {
|
||||
emit_u8(0xe9); // jmp Jvds
|
||||
emit_u32(disp);
|
||||
}
|
||||
|
||||
void inst_jump(ip there) {
|
||||
int32_t disp8 = rel_offs(here + JUMP_DISP8_SIZE, there);
|
||||
if (disp8 < INT8_MAX && disp8 > INT8_MIN) {
|
||||
inst_jump_disp8(disp8);
|
||||
} else {
|
||||
inst_jump_disp32(rel_offs(here + JUMP_DISP32_SIZE, there));
|
||||
}
|
||||
x86_inst_jmp_disp((int32_t) (there - here));
|
||||
// TODO: support 64-bit jumps?
|
||||
}
|
||||
|
||||
ip inst_jump_unresolved(void) {
|
||||
inst_jump_disp32(0);
|
||||
x86_inst_jmp_disp32(0);
|
||||
return here;
|
||||
}
|
||||
|
||||
void inst_jump_resolve(ip disp, ip there) {
|
||||
patch_i32(disp - 4, rel_offs(disp, there));
|
||||
}
|
||||
|
||||
static void inst_mov_imm32(reg reg, uint32_t imm) {
|
||||
// mov Zvqp Ivqp
|
||||
if (reg >= R8) {
|
||||
emit_u8(REXQ_R(reg));
|
||||
}
|
||||
emit_u8(0xb8 + REG(reg));
|
||||
emit_u32(imm);
|
||||
}
|
||||
|
||||
static void inst_mov_imm64(reg reg, uint64_t imm) {
|
||||
// mov Zvqp Ivqp
|
||||
uint8_t buf[10] = { REXQ_WR(reg), 0xb8 + REG(reg), 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
memcpy(&buf[2], &imm, sizeof(uint64_t));
|
||||
emit(buf, 10);
|
||||
}
|
||||
|
||||
void inst_mov_imm(reg reg, uint64_t imm) {
|
||||
// TODO: emit `mov ax`, `mov al`, xor, xor+inc, xor+neg
|
||||
if (imm <= UINT32_MAX) {
|
||||
inst_mov_imm32(reg, (uint32_t) imm);
|
||||
} else {
|
||||
inst_mov_imm64(reg, imm);
|
||||
}
|
||||
}
|
||||
|
||||
void inst_mov_imm_i64(reg reg, int64_t imm) {
|
||||
// TODO: emit sign extensions
|
||||
if (imm >= 0 && imm <= UINT32_MAX) {
|
||||
inst_mov_imm32(reg, (uint32_t) imm);
|
||||
} else {
|
||||
inst_mov_imm64(reg, (uint64_t) imm);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_base(reg base) {
|
||||
if (base == SP || base == BP || base == R12 || base == R13) {
|
||||
fprintf(stderr, "indirect addressing not implemented for sp & co\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void inst_mov(reg dest, reg src) {
|
||||
// mov Evqp Gvqp
|
||||
emit_u8(REXQ_WRB(dest, src));
|
||||
emit_u8(0x89);
|
||||
emit_u8(MODRMQ_RR(src, dest));
|
||||
}
|
||||
|
||||
void inst_mov_from(reg dest, reg base) {
|
||||
check_base(base);
|
||||
// mov Gvqp Evqp
|
||||
emit_u8(REXQ_WRB(base, dest));
|
||||
emit_u8(0x8B);
|
||||
emit_u8(MODRMQ_RM(dest, base));
|
||||
}
|
||||
|
||||
static void inst_mov_from_disp8(reg dest, reg base, int8_t disp) {
|
||||
check_base(base);
|
||||
// mov Gvqp Evqp
|
||||
emit_u8(REXQ_WRB(base, dest));
|
||||
emit_u8(0x8B);
|
||||
emit_u8(MODRMQ_RD8(dest, base));
|
||||
emit_u8(disp);
|
||||
}
|
||||
|
||||
static void inst_mov_from_disp32(reg dest, reg base, int32_t disp) {
|
||||
check_base(base);
|
||||
// mov Gvqp Evqp
|
||||
emit_u8(REXQ_WRB(base, dest));
|
||||
emit_u8(0x8B);
|
||||
emit_u8(MODRMQ_RD32(dest, base));
|
||||
emit_u32((int32_t) disp);
|
||||
}
|
||||
|
||||
void inst_mov_from_disp(reg dest, reg base, int32_t disp) {
|
||||
if (disp == 0) {
|
||||
inst_mov_from(dest, base);
|
||||
} else if (disp <= INT8_MAX && disp >= INT8_MIN) {
|
||||
inst_mov_from_disp8(dest, base, (int8_t) disp);
|
||||
} else {
|
||||
inst_mov_from_disp32(dest, base, disp);
|
||||
}
|
||||
}
|
||||
|
||||
void inst_mov_to(reg base, reg src) {
|
||||
check_base(base);
|
||||
// mov Evqp Gvqp
|
||||
emit_u8(REXQ_WRB(base, src));
|
||||
emit_u8(0x89);
|
||||
emit_u8(MODRMQ_RR(base, src));
|
||||
}
|
||||
|
||||
static void inst_mov_to_disp8(reg base, reg src, int8_t disp) {
|
||||
check_base(base);
|
||||
// mov Evqp Gvqp
|
||||
emit_u8(REXQ_WRB(base, src));
|
||||
emit_u8(0x89);
|
||||
emit_u8(MODRMQ_RD8(base, src));
|
||||
emit_u8(disp);
|
||||
}
|
||||
|
||||
static void inst_mov_to_disp32(reg base, reg src, int32_t disp) {
|
||||
check_base(base);
|
||||
// mov Evqp Gvqp
|
||||
emit_u8(REXQ_WRB(base, src));
|
||||
emit_u8(0x89);
|
||||
emit_u8(MODRMQ_RD32(base, src));
|
||||
emit_u32((uint32_t) disp);
|
||||
}
|
||||
|
||||
void inst_mov_to_disp(reg base, reg src, int32_t disp) {
|
||||
if (disp == 0) {
|
||||
inst_mov_to(base, src);
|
||||
} else if (disp <= INT8_MAX && disp >= INT8_MIN) {
|
||||
inst_mov_to_disp8(base, src, (int8_t) disp);
|
||||
} else {
|
||||
inst_mov_to_disp32(base, src, disp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void inst_syscall(void) {
|
||||
const uint8_t buf[2] = { 0x0f, 0x05 };
|
||||
emit(&buf, 2);
|
||||
}
|
||||
|
||||
void inst_push(reg reg) {
|
||||
// push Zvq
|
||||
if (reg >= R8) {
|
||||
emit_u8(REX_B);
|
||||
}
|
||||
emit_u8(0x50 + REG(reg));
|
||||
}
|
||||
|
||||
void inst_pop(reg reg) {
|
||||
// pop Zvq
|
||||
if (reg >= R8) {
|
||||
emit_u8(REX_B);
|
||||
}
|
||||
emit_u8(0x58 + REG(reg));
|
||||
patch_i32(disp - 4, (int32_t) (there - here));
|
||||
}
|
||||
|
|
24
src/asm.h
24
src/asm.h
|
@ -1,34 +1,14 @@
|
|||
#ifndef _ASM_H
|
||||
#define _ASM_H
|
||||
|
||||
#include "x86encode.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef size_t ip;
|
||||
extern ip here;
|
||||
|
||||
// A general-purpose x86 register.
|
||||
// The specific register size (e.g. al/ax/eax/rax) depends on the instruction.
|
||||
// All registers are valid for all instructions; we will perform exchanges if necessary.
|
||||
typedef enum reg {
|
||||
RA = 0, // rax, eax, ax, al
|
||||
RC = 1, // rcx, ecx, cx, cl
|
||||
RD = 2, // rdx, edx, dx, dl
|
||||
RB = 3, // rbx, ebx, bx, bl
|
||||
SP = 4, // rsp, esp, sp, spl (we do not use ah)
|
||||
BP = 5, // rbp, ebp, bp, bpl (we do not use ch)
|
||||
SI = 6, // rsi, esi, si, sil (we do not use dh)
|
||||
DI = 7, // rdi, edi, di, dil (we do not use bh)
|
||||
R8 = 8, // r8, r8d, r8w, r8l
|
||||
R9 = 9, // r9, r9d, r9w, r9l
|
||||
R10 = 10, // r10, r10d, r10w, r10l
|
||||
R11 = 11, // r11, r11d, r11w, r11l
|
||||
R12 = 12, // r12, r12d, r12w, r12l
|
||||
R13 = 13, // r13, r13d, r13w, r13l
|
||||
R14 = 14, // r14, r14d, r14w, r14l
|
||||
R15 = 15, // r15, r15d, r15w, r15l
|
||||
} reg;
|
||||
|
||||
/// Jump to a known address.
|
||||
void inst_jump(ip there);
|
||||
|
||||
|
|
21
src/io.c
21
src/io.c
|
@ -1,4 +1,4 @@
|
|||
#ifdef __unix__
|
||||
#ifdef __linux__
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
|
@ -8,9 +8,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __unix__
|
||||
// This program can be trivially converted to work with only the C standard library
|
||||
// at the cost of not being able to link the output file atomically.
|
||||
#ifdef __linux__
|
||||
// On Linux, we create a temporary output file using O_TMPFILE
|
||||
// so that the output file is never in a corrupt or incomplete state.
|
||||
// However, O_TMPFILE is not portable, so we only enable it on Linux.
|
||||
#include <fcntl.h>
|
||||
#include <libgen.h>
|
||||
#include <sys/stat.h>
|
||||
|
@ -20,8 +21,10 @@
|
|||
static const char* outfile_name;
|
||||
FILE* infile;
|
||||
FILE* outfile;
|
||||
// HACK: "here" tracking should be handled by the assembler, not IO.
|
||||
size_t here = 0;
|
||||
|
||||
#ifdef __unix__
|
||||
#ifdef __linux__
|
||||
void open_files(const char* infile_name, const char* outfile_name_) {
|
||||
outfile_name = outfile_name_;
|
||||
// To avoid creating a corrupt or incomplete output file,
|
||||
|
@ -84,8 +87,10 @@ void open_files(const char* infile_name, const char* outfile_name) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
// FIXME: portable temporary output file
|
||||
// There is no way for us to mark the file as executable.
|
||||
// Then again, if it's not Unix, that probably doesn't matter.
|
||||
// Then again, if it's not Linux, that probably doesn't matter,
|
||||
// because that's the only target platform we support.
|
||||
outfile = fopen(outfile_name, "wb");
|
||||
if (outfile == NULL) {
|
||||
fprintf(stderr, "failed to open output file: %s\n", strerror(errno));
|
||||
|
@ -94,6 +99,9 @@ void open_files(const char* infile_name, const char* outfile_name) {
|
|||
}
|
||||
|
||||
void close_files(void) {
|
||||
// FIXME: set executable permission on unixen other than Linux
|
||||
// (relevant for e.g. the BSDs, some of which can transparently
|
||||
// emulate Linux executables.)
|
||||
if (fclose(outfile) != 0) {
|
||||
fprintf(stderr, "failed to close output file: %s\n", strerror(errno));
|
||||
// NOTE: ideally we'd do this on any dirty exit
|
||||
|
@ -113,6 +121,7 @@ void emit(const void* restrict ptr, size_t count) {
|
|||
fprintf(stderr, "failed to write to output file\n");
|
||||
exit(1);
|
||||
}
|
||||
here += count;
|
||||
}
|
||||
|
||||
void emit_u8(uint8_t x) {
|
||||
|
|
1
src/io.h
1
src/io.h
|
@ -6,6 +6,7 @@
|
|||
|
||||
extern FILE* outfile;
|
||||
extern FILE* infile;
|
||||
extern size_t here;
|
||||
|
||||
void open_files(const char* infile_name, const char* outfile_name);
|
||||
void close_files(void);
|
||||
|
|
30
src/ir.c
30
src/ir.c
|
@ -1,8 +1,12 @@
|
|||
/// This file serves conceptually as the intermediate representation (IR)
|
||||
/// of the compiler. Compared to "asm", this file is aware of stack frames,
|
||||
/// control flow blocks and labels, compound types like structs and enums,
|
||||
/// and register allocation.
|
||||
|
||||
#include "asm.h"
|
||||
#include "ir.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -69,7 +73,7 @@ static void reserve(size_t argc) {
|
|||
static var new_var(void) {
|
||||
var var = top_of_stack;
|
||||
top_of_stack++;
|
||||
inst_push(RA);
|
||||
x86_inst_push_r64(RA);
|
||||
return var;
|
||||
}
|
||||
|
||||
|
@ -90,15 +94,15 @@ static void move(var dest, var src) {
|
|||
struct storage ds = storage(dest);
|
||||
struct storage ss = storage(src);
|
||||
if (ds.type == STORE_REG && ss.type == STORE_REG) {
|
||||
inst_mov(ds.reg, ss.reg);
|
||||
x86_inst_mov_r64_r64(ds.reg, ss.reg);
|
||||
} else if (ds.type == STORE_STACK && ss.type == STORE_REG) {
|
||||
inst_mov_to_disp(RB, ss.reg, ds.off);
|
||||
x86_inst_mov_m64_r64_disp(RB, ss.reg, ds.off);
|
||||
} else if (ds.type == STORE_REG && ds.type == STORE_STACK) {
|
||||
inst_mov_from_disp(ds.reg, RB, ss.off);
|
||||
x86_inst_mov_r64_m64_disp(ds.reg, RB, ss.off);
|
||||
} else {
|
||||
// FIXME: DI is scratch register?
|
||||
inst_mov_from_disp(DI, RB, ss.off);
|
||||
inst_mov_to_disp(RB, DI, ds.off);
|
||||
x86_inst_mov_r64_m64_disp(DI, RB, ss.off);
|
||||
x86_inst_mov_m64_r64_disp(RB, DI, ds.off);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -195,7 +199,7 @@ var lit(uint64_t x) {
|
|||
var var = new_var();
|
||||
struct storage stg = storage(var);
|
||||
assert(stg.type == STORE_REG);
|
||||
inst_mov_imm(stg.reg, x);
|
||||
x86_inst_mov_r64_imm(stg.reg, x);
|
||||
return var;
|
||||
}
|
||||
|
||||
|
@ -206,14 +210,14 @@ void syscall(size_t argc, var* args) {
|
|||
// FIXME: save args in case we don't want to sysexit
|
||||
// FIXME: save other registers
|
||||
if (argc > 1)
|
||||
inst_mov_from_disp(DI, RB, -args[1] * 8 - 16);
|
||||
x86_inst_mov_r64_m64_disp(DI, RB, -args[1] * 8 - 16);
|
||||
if (argc > 2)
|
||||
inst_mov_from_disp(SI, RB, -args[2] * 8 - 16);
|
||||
x86_inst_mov_r64_m64_disp(SI, RB, -args[2] * 8 - 16);
|
||||
if (argc > 3)
|
||||
inst_mov_from_disp(RD, RB, -args[3] * 8 - 16);
|
||||
inst_syscall();
|
||||
x86_inst_mov_r64_m64_disp(RD, RB, -args[3] * 8 - 16);
|
||||
x86_inst_syscall();
|
||||
}
|
||||
|
||||
void init(void) {
|
||||
inst_mov(RB, SP);
|
||||
x86_inst_mov_r64_r64(RB, SP);
|
||||
}
|
||||
|
|
2
src/ir.h
2
src/ir.h
|
@ -44,4 +44,6 @@ var lit(uint64_t x);
|
|||
|
||||
void syscall(size_t argc, var* args);
|
||||
|
||||
void init(void);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,226 @@
|
|||
/// This file handles the (context-free) encoding of x86 instructions.
|
||||
/// It may substitute an instruction for a shorter one, but is unable to fuse instructions.
|
||||
|
||||
// REFERENCES:
|
||||
// http://ref.x86asm.net/index.html (geek64-abc)
|
||||
// https://wiki.osdev.org/X86-64_Instruction_Encoding
|
||||
// https://defuse.ca/online-x86-assembler.htm
|
||||
|
||||
#include "io.h"
|
||||
#include "x86encode.h"
|
||||
|
||||
#define REX 0x40
|
||||
// REX prefix with 64-bit operands set
|
||||
#define REX_W 0x48
|
||||
#define REX_R 0x44
|
||||
#define REX_X 0x42
|
||||
#define REX_B 0x41
|
||||
|
||||
// lower 3 bits of register (not including part encoded in REX)
|
||||
#define REG(r) (r & 7)
|
||||
|
||||
#define MODRM_RR (3 << 6)
|
||||
#define MODRM_RM (0 << 6)
|
||||
#define MODRM_RM8 (1 << 6)
|
||||
#define MODRM_RM32 (2 << 6)
|
||||
|
||||
static void x86_opt_rexr(reg reg) {
|
||||
if (reg >= R8) {
|
||||
emit_u8(REX | REX_R);
|
||||
}
|
||||
}
|
||||
|
||||
static void x86_rexwr(reg reg) {
|
||||
uint8_t rex = REX | REX_W;
|
||||
if (reg >= R8) rex |= REX_R;
|
||||
emit_u8(rex);
|
||||
}
|
||||
|
||||
static void x86_rexwrb(reg r, reg b) {
|
||||
uint8_t rex = REX | REX_W;
|
||||
if (r >= R8) rex |= REX_R;
|
||||
if (b >= R8) rex |= REX_B;
|
||||
emit_u8(rex);
|
||||
}
|
||||
|
||||
static void x86_modrr(reg r, reg b) {
|
||||
emit_u8(MODRM_RR | (REG(r) << 3) | REG(b));
|
||||
}
|
||||
|
||||
static void x86_modrm(reg r, reg b) {
|
||||
emit_u8(MODRM_RM | (REG(r) << 3) | REG(b));
|
||||
}
|
||||
|
||||
static void x86_modrm8(reg r, reg b) {
|
||||
emit_u8(MODRM_RM8 | (REG(r) << 3) | REG(b));
|
||||
}
|
||||
|
||||
static void x86_modrm32(reg r, reg b) {
|
||||
emit_u8(MODRM_RM32 | (REG(r) << 3) | REG(b));
|
||||
}
|
||||
|
||||
static void x86_enc_opr(uint8_t op, reg reg) {
|
||||
x86_opt_rexr(reg);
|
||||
emit_u8(op + REG(reg));
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_opr(uint8_t op, reg reg) {
|
||||
x86_rexwr(reg);
|
||||
emit_u8(op + REG(reg));
|
||||
}
|
||||
|
||||
static void x86_enc_opr_imm32(uint8_t op, reg reg, uint32_t imm) {
|
||||
x86_opt_rexr(reg);
|
||||
emit_u8(op + REG(reg));
|
||||
emit_u32(imm);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_opr_imm32(uint8_t op, reg reg, uint32_t imm) {
|
||||
x86_rexwr(reg);
|
||||
emit_u8(op + REG(reg));
|
||||
emit_u32(imm);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_opr_imm64(uint8_t op, reg reg, uint64_t imm) {
|
||||
x86_rexwr(reg);
|
||||
emit_u8(op + REG(reg));
|
||||
emit_u64(imm);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_modrr(uint8_t op, reg r, reg m) {
|
||||
x86_rexwrb(r, m);
|
||||
emit_u8(op);
|
||||
x86_modrr(r, m);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_modrm(uint8_t op, reg r, reg b) {
|
||||
x86_rexwrb(r, b);
|
||||
emit_u8(op);
|
||||
x86_modrm(r, b);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_modrm8(uint8_t op, reg r, reg b, int8_t disp) {
|
||||
x86_rexwrb(r, b);
|
||||
emit_u8(op);
|
||||
x86_modrm8(r, b);
|
||||
emit_u8(disp);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_modrm32(uint8_t op, reg r, reg b, int32_t disp) {
|
||||
x86_rexwrb(r, b);
|
||||
emit_u8(op);
|
||||
x86_modrm32(r, b);
|
||||
emit_u32(disp);
|
||||
}
|
||||
|
||||
static void x86_enc_rexw_modrmd(uint8_t op, reg r, reg b, int32_t disp) {
|
||||
if (disp == 0 && b != BP && b != R13) {
|
||||
x86_enc_rexw_modrm(op, r, b);
|
||||
} else if (disp >= INT8_MIN && disp <= INT8_MAX) {
|
||||
x86_enc_rexw_modrm8(op, r, b, (int8_t) disp);
|
||||
} else {
|
||||
x86_enc_rexw_modrm32(op, r, b, disp);
|
||||
}
|
||||
}
|
||||
|
||||
static void x86_enc_disp8(uint8_t op, int8_t disp) {
|
||||
uint8_t buf[2] = { op, (uint8_t) disp };
|
||||
emit(buf, 2);
|
||||
}
|
||||
|
||||
static void x86_enc_disp32(uint8_t op, int32_t disp) {
|
||||
emit_u8(op);
|
||||
emit_u32((uint32_t) disp);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_imm64(reg dest, uint64_t imm) {
|
||||
x86_enc_rexw_opr_imm64(0xb8, dest, imm);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_imm32(reg dest, uint32_t imm) {
|
||||
x86_enc_opr_imm32(0xb8, dest, imm);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_imm(reg dest, uint64_t imm) {
|
||||
// TODO: xor if 0, use inc and dec, 16-bit and 8-bit immediates
|
||||
if (imm <= UINT32_MAX) {
|
||||
x86_inst_mov_r64_imm32(dest, (uint32_t) imm);
|
||||
} else {
|
||||
x86_inst_mov_r64_imm64(dest, imm);
|
||||
}
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_imms(reg dest, int64_t imm) {
|
||||
// TODO: sign-extend
|
||||
if (imm >= 0) {
|
||||
x86_inst_mov_r64_imm(dest, imm);
|
||||
} else {
|
||||
x86_inst_mov_r64_imm64(dest, (int64_t) imm);
|
||||
}
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_r64(reg dest, reg src) {
|
||||
x86_enc_rexw_modrr(0x8b, dest, src);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_m64(reg dest, reg src) {
|
||||
x86_enc_rexw_modrm(0x8b, dest, src);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_m64_disp8(reg dest, reg src, int8_t disp) {
|
||||
x86_enc_rexw_modrm8(0x8b, dest, src, disp);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_m64_disp32(reg dest, reg src, int32_t disp) {
|
||||
x86_enc_rexw_modrm32(0x8b, dest, src, disp);
|
||||
}
|
||||
|
||||
void x86_inst_mov_r64_m64_disp(reg dest, reg src, int32_t disp) {
|
||||
x86_enc_rexw_modrmd(0x8b, dest, src, disp);
|
||||
}
|
||||
|
||||
void x86_inst_mov_m64_r64(reg dest, reg src) {
|
||||
x86_enc_rexw_modrm(0x8a, src, dest);
|
||||
}
|
||||
|
||||
void x86_inst_mov_m64_r64_disp8(reg dest, reg src, int8_t disp) {
|
||||
x86_enc_rexw_modrm8(0x8a, src, dest, disp);
|
||||
}
|
||||
|
||||
void x86_inst_mov_m64_r64_disp32(reg dest, reg src, int32_t disp) {
|
||||
x86_enc_rexw_modrm32(0x8a, src, dest, disp);
|
||||
}
|
||||
|
||||
void x86_inst_mov_m64_r64_disp(reg dest, reg src, int32_t disp) {
|
||||
x86_enc_rexw_modrmd(0x8a, src, dest, disp);
|
||||
}
|
||||
|
||||
void x86_inst_push_r64(reg reg) {
|
||||
x86_enc_opr(0x50, reg);
|
||||
}
|
||||
|
||||
void x86_inst_pop_r64(reg reg) {
|
||||
x86_enc_opr(0x58, reg);
|
||||
}
|
||||
|
||||
void x86_inst_jmp_disp8(int8_t disp) {
|
||||
x86_enc_disp8(0xeb, disp);
|
||||
}
|
||||
|
||||
void x86_inst_jmp_disp32(int32_t disp) {
|
||||
x86_enc_disp32(0xe9, disp);
|
||||
}
|
||||
|
||||
void x86_inst_jmp_disp(int32_t disp) {
|
||||
int32_t disp8 = disp + X86_JMP_DISP8_SIZE;
|
||||
if (disp8 >= INT8_MIN && disp8 <= INT8_MAX) {
|
||||
x86_inst_jmp_disp8((int8_t) disp8);
|
||||
} else {
|
||||
x86_inst_jmp_disp32(disp + X86_JMP_DISP32_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void x86_inst_syscall(void) {
|
||||
const uint8_t buf[2] = { 0x0f, 0x05 };
|
||||
emit(buf, 2);
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
#ifndef _X86ENCODE_H
|
||||
#define _X86ENCODE_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// A general-purpose x86 register.
|
||||
// The specific register size (e.g. al/ax/eax/rax) depends on the instruction.
|
||||
// All registers are valid for all instructions; we will perform exchanges if necessary.
|
||||
typedef enum reg {
|
||||
RA = 0, // rax, eax, ax, al
|
||||
RC = 1, // rcx, ecx, cx, cl
|
||||
RD = 2, // rdx, edx, dx, dl
|
||||
RB = 3, // rbx, ebx, bx, bl
|
||||
SP = 4, // rsp, esp, sp, spl (we do not use ah)
|
||||
BP = 5, // rbp, ebp, bp, bpl (we do not use ch)
|
||||
SI = 6, // rsi, esi, si, sil (we do not use dh)
|
||||
DI = 7, // rdi, edi, di, dil (we do not use bh)
|
||||
R8 = 8, // r8, r8d, r8w, r8l
|
||||
R9 = 9, // r9, r9d, r9w, r9l
|
||||
R10 = 10, // r10, r10d, r10w, r10l
|
||||
R11 = 11, // r11, r11d, r11w, r11l
|
||||
R12 = 12, // r12, r12d, r12w, r12l
|
||||
R13 = 13, // r13, r13d, r13w, r13l
|
||||
R14 = 14, // r14, r14d, r14w, r14l
|
||||
R15 = 15, // r15, r15d, r15w, r15l
|
||||
} reg;
|
||||
|
||||
|
||||
void x86_inst_mov_r64_imm64(reg dest, uint64_t imm);
|
||||
void x86_inst_mov_r64_imm32(reg dest, uint32_t imm);
|
||||
void x86_inst_mov_r64_imm(reg dest, uint64_t imm);
|
||||
void x86_inst_mov_r64_imms(reg dest, int64_t imm);
|
||||
void x86_inst_mov_r64_r64(reg dest, reg src);
|
||||
void x86_inst_mov_r64_m64(reg dest, reg src);
|
||||
void x86_inst_mov_r64_m64_disp8(reg dest, reg src, int8_t disp);
|
||||
void x86_inst_mov_r64_m64_disp32(reg dest, reg src, int32_t disp);
|
||||
void x86_inst_mov_r64_m64_disp(reg dest, reg src, int32_t disp);
|
||||
void x86_inst_mov_m64_r64(reg dest, reg src);
|
||||
void x86_inst_mov_m64_r64_disp8(reg dest, reg src, int8_t disp);
|
||||
void x86_inst_mov_m64_r64_disp32(reg dest, reg src, int32_t disp);
|
||||
void x86_inst_mov_m64_r64_disp(reg dest, reg src, int32_t disp);
|
||||
void x86_inst_push_r64(reg reg);
|
||||
void x86_inst_pop_r64(reg reg);
|
||||
#define X86_JMP_DISP8_SIZE 2
|
||||
void x86_inst_jmp_disp8(int8_t disp);
|
||||
#define X86_JMP_DISP32_SIZE 5
|
||||
void x86_inst_jmp_disp32(int32_t disp);
|
||||
void x86_inst_jmp_disp(int32_t disp);
|
||||
void x86_inst_syscall(void);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue