2022-09-10 22:06:21 -07:00
|
|
|
/// This file handles concerns the executable file format.
|
|
|
|
/// This includes keeping track of the current virtual address,
|
|
|
|
/// performing relocations, and creating the executable file header.
|
|
|
|
///
|
|
|
|
/// The set of features we actually use is very small, so hopefully
|
|
|
|
/// this will turn out to be able to port this across executable formats
|
|
|
|
/// and architectures with relatively few modifications.
|
|
|
|
#include "format.h"
|
|
|
|
#include "io.h"
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
struct symbol {
|
|
|
|
uint64_t vaddr;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_SYMBOLS 65535
|
|
|
|
static uint32_t symbol_count = 0;
|
|
|
|
static struct symbol symbols[MAX_SYMBOLS];
|
|
|
|
|
|
|
|
enum relocation_type {
|
|
|
|
REL_PC32 = 2,
|
|
|
|
REL_GOTPCREL = 9,
|
|
|
|
REL_SIZE32 = 32,
|
|
|
|
REL_SIZE64 = 33,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct relocation {
|
|
|
|
enum relocation_type type;
|
|
|
|
symbol symbol;
|
|
|
|
uint64_t offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_RELOCATIONS 65535
|
|
|
|
static uint32_t relocation_count = 0;
|
|
|
|
static struct relocation relocations[MAX_RELOCATIONS];
|
|
|
|
|
|
|
|
static uint64_t file_here = 0;
|
|
|
|
|
|
|
|
/// Not the size of the ELF header per se, but rather th ELF header
|
|
|
|
/// plus the program headers and section headers we include.
|
|
|
|
/// We reserve this much space at the beginning of every file
|
|
|
|
/// to fill in once the executable is finished.
|
|
|
|
#define ELF_HEADER_SIZE 0xb0
|
|
|
|
|
|
|
|
void elf_executable(void) {
|
|
|
|
reserve(ELF_HEADER_SIZE);
|
|
|
|
file_here += ELF_HEADER_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
void finish_executable(symbol entry_point) {
|
|
|
|
uint64_t file_len = file_here;
|
|
|
|
|
|
|
|
// Hardcoded ELF header for statically-linked position-independent executable.
|
|
|
|
// Since we only support Linux amd64 static PIE, there's no need to abstract over this for now.
|
|
|
|
uint8_t elf_header[ELF_HEADER_SIZE] = {
|
|
|
|
// ELF header
|
|
|
|
0x7F, 'E', 'L', 'F', // ELF magic
|
|
|
|
2, 1, 1, 3, 0, // 64-bit little-endian Linux, ELF version 1
|
|
|
|
0, 0, 0, 0, 0, 0, 0, // padding
|
|
|
|
3, 0, 0x3E, 0, 1, 0, 0, 0, // dynamic executable, amd64, ELF version 1 again
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: entry point address
|
|
|
|
0x40, 0, 0, 0, 0, 0, 0, 0, // program header table offset (immediately after ELF)
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // section eader table offset (none)
|
|
|
|
0, 0, 0, 0, 0x40, 0, 0x38, 0, // flags (none), header sizes
|
|
|
|
2, 0, 0, 0, 0, 0, 0, 0, // 2 segments, no sections
|
|
|
|
|
|
|
|
// program header segment
|
|
|
|
6, 0, 0, 0, 4, 0, 0, 0, // program header segment, readable
|
|
|
|
0x40, 0, 0, 0, 0, 0, 0, 0, // immediately after ELF header
|
|
|
|
0x40, 0, 0, 0, 0, 0, 0, 0, // virtual address
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // physical address
|
|
|
|
0x70, 0, 0, 0, 0, 0, 0, 0, // size in file (2 * size of program header)
|
|
|
|
0x70, 0, 0, 0, 0, 0, 0, 0, // size in memory
|
|
|
|
8, 0, 0, 0, 0, 0, 0, 0, // alignment
|
|
|
|
|
|
|
|
// executable segment
|
|
|
|
1, 0, 0, 0, 5, 0, 0, 0, // loadable segment, readable and executable
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // whole file
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // virtual address
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // physical address
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in file
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, // PATCHME: size in memory
|
|
|
|
0, 0x10, 0, 0, 0, 0, 0, 0, // alignment (4K)
|
|
|
|
};
|
|
|
|
uint64_t ep = (uint64_t) symbols[entry_point].vaddr;
|
|
|
|
uint64_t fl = (uint64_t) file_len;
|
|
|
|
memcpy(&elf_header[0x18], &ep, sizeof(uint64_t));
|
|
|
|
memcpy(&elf_header[0x98], &fl, sizeof(uint64_t));
|
|
|
|
memcpy(&elf_header[0x98 + sizeof(uint64_t)], &fl, sizeof(uint64_t));
|
|
|
|
|
|
|
|
patch(0, elf_header, ELF_HEADER_SIZE);
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < relocation_count; i++) {
|
|
|
|
struct relocation rel = relocations[i];
|
|
|
|
assert(rel.type == REL_PC32);
|
|
|
|
uint64_t vaddr = symbols[rel.symbol].vaddr;
|
|
|
|
assert(vaddr != (uint64_t) -1);
|
2022-10-19 12:21:46 -07:00
|
|
|
// PC-relative is from the *end* of the instruction,
|
|
|
|
// and the displacement is 4 bytes (32 bits).
|
|
|
|
int64_t disp = (int64_t) vaddr - ((int64_t) rel.offset + 4);
|
2022-09-10 22:06:21 -07:00
|
|
|
assert(disp >= INT32_MIN && disp <= INT32_MAX);
|
|
|
|
patch_u32(rel.offset, (int32_t) disp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
symbol new_symbol(void) {
|
|
|
|
struct symbol* sym = &symbols[symbol_count];
|
|
|
|
sym->vaddr = (uint64_t) -1;
|
|
|
|
if (symbol_count == MAX_SYMBOLS) {
|
|
|
|
fprintf(stderr, "error: exceeded maximum number of symbols\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
return symbol_count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
void define_executable_symbol(symbol s) {
|
|
|
|
struct symbol* sym = &symbols[s];
|
2022-10-19 15:42:04 -07:00
|
|
|
assert(sym->vaddr == (uint64_t) -1);
|
2022-09-10 22:06:21 -07:00
|
|
|
sym->vaddr = file_here;
|
|
|
|
}
|
|
|
|
|
|
|
|
void define_readonly_symbol(symbol sym) {
|
|
|
|
// TODO:
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void append_data(size_t size, const void* buf) {
|
|
|
|
file_here += size;
|
|
|
|
emit(buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void append_u8(uint8_t x) {
|
2022-10-19 12:21:46 -07:00
|
|
|
file_here += 1;
|
2022-09-10 22:06:21 -07:00
|
|
|
emit_u8(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
void append_u32(uint32_t x) {
|
2022-10-19 12:21:46 -07:00
|
|
|
file_here += 4;
|
2022-09-10 22:06:21 -07:00
|
|
|
emit_u32(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
void append_u64(uint64_t x) {
|
2022-10-19 12:21:46 -07:00
|
|
|
file_here += 8;
|
2022-09-10 22:06:21 -07:00
|
|
|
emit_u64(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct relocation* new_relocation(void) {
|
|
|
|
if (relocation_count == MAX_RELOCATIONS) {
|
|
|
|
fprintf(stderr, "error: exceeded maximum number of relocations\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
struct relocation* rel = &relocations[relocation_count];
|
|
|
|
relocation_count++;
|
|
|
|
return rel;
|
|
|
|
}
|
|
|
|
|
|
|
|
void relocate_pc32(symbol sym) {
|
|
|
|
int32_t offset = symbol_offset(sym, 4);
|
|
|
|
if (offset != INT32_MAX) {
|
|
|
|
append_u32((uint32_t) offset);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
struct relocation* rel = new_relocation();
|
|
|
|
rel->type = REL_PC32;
|
|
|
|
rel->offset = file_here;
|
|
|
|
rel->symbol = sym;
|
2022-10-19 12:21:46 -07:00
|
|
|
append_u32(0);
|
2022-09-10 22:06:21 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
int32_t symbol_offset(symbol sym, int8_t off) {
|
|
|
|
uint64_t vaddr = symbols[sym].vaddr;
|
|
|
|
if (vaddr == (uint64_t) -1) {
|
|
|
|
return INT32_MAX;
|
|
|
|
}
|
2022-10-19 12:21:46 -07:00
|
|
|
int64_t disp = (int64_t) vaddr - ((int64_t) file_here + off);
|
2022-09-10 22:06:21 -07:00
|
|
|
if (disp >= INT32_MAX || disp <= INT32_MIN) {
|
|
|
|
return INT32_MAX;
|
|
|
|
}
|
|
|
|
return disp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void relocate_gotpcrel(symbol sym) {
|
|
|
|
// TODO
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void relocate_size32(symbol sym) {
|
|
|
|
// TODO
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void relocate_size64(symbol sym) {
|
|
|
|
// TODO
|
|
|
|
assert(0);
|
|
|
|
}
|