(Algebraic language) Fibonacci sequence works.

master
James T. Martin 2023-07-28 20:03:10 -07:00
parent 40f88918ef
commit 86fea958f4
Signed by: james
GPG Key ID: D6FB2F9892F9B225
13 changed files with 334 additions and 2213 deletions

View File

@ -6,7 +6,7 @@ SHELL = /bin/sh
CFLAGS = -std=c99 -pedantic -Wextra -Os
LDFLAGS = -lc
OBJECTS = bytecode.o format.o io.o main.o x86encode.o
OBJECTS = bytecode.o format.o io.o lex.o main.o x86encode.o
.PHONY: passc
passc: .bin $(OBJECTS)

View File

@ -1,111 +0,0 @@
# Intermediate Representations
## Bytecode
### Instructions
Instructions for times:
* `comm : a * b <=> b * a`
* `assocl : a * (b * c) => (a * b) * c`
* `assocr : (a * b) * c => a * (b * c)`
* `mapl (f : a => b) : a * c => b * c`
* `mapr (f : b => c) : a * b => a * c`
* `unitil : a => a * 1`
* `unitir : a => 1 * a`
* `unitel : a * 1 => a`
* `uniter : 1 * a => a`
Instructions for plus:
* `comm : a + b <=> b + a`
* `assocl : a + (b + c) => (a + b) + c`
* `assocr : (a + b) + c => a + (b + c)`
* `mapl (f : a => b) : a + c => b + c`
* `mapr (f : b => c) : a + b => a + c`
* `inl (b : type) : a => a + b`
* `inr (b : type) : b => a + b`
* `out : a + a => a`
Distributivity:
* `distl : a * (b + c) => (a * b) + (a * c)`
* `distr : (a + b) * c => (a * c) + (b * c)`
* `factl : (a * b) + (a * c) => a * (b + c)`
* `factr : (a * c) + (b * c) => (a + b) * c`
Recursion:
* `project: rec r. f(r) -> f(rec r. f(r))`
* `embed: f(rec r. f(r)) -> rec r. f(r)`
`project` and `embed` are no-ops which exist to make type-checking easier
(i.e. isorecursive over equirecursive types).
#### Most instructions are redundant
Most of these instructions are redundant:
* All of the l/r variants can be implemented in terms of each other
using commutativity.
* All of the plus instructions can be implemented in terms of `map`, `in`, and `out`.
* Alternatively, we could have replaced `map` and `out` with a single instruction,
`if (f : a => c) (g : b => c) : a + b => c`.
So "morally", there are only about 10 instructions: `comm`, `assoc`, `map`, `uniti`, `unite`,
`inl`, `inr`, `if`, `dist`, and `fact`.
#### Most instructions are reversible
Inverses of instructions:
* `comm` / `comm`
* `assocl` / `assocr`
* `map f` / `map f*`
* `uniti` / `unite`
* `dist` / `fact`
The only irreversible instructions are `in` and `out`.
#### Instructions are algebraic laws
We have a symmetric monoidal category with coproducts where `*` distributes over `+`.
This isn't quite a distributive symmetric monoidal category, because `*` isn't a product.
Likewise, we *almost* have a distributive lattice (characterized as a meet-semilattice
with binary joins), but `*` isn't guaranteed to be idempotent.
The reversible fragment is a wide dagger symmetric monoidal subcategory.
#### That's really all we need
We simply don't need functions, polymorphism, or `0`.
`0` isn't very interesting when characterized as an initial object
or as the unit for `+`; I find it's only interesting in the context of
second-order polymorphism, as `forall a. a`.
## Finite-state 1-bit cons machine
Instructions:
* `comm`
* `assoc`
* `factor`
* `dist`
* `map`
* `unite`
* `uniti`
* `inl`
* `inr`
Redundant instructions:
* `l`/`r` variants
* `out`
There is a finite number of states, and a state transition table
which determines the next state based on the current state and
a single bit extracted using `dist`.
## Finite-state random-access 1-bit register machine
Instructions:
* `x <- enum(imm, y)`
* `w <- struct(x, y, z)`
* `free x`

View File

@ -61,7 +61,7 @@ void assocr(void) {
x86_inst_xchg_r64_m64(AX, DX);
}
void distl(void) {
void distr(void) {
// a, b + c
// a * b + a * c
@ -74,7 +74,7 @@ void distl(void) {
// Awfully convenient how that works out, huh?
}
void distr(void) {
void distl(void) {
// The intermediate states here are ill-typed, but ultimately everything
// gets shuffled around to the right locations.
@ -96,7 +96,7 @@ void distr(void) {
x86_inst_xchg_r64_rax(DX);
}
void factl(void) {
void factr(void) {
// a * b + a * c:
// a * (b + c)
@ -107,7 +107,7 @@ void factl(void) {
x86_inst_xchg_r64_m64(AX, DX);
}
void factr(void) {
void factl(void) {
// a * c + b * c
// (a + b) * c
@ -172,23 +172,23 @@ void mapr_end(void) {
x86_inst_pop_r64(AX);
}
void unitil(void) {
void unitir(void) {
allocate_cons();
x86_inst_xchg_r64_rax(DX);
x86_inst_mov_r64_r64(DX, DI);
}
void unitir(void) {
void unitil(void) {
allocate_cons();
x86_inst_mov_r64_r64(AX, DI);
}
void unitel(void) {
void uniter(void) {
x86_inst_xchg_r64_rax(DX);
free_cons();
}
void uniter(void) {
void unitel(void) {
free_cons();
}
@ -377,6 +377,17 @@ void out(void) {
free_cons();
}
void jump(symbol sym) {
inst_jump(sym);
}
void jump_if(symbol a, symbol b) {
x86_inst_test_r8_r8(AX, AX);
out();
inst_jump_if_zero(a);
inst_jump(b);
}
static void inst_load(reg dest, symbol sym) {
x86_inst_lea_r64_rip_disp32_op(dest);
relocate_pc32(sym);
@ -386,7 +397,7 @@ static symbol one_symbol;
static symbol loop_point;
static symbol exit_point;
void quit(void) {
void halt(void) {
inst_jump(exit_point);
}
@ -469,14 +480,13 @@ symbol init_bytecode(void) {
//x86_inst_lea_r64_m64_disp8(DI, SP, -16);
x86_inst_mov_r64_r64(DI, SP);
x86_inst_sub_r64_imm8(DI, 16);
x86_inst_push_r64(DI);
x86_inst_push_r64(DI);
x86_inst_xor_r32_r32(R14, R14);
x86_inst_push_r64(R14);
x86_inst_push_r64(R14);
// Initial state is a unit in the left.
// (Right states will be loop states.)
// Initial state is a unit.
x86_inst_mov_r64_r64(AX, DI);
x86_inst_mov_r64_r64(DX, DI);
inl();
loop_point = new_symbol();
define_executable_symbol(loop_point);

View File

@ -10,13 +10,13 @@ void assocl(void);
/// (a * b) * c => a * (b * c)
void assocr(void);
/// a * (b + c) => (a * b) + (a * c)
void distl(void);
/// (a + b) * c => (a * c) + (b * c)
void distr(void);
/// (a + b) * c => (a * c) + (b * c)
void distl(void);
/// (a * b) + (a * c) => a * (b + c)
void factl(void);
/// (a * c) + (b * c) => (a + b) * c
void factr(void);
/// (a * c) + (b * c) => (a + b) * c
void factl(void);
/// (a => b) => (a * c => b * c)
void mapl_begin(void);
void mapl_end(void);
@ -24,13 +24,13 @@ void mapl_end(void);
void mapr_begin(void);
void mapr_end(void);
/// a => a * 1
void unitil(void);
/// a => 1 * a
void unitir(void);
/// a => 1 * a
void unitil(void);
/// a * 1 => a
void unitel(void);
/// 1 * a => a
void uniter(void);
/// 1 * a => a
void unitel(void);
/// a + b <=> b + a
void comm_plus(void);
/// a + (b + c) => (a + b) + c
@ -49,9 +49,12 @@ void inl(void);
void inr(void);
/// a + a => a
void out(void);
void quit(void);
/// end the program
void halt(void);
symbol init_bytecode(void);
void finish_bytecode(void);
void jump(symbol sym);
void jump_if(symbol a, symbol b);
#endif

196
src/ir.c
View File

@ -1,196 +0,0 @@
/// This file serves conceptually as the intermediate representation (IR)
/// of the compiler. Compared to "asm", this file is aware of stack frames,
/// control flow blocks and labels, compound types like structs and enums,
/// and register allocation.
#include "asm.h"
#include "format.h"
#include "ir.h"
#include "x86encode.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_STACK_FRAMES 32
#define MAX_LABELS 256
#define MAX_FIXUPS 256
struct stack_frame {
uint32_t depth;
uint32_t label_depth;
};
struct label {
uint32_t frame;
uint32_t argc;
symbol symbol;
};
static uint32_t stack_depth = 0;
static uint32_t stack_frame = 0;
static struct stack_frame stack_frames[MAX_STACK_FRAMES];
static uint32_t label_depth = 0;
static struct label labels[MAX_LABELS];
void init_ir(var* argc, var* argv, var* env) {
assert(stack_depth == 0 && stack_frame == 0);
x86_inst_mov_r64_r64(BP, SP);
x86_inst_add_r64_imm8(BP, 8 * 3);
*env = stack_depth++;
*argv = stack_depth++;
*argc = stack_depth++;
}
void enter(void) {
assert(stack_frame < MAX_STACK_FRAMES);
printf("ENTERING: %i, %i\n", stack_depth, label_depth);
struct stack_frame frame = { .depth = stack_depth, .label_depth = label_depth };
stack_frames[stack_frame] = frame;
stack_frame++;
// exit label
declare(0);
}
void leave(var* args) {
assert(stack_frame > 0);
struct stack_frame frame = stack_frames[stack_frame - 1];
stack_depth = frame.depth;
label_depth = frame.label_depth;
define(frame.label_depth, args);
stack_frame--;
}
label declare(uint32_t argc) {
assert(label_depth < MAX_LABELS);
symbol sym = new_symbol();
struct label label = { .frame = stack_frame, .argc = argc, .symbol = sym };
labels[label_depth] = label;
return label_depth++;
}
label declare_exit(uint32_t argc) {
label label = stack_frames[stack_frame - 1].label_depth;
labels[label].argc = argc;
return label;
}
void define(label l, var* args) {
struct label* label = &labels[l];
printf("DEFINING %i (%i)\n", l, label->argc);
define_executable_symbol(label->symbol);
assert(label->frame == stack_frame);
for (uint32_t i = 0; i < label->argc; i++) {
args[i] = stack_depth + i;
}
stack_depth += label->argc;
}
void load_var(reg reg, var var) {
// the stack grows downward, so the bottom of the stack, BP, points to nothing;
// subtracting 8 causes it to point to the first variable, 0.
// (each variable is 8 bytes.)
x86_inst_mov_r64_m64_disp(reg, BP, -(var * 8) - 8);
}
var push_var(reg reg) {
x86_inst_push_r64(reg);
return stack_depth++;
}
void load_args(struct label* label, var* args) {
struct stack_frame* dest_frame = &stack_frames[label->frame - 1];
uint32_t depth_diff = stack_depth - dest_frame->depth;
if (depth_diff > 0) {
// FIXME: should be immX!!!
x86_inst_add_r64_imm8(SP, depth_diff * 8);
}
for (uint32_t arg = 0; arg < label->argc; arg++) {
load_var(AX, args[arg]);
x86_inst_push_r64(AX);
}
stack_depth = dest_frame->depth + label->argc;
}
void jump(label l, var* args) {
struct label* label = &labels[l];
printf("JUMP %i (%i)\n", l, label->argc);
load_args(label, args);
inst_jump(label->symbol);
}
void jump_table(size_t branches, label* labels, var index, var* args) {
assert(0); // UNIMPLEMENTED
}
void jump_if(label t, label e, var cond, var* args) {
struct label* then = &labels[t];
struct label* else_ = &labels[e];
printf("JUMP_IF %i ELSE %i (%i)\n", t, e, then->argc);
assert(then->argc == else_->argc && then->frame == else_->frame);
load_var(BX, cond);
load_args(then, args);
inst_jump_if_not_zero(then->symbol, BX);
inst_jump(else_->symbol);
}
var lit(uint64_t lit) {
x86_inst_mov_r64_imm(AX, lit);
x86_inst_push_r64(AX);
return stack_depth++;
}
var lit_string(char* str) {
fprintf(stderr, "error: string literals not yet implemented\n");
exit(1);
}
var add(var addend1, var addend2) {
load_var(AX, addend1);
load_var(BX, addend2);
x86_inst_add_r64_r64(AX, BX);
return push_var(AX);
}
var sub(var subtrahend, var minuend) {
// TODO: use modr/m
load_var(AX, subtrahend);
load_var(BX, minuend);
x86_inst_sub_r64_r64(AX, BX);
return push_var(AX);
}
// Linux system call: https://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/
var syscall(size_t argc, var* args) {
assert(argc > 0 && argc <= 7);
switch(argc) {
case 7:
load_var(R9, args[6]);
__attribute__((fallthrough));
case 6:
load_var(R8, args[5]);
__attribute__ ((fallthrough));
case 5:
load_var(R10, args[4]);
__attribute__ ((fallthrough));
case 4:
load_var(DX, args[3]);
__attribute__ ((fallthrough));
case 3:
load_var(SI, args[2]);
__attribute__ ((fallthrough));
case 2:
load_var(DI, args[1]);
__attribute__ ((fallthrough));
case 1:
// the system call number, not an argument
load_var(AX, args[0]);
}
// NOTE: syscall clobbers rcx and r11.
x86_inst_syscall();
return push_var(AX);
}

116
src/ir.h
View File

@ -1,116 +0,0 @@
#ifndef _IR_H
#define _IR_H
#include <stddef.h>
#include <stdint.h>
typedef uint32_t var;
typedef uint32_t label;
struct jump_target {
label label;
var* args;
};
/// Declare a new label in the current scope with the provided number
/// of arguments.
///
/// Local variables (not part of a stack frame generated by `define` or `enter`)
/// will not be in scope of the definition of the label.
label declare(uint32_t argc);
/// Define a label and create a new scope for local variables.
///
/// The new scope will have access to all of the variables
/// of the parent scope of the label and the label's arguments,
/// but not any local variables from previous definitions.
void define(label label, var* args);
/// Create a new scope which encompasses all local variables defined up to this point.
///
/// This allows nested definitions to have access to local variables.
void enter(void);
/// Jump to label, unconditionally. Ends the continuation.
void jump(struct jump_target dest);
/// Jump to `then` if `cond` is not zero; jump to `else` otherwise.
/// Ends the continuation.
void jump_if(struct jump_target then, struct jump_target else_, var cond);
/// Jump to the `index`th destination. Ends the continuation.
void jump_table(uint32_t destc, struct jump_target* destinations, var index);
/// Call this at the beginning of execution.
/// It performs initialization and stuff.
void init_ir(var* argc, var* argv, var* env);
/// Enter a new block.
///
/// All labels defined in this block will have access to all variables
/// which are in scope as of calling `enter`. You will be able to jump
/// to any label which is defined in this block from here
/// to the symmetric `leave`.
///
/// This also generates a new label corresponding with the end of the block,
/// which will be automatically defined when you call `leave`.
void enter(void);
/// Leave a block.
///
/// This will restore the context to how it was when `enter` was called,
/// plus the return values declared by the call to `declare_continue`.
void leave(var* rets);
/// Declare a new label in the innermost block.
///
/// This label can only be called from the block or nested blocks.
/// This label must be called with the given number of arguments.
label declare(uint32_t argc);
//// Declare an exit label for the surrounding block.
///
/// Calling this label will exit the surrounding blocks.
/// The usual restrictions for labels apply.
label declare_exit(uint32_t retc);
// Define a label in the innermost block, automatically terminating
/// any previous labels.
///
/// All variables defined prior to the beginning of this block will be in scope.
/// The arguments associated with the label will be in scope.
/// Variables defined *after* the beginning of the block but *prior* to this label
/// will *not* be in scope.
///
/// From this label you can jump to any label in the enclosing block
/// or any parent block.
void define(label label, var* args);
/// Jump to label, unconditionally; never returns.
void jump(label label, var* args);
/// Jump to `index`th label in table; never returns.
///
/// All labels must be at the same depth and accept the same arguments.
/// `index` must not be out of bounds.
void jump_table(size_t branches, label* labels, var index, var* args);
/// Jump to `then` if cond is not zero, `else` if cond is zero.
void jump_if(label then, label else_, var cond, var* args);
/// Integer literal.
var lit(uint64_t lit);
/// String literal.
var lit_string(char* str);
/// Addition.
var add(var addend1, var addend2);
/// Subtraction.
var sub(var subtrahend, var minuend);
/// Perform a system call.
var syscall(size_t argc, var* args);
#endif

View File

@ -1,655 +0,0 @@
#include "ir.h"
#include "lang.h"
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_CONTEXT 32
#define MAX_ASSIGNMENTS 256
#define MAX_ARGUMENTS 256
#define MAX_OPERATORS 256
struct assignment {
char* name;
var ref;
};
enum block_state {
BLOCK_CLEAN,
BLOCK_ASSIGN,
BLOCK_EXPR,
};
struct block_crumb {
enum block_state state;
uint32_t assignment_count;
struct assignment assignments[MAX_ASSIGNMENTS];
var final;
};
enum if_state {
IF_COND,
IF_THEN,
IF_ELSE,
IF_END,
};
struct if_crumb {
enum if_state state;
label then;
label else_;
label end;
};
enum loop_state {
LOOP_CLEAN,
LOOP_CVAR_INIT,
LOOP_BODY,
};
struct loop_crumb {
enum loop_state state;
char* label_name;
label next;
label exit;
uint32_t assignment_count;
var initializers[MAX_ASSIGNMENTS];
struct assignment assignments[MAX_ASSIGNMENTS];
};
struct expr_crumb {
uint32_t argument_count;
uint32_t operator_count;
var arguments[MAX_ARGUMENTS];
enum operator_ operators[MAX_OPERATORS];
};
struct jump_crumb {
label label;
uint32_t arity;
uint32_t argument_count;
var arguments[MAX_ARGUMENTS];
};
enum crumb_type {
BLOCK_CRUMB,
IF_CRUMB,
LOOP_CRUMB,
EXPR_CRUMB,
JUMP_CRUMB,
};
union crumb_data {
struct block_crumb block;
struct if_crumb if_;
struct loop_crumb loop;
struct expr_crumb expr;
struct jump_crumb jump;
};
struct crumb {
enum crumb_type type;
union crumb_data data;
};
static uint32_t context_depth = 1;
static struct crumb context[MAX_CONTEXT];
static char* copy_str(char* str) {
unsigned long len = strlen(str);
char* new = malloc(len * sizeof(char) + 1);
memcpy(new, str, len);
new[len] = 0;
return new;
}
static void push(struct crumb crumb) {
context[context_depth] = crumb;
context_depth++;
}
static void push_new_block(void) {
union crumb_data data;
struct block_crumb block = {
.state = BLOCK_CLEAN,
.assignment_count = 0,
.final = (var) -1,
};
data.block = block;
struct crumb crumb = {
.type = BLOCK_CRUMB,
.data = data,
};
push(crumb);
}
static void push_new_expr(void) {
struct expr_crumb exprc = {
.argument_count = 0,
.operator_count = 0,
};
union crumb_data data;
data.expr = exprc;
struct crumb crumb = {
.type = EXPR_CRUMB,
.data = data,
};
push(crumb);
}
struct label_and_arity {
label label;
uint32_t arity;
};
static void push_new_jump(struct label_and_arity label) {
union crumb_data data;
data.jump.label = label.label;
data.jump.arity = label.arity;
data.jump.argument_count = 0;
struct crumb crumb = {
.type = JUMP_CRUMB,
.data = data,
};
push(crumb);
}
static void push_argument(var ref) {
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == EXPR_CRUMB);
struct expr_crumb* exprc = &ctx->data.expr;
if (exprc->argument_count > MAX_ARGUMENTS) {
fprintf(stderr, "error: exceeded maximum number of arguments in expression\n");
exit(1);
}
exprc->arguments[exprc->argument_count] = ref;
exprc->argument_count++;
}
static void push_cvar_name(char* name) {
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == LOOP_CRUMB);
struct loop_crumb* loopc = &ctx->data.loop;
if (loopc->assignment_count == MAX_ASSIGNMENTS) {
fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n");
exit(1);
}
loopc->assignments[loopc->assignment_count].name = copy_str(name);
}
static void push_cvar(var ref) {
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == LOOP_CRUMB);
struct loop_crumb* loopc = &ctx->data.loop;
if (loopc->assignment_count > MAX_ASSIGNMENTS) {
fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n");
exit(1);
}
loopc->initializers[loopc->assignment_count] = ref;
loopc->assignment_count++;
}
static var lookup_assignment(
uint32_t assignment_count,
struct assignment* assignments,
char* name
) {
for (uint32_t i = assignment_count; i > 0; i--) {
struct assignment asgn = assignments[i - 1];
if (strcmp(asgn.name, name) == 0) {
return asgn.ref;
}
}
return (var) -1;
}
static var lookup_var(char* name) {
for (uint32_t i = context_depth; i > 0; i--) {
struct crumb ctx = context[i - 1];
var ref = (var) -1;
switch (ctx.type) {
case LOOP_CRUMB:
if (ctx.data.loop.state != LOOP_BODY) {
break;
}
ref = lookup_assignment(
ctx.data.loop.assignment_count,
ctx.data.loop.assignments,
name
);
break;
case BLOCK_CRUMB:
ref = lookup_assignment(
ctx.data.block.assignment_count,
ctx.data.block.assignments,
name
);
break;
default:
continue;
}
if (ref != (var) -1) {
return ref;
}
}
fprintf(stderr, "name resolution error: unknown variable %s\n", name);
exit(1);
}
enum label_type {
NEXT_LABEL,
EXIT_LABEL,
RETURN_LABEL,
};
static const char* label_type_name(enum label_type type) {
switch (type) {
case NEXT_LABEL:
return "next";
case EXIT_LABEL:
return "exit";
case RETURN_LABEL:
return "return";
}
}
static struct label_and_arity lookup_label(enum label_type type, char* name) {
for (uint32_t i = context_depth; i > 0; i--) {
struct crumb ctx = context[i - 1];
switch (ctx.type) {
case LOOP_CRUMB:
if (name == NULL || strcmp(name, ctx.data.loop.label_name) == 0) {
struct label_and_arity label;
if (type == NEXT_LABEL) {
label.label = ctx.data.loop.next;
label.arity = ctx.data.loop.assignment_count;
return label;
}
if (type == EXIT_LABEL) {
label.label = ctx.data.loop.exit;
label.arity = 1;
return label;
}
}
break;
default:
continue;
}
}
if (name == NULL) {
fprintf(stderr, "name resolution error: no %s label in scope\n", label_type_name(type));
} else {
fprintf(stderr, "name resolution error: unknown label %s\n", name);
}
exit(1);
}
static void reduce_expression_binop(struct expr_crumb* exprc, var (*emit)(var arg1, var arg2)) {
assert(exprc->argument_count >= 2);
var arg1 = exprc->arguments[0];
var arg2 = exprc->arguments[1];
exprc->arguments[0] = emit(arg1, arg2);
memmove(&exprc->arguments[1], &exprc->arguments[2], exprc->argument_count - 2);
exprc->argument_count--;
}
static var reduce_expression(struct expr_crumb* exprc) {
// TODO FIXME: operator precedence
if (exprc->operator_count > 0 || exprc->argument_count > 1) {
fprintf(stderr, "warning: expression reduction may be incorrect\n");
//exit(1);
}
for (uint32_t op_ix = 0; op_ix < exprc->operator_count; op_ix++) {
switch (exprc->operators[op_ix]) {
case OP_ADD:
reduce_expression_binop(exprc, add);
break;
case OP_SUB:
reduce_expression_binop(exprc, sub);
break;
default:
fprintf(stderr, "error: operator not implemented: %i", exprc->operators[op_ix]);
exit(1);
}
}
exprc->operator_count = 0;
assert(exprc->argument_count == 1);
return exprc->arguments[0];
}
void enter_block(void) {
printf("** enter_block\n");
struct crumb* ctx = &context[context_depth - 1];
switch (ctx->type) {
case BLOCK_CRUMB:
// we should have seen a stmt_assign or stmt_expr first,
// either of which pushes an expr crumb.
assert(0);
case EXPR_CRUMB: {
// this block is purely a scope/sequencing thing
// with no special semantics
break;
}
case IF_CRUMB: {
struct if_crumb ifc = ctx->data.if_;
switch (ifc.state) {
case IF_COND:
case IF_END:
assert(0);
case IF_THEN:
define(ifc.then, NULL);
break;
case IF_ELSE:
define(ifc.else_, NULL);
break;
}
break;
}
case LOOP_CRUMB: {
struct loop_crumb* loopc = &ctx->data.loop;
assert(loopc->state == LOOP_CLEAN);
loopc->state = LOOP_BODY;
loopc->next = declare(loopc->assignment_count);
printf("LOOP %i END %i\n", loopc->next, loopc->exit);
var args[MAX_ASSIGNMENTS];
define(loopc->next, args);
// TODO NOTE: is this the correct order?
for (uint32_t i = 0; i < loopc->assignment_count; i++) {
loopc->assignments[i].ref = args[i];
}
break;
}
default:
assert(0);
}
push_new_block();
}
void stmt_assign(char* name) {
printf("** stmt_assign\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == BLOCK_CRUMB);
struct block_crumb* blockc = &ctx->data.block;
assert(blockc->state == BLOCK_CLEAN);
if (blockc->assignment_count == MAX_ASSIGNMENTS) {
fprintf(stderr, "error: exceeded maximum number of assignments in block\n");
exit(1);
}
blockc->state = BLOCK_ASSIGN;
blockc->assignments[blockc->assignment_count].name = copy_str(name);
push_new_expr();
}
void stmt_expr(void) {
printf("** stmt_expr\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == BLOCK_CRUMB);
struct block_crumb* blockc = &ctx->data.block;
assert(blockc->state == BLOCK_CLEAN);
blockc->state = BLOCK_EXPR;
push_new_expr();
}
void exit_block(void) {
printf("** exit_block\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == BLOCK_CRUMB);
struct block_crumb blockc = ctx->data.block;
assert(blockc.state == BLOCK_CLEAN);
var ret = blockc.final;
if (ret == (var) -1) {
// TODO: better way to handle empty blocks
ret = lit(0);
}
context_depth--;
ctx = &context[context_depth - 1];
switch (ctx->type) {
case EXPR_CRUMB: {
push_argument(ret);
break;
}
case IF_CRUMB: {
struct if_crumb* ifc = &ctx->data.if_;
assert(ifc->state == IF_THEN || ifc->state == IF_ELSE);
jump(ifc->end, &ret);
if (ifc->state == IF_THEN) {
ifc->state = IF_ELSE;
} else if (ifc->state == IF_ELSE) {
ifc->state = IF_END;
}
break;
}
case LOOP_CRUMB: {
// unlike with `if`, there is no `exit_loop`, so we do clean-up here.
struct loop_crumb loopc = ctx->data.loop;
assert(loopc.state == LOOP_BODY);
jump(loopc.exit, &ret);
context_depth--;
for (uint32_t i = 0; i < loopc.assignment_count; i++) {
free(loopc.assignments[i].name);
}
leave(&ret);
push_argument(ret);
break;
}
default:
assert(0);
}
for (uint32_t i = 0; i < blockc.assignment_count; i++) {
free(blockc.assignments[i].name);
}
}
void exit_expr(void) {
printf("** exit_expr\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == EXPR_CRUMB);
struct expr_crumb* exprc = &ctx->data.expr;
assert(exprc->argument_count > 0);
var ret = reduce_expression(exprc);
context_depth--;
ctx = &context[context_depth - 1];
switch (ctx->type) {
case BLOCK_CRUMB: {
struct block_crumb* blockc = &ctx->data.block;
blockc->final = ret;
switch (blockc->state) {
case BLOCK_CLEAN:
assert(0);
case BLOCK_EXPR:
blockc->state = BLOCK_CLEAN;
break;
case BLOCK_ASSIGN:
blockc->assignments[blockc->assignment_count].ref = ret;
blockc->assignment_count++;
blockc->state = BLOCK_CLEAN;
break;
}
break;
}
case IF_CRUMB: {
struct if_crumb* ifc = &ctx->data.if_;
assert(ifc->state == IF_COND);
jump_if(ifc->then, ifc->else_, ret, NULL);
ifc->state = IF_THEN;
break;
}
case EXPR_CRUMB:
push_argument(ret);
break;
case LOOP_CRUMB: {
struct loop_crumb* loopc = &ctx->data.loop;
assert(loopc->state == LOOP_CVAR_INIT);
push_cvar(ret);
loopc->state = LOOP_CLEAN;
break;
}
case JUMP_CRUMB: {
// TODO FIXME: this is *completely wrong* for `next`!
struct jump_crumb jumpc = ctx->data.jump;
fprintf(stderr, "args: %i, arity: %i\n", jumpc.argument_count, jumpc.arity);
assert(jumpc.argument_count + 1 == jumpc.arity);
jumpc.arguments[jumpc.argument_count] = ret;
jump(jumpc.label, jumpc.arguments);
// TODO: better way to handle returning impossible value
context_depth--;
push_argument(ret);
break;
}
}
}
void enter_if(void) {
printf("** enter_if\n");
enter();
label then = declare(0);
label else_ = declare(0);
label end = declare_exit(1);
printf("IF THEN %i ELSE %i EXIT %i\n", then, else_, end);
struct if_crumb ifc = {
.state = IF_COND,
.then = then,
.else_ = else_,
.end = end,
};
union crumb_data data;
data.if_ = ifc;
struct crumb ctx = {
.type = IF_CRUMB,
.data = data,
};
push(ctx);
push_new_expr();
}
void exit_if(void) {
printf("** exit_if\n");
struct crumb ctx = context[context_depth - 1];
assert(ctx.type == IF_CRUMB);
struct if_crumb ifc = ctx.data.if_;
switch (ifc.state) {
case IF_COND:
case IF_THEN:
assert(0);
case IF_ELSE: {
define(ifc.else_, NULL);
var ret = lit(0);
jump(ifc.end, &ret);
break;
}
case IF_END:
break;
}
var ret;
leave(&ret);
context_depth--;
push_argument(ret);
}
void enter_loop(char* label_name) {
printf("** enter_loop\n");
enter();
label exit = declare_exit(1);
struct loop_crumb loopc = {
.state = LOOP_CLEAN,
.label_name = copy_str(label_name),
.assignment_count = 0,
.exit = exit
};
union crumb_data data;
data.loop = loopc;
struct crumb ctx = {
.type = LOOP_CRUMB,
.data = data,
};
push(ctx);
}
void cvar_pass(char* name) {
printf("** cvar_pass\n");
push_cvar_name(name);
push_cvar(lookup_var(name));
}
void cvar_init(char* name) {
printf("** cvar_init\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == LOOP_CRUMB);
struct loop_crumb* loopc = &ctx->data.loop;
loopc->state = LOOP_CVAR_INIT;
push_cvar_name(name);
push_new_expr();
}
void expr_next(char* label) {
printf("** expr_next\n");
push_new_jump(lookup_label(NEXT_LABEL, label));
push_new_expr();
}
void expr_exit(char* label) {
printf("** expr_exit\n");
push_new_jump(lookup_label(EXIT_LABEL, label));
push_new_expr();
}
void expr_return(void) {
printf("** expr_return\n");
push_new_jump(lookup_label(RETURN_LABEL, NULL));
push_new_expr();
}
void enter_group(void) {
printf("** enter_group\n");
push_new_expr();
}
void exit_group(void) {
printf("** exit_group\n");
// exit_expr is sufficient
}
void expr_op(enum operator_ op) {
printf("** expr_op %i\n", op);
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == EXPR_CRUMB);
struct expr_crumb* exprc = &ctx->data.expr;
if (op == OP_JUXT && context_depth > 1) {
// HACK: should handle continuations separately from expressions
struct crumb* ctx2 = &context[context_depth - 2];
if (ctx2->type == JUMP_CRUMB) {
struct jump_crumb* jumpc = &ctx2->data.jump;
var ret = reduce_expression(exprc);
assert(jumpc->argument_count < MAX_ARGUMENTS);
jumpc->arguments[jumpc->argument_count] = ret;
jumpc->argument_count++;
context_depth--;
push_new_expr();
return;
}
}
if (exprc->operator_count > MAX_OPERATORS) {
fprintf(stderr, "error: exceeded maximum number of operators in expression\n");
exit(1);
}
exprc->operators[exprc->operator_count] = op;
exprc->operator_count++;
}
void expr_string(char* string) {
printf("** expr_string %s\n", string);
push_argument(lit_string(string));
}
void expr_integer(int64_t num) {
printf("** expr_integer %lli\n", num);
push_argument(lit((uint64_t) num));
}
void expr_var(char* var) {
printf("** expr_var %s\n", var);
push_argument(lookup_var(var));
}

View File

@ -1,32 +0,0 @@
#ifndef LANG_H
#define LANG_H
#include "lex.h"
void enter_block(void);
void stmt_assign(char* name);
void stmt_expr(void);
void exit_block(void);
void exit_expr(void);
void enter_if(void);
void exit_if(void);
void enter_loop(char* label);
void cvar_pass(char* name);
void cvar_init(char* name);
void expr_next(char* label);
void expr_exit(char* label);
void expr_return(void);
void enter_group(void);
void exit_group(void);
void expr_op(enum operator_ op);
void expr_string(char* string);
void expr_integer(int64_t num);
void expr_var(char* var);
#endif

520
src/lex.c
View File

@ -7,37 +7,58 @@
#include "lex/indent.h"
#include "io.h"
_Bool is_unary(enum operator_ op) {
return op == OP_SUB
|| op == OP_INV
|| op == OP_NOT;
static const char* const keywords[KEYWORD_COUNT] = {
"comm",
"assocl",
"assocr",
"distl",
"distr",
"factl",
"factr",
"mapl",
"mapr",
"unitil",
"unitir",
"unitel",
"uniter",
"comm+",
"assocl+",
"assocr+",
"mapl+",
"mapr+",
"inl",
"inr",
"out",
"halt",
"if",
};
static struct token simple(enum token_type type) {
struct token tok = { type, 0 };
return tok;
}
_Bool is_binary(enum operator_ op) {
return op == OP_EQ
|| op == OP_ADD
|| op == OP_SUB
|| op == OP_MUL
|| op == OP_DIV
|| op == OP_MOD
|| op == OP_AND
|| op == OP_OR
|| op == OP_XOR
|| op == OP_SHL
|| op == OP_SAR
|| op == OP_SHR
|| op == OP_GT
|| op == OP_LT
|| op == OP_GTE
|| op == OP_LTE
|| op == OP_NE
|| op == OP_TYPE
|| op == OP_FUN;
#define MAX_STR_LEN 4096
static size_t str_index;
// alternate string buffers between tokens to prevent overwriting buffer.
// we're LL(1) so 2 buffers is sufficient.
// NOTE: I later changed the code and it wasn't sufficient anymore, lmao.
static int which_buf = 0;
static char str_buf_1[MAX_STR_LEN];
static char str_buf_2[MAX_STR_LEN];
static char str_buf_3[MAX_STR_LEN];
static char* str_buf(void) {
which_buf = (which_buf + 1) % 3;
switch (which_buf) {
case 0: return str_buf_1;
case 1: return str_buf_2;
case 2: return str_buf_3;
}
assert(false);
}
_Bool is_lit(struct token tok) {
return tok.type == TOK_INTEGER || tok.type == TOK_STRING || tok.type == TOK_NAME;
}
static _Bool is_alpha(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
@ -56,142 +77,10 @@ static _Bool is_id_char(char c) {
return is_alphanumeric(c) || c == '_';
}
static struct token simple(enum token_type type) {
struct token tok = { type, 0 };
return tok;
_Bool is_whitespace(char c) {
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
static struct token op(enum operator_ op) {
union token_data data;
data.op = op;
struct token tok = { TOK_OPERATOR, data };
return tok;
}
static uint8_t digit_value(char c) {
if (is_digit(c)) { return c - '0'; }
if (c >= 'A' && c <= 'Z') { return c - 'A' + 0xA; }
if (c >= 'a' && c <= 'z') { return c - 'a' + 0xA; }
assert(0);
}
static _Bool is_digit_in(uint8_t base, char c) {
if (!is_alphanumeric(c)) {
return false;
}
return digit_value(c) < base;
}
static uint64_t lex_digits(uint8_t base) {
uint64_t acc = 0;
while (true) {
char c = peekc();
if (!is_digit_in(base, c)) {
// commas are legal digit separators
if (c == ',' && is_digit_in(base, peekc())) {
nextc();
continue;
}
break;
}
nextc();
uint8_t digit = digit_value(c);
// (val * base + digit) > UINT64_MAX
if (acc > ((UINT64_MAX - digit) / base)) {
fprintf(stderr, "lexical error: integer literal overflow\n");
exit(1);
}
acc *= base;
acc += digit;
}
return acc;
}
static struct token integer_tok(uint64_t integer) {
union token_data data;
data.int_ = integer;
struct token tok = { TOK_INTEGER, data };
return tok;
}
static struct token lex_integer(_Bool sign) {
uint8_t base = 10;
if (peekc() == '0') {
nextc();
if (peekc() == 'b') {
base = 2;
nextc();
} else if (peekc() == 'x') {
base = 16;
nextc();
}
if(!is_digit(peekc() && !is_id_char(peekc()))) {
return integer_tok(0);
}
}
if (!is_digit_in(base, peekc())) {
fprintf(stderr, "lexical error: expected base-%i digits\n", base);
exit(1);
}
uint64_t acc = lex_digits(10);
if (is_id_char(peekc())) {
fprintf(stderr, "lexical error: must put space between integer and following identifier\n");
exit(1);
}
if (sign && acc > INT64_MAX) {
fprintf(stderr, "lexical error: signed integer literal overflow\n");
exit(1);
}
int64_t val = (int64_t) acc;
if (sign) {
val = -val;
}
return integer_tok(val);
}
#define MAX_STR_LEN 4096
static size_t str_index;
// alternate string buffers between tokens to prevent overwriting buffer.
// we're LL(1) so 2 buffers is sufficient.
static _Bool which_buf = false;
static char str_buf_1[MAX_STR_LEN];
static char str_buf_2[MAX_STR_LEN];
static char* str_buf(void) {
which_buf = !which_buf;
if (which_buf) {
return str_buf_1;
}
return str_buf_2;
}
static char* lex_string(void) {
// TODO: string escapes, multi-line strings, no length limit on strings
str_index = 0;
char* buf = str_buf();
while (true) {
char c = nextc();
if (str_index == MAX_STR_LEN - 1) {
fprintf(stderr, "lexical error: string too long\n");
exit(1);
}
if (c == 0) {
fprintf(stderr, "lexical error: unclosed string (reached end of file)\n");
exit(1);
}
if (c == '"') {
break;
}
if (c == '\n') {
fprintf(stderr, "lexical error: unclosed string (reached end of line)\n");
exit(1);
}
buf[str_index] = c;
str_index++;
}
buf[str_index] = 0;
return buf;
}
static char* lex_identifier(void) {
str_index = 0;
@ -211,204 +100,53 @@ static char* lex_identifier(void) {
return buf;
}
static uint32_t indent_level = 0;
static uint32_t pending_level = 0;
static _Bool level_is_block[MAX_INDENTS] = {true};
// going back to a previous indentation level.
// if we're going back, then we insert a terminator.
static _Bool going_back = false;
static struct token lex(void) {
char c = peekc();
if (is_newline(c)) {
indent_level = lex_indentation();
if (indent_level <= pending_level) {
going_back = true;
}
}
while (indent_level > pending_level) {
pending_level++;
if (level_is_block[pending_level]) {
return simple(TOK_OPEN_BLOCK);
}
}
while (indent_level < pending_level) {
_Bool was_block = level_is_block[pending_level];
level_is_block[pending_level] = false;
pending_level--;
if (was_block) {
return simple(TOK_CLOSE_BLOCK);
}
}
if (going_back) {
going_back = false;
if (level_is_block[indent_level]) {
return simple(TOK_TERMINATOR);
}
}
c = peekc();
while (is_indent(c)) {
nextc();
c = peekc();
}
_Bool sign = false;
switch (c) {
case 0:
nextc();
return simple(TOK_EOF);
case '"': {
nextc();
union token_data data;
data.string = lex_string();
struct token tok = { TOK_STRING, data };
return tok;
}
case '\'': {
nextc();
union token_data data;
data.label = lex_identifier();
struct token tok = { TOK_LABEL, data };
return tok;
}
case ':':
nextc();
while (is_indent(peekc())) {
nextc();
}
if (is_newline(peekc())) {
level_is_block[indent_level + 1] = true;
return lex();
}
return op(OP_TYPE);
case '{':
nextc();
return simple(TOK_OPEN_BLOCK);
case '}':
nextc();
return simple(TOK_CLOSE_BLOCK);
case '(':
nextc();
return simple(TOK_OPEN_GROUP);
case ')':
nextc();
return simple(TOK_CLOSE_GROUP);
case ';':
nextc();
return simple(TOK_TERMINATOR);
case ',':
nextc();
return simple(TOK_SEPARATOR);
case '=':
nextc();
return simple(TOK_EQUALS);
case '-':
nextc();
if (peekc() == '>') {
nextc();
return op(OP_FUN);
}
if (is_digit(peekc())) {
return lex_integer(true);
}
return op(OP_SUB);
case '+':
nextc();
return op(OP_ADD);
case '*':
nextc();
return op(OP_MUL);
case '/':
nextc();
return op(OP_DIV);
case '%':
nextc();
return op(OP_MOD);
case '~':
nextc();
return op(OP_INV);
case '&':
nextc();
return op(OP_AND);
case '|':
nextc();
return op(OP_OR);
case '^':
nextc();
return op(OP_XOR);
case '!':
nextc();
if (peekc() == '=') {
nextc();
return op(OP_NE);
}
return op(OP_NOT);
case '>':
while (true) {
// skip whitespace
while (is_whitespace(c)) {
nextc();
c = peekc();
if (c == '=') {
nextc();
return op(OP_GTE);
}
if (c == '>') {
nextc();
if (peekc() == '>') {
nextc();
return op(OP_SHR);
}
return op(OP_SAR);
}
return op(OP_GT);
case '<':
nextc();
c = peekc();
if (c == '<') {
nextc();
return op(OP_SHL);
}
if (c == '=') {
nextc();
return op(OP_LTE);
}
return op(OP_LT);
}
if (is_digit(c)) {
return lex_integer(false);
}
char* name = lex_identifier();
if (strcmp(name, "if") == 0) {
return simple(TOK_IF);
}
if (strcmp(name, "else") == 0) {
return simple(TOK_ELSE);
}
if (strcmp(name, "match") == 0) {
return simple(TOK_MATCH);
}
if (strcmp(name, "case") == 0) {
return simple(TOK_CASE);
}
if (strcmp(name, "loop") == 0) {
return simple(TOK_LOOP);
}
if (strcmp(name, "fn") == 0) {
return simple(TOK_FN);
}
if (strcmp(name, "next") == 0) {
return simple(TOK_NEXT);
}
if (strcmp(name, "exit") == 0) {
return simple(TOK_EXIT);
}
if (strcmp(name, "recurse") == 0) {
return simple(TOK_RECURSE);
}
if (strcmp(name, "return") == 0) {
return simple(TOK_RETURN);
}
// skip line comments
if (c == '!') {
do {
c = nextc();
} while (c != '\n');
} else {
break;
}
}
union token_data data;
data.name = name;
struct token tok = { TOK_NAME, data };
// syntax
switch (c) {
case 0:
return simple(TOK_EOF);
case '{':
nextc();
return simple(TOK_MAP_BEGIN);
case '}':
nextc();
return simple(TOK_MAP_END);
}
char* name = lex_identifier();
// keywords
for (size_t kwd = 0; kwd < KEYWORD_COUNT; kwd++) {
if (strcmp(name, keywords[kwd]) == 0) {
return simple((enum token_type) kwd);
}
}
enum token_type type = TOK_JUMP;
// labels
if (peekc() == ':') {
type = TOK_LABEL;
nextc();
}
struct token tok = { type, name };
return tok;
}
@ -418,7 +156,6 @@ static struct token peek_buf;
struct token next(void) {
if (!init) {
init = true;
indent_level = lex_indentation();
next();
}
struct token tmp = peek_buf;
@ -429,78 +166,3 @@ struct token next(void) {
struct token peek(void) {
return peek_buf;
}
void print_token(struct token tok) {
switch (tok.type) {
case TOK_NAME:
fprintf(stdout, "%s", tok.data.name);
break;
case TOK_LABEL:
fprintf(stdout, "'%s", tok.data.label);
break;
case TOK_INTEGER:
fprintf(stdout, "%zi", tok.data.int_);
break;
case TOK_STRING:
fprintf(stdout, "\"%s\"", tok.data.string);
break;
case TOK_OPEN_GROUP:
fprintf(stdout, "(");
break;
case TOK_CLOSE_GROUP:
fprintf(stdout, ")");
break;
case TOK_OPEN_BLOCK:
fprintf(stdout, "{");
break;
case TOK_CLOSE_BLOCK:
fprintf(stdout, "}");
break;
case TOK_TERMINATOR:
fprintf(stdout, ";");
break;
case TOK_SEPARATOR:
fprintf(stdout, ",");
break;
case TOK_OPERATOR:
// TODO: printing for operators
fprintf(stdout, "OP:%i", tok.data.op);
break;
case TOK_EOF:
fprintf(stdout, "<EOF>");
break;
case TOK_CASE:
fprintf(stdout, "case");
break;
case TOK_ELSE:
fprintf(stdout, "else");
break;
case TOK_EQUALS:
fprintf(stdout, "=");
break;
case TOK_EXIT:
fprintf(stdout, "exit");
break;
case TOK_FN:
fprintf(stdout, "fn");
break;
case TOK_IF:
fprintf(stdout, "if");
break;
case TOK_LOOP:
fprintf(stdout, "loop");
break;
case TOK_NEXT:
fprintf(stdout, "next");
break;
case TOK_RETURN:
fprintf(stdout, "return");
break;
case TOK_RECURSE:
fprintf(stdout, "recurse");
break;
case TOK_MATCH:
fprintf(stdout, "match");
break;
}
}

View File

@ -4,79 +4,44 @@
#include <stdbool.h>
#include <stdint.h>
#define KEYWORD_COUNT 23
enum token_type {
TOK_COMM = 0,
TOK_ASSOCL = 1,
TOK_ASSOCR = 2,
TOK_DISTL = 3,
TOK_DISTR = 4,
TOK_FACTL = 5,
TOK_FACTR = 6,
TOK_MAPL = 7,
TOK_MAPR = 8,
TOK_UNITIL = 9,
TOK_UNITIR = 10,
TOK_UNITEL = 11,
TOK_UNITER = 12,
TOK_COMM_PLUS = 13,
TOK_ASSOCL_PLUS = 14,
TOK_ASSOCR_PLUS = 15,
TOK_MAPL_PLUS = 16,
TOK_MAPR_PLUS = 17,
TOK_INL = 18,
TOK_INR = 19,
TOK_OUT = 20,
TOK_HALT = 21,
TOK_IF = 22,
TOK_LABEL,
TOK_JUMP,
TOK_MAP_BEGIN,
TOK_MAP_END,
TOK_EOF, // end of file
TOK_NAME, // foo, bar_quux123, loop
TOK_LABEL, // 'my_loop
TOK_INTEGER, // -123, 0xDEADBEEF
TOK_STRING, // "..."
TOK_OPERATOR,
TOK_OPEN_BLOCK, // `{` or `:` at the end of a line
TOK_CLOSE_BLOCK, // `}` or inferred from indentation
TOK_OPEN_GROUP, // `(`
TOK_CLOSE_GROUP, // `)`
TOK_TERMINATOR, // `;` or inferred from indentation, used to separate statements in blocks
TOK_SEPARATOR, // `,`, used to separate variables in initializers
TOK_EQUALS, // `=`, used for assignments or as an equality operator
TOK_IF, // if
TOK_ELSE, // else
TOK_MATCH, // match
TOK_CASE, // case
TOK_LOOP, // loop
TOK_FN, // fn
TOK_NEXT, // next
TOK_EXIT, // exit
TOK_RECURSE, // recurse
TOK_RETURN, // return
};
enum operator_ {
OP_EQ, // =
OP_ADD, // +
OP_SUB, // -
OP_MUL, // *
OP_DIV, // /
OP_MOD, // %
OP_INV, // ~
OP_AND, // &
OP_OR, // |
OP_XOR, // ^
OP_SHL, // <<
OP_SAR, // >>
OP_SHR, // >>>
OP_NOT, // !
OP_GT, // >
OP_LT, // <
OP_GTE, // >=
OP_LTE, // <=
OP_NE, // !=
OP_TYPE, // :
OP_FUN, // ->
OP_JUXT, // space! but this is not emitted by the lexer.
};
union token_data {
char* name;
char* label;
char* string;
int64_t int_;
enum operator_ op;
};
struct token {
enum token_type type;
union token_data data;
char* identifier;
};
_Bool is_unary(enum operator_ op);
_Bool is_binary(enum operator_ op);
_Bool is_lit(struct token tok);
struct token next(void);
struct token peek(void);

View File

@ -2,310 +2,209 @@
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "bytecode.h"
#include "format.h"
#include "io.h"
#include "lex.h"
#define ELF_HEADER_SIZE 0xb0
enum map_type {
MAP_LEFT_TIMES,
MAP_RIGHT_TIMES,
MAP_LEFT_PLUS,
MAP_RIGHT_PLUS,
};
// a + (b + (c + d))
// (a + b) + (c + d)
// (b + a) + (c + d)
// b + (a + (c + d))
//
static enum map_type maps[16];
static size_t mapi = 0;
void transition_right(void) {
assocl_plus();
mapl_plus_begin();
out();
inr();
mapl_plus_end();
assocr_plus();
static char* label_names[2048];
static symbol label_symbols[2048];
static size_t labeli = 0;
enum map_type pop(void) {
if (mapi <= 0) {
fprintf(stderr, "unmatched }\n");
exit(1);
}
return maps[--mapi];
}
void transition_left(void) {
out();
inl();
void push(enum map_type type) {
if (mapi >= 16) {
fprintf(stderr, "out of maps\n");
exit(1);
}
maps[mapi++] = type;
}
void jump_from_to(size_t from, size_t to) {
if (from < to) {
mapl_plus_begin();
inl();
for(; from <= to; to--) {
inr();
}
mapl_plus_end();
mapr_plus_begin();
inr();
mapr_plus_end();
out();
} else if (to > from) {
for (size_t i = 0; i < from - to; i++) {
mapr_plus_end();
mapl_plus_begin();
inl();
mapl_plus_end();
symbol lookup_label(const char* name) {
for (size_t i = 0; i < labeli; i++) {
if (strcmp(label_names[i], name) == 0) {
fprintf(stderr, "%s\n", name);
return label_symbols[i];
}
}
}
void transition_into(void) {
assocl_plus();
mapl_plus_begin();
mapl_plus_begin();
inl();
mapl_plus_end();
out();
inr();
mapl_plus_end();
assocr_plus();
}
void transition_while(void) {
assocl_plus();
mapl_plus_begin();
mapr_plus_begin();
inr();
mapr_plus_end();
out();
mapl_plus_end();
assocr_plus();
}
void inc(void) {
inr();
factl();
}
void new_nat(void) {
// ctx
unitil(); // ctx * 1
inl(); // ctx * 1 + ctx * 1
factl(); // ctx * (1 + 1)
}
void swap(void) {
assocr();
mapr_begin();
comm();
mapr_end();
assocl();
}
static void select_var(size_t var) {
// (... * a) * (b * (c * ...))
for (size_t i = 0; i < var; i++) {
assocr();
// ((... * a) * b) * (c * ...)
if (labeli >= 2048) {
fprintf(stderr, "out of labels\n");
exit(1);
}
comm();
// (c * ...) * ((... * a) * b)
assocl();
// ((c * ...) * (... * a)) * b
fprintf(stderr, "%s:\n", name);
unsigned long len = strlen(name) + 1;
label_names[labeli] = malloc(len);
memcpy(label_names[labeli], name, len);
label_symbols[labeli] = new_symbol();
return label_symbols[labeli++];
}
static void unselect_var(size_t var) {
assocr();
comm();
for (size_t i = 0; i < var; i++) {
assocl();
void nomap(void) {
if (mapi > 0) {
fprintf(stderr, "expected all maps to be closed before new label or EOF\n");
exit(1);
}
}
static void case_on(size_t var) {
select_var(var);
distr();
mapl_plus_begin(); {
unselect_var(var);
} mapl_plus_end();
mapr_plus_begin(); {
unselect_var(var);
} mapr_plus_end();
}
static void snipe(size_t var) {
select_var(var);
unitel();
comm();
for (size_t i = 0; i < var; i++) {
assocl();
void begin_map(enum map_type type) {
if (next().type != TOK_MAP_BEGIN) {
fprintf(stderr, "expected {\n");
exit(1);
}
push(type);
}
symbol compile(void) {
symbol entry_point = init_bytecode();
// This is the program we're trying to execute:
//
// fib n = fib_acc n 0 1
// fib_acc 0 a b = a
// fib_acc (S n) a b = fib_acc n b (a + b)
//
// Looks simple, right? Well, things are a bit more complicated than that.
//
// 1. In `fib_acc 0`, we implicitly drop the value of `b`. Because we do not have
// weakening, we will have to free `b` explicitly here.
//
// fib_acc 0 a 0 = a
// fib_acc 0 a (S b) = fib_acc 0 a b
//
// 2. In `fib_acc (S n)`, we use `b` twice. We do not have contraction, so we must
// explicitly duplicate it, or implicitly duplicate it when we consume `b`.
//
// 3. We do not have addition as a built-in; we will need to define it ourselves.
// Moreover, we do not have functions, so it must be fused into the definition
// of fib_acc.
//
// -- We will duplicate `b` into the first argument (the new `a`)
// -- while adding it to the second argument (`a`, which will become the new `b`).
// fib_acc (S n) a b = fib_acc_plus n 0 a b
// fib_acc_plus n a b' 0 = fib_acc n a b'
// fib_acc_plus n a b' (S b) = fib_acc_plus n (S a) (S b') b
//
// 4. We'll have to do a lot of tedious work shuffling variables around.
// We don't even have implicit associativity, much less commutativity!
//
// We have this hierarchy of states:
//
// 1. start(1)
// 2. fib(n)
// 3. fib_acc(n, a, b)
// 4. fib_acc(0, a, b)
// 5. fib_acc_0(a b)
//
// States:
// * start(1)
// * fib(n)
// * fib_acc(n, a, b)
// * fib_acc_Z(1, (a, b))
// * fib_acc_Z_free(a, b)
// * fib_acc_Z_done
// * fib_acc_S
// * fib_acc_S_copy
// * fib_acc_S_copy_done
// * fib_acc_S_copy_S
// State 0: starting state
mapl_plus_begin();
// Initialize with integer (5).
inl();
inr();
inr();
inr();
inr();
inr();
mapl_plus_end();
transition_right();
mapr_plus_begin();
// State 1: fib(n);
mapl_plus_begin();
// a = 0
new_nat();
// b = 1
new_nat();
inc();
mapl_plus_end();
transition_right();
mapr_plus_begin();
// State 2: fib_acc(n, a, b)
mapl_plus_begin();
// if n=1, we return the accumulated value
assocr();
distl();
mapl_plus_end();
transition_right();
mapr_plus_begin();
mapl_plus_begin();
// State 3.1.1: fib_acc_Z(1, (a, b))
mapl_plus_begin();
uniter();
// (a, b)
mapl_plus_end();
transition_into();
mapr_plus_begin();
// State 3.1.2.1: fib_acc_Z_free(a, b)
mapl_plus_begin();
// n * (1 + n)
distr();
mapl_plus_end();
transition_while();
// State 3.1.2.2: fib_acc_Z_done
mapr_plus_begin();
uniter();
quit();
mapr_plus_end();
mapr_plus_end();
mapl_plus_end();
mapr_plus_begin();
// State 4: fib_acc_S
mapl_plus_begin();
assocl();
new_nat();
swap();
new_nat();
swap();
mapl_plus_end();
transition_into();
mapr_plus_begin();
mapl_plus_begin();
// State 5.1: fib_acc_S_copy(n, a, b1, b2, b)
mapl_plus_begin();
distl();
mapl_plus_end();
transition_into();
mapr_plus_begin();
mapl_plus_begin();
// State 5.2.1: fib_acc_S_copy_done(n, a, b, b, 1)
uniter();
// TODO:
mapl_plus_end();
mapr_plus_begin();
// State 5.2.2: fib_acc_S_copy_S(n, a, b1, b2, b)
mapr_plus_end();
mapr_plus_end();
mapl_plus_end();
mapr_plus_end();
mapr_plus_end();
mapr_plus_end();
mapr_plus_end();
mapr_plus_end();
// State 1: fib(n)
assocl_plus();
mapl_plus_begin();
// switch to state 2
out();
inr();
mapl_plus_end();
assocr_plus();
mapr_plus_begin();
// State 2: fib_acc(n, a, b)
mapl_plus_begin();
// State 2.1: transition to state 3
mapl_plus_begin();
mapr_plus_end();
mapr_plus_end();
while (true) {
struct token tok = next();
switch (tok.type) {
case TOK_COMM:
comm();
break;
case TOK_ASSOCL:
assocl();
break;
case TOK_ASSOCR:
assocr();
break;
case TOK_DISTL:
distl();
break;
case TOK_DISTR:
distr();
break;
case TOK_FACTL:
factl();
break;
case TOK_FACTR:
factr();
break;
case TOK_MAPL:
begin_map(MAP_LEFT_TIMES);
mapl_begin();
break;
case TOK_MAPR:
begin_map(MAP_RIGHT_TIMES);
mapr_begin();
break;
case TOK_UNITIL:
unitil();
break;
case TOK_UNITIR:
unitir();
break;
case TOK_UNITEL:
unitel();
break;
case TOK_UNITER:
uniter();
break;
case TOK_COMM_PLUS:
comm_plus();
break;
case TOK_ASSOCL_PLUS:
assocl_plus();
break;
case TOK_ASSOCR_PLUS:
assocr_plus();
break;
case TOK_MAPL_PLUS:
begin_map(MAP_LEFT_PLUS);
mapl_plus_begin();
break;
case TOK_MAPR_PLUS:
begin_map(MAP_RIGHT_PLUS);
mapr_plus_begin();
break;
case TOK_INL:
inl();
break;
case TOK_INR:
inr();
break;
case TOK_OUT:
out();
break;
case TOK_HALT:
halt();
break;
case TOK_LABEL:
nomap();
define_executable_symbol(lookup_label(tok.identifier));
break;
case TOK_JUMP:
fprintf(stderr, "!jump %s\n", tok.identifier);
nomap();
jump(lookup_label(tok.identifier));
break;
case TOK_MAP_BEGIN:
fprintf(stderr, "unexpected {\n");
exit(1);
break;
case TOK_MAP_END:
; enum map_type type = pop();
switch (type) {
case MAP_LEFT_TIMES:
mapl_end();
break;
case MAP_RIGHT_TIMES:
mapr_end();
break;
case MAP_LEFT_PLUS:
mapl_plus_end();
break;
case MAP_RIGHT_PLUS:
mapr_plus_end();
break;
}
break;
case TOK_EOF:
goto eof;
case TOK_IF:
nomap();
struct token a = next();
struct token b = next();
fprintf(stderr, "!if %s %s\n", a.identifier, b.identifier);
symbol aa = lookup_label(a.identifier);
symbol bb = lookup_label(b.identifier);
if (a.type != TOK_JUMP || b.type != TOK_JUMP) {
fprintf(stderr, "arguments to 'if' should be labels\n");
exit(1);
}
jump_if(aa, bb);
break;
}
}
eof:
nomap();
finish_bytecode();
return entry_point;
}

View File

@ -1,302 +0,0 @@
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lang.h"
#include "lex.h"
#include "parse.h"
enum state {
ST_BLOCK,
ST_BLOCK_BODY,
ST_BLOCK_CONT,
ST_BLOCK_CLOSE,
ST_ASSIGN,
ST_EXPR,
// HACK: The existence of this state.
// Also, the entire structure of the parser is ugly.
ST_EXPR_HACK,
ST_EXPR_CONT,
ST_EXPR_END,
ST_GROUP,
ST_IF_ELSE,
ST_IF_END,
ST_LOOP_VARS,
ST_LOOP_VARS_CONT,
};
const char* state_name(enum state st) {
switch (st) {
case ST_BLOCK:
return "{";
case ST_BLOCK_BODY:
return "B";
case ST_BLOCK_CONT:
return ";";
case ST_BLOCK_CLOSE:
return "}";
case ST_ASSIGN:
return "=";
case ST_EXPR:
return "x";
case ST_EXPR_CONT:
return "c";
case ST_GROUP:
return "(";
case ST_IF_ELSE:
return "|";
case ST_LOOP_VARS:
return "v";
case ST_LOOP_VARS_CONT:
return ",";
case ST_EXPR_END:
return "E";
case ST_EXPR_HACK:
return "H";
case ST_IF_END:
return "i";
}
}
#define MAX_CONTEXT 256
static uint32_t sp = 0;
static enum state stack[MAX_CONTEXT];
static void debug_print(struct token tok, struct token next) {
for (uint32_t i = 0; i < sp; i++) {
printf("%s", state_name(stack[i]));
}
printf(" ");
print_token(tok);
printf(" ");
print_token(next);
printf("\n");
}
static void push(enum state state) {
stack[sp] = state;
sp++;
}
static enum state pop(void) {
assert(sp != 0);
sp--;
return stack[sp];
}
static _Bool is_assignment(struct token tok, struct token next) {
return tok.type == TOK_NAME && next.type == TOK_EQUALS;
}
static _Bool is_expr(struct token tok) {
return is_lit(tok)
|| tok.type == TOK_NAME
|| tok.type == TOK_OPEN_GROUP
|| tok.type == TOK_IF
|| tok.type == TOK_MATCH
|| tok.type == TOK_FN
|| tok.type == TOK_LOOP
|| tok.type == TOK_NEXT
|| tok.type == TOK_EXIT
|| tok.type == TOK_NEXT
|| tok.type == TOK_RETURN
|| tok.type == TOK_RECURSE
|| tok.type == TOK_MATCH;
}
#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1)
void parse(void) {
sp = 0;
// TODO: add support for the top-level instead of this block hack
push(ST_BLOCK_BODY);
struct token tok = next();
struct token nxt = peek();
while (sp > 0) {
debug_print(tok, nxt);
// FIXME: stack underflow because we're faking the top-level with blocks
switch (pop()) {
case ST_BLOCK:
if (tok.type == TOK_OPEN_BLOCK) {
push(ST_BLOCK_CLOSE);
push(ST_BLOCK_BODY);
enter_block();
break;
}
syntax_error("expected beginning of block");
break;
case ST_BLOCK_BODY:
if (is_assignment(tok, nxt)) {
push(ST_BLOCK_CONT);
push(ST_ASSIGN);
stmt_assign(tok.data.name);
break;
}
if (is_expr(tok)) {
push(ST_BLOCK_CONT);
push(ST_EXPR);
stmt_expr();
continue;
}
continue;
case ST_BLOCK_CONT:
if (tok.type == TOK_TERMINATOR) {
push(ST_BLOCK_BODY);
break;
}
continue;
case ST_BLOCK_CLOSE:
if (tok.type == TOK_CLOSE_BLOCK) {
exit_block();
break;
}
syntax_error("expected end of block");
case ST_ASSIGN:
assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ);
push(ST_EXPR);
break;
case ST_EXPR:
push(ST_EXPR_END);
push(ST_EXPR_HACK);
continue;
case ST_EXPR_HACK:
switch (tok.type) {
case TOK_STRING:
push(ST_EXPR_CONT);
expr_string(tok.data.string);
break;
case TOK_INTEGER:
push(ST_EXPR_CONT);
expr_integer(tok.data.int_);
break;
case TOK_IF:
push(ST_IF_END);
push(ST_IF_ELSE);
push(ST_BLOCK);
push(ST_EXPR);
enter_if();
break;
case TOK_LOOP:
push(ST_BLOCK);
push(ST_LOOP_VARS);
if (nxt.type == TOK_LABEL) {
next();
enter_loop(nxt.data.label);
} else {
enter_loop(NULL);
}
break;
case TOK_NEXT:
push(ST_EXPR);
if (nxt.type == TOK_LABEL) {
next();
expr_next(nxt.data.label);
} else {
expr_next(NULL);
}
break;
case TOK_EXIT:
push(ST_EXPR);
if (nxt.type == TOK_LABEL) {
next();
expr_exit(nxt.data.label);
} else {
expr_exit(NULL);
}
break;
case TOK_RETURN:
push(ST_EXPR);
expr_return();
break;
case TOK_NAME:
push(ST_EXPR_CONT);
expr_var(tok.data.name);
break;
case TOK_OPEN_GROUP:
push(ST_EXPR_CONT);
push(ST_GROUP);
push(ST_EXPR);
enter_group();
break;
case TOK_OPERATOR:
if (is_unary(tok.data.op)) {
push(ST_EXPR_CONT);
push(ST_EXPR_HACK);
expr_op(tok.data.op);
break;
}
syntax_error("only unary operators allowed at beginning of expression");
case TOK_OPEN_BLOCK:
push(ST_BLOCK);
continue;
default:
syntax_error("expected expression");
}
break;
case ST_EXPR_CONT:
if (is_expr(tok)) {
push(ST_EXPR_HACK);
expr_op(OP_JUXT);
continue;
}
if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) {
push(ST_EXPR_HACK);
expr_op(tok.data.op);
break;
}
continue;
case ST_EXPR_END:
exit_expr();
continue;
case ST_GROUP:
if (tok.type == TOK_CLOSE_GROUP) {
exit_group();
break;
}
syntax_error("mismatched parentheses");
case ST_IF_ELSE:
if (tok.type == TOK_ELSE) {
push(ST_BLOCK);
break;
}
continue;
case ST_IF_END:
exit_if();
continue;
case ST_LOOP_VARS:
if (is_assignment(tok, nxt)) {
push(ST_LOOP_VARS_CONT);
push(ST_ASSIGN);
cvar_init(tok.data.name);
break;
}
if (tok.type == TOK_NAME) {
push(ST_LOOP_VARS_CONT);
cvar_pass(tok.data.name);
break;
}
continue;
case ST_LOOP_VARS_CONT:
if (tok.type == TOK_SEPARATOR) {
push(ST_LOOP_VARS);
break;
}
continue;
}
tok = next();
nxt = peek();
}
if (tok.type != TOK_EOF) {
fprintf(stderr, "syntax error: finished parsing before end of file\n");
exit(1);
}
if (sp > 0) {
fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]);
exit(1);
}
}

View File

@ -1,6 +0,0 @@
#ifndef PARSE_H
#define PARSE_H
void parse(void);
#endif