(Algebraic language) Fibonacci sequence works.
parent
40f88918ef
commit
86fea958f4
2
Makefile
2
Makefile
|
@ -6,7 +6,7 @@ SHELL = /bin/sh
|
|||
CFLAGS = -std=c99 -pedantic -Wextra -Os
|
||||
LDFLAGS = -lc
|
||||
|
||||
OBJECTS = bytecode.o format.o io.o main.o x86encode.o
|
||||
OBJECTS = bytecode.o format.o io.o lex.o main.o x86encode.o
|
||||
|
||||
.PHONY: passc
|
||||
passc: .bin $(OBJECTS)
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
# Intermediate Representations
|
||||
|
||||
## Bytecode
|
||||
|
||||
### Instructions
|
||||
Instructions for times:
|
||||
|
||||
* `comm : a * b <=> b * a`
|
||||
* `assocl : a * (b * c) => (a * b) * c`
|
||||
* `assocr : (a * b) * c => a * (b * c)`
|
||||
* `mapl (f : a => b) : a * c => b * c`
|
||||
* `mapr (f : b => c) : a * b => a * c`
|
||||
* `unitil : a => a * 1`
|
||||
* `unitir : a => 1 * a`
|
||||
* `unitel : a * 1 => a`
|
||||
* `uniter : 1 * a => a`
|
||||
|
||||
Instructions for plus:
|
||||
|
||||
* `comm : a + b <=> b + a`
|
||||
* `assocl : a + (b + c) => (a + b) + c`
|
||||
* `assocr : (a + b) + c => a + (b + c)`
|
||||
* `mapl (f : a => b) : a + c => b + c`
|
||||
* `mapr (f : b => c) : a + b => a + c`
|
||||
* `inl (b : type) : a => a + b`
|
||||
* `inr (b : type) : b => a + b`
|
||||
* `out : a + a => a`
|
||||
|
||||
Distributivity:
|
||||
|
||||
* `distl : a * (b + c) => (a * b) + (a * c)`
|
||||
* `distr : (a + b) * c => (a * c) + (b * c)`
|
||||
* `factl : (a * b) + (a * c) => a * (b + c)`
|
||||
* `factr : (a * c) + (b * c) => (a + b) * c`
|
||||
|
||||
Recursion:
|
||||
|
||||
* `project: rec r. f(r) -> f(rec r. f(r))`
|
||||
* `embed: f(rec r. f(r)) -> rec r. f(r)`
|
||||
|
||||
`project` and `embed` are no-ops which exist to make type-checking easier
|
||||
(i.e. isorecursive over equirecursive types).
|
||||
|
||||
#### Most instructions are redundant
|
||||
Most of these instructions are redundant:
|
||||
|
||||
* All of the l/r variants can be implemented in terms of each other
|
||||
using commutativity.
|
||||
* All of the plus instructions can be implemented in terms of `map`, `in`, and `out`.
|
||||
* Alternatively, we could have replaced `map` and `out` with a single instruction,
|
||||
`if (f : a => c) (g : b => c) : a + b => c`.
|
||||
|
||||
So "morally", there are only about 10 instructions: `comm`, `assoc`, `map`, `uniti`, `unite`,
|
||||
`inl`, `inr`, `if`, `dist`, and `fact`.
|
||||
|
||||
#### Most instructions are reversible
|
||||
Inverses of instructions:
|
||||
|
||||
* `comm` / `comm`
|
||||
* `assocl` / `assocr`
|
||||
* `map f` / `map f*`
|
||||
* `uniti` / `unite`
|
||||
* `dist` / `fact`
|
||||
|
||||
The only irreversible instructions are `in` and `out`.
|
||||
|
||||
#### Instructions are algebraic laws
|
||||
We have a symmetric monoidal category with coproducts where `*` distributes over `+`.
|
||||
This isn't quite a distributive symmetric monoidal category, because `*` isn't a product.
|
||||
|
||||
Likewise, we *almost* have a distributive lattice (characterized as a meet-semilattice
|
||||
with binary joins), but `*` isn't guaranteed to be idempotent.
|
||||
|
||||
The reversible fragment is a wide dagger symmetric monoidal subcategory.
|
||||
|
||||
#### That's really all we need
|
||||
We simply don't need functions, polymorphism, or `0`.
|
||||
|
||||
`0` isn't very interesting when characterized as an initial object
|
||||
or as the unit for `+`; I find it's only interesting in the context of
|
||||
second-order polymorphism, as `forall a. a`.
|
||||
|
||||
## Finite-state 1-bit cons machine
|
||||
Instructions:
|
||||
|
||||
* `comm`
|
||||
* `assoc`
|
||||
* `factor`
|
||||
* `dist`
|
||||
* `map`
|
||||
* `unite`
|
||||
* `uniti`
|
||||
* `inl`
|
||||
* `inr`
|
||||
|
||||
Redundant instructions:
|
||||
|
||||
* `l`/`r` variants
|
||||
* `out`
|
||||
|
||||
There is a finite number of states, and a state transition table
|
||||
which determines the next state based on the current state and
|
||||
a single bit extracted using `dist`.
|
||||
|
||||
## Finite-state random-access 1-bit register machine
|
||||
Instructions:
|
||||
|
||||
* `x <- enum(imm, y)`
|
||||
* `w <- struct(x, y, z)`
|
||||
* `free x`
|
||||
|
|
@ -61,7 +61,7 @@ void assocr(void) {
|
|||
x86_inst_xchg_r64_m64(AX, DX);
|
||||
}
|
||||
|
||||
void distl(void) {
|
||||
void distr(void) {
|
||||
// a, b + c
|
||||
// a * b + a * c
|
||||
|
||||
|
@ -74,7 +74,7 @@ void distl(void) {
|
|||
// Awfully convenient how that works out, huh?
|
||||
}
|
||||
|
||||
void distr(void) {
|
||||
void distl(void) {
|
||||
// The intermediate states here are ill-typed, but ultimately everything
|
||||
// gets shuffled around to the right locations.
|
||||
|
||||
|
@ -96,7 +96,7 @@ void distr(void) {
|
|||
x86_inst_xchg_r64_rax(DX);
|
||||
}
|
||||
|
||||
void factl(void) {
|
||||
void factr(void) {
|
||||
// a * b + a * c:
|
||||
// a * (b + c)
|
||||
|
||||
|
@ -107,7 +107,7 @@ void factl(void) {
|
|||
x86_inst_xchg_r64_m64(AX, DX);
|
||||
}
|
||||
|
||||
void factr(void) {
|
||||
void factl(void) {
|
||||
// a * c + b * c
|
||||
// (a + b) * c
|
||||
|
||||
|
@ -172,23 +172,23 @@ void mapr_end(void) {
|
|||
x86_inst_pop_r64(AX);
|
||||
}
|
||||
|
||||
void unitil(void) {
|
||||
void unitir(void) {
|
||||
allocate_cons();
|
||||
x86_inst_xchg_r64_rax(DX);
|
||||
x86_inst_mov_r64_r64(DX, DI);
|
||||
}
|
||||
|
||||
void unitir(void) {
|
||||
void unitil(void) {
|
||||
allocate_cons();
|
||||
x86_inst_mov_r64_r64(AX, DI);
|
||||
}
|
||||
|
||||
void unitel(void) {
|
||||
void uniter(void) {
|
||||
x86_inst_xchg_r64_rax(DX);
|
||||
free_cons();
|
||||
}
|
||||
|
||||
void uniter(void) {
|
||||
void unitel(void) {
|
||||
free_cons();
|
||||
}
|
||||
|
||||
|
@ -377,6 +377,17 @@ void out(void) {
|
|||
free_cons();
|
||||
}
|
||||
|
||||
void jump(symbol sym) {
|
||||
inst_jump(sym);
|
||||
}
|
||||
|
||||
void jump_if(symbol a, symbol b) {
|
||||
x86_inst_test_r8_r8(AX, AX);
|
||||
out();
|
||||
inst_jump_if_zero(a);
|
||||
inst_jump(b);
|
||||
}
|
||||
|
||||
static void inst_load(reg dest, symbol sym) {
|
||||
x86_inst_lea_r64_rip_disp32_op(dest);
|
||||
relocate_pc32(sym);
|
||||
|
@ -386,7 +397,7 @@ static symbol one_symbol;
|
|||
static symbol loop_point;
|
||||
static symbol exit_point;
|
||||
|
||||
void quit(void) {
|
||||
void halt(void) {
|
||||
inst_jump(exit_point);
|
||||
}
|
||||
|
||||
|
@ -469,14 +480,13 @@ symbol init_bytecode(void) {
|
|||
//x86_inst_lea_r64_m64_disp8(DI, SP, -16);
|
||||
x86_inst_mov_r64_r64(DI, SP);
|
||||
x86_inst_sub_r64_imm8(DI, 16);
|
||||
x86_inst_push_r64(DI);
|
||||
x86_inst_push_r64(DI);
|
||||
x86_inst_xor_r32_r32(R14, R14);
|
||||
x86_inst_push_r64(R14);
|
||||
x86_inst_push_r64(R14);
|
||||
|
||||
// Initial state is a unit in the left.
|
||||
// (Right states will be loop states.)
|
||||
// Initial state is a unit.
|
||||
x86_inst_mov_r64_r64(AX, DI);
|
||||
x86_inst_mov_r64_r64(DX, DI);
|
||||
inl();
|
||||
|
||||
loop_point = new_symbol();
|
||||
define_executable_symbol(loop_point);
|
||||
|
|
|
@ -10,13 +10,13 @@ void assocl(void);
|
|||
/// (a * b) * c => a * (b * c)
|
||||
void assocr(void);
|
||||
/// a * (b + c) => (a * b) + (a * c)
|
||||
void distl(void);
|
||||
/// (a + b) * c => (a * c) + (b * c)
|
||||
void distr(void);
|
||||
/// (a + b) * c => (a * c) + (b * c)
|
||||
void distl(void);
|
||||
/// (a * b) + (a * c) => a * (b + c)
|
||||
void factl(void);
|
||||
/// (a * c) + (b * c) => (a + b) * c
|
||||
void factr(void);
|
||||
/// (a * c) + (b * c) => (a + b) * c
|
||||
void factl(void);
|
||||
/// (a => b) => (a * c => b * c)
|
||||
void mapl_begin(void);
|
||||
void mapl_end(void);
|
||||
|
@ -24,13 +24,13 @@ void mapl_end(void);
|
|||
void mapr_begin(void);
|
||||
void mapr_end(void);
|
||||
/// a => a * 1
|
||||
void unitil(void);
|
||||
/// a => 1 * a
|
||||
void unitir(void);
|
||||
/// a => 1 * a
|
||||
void unitil(void);
|
||||
/// a * 1 => a
|
||||
void unitel(void);
|
||||
/// 1 * a => a
|
||||
void uniter(void);
|
||||
/// 1 * a => a
|
||||
void unitel(void);
|
||||
/// a + b <=> b + a
|
||||
void comm_plus(void);
|
||||
/// a + (b + c) => (a + b) + c
|
||||
|
@ -49,9 +49,12 @@ void inl(void);
|
|||
void inr(void);
|
||||
/// a + a => a
|
||||
void out(void);
|
||||
|
||||
void quit(void);
|
||||
/// end the program
|
||||
void halt(void);
|
||||
symbol init_bytecode(void);
|
||||
void finish_bytecode(void);
|
||||
|
||||
void jump(symbol sym);
|
||||
void jump_if(symbol a, symbol b);
|
||||
|
||||
#endif
|
||||
|
|
196
src/ir.c
196
src/ir.c
|
@ -1,196 +0,0 @@
|
|||
/// This file serves conceptually as the intermediate representation (IR)
|
||||
/// of the compiler. Compared to "asm", this file is aware of stack frames,
|
||||
/// control flow blocks and labels, compound types like structs and enums,
|
||||
/// and register allocation.
|
||||
|
||||
#include "asm.h"
|
||||
#include "format.h"
|
||||
#include "ir.h"
|
||||
#include "x86encode.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_STACK_FRAMES 32
|
||||
#define MAX_LABELS 256
|
||||
#define MAX_FIXUPS 256
|
||||
|
||||
struct stack_frame {
|
||||
uint32_t depth;
|
||||
uint32_t label_depth;
|
||||
};
|
||||
|
||||
struct label {
|
||||
uint32_t frame;
|
||||
uint32_t argc;
|
||||
symbol symbol;
|
||||
};
|
||||
|
||||
static uint32_t stack_depth = 0;
|
||||
static uint32_t stack_frame = 0;
|
||||
static struct stack_frame stack_frames[MAX_STACK_FRAMES];
|
||||
static uint32_t label_depth = 0;
|
||||
static struct label labels[MAX_LABELS];
|
||||
|
||||
void init_ir(var* argc, var* argv, var* env) {
|
||||
assert(stack_depth == 0 && stack_frame == 0);
|
||||
x86_inst_mov_r64_r64(BP, SP);
|
||||
x86_inst_add_r64_imm8(BP, 8 * 3);
|
||||
*env = stack_depth++;
|
||||
*argv = stack_depth++;
|
||||
*argc = stack_depth++;
|
||||
}
|
||||
|
||||
void enter(void) {
|
||||
assert(stack_frame < MAX_STACK_FRAMES);
|
||||
printf("ENTERING: %i, %i\n", stack_depth, label_depth);
|
||||
struct stack_frame frame = { .depth = stack_depth, .label_depth = label_depth };
|
||||
stack_frames[stack_frame] = frame;
|
||||
stack_frame++;
|
||||
// exit label
|
||||
declare(0);
|
||||
}
|
||||
|
||||
void leave(var* args) {
|
||||
assert(stack_frame > 0);
|
||||
struct stack_frame frame = stack_frames[stack_frame - 1];
|
||||
stack_depth = frame.depth;
|
||||
label_depth = frame.label_depth;
|
||||
define(frame.label_depth, args);
|
||||
stack_frame--;
|
||||
}
|
||||
|
||||
label declare(uint32_t argc) {
|
||||
assert(label_depth < MAX_LABELS);
|
||||
symbol sym = new_symbol();
|
||||
struct label label = { .frame = stack_frame, .argc = argc, .symbol = sym };
|
||||
labels[label_depth] = label;
|
||||
return label_depth++;
|
||||
}
|
||||
|
||||
label declare_exit(uint32_t argc) {
|
||||
label label = stack_frames[stack_frame - 1].label_depth;
|
||||
labels[label].argc = argc;
|
||||
return label;
|
||||
}
|
||||
|
||||
void define(label l, var* args) {
|
||||
struct label* label = &labels[l];
|
||||
printf("DEFINING %i (%i)\n", l, label->argc);
|
||||
define_executable_symbol(label->symbol);
|
||||
assert(label->frame == stack_frame);
|
||||
for (uint32_t i = 0; i < label->argc; i++) {
|
||||
args[i] = stack_depth + i;
|
||||
}
|
||||
stack_depth += label->argc;
|
||||
}
|
||||
|
||||
void load_var(reg reg, var var) {
|
||||
// the stack grows downward, so the bottom of the stack, BP, points to nothing;
|
||||
// subtracting 8 causes it to point to the first variable, 0.
|
||||
// (each variable is 8 bytes.)
|
||||
x86_inst_mov_r64_m64_disp(reg, BP, -(var * 8) - 8);
|
||||
}
|
||||
|
||||
var push_var(reg reg) {
|
||||
x86_inst_push_r64(reg);
|
||||
return stack_depth++;
|
||||
}
|
||||
|
||||
void load_args(struct label* label, var* args) {
|
||||
struct stack_frame* dest_frame = &stack_frames[label->frame - 1];
|
||||
uint32_t depth_diff = stack_depth - dest_frame->depth;
|
||||
if (depth_diff > 0) {
|
||||
// FIXME: should be immX!!!
|
||||
x86_inst_add_r64_imm8(SP, depth_diff * 8);
|
||||
}
|
||||
for (uint32_t arg = 0; arg < label->argc; arg++) {
|
||||
load_var(AX, args[arg]);
|
||||
x86_inst_push_r64(AX);
|
||||
}
|
||||
stack_depth = dest_frame->depth + label->argc;
|
||||
}
|
||||
|
||||
void jump(label l, var* args) {
|
||||
struct label* label = &labels[l];
|
||||
printf("JUMP %i (%i)\n", l, label->argc);
|
||||
load_args(label, args);
|
||||
inst_jump(label->symbol);
|
||||
}
|
||||
|
||||
void jump_table(size_t branches, label* labels, var index, var* args) {
|
||||
assert(0); // UNIMPLEMENTED
|
||||
}
|
||||
|
||||
void jump_if(label t, label e, var cond, var* args) {
|
||||
struct label* then = &labels[t];
|
||||
struct label* else_ = &labels[e];
|
||||
printf("JUMP_IF %i ELSE %i (%i)\n", t, e, then->argc);
|
||||
assert(then->argc == else_->argc && then->frame == else_->frame);
|
||||
load_var(BX, cond);
|
||||
load_args(then, args);
|
||||
inst_jump_if_not_zero(then->symbol, BX);
|
||||
inst_jump(else_->symbol);
|
||||
}
|
||||
|
||||
var lit(uint64_t lit) {
|
||||
x86_inst_mov_r64_imm(AX, lit);
|
||||
x86_inst_push_r64(AX);
|
||||
return stack_depth++;
|
||||
}
|
||||
|
||||
var lit_string(char* str) {
|
||||
fprintf(stderr, "error: string literals not yet implemented\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
var add(var addend1, var addend2) {
|
||||
load_var(AX, addend1);
|
||||
load_var(BX, addend2);
|
||||
x86_inst_add_r64_r64(AX, BX);
|
||||
return push_var(AX);
|
||||
}
|
||||
|
||||
var sub(var subtrahend, var minuend) {
|
||||
// TODO: use modr/m
|
||||
load_var(AX, subtrahend);
|
||||
load_var(BX, minuend);
|
||||
x86_inst_sub_r64_r64(AX, BX);
|
||||
return push_var(AX);
|
||||
}
|
||||
|
||||
// Linux system call: https://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/
|
||||
var syscall(size_t argc, var* args) {
|
||||
assert(argc > 0 && argc <= 7);
|
||||
switch(argc) {
|
||||
case 7:
|
||||
load_var(R9, args[6]);
|
||||
__attribute__((fallthrough));
|
||||
case 6:
|
||||
load_var(R8, args[5]);
|
||||
__attribute__ ((fallthrough));
|
||||
case 5:
|
||||
load_var(R10, args[4]);
|
||||
__attribute__ ((fallthrough));
|
||||
case 4:
|
||||
load_var(DX, args[3]);
|
||||
__attribute__ ((fallthrough));
|
||||
case 3:
|
||||
load_var(SI, args[2]);
|
||||
__attribute__ ((fallthrough));
|
||||
case 2:
|
||||
load_var(DI, args[1]);
|
||||
__attribute__ ((fallthrough));
|
||||
case 1:
|
||||
// the system call number, not an argument
|
||||
load_var(AX, args[0]);
|
||||
}
|
||||
// NOTE: syscall clobbers rcx and r11.
|
||||
x86_inst_syscall();
|
||||
return push_var(AX);
|
||||
}
|
116
src/ir.h
116
src/ir.h
|
@ -1,116 +0,0 @@
|
|||
#ifndef _IR_H
|
||||
#define _IR_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint32_t var;
|
||||
typedef uint32_t label;
|
||||
|
||||
struct jump_target {
|
||||
label label;
|
||||
var* args;
|
||||
};
|
||||
|
||||
/// Declare a new label in the current scope with the provided number
|
||||
/// of arguments.
|
||||
///
|
||||
/// Local variables (not part of a stack frame generated by `define` or `enter`)
|
||||
/// will not be in scope of the definition of the label.
|
||||
label declare(uint32_t argc);
|
||||
|
||||
/// Define a label and create a new scope for local variables.
|
||||
///
|
||||
/// The new scope will have access to all of the variables
|
||||
/// of the parent scope of the label and the label's arguments,
|
||||
/// but not any local variables from previous definitions.
|
||||
void define(label label, var* args);
|
||||
|
||||
/// Create a new scope which encompasses all local variables defined up to this point.
|
||||
///
|
||||
/// This allows nested definitions to have access to local variables.
|
||||
void enter(void);
|
||||
|
||||
/// Jump to label, unconditionally. Ends the continuation.
|
||||
void jump(struct jump_target dest);
|
||||
|
||||
/// Jump to `then` if `cond` is not zero; jump to `else` otherwise.
|
||||
/// Ends the continuation.
|
||||
void jump_if(struct jump_target then, struct jump_target else_, var cond);
|
||||
|
||||
/// Jump to the `index`th destination. Ends the continuation.
|
||||
void jump_table(uint32_t destc, struct jump_target* destinations, var index);
|
||||
|
||||
/// Call this at the beginning of execution.
|
||||
/// It performs initialization and stuff.
|
||||
void init_ir(var* argc, var* argv, var* env);
|
||||
|
||||
/// Enter a new block.
|
||||
///
|
||||
/// All labels defined in this block will have access to all variables
|
||||
/// which are in scope as of calling `enter`. You will be able to jump
|
||||
/// to any label which is defined in this block from here
|
||||
/// to the symmetric `leave`.
|
||||
///
|
||||
/// This also generates a new label corresponding with the end of the block,
|
||||
/// which will be automatically defined when you call `leave`.
|
||||
void enter(void);
|
||||
|
||||
/// Leave a block.
|
||||
///
|
||||
/// This will restore the context to how it was when `enter` was called,
|
||||
/// plus the return values declared by the call to `declare_continue`.
|
||||
void leave(var* rets);
|
||||
|
||||
/// Declare a new label in the innermost block.
|
||||
///
|
||||
/// This label can only be called from the block or nested blocks.
|
||||
/// This label must be called with the given number of arguments.
|
||||
label declare(uint32_t argc);
|
||||
|
||||
//// Declare an exit label for the surrounding block.
|
||||
///
|
||||
/// Calling this label will exit the surrounding blocks.
|
||||
/// The usual restrictions for labels apply.
|
||||
label declare_exit(uint32_t retc);
|
||||
|
||||
// Define a label in the innermost block, automatically terminating
|
||||
/// any previous labels.
|
||||
///
|
||||
/// All variables defined prior to the beginning of this block will be in scope.
|
||||
/// The arguments associated with the label will be in scope.
|
||||
/// Variables defined *after* the beginning of the block but *prior* to this label
|
||||
/// will *not* be in scope.
|
||||
///
|
||||
/// From this label you can jump to any label in the enclosing block
|
||||
/// or any parent block.
|
||||
void define(label label, var* args);
|
||||
|
||||
/// Jump to label, unconditionally; never returns.
|
||||
void jump(label label, var* args);
|
||||
|
||||
/// Jump to `index`th label in table; never returns.
|
||||
///
|
||||
/// All labels must be at the same depth and accept the same arguments.
|
||||
/// `index` must not be out of bounds.
|
||||
void jump_table(size_t branches, label* labels, var index, var* args);
|
||||
|
||||
/// Jump to `then` if cond is not zero, `else` if cond is zero.
|
||||
void jump_if(label then, label else_, var cond, var* args);
|
||||
|
||||
/// Integer literal.
|
||||
var lit(uint64_t lit);
|
||||
|
||||
/// String literal.
|
||||
var lit_string(char* str);
|
||||
|
||||
/// Addition.
|
||||
var add(var addend1, var addend2);
|
||||
|
||||
/// Subtraction.
|
||||
var sub(var subtrahend, var minuend);
|
||||
|
||||
/// Perform a system call.
|
||||
var syscall(size_t argc, var* args);
|
||||
|
||||
#endif
|
655
src/lang.c
655
src/lang.c
|
@ -1,655 +0,0 @@
|
|||
#include "ir.h"
|
||||
#include "lang.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_CONTEXT 32
|
||||
#define MAX_ASSIGNMENTS 256
|
||||
#define MAX_ARGUMENTS 256
|
||||
#define MAX_OPERATORS 256
|
||||
|
||||
struct assignment {
|
||||
char* name;
|
||||
var ref;
|
||||
};
|
||||
|
||||
enum block_state {
|
||||
BLOCK_CLEAN,
|
||||
BLOCK_ASSIGN,
|
||||
BLOCK_EXPR,
|
||||
};
|
||||
|
||||
struct block_crumb {
|
||||
enum block_state state;
|
||||
uint32_t assignment_count;
|
||||
struct assignment assignments[MAX_ASSIGNMENTS];
|
||||
var final;
|
||||
};
|
||||
|
||||
enum if_state {
|
||||
IF_COND,
|
||||
IF_THEN,
|
||||
IF_ELSE,
|
||||
IF_END,
|
||||
};
|
||||
|
||||
struct if_crumb {
|
||||
enum if_state state;
|
||||
label then;
|
||||
label else_;
|
||||
label end;
|
||||
};
|
||||
|
||||
enum loop_state {
|
||||
LOOP_CLEAN,
|
||||
LOOP_CVAR_INIT,
|
||||
LOOP_BODY,
|
||||
};
|
||||
|
||||
struct loop_crumb {
|
||||
enum loop_state state;
|
||||
char* label_name;
|
||||
label next;
|
||||
label exit;
|
||||
uint32_t assignment_count;
|
||||
var initializers[MAX_ASSIGNMENTS];
|
||||
struct assignment assignments[MAX_ASSIGNMENTS];
|
||||
};
|
||||
|
||||
struct expr_crumb {
|
||||
uint32_t argument_count;
|
||||
uint32_t operator_count;
|
||||
var arguments[MAX_ARGUMENTS];
|
||||
enum operator_ operators[MAX_OPERATORS];
|
||||
};
|
||||
|
||||
struct jump_crumb {
|
||||
label label;
|
||||
uint32_t arity;
|
||||
uint32_t argument_count;
|
||||
var arguments[MAX_ARGUMENTS];
|
||||
};
|
||||
|
||||
enum crumb_type {
|
||||
BLOCK_CRUMB,
|
||||
IF_CRUMB,
|
||||
LOOP_CRUMB,
|
||||
EXPR_CRUMB,
|
||||
JUMP_CRUMB,
|
||||
};
|
||||
|
||||
union crumb_data {
|
||||
struct block_crumb block;
|
||||
struct if_crumb if_;
|
||||
struct loop_crumb loop;
|
||||
struct expr_crumb expr;
|
||||
struct jump_crumb jump;
|
||||
};
|
||||
|
||||
struct crumb {
|
||||
enum crumb_type type;
|
||||
union crumb_data data;
|
||||
};
|
||||
|
||||
static uint32_t context_depth = 1;
|
||||
static struct crumb context[MAX_CONTEXT];
|
||||
|
||||
static char* copy_str(char* str) {
|
||||
unsigned long len = strlen(str);
|
||||
char* new = malloc(len * sizeof(char) + 1);
|
||||
memcpy(new, str, len);
|
||||
new[len] = 0;
|
||||
return new;
|
||||
}
|
||||
|
||||
static void push(struct crumb crumb) {
|
||||
context[context_depth] = crumb;
|
||||
context_depth++;
|
||||
}
|
||||
|
||||
static void push_new_block(void) {
|
||||
union crumb_data data;
|
||||
struct block_crumb block = {
|
||||
.state = BLOCK_CLEAN,
|
||||
.assignment_count = 0,
|
||||
.final = (var) -1,
|
||||
};
|
||||
data.block = block;
|
||||
struct crumb crumb = {
|
||||
.type = BLOCK_CRUMB,
|
||||
.data = data,
|
||||
};
|
||||
push(crumb);
|
||||
}
|
||||
|
||||
static void push_new_expr(void) {
|
||||
struct expr_crumb exprc = {
|
||||
.argument_count = 0,
|
||||
.operator_count = 0,
|
||||
};
|
||||
union crumb_data data;
|
||||
data.expr = exprc;
|
||||
struct crumb crumb = {
|
||||
.type = EXPR_CRUMB,
|
||||
.data = data,
|
||||
};
|
||||
push(crumb);
|
||||
}
|
||||
|
||||
struct label_and_arity {
|
||||
label label;
|
||||
uint32_t arity;
|
||||
};
|
||||
|
||||
static void push_new_jump(struct label_and_arity label) {
|
||||
union crumb_data data;
|
||||
data.jump.label = label.label;
|
||||
data.jump.arity = label.arity;
|
||||
data.jump.argument_count = 0;
|
||||
struct crumb crumb = {
|
||||
.type = JUMP_CRUMB,
|
||||
.data = data,
|
||||
};
|
||||
push(crumb);
|
||||
}
|
||||
|
||||
static void push_argument(var ref) {
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == EXPR_CRUMB);
|
||||
struct expr_crumb* exprc = &ctx->data.expr;
|
||||
if (exprc->argument_count > MAX_ARGUMENTS) {
|
||||
fprintf(stderr, "error: exceeded maximum number of arguments in expression\n");
|
||||
exit(1);
|
||||
}
|
||||
exprc->arguments[exprc->argument_count] = ref;
|
||||
exprc->argument_count++;
|
||||
}
|
||||
|
||||
static void push_cvar_name(char* name) {
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == LOOP_CRUMB);
|
||||
struct loop_crumb* loopc = &ctx->data.loop;
|
||||
if (loopc->assignment_count == MAX_ASSIGNMENTS) {
|
||||
fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n");
|
||||
exit(1);
|
||||
}
|
||||
loopc->assignments[loopc->assignment_count].name = copy_str(name);
|
||||
}
|
||||
|
||||
static void push_cvar(var ref) {
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == LOOP_CRUMB);
|
||||
struct loop_crumb* loopc = &ctx->data.loop;
|
||||
if (loopc->assignment_count > MAX_ASSIGNMENTS) {
|
||||
fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n");
|
||||
exit(1);
|
||||
}
|
||||
loopc->initializers[loopc->assignment_count] = ref;
|
||||
loopc->assignment_count++;
|
||||
}
|
||||
|
||||
static var lookup_assignment(
|
||||
uint32_t assignment_count,
|
||||
struct assignment* assignments,
|
||||
char* name
|
||||
) {
|
||||
for (uint32_t i = assignment_count; i > 0; i--) {
|
||||
struct assignment asgn = assignments[i - 1];
|
||||
if (strcmp(asgn.name, name) == 0) {
|
||||
return asgn.ref;
|
||||
}
|
||||
}
|
||||
return (var) -1;
|
||||
}
|
||||
|
||||
static var lookup_var(char* name) {
|
||||
for (uint32_t i = context_depth; i > 0; i--) {
|
||||
struct crumb ctx = context[i - 1];
|
||||
var ref = (var) -1;
|
||||
switch (ctx.type) {
|
||||
case LOOP_CRUMB:
|
||||
if (ctx.data.loop.state != LOOP_BODY) {
|
||||
break;
|
||||
}
|
||||
ref = lookup_assignment(
|
||||
ctx.data.loop.assignment_count,
|
||||
ctx.data.loop.assignments,
|
||||
name
|
||||
);
|
||||
break;
|
||||
case BLOCK_CRUMB:
|
||||
ref = lookup_assignment(
|
||||
ctx.data.block.assignment_count,
|
||||
ctx.data.block.assignments,
|
||||
name
|
||||
);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
if (ref != (var) -1) {
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "name resolution error: unknown variable %s\n", name);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
enum label_type {
|
||||
NEXT_LABEL,
|
||||
EXIT_LABEL,
|
||||
RETURN_LABEL,
|
||||
};
|
||||
|
||||
static const char* label_type_name(enum label_type type) {
|
||||
switch (type) {
|
||||
case NEXT_LABEL:
|
||||
return "next";
|
||||
case EXIT_LABEL:
|
||||
return "exit";
|
||||
case RETURN_LABEL:
|
||||
return "return";
|
||||
}
|
||||
}
|
||||
|
||||
static struct label_and_arity lookup_label(enum label_type type, char* name) {
|
||||
for (uint32_t i = context_depth; i > 0; i--) {
|
||||
struct crumb ctx = context[i - 1];
|
||||
switch (ctx.type) {
|
||||
case LOOP_CRUMB:
|
||||
if (name == NULL || strcmp(name, ctx.data.loop.label_name) == 0) {
|
||||
struct label_and_arity label;
|
||||
if (type == NEXT_LABEL) {
|
||||
label.label = ctx.data.loop.next;
|
||||
label.arity = ctx.data.loop.assignment_count;
|
||||
return label;
|
||||
}
|
||||
if (type == EXIT_LABEL) {
|
||||
label.label = ctx.data.loop.exit;
|
||||
label.arity = 1;
|
||||
return label;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (name == NULL) {
|
||||
fprintf(stderr, "name resolution error: no %s label in scope\n", label_type_name(type));
|
||||
} else {
|
||||
fprintf(stderr, "name resolution error: unknown label %s\n", name);
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void reduce_expression_binop(struct expr_crumb* exprc, var (*emit)(var arg1, var arg2)) {
|
||||
assert(exprc->argument_count >= 2);
|
||||
var arg1 = exprc->arguments[0];
|
||||
var arg2 = exprc->arguments[1];
|
||||
exprc->arguments[0] = emit(arg1, arg2);
|
||||
memmove(&exprc->arguments[1], &exprc->arguments[2], exprc->argument_count - 2);
|
||||
exprc->argument_count--;
|
||||
}
|
||||
|
||||
static var reduce_expression(struct expr_crumb* exprc) {
|
||||
// TODO FIXME: operator precedence
|
||||
if (exprc->operator_count > 0 || exprc->argument_count > 1) {
|
||||
fprintf(stderr, "warning: expression reduction may be incorrect\n");
|
||||
//exit(1);
|
||||
}
|
||||
for (uint32_t op_ix = 0; op_ix < exprc->operator_count; op_ix++) {
|
||||
switch (exprc->operators[op_ix]) {
|
||||
case OP_ADD:
|
||||
reduce_expression_binop(exprc, add);
|
||||
break;
|
||||
case OP_SUB:
|
||||
reduce_expression_binop(exprc, sub);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "error: operator not implemented: %i", exprc->operators[op_ix]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
exprc->operator_count = 0;
|
||||
assert(exprc->argument_count == 1);
|
||||
return exprc->arguments[0];
|
||||
}
|
||||
|
||||
void enter_block(void) {
|
||||
printf("** enter_block\n");
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
switch (ctx->type) {
|
||||
case BLOCK_CRUMB:
|
||||
// we should have seen a stmt_assign or stmt_expr first,
|
||||
// either of which pushes an expr crumb.
|
||||
assert(0);
|
||||
case EXPR_CRUMB: {
|
||||
// this block is purely a scope/sequencing thing
|
||||
// with no special semantics
|
||||
break;
|
||||
}
|
||||
case IF_CRUMB: {
|
||||
struct if_crumb ifc = ctx->data.if_;
|
||||
switch (ifc.state) {
|
||||
case IF_COND:
|
||||
case IF_END:
|
||||
assert(0);
|
||||
case IF_THEN:
|
||||
define(ifc.then, NULL);
|
||||
break;
|
||||
case IF_ELSE:
|
||||
define(ifc.else_, NULL);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LOOP_CRUMB: {
|
||||
struct loop_crumb* loopc = &ctx->data.loop;
|
||||
assert(loopc->state == LOOP_CLEAN);
|
||||
loopc->state = LOOP_BODY;
|
||||
loopc->next = declare(loopc->assignment_count);
|
||||
printf("LOOP %i END %i\n", loopc->next, loopc->exit);
|
||||
var args[MAX_ASSIGNMENTS];
|
||||
define(loopc->next, args);
|
||||
// TODO NOTE: is this the correct order?
|
||||
for (uint32_t i = 0; i < loopc->assignment_count; i++) {
|
||||
loopc->assignments[i].ref = args[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
push_new_block();
|
||||
}
|
||||
|
||||
void stmt_assign(char* name) {
|
||||
printf("** stmt_assign\n");
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == BLOCK_CRUMB);
|
||||
struct block_crumb* blockc = &ctx->data.block;
|
||||
assert(blockc->state == BLOCK_CLEAN);
|
||||
if (blockc->assignment_count == MAX_ASSIGNMENTS) {
|
||||
fprintf(stderr, "error: exceeded maximum number of assignments in block\n");
|
||||
exit(1);
|
||||
}
|
||||
blockc->state = BLOCK_ASSIGN;
|
||||
blockc->assignments[blockc->assignment_count].name = copy_str(name);
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void stmt_expr(void) {
|
||||
printf("** stmt_expr\n");
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == BLOCK_CRUMB);
|
||||
struct block_crumb* blockc = &ctx->data.block;
|
||||
assert(blockc->state == BLOCK_CLEAN);
|
||||
blockc->state = BLOCK_EXPR;
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void exit_block(void) {
|
||||
printf("** exit_block\n");
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == BLOCK_CRUMB);
|
||||
struct block_crumb blockc = ctx->data.block;
|
||||
assert(blockc.state == BLOCK_CLEAN);
|
||||
var ret = blockc.final;
|
||||
if (ret == (var) -1) {
|
||||
// TODO: better way to handle empty blocks
|
||||
ret = lit(0);
|
||||
}
|
||||
context_depth--;
|
||||
ctx = &context[context_depth - 1];
|
||||
switch (ctx->type) {
|
||||
case EXPR_CRUMB: {
|
||||
push_argument(ret);
|
||||
break;
|
||||
}
|
||||
case IF_CRUMB: {
|
||||
struct if_crumb* ifc = &ctx->data.if_;
|
||||
assert(ifc->state == IF_THEN || ifc->state == IF_ELSE);
|
||||
jump(ifc->end, &ret);
|
||||
if (ifc->state == IF_THEN) {
|
||||
ifc->state = IF_ELSE;
|
||||
} else if (ifc->state == IF_ELSE) {
|
||||
ifc->state = IF_END;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LOOP_CRUMB: {
|
||||
// unlike with `if`, there is no `exit_loop`, so we do clean-up here.
|
||||
struct loop_crumb loopc = ctx->data.loop;
|
||||
assert(loopc.state == LOOP_BODY);
|
||||
jump(loopc.exit, &ret);
|
||||
context_depth--;
|
||||
for (uint32_t i = 0; i < loopc.assignment_count; i++) {
|
||||
free(loopc.assignments[i].name);
|
||||
}
|
||||
leave(&ret);
|
||||
push_argument(ret);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
for (uint32_t i = 0; i < blockc.assignment_count; i++) {
|
||||
free(blockc.assignments[i].name);
|
||||
}
|
||||
}
|
||||
|
||||
void exit_expr(void) {
|
||||
printf("** exit_expr\n");
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == EXPR_CRUMB);
|
||||
struct expr_crumb* exprc = &ctx->data.expr;
|
||||
assert(exprc->argument_count > 0);
|
||||
var ret = reduce_expression(exprc);
|
||||
context_depth--;
|
||||
ctx = &context[context_depth - 1];
|
||||
switch (ctx->type) {
|
||||
case BLOCK_CRUMB: {
|
||||
struct block_crumb* blockc = &ctx->data.block;
|
||||
blockc->final = ret;
|
||||
switch (blockc->state) {
|
||||
case BLOCK_CLEAN:
|
||||
assert(0);
|
||||
case BLOCK_EXPR:
|
||||
blockc->state = BLOCK_CLEAN;
|
||||
break;
|
||||
case BLOCK_ASSIGN:
|
||||
blockc->assignments[blockc->assignment_count].ref = ret;
|
||||
blockc->assignment_count++;
|
||||
blockc->state = BLOCK_CLEAN;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IF_CRUMB: {
|
||||
struct if_crumb* ifc = &ctx->data.if_;
|
||||
assert(ifc->state == IF_COND);
|
||||
jump_if(ifc->then, ifc->else_, ret, NULL);
|
||||
ifc->state = IF_THEN;
|
||||
break;
|
||||
}
|
||||
case EXPR_CRUMB:
|
||||
push_argument(ret);
|
||||
break;
|
||||
case LOOP_CRUMB: {
|
||||
struct loop_crumb* loopc = &ctx->data.loop;
|
||||
assert(loopc->state == LOOP_CVAR_INIT);
|
||||
push_cvar(ret);
|
||||
loopc->state = LOOP_CLEAN;
|
||||
break;
|
||||
}
|
||||
case JUMP_CRUMB: {
|
||||
// TODO FIXME: this is *completely wrong* for `next`!
|
||||
struct jump_crumb jumpc = ctx->data.jump;
|
||||
fprintf(stderr, "args: %i, arity: %i\n", jumpc.argument_count, jumpc.arity);
|
||||
assert(jumpc.argument_count + 1 == jumpc.arity);
|
||||
jumpc.arguments[jumpc.argument_count] = ret;
|
||||
jump(jumpc.label, jumpc.arguments);
|
||||
// TODO: better way to handle returning impossible value
|
||||
context_depth--;
|
||||
push_argument(ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void enter_if(void) {
|
||||
printf("** enter_if\n");
|
||||
enter();
|
||||
label then = declare(0);
|
||||
label else_ = declare(0);
|
||||
label end = declare_exit(1);
|
||||
printf("IF THEN %i ELSE %i EXIT %i\n", then, else_, end);
|
||||
struct if_crumb ifc = {
|
||||
.state = IF_COND,
|
||||
.then = then,
|
||||
.else_ = else_,
|
||||
.end = end,
|
||||
};
|
||||
union crumb_data data;
|
||||
data.if_ = ifc;
|
||||
struct crumb ctx = {
|
||||
.type = IF_CRUMB,
|
||||
.data = data,
|
||||
};
|
||||
push(ctx);
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void exit_if(void) {
|
||||
printf("** exit_if\n");
|
||||
struct crumb ctx = context[context_depth - 1];
|
||||
assert(ctx.type == IF_CRUMB);
|
||||
struct if_crumb ifc = ctx.data.if_;
|
||||
switch (ifc.state) {
|
||||
case IF_COND:
|
||||
case IF_THEN:
|
||||
assert(0);
|
||||
case IF_ELSE: {
|
||||
define(ifc.else_, NULL);
|
||||
var ret = lit(0);
|
||||
jump(ifc.end, &ret);
|
||||
break;
|
||||
}
|
||||
case IF_END:
|
||||
break;
|
||||
}
|
||||
var ret;
|
||||
leave(&ret);
|
||||
context_depth--;
|
||||
push_argument(ret);
|
||||
}
|
||||
|
||||
void enter_loop(char* label_name) {
|
||||
printf("** enter_loop\n");
|
||||
enter();
|
||||
label exit = declare_exit(1);
|
||||
struct loop_crumb loopc = {
|
||||
.state = LOOP_CLEAN,
|
||||
.label_name = copy_str(label_name),
|
||||
.assignment_count = 0,
|
||||
.exit = exit
|
||||
};
|
||||
union crumb_data data;
|
||||
data.loop = loopc;
|
||||
struct crumb ctx = {
|
||||
.type = LOOP_CRUMB,
|
||||
.data = data,
|
||||
};
|
||||
push(ctx);
|
||||
}
|
||||
|
||||
void cvar_pass(char* name) {
|
||||
printf("** cvar_pass\n");
|
||||
push_cvar_name(name);
|
||||
push_cvar(lookup_var(name));
|
||||
}
|
||||
|
||||
void cvar_init(char* name) {
|
||||
printf("** cvar_init\n");
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == LOOP_CRUMB);
|
||||
struct loop_crumb* loopc = &ctx->data.loop;
|
||||
loopc->state = LOOP_CVAR_INIT;
|
||||
push_cvar_name(name);
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void expr_next(char* label) {
|
||||
printf("** expr_next\n");
|
||||
push_new_jump(lookup_label(NEXT_LABEL, label));
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void expr_exit(char* label) {
|
||||
printf("** expr_exit\n");
|
||||
push_new_jump(lookup_label(EXIT_LABEL, label));
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void expr_return(void) {
|
||||
printf("** expr_return\n");
|
||||
push_new_jump(lookup_label(RETURN_LABEL, NULL));
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void enter_group(void) {
|
||||
printf("** enter_group\n");
|
||||
push_new_expr();
|
||||
}
|
||||
|
||||
void exit_group(void) {
|
||||
printf("** exit_group\n");
|
||||
// exit_expr is sufficient
|
||||
}
|
||||
|
||||
void expr_op(enum operator_ op) {
|
||||
printf("** expr_op %i\n", op);
|
||||
struct crumb* ctx = &context[context_depth - 1];
|
||||
assert(ctx->type == EXPR_CRUMB);
|
||||
struct expr_crumb* exprc = &ctx->data.expr;
|
||||
if (op == OP_JUXT && context_depth > 1) {
|
||||
// HACK: should handle continuations separately from expressions
|
||||
struct crumb* ctx2 = &context[context_depth - 2];
|
||||
if (ctx2->type == JUMP_CRUMB) {
|
||||
struct jump_crumb* jumpc = &ctx2->data.jump;
|
||||
var ret = reduce_expression(exprc);
|
||||
assert(jumpc->argument_count < MAX_ARGUMENTS);
|
||||
jumpc->arguments[jumpc->argument_count] = ret;
|
||||
jumpc->argument_count++;
|
||||
context_depth--;
|
||||
push_new_expr();
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (exprc->operator_count > MAX_OPERATORS) {
|
||||
fprintf(stderr, "error: exceeded maximum number of operators in expression\n");
|
||||
exit(1);
|
||||
}
|
||||
exprc->operators[exprc->operator_count] = op;
|
||||
exprc->operator_count++;
|
||||
}
|
||||
|
||||
void expr_string(char* string) {
|
||||
printf("** expr_string %s\n", string);
|
||||
push_argument(lit_string(string));
|
||||
}
|
||||
|
||||
void expr_integer(int64_t num) {
|
||||
printf("** expr_integer %lli\n", num);
|
||||
push_argument(lit((uint64_t) num));
|
||||
}
|
||||
|
||||
void expr_var(char* var) {
|
||||
printf("** expr_var %s\n", var);
|
||||
push_argument(lookup_var(var));
|
||||
}
|
32
src/lang.h
32
src/lang.h
|
@ -1,32 +0,0 @@
|
|||
#ifndef LANG_H
|
||||
#define LANG_H
|
||||
|
||||
#include "lex.h"
|
||||
|
||||
void enter_block(void);
|
||||
void stmt_assign(char* name);
|
||||
void stmt_expr(void);
|
||||
void exit_block(void);
|
||||
|
||||
void exit_expr(void);
|
||||
|
||||
void enter_if(void);
|
||||
void exit_if(void);
|
||||
|
||||
void enter_loop(char* label);
|
||||
void cvar_pass(char* name);
|
||||
void cvar_init(char* name);
|
||||
|
||||
void expr_next(char* label);
|
||||
void expr_exit(char* label);
|
||||
void expr_return(void);
|
||||
|
||||
void enter_group(void);
|
||||
void exit_group(void);
|
||||
|
||||
void expr_op(enum operator_ op);
|
||||
void expr_string(char* string);
|
||||
void expr_integer(int64_t num);
|
||||
void expr_var(char* var);
|
||||
|
||||
#endif
|
520
src/lex.c
520
src/lex.c
|
@ -7,37 +7,58 @@
|
|||
#include "lex/indent.h"
|
||||
#include "io.h"
|
||||
|
||||
_Bool is_unary(enum operator_ op) {
|
||||
return op == OP_SUB
|
||||
|| op == OP_INV
|
||||
|| op == OP_NOT;
|
||||
static const char* const keywords[KEYWORD_COUNT] = {
|
||||
"comm",
|
||||
"assocl",
|
||||
"assocr",
|
||||
"distl",
|
||||
"distr",
|
||||
"factl",
|
||||
"factr",
|
||||
"mapl",
|
||||
"mapr",
|
||||
"unitil",
|
||||
"unitir",
|
||||
"unitel",
|
||||
"uniter",
|
||||
"comm+",
|
||||
"assocl+",
|
||||
"assocr+",
|
||||
"mapl+",
|
||||
"mapr+",
|
||||
"inl",
|
||||
"inr",
|
||||
"out",
|
||||
"halt",
|
||||
"if",
|
||||
};
|
||||
|
||||
static struct token simple(enum token_type type) {
|
||||
struct token tok = { type, 0 };
|
||||
return tok;
|
||||
}
|
||||
|
||||
_Bool is_binary(enum operator_ op) {
|
||||
return op == OP_EQ
|
||||
|| op == OP_ADD
|
||||
|| op == OP_SUB
|
||||
|| op == OP_MUL
|
||||
|| op == OP_DIV
|
||||
|| op == OP_MOD
|
||||
|| op == OP_AND
|
||||
|| op == OP_OR
|
||||
|| op == OP_XOR
|
||||
|| op == OP_SHL
|
||||
|| op == OP_SAR
|
||||
|| op == OP_SHR
|
||||
|| op == OP_GT
|
||||
|| op == OP_LT
|
||||
|| op == OP_GTE
|
||||
|| op == OP_LTE
|
||||
|| op == OP_NE
|
||||
|| op == OP_TYPE
|
||||
|| op == OP_FUN;
|
||||
#define MAX_STR_LEN 4096
|
||||
static size_t str_index;
|
||||
// alternate string buffers between tokens to prevent overwriting buffer.
|
||||
// we're LL(1) so 2 buffers is sufficient.
|
||||
|
||||
// NOTE: I later changed the code and it wasn't sufficient anymore, lmao.
|
||||
static int which_buf = 0;
|
||||
static char str_buf_1[MAX_STR_LEN];
|
||||
static char str_buf_2[MAX_STR_LEN];
|
||||
static char str_buf_3[MAX_STR_LEN];
|
||||
|
||||
static char* str_buf(void) {
|
||||
which_buf = (which_buf + 1) % 3;
|
||||
switch (which_buf) {
|
||||
case 0: return str_buf_1;
|
||||
case 1: return str_buf_2;
|
||||
case 2: return str_buf_3;
|
||||
}
|
||||
assert(false);
|
||||
}
|
||||
|
||||
_Bool is_lit(struct token tok) {
|
||||
return tok.type == TOK_INTEGER || tok.type == TOK_STRING || tok.type == TOK_NAME;
|
||||
}
|
||||
|
||||
static _Bool is_alpha(char c) {
|
||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
||||
|
@ -56,142 +77,10 @@ static _Bool is_id_char(char c) {
|
|||
return is_alphanumeric(c) || c == '_';
|
||||
}
|
||||
|
||||
static struct token simple(enum token_type type) {
|
||||
struct token tok = { type, 0 };
|
||||
return tok;
|
||||
_Bool is_whitespace(char c) {
|
||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
||||
}
|
||||
|
||||
static struct token op(enum operator_ op) {
|
||||
union token_data data;
|
||||
data.op = op;
|
||||
struct token tok = { TOK_OPERATOR, data };
|
||||
return tok;
|
||||
}
|
||||
|
||||
static uint8_t digit_value(char c) {
|
||||
if (is_digit(c)) { return c - '0'; }
|
||||
if (c >= 'A' && c <= 'Z') { return c - 'A' + 0xA; }
|
||||
if (c >= 'a' && c <= 'z') { return c - 'a' + 0xA; }
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static _Bool is_digit_in(uint8_t base, char c) {
|
||||
if (!is_alphanumeric(c)) {
|
||||
return false;
|
||||
}
|
||||
return digit_value(c) < base;
|
||||
}
|
||||
|
||||
static uint64_t lex_digits(uint8_t base) {
|
||||
uint64_t acc = 0;
|
||||
while (true) {
|
||||
char c = peekc();
|
||||
if (!is_digit_in(base, c)) {
|
||||
// commas are legal digit separators
|
||||
if (c == ',' && is_digit_in(base, peekc())) {
|
||||
nextc();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
nextc();
|
||||
uint8_t digit = digit_value(c);
|
||||
// (val * base + digit) > UINT64_MAX
|
||||
if (acc > ((UINT64_MAX - digit) / base)) {
|
||||
fprintf(stderr, "lexical error: integer literal overflow\n");
|
||||
exit(1);
|
||||
}
|
||||
acc *= base;
|
||||
acc += digit;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
static struct token integer_tok(uint64_t integer) {
|
||||
union token_data data;
|
||||
data.int_ = integer;
|
||||
struct token tok = { TOK_INTEGER, data };
|
||||
return tok;
|
||||
}
|
||||
|
||||
static struct token lex_integer(_Bool sign) {
|
||||
uint8_t base = 10;
|
||||
if (peekc() == '0') {
|
||||
nextc();
|
||||
if (peekc() == 'b') {
|
||||
base = 2;
|
||||
nextc();
|
||||
} else if (peekc() == 'x') {
|
||||
base = 16;
|
||||
nextc();
|
||||
}
|
||||
if(!is_digit(peekc() && !is_id_char(peekc()))) {
|
||||
return integer_tok(0);
|
||||
}
|
||||
}
|
||||
if (!is_digit_in(base, peekc())) {
|
||||
fprintf(stderr, "lexical error: expected base-%i digits\n", base);
|
||||
exit(1);
|
||||
}
|
||||
uint64_t acc = lex_digits(10);
|
||||
if (is_id_char(peekc())) {
|
||||
fprintf(stderr, "lexical error: must put space between integer and following identifier\n");
|
||||
exit(1);
|
||||
}
|
||||
if (sign && acc > INT64_MAX) {
|
||||
fprintf(stderr, "lexical error: signed integer literal overflow\n");
|
||||
exit(1);
|
||||
}
|
||||
int64_t val = (int64_t) acc;
|
||||
if (sign) {
|
||||
val = -val;
|
||||
}
|
||||
return integer_tok(val);
|
||||
}
|
||||
|
||||
#define MAX_STR_LEN 4096
|
||||
static size_t str_index;
|
||||
// alternate string buffers between tokens to prevent overwriting buffer.
|
||||
// we're LL(1) so 2 buffers is sufficient.
|
||||
static _Bool which_buf = false;
|
||||
static char str_buf_1[MAX_STR_LEN];
|
||||
static char str_buf_2[MAX_STR_LEN];
|
||||
|
||||
static char* str_buf(void) {
|
||||
which_buf = !which_buf;
|
||||
if (which_buf) {
|
||||
return str_buf_1;
|
||||
}
|
||||
return str_buf_2;
|
||||
}
|
||||
|
||||
static char* lex_string(void) {
|
||||
// TODO: string escapes, multi-line strings, no length limit on strings
|
||||
str_index = 0;
|
||||
char* buf = str_buf();
|
||||
while (true) {
|
||||
char c = nextc();
|
||||
if (str_index == MAX_STR_LEN - 1) {
|
||||
fprintf(stderr, "lexical error: string too long\n");
|
||||
exit(1);
|
||||
}
|
||||
if (c == 0) {
|
||||
fprintf(stderr, "lexical error: unclosed string (reached end of file)\n");
|
||||
exit(1);
|
||||
}
|
||||
if (c == '"') {
|
||||
break;
|
||||
}
|
||||
if (c == '\n') {
|
||||
fprintf(stderr, "lexical error: unclosed string (reached end of line)\n");
|
||||
exit(1);
|
||||
}
|
||||
buf[str_index] = c;
|
||||
str_index++;
|
||||
}
|
||||
buf[str_index] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static char* lex_identifier(void) {
|
||||
str_index = 0;
|
||||
|
@ -211,204 +100,53 @@ static char* lex_identifier(void) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
static uint32_t indent_level = 0;
|
||||
static uint32_t pending_level = 0;
|
||||
static _Bool level_is_block[MAX_INDENTS] = {true};
|
||||
// going back to a previous indentation level.
|
||||
// if we're going back, then we insert a terminator.
|
||||
static _Bool going_back = false;
|
||||
|
||||
static struct token lex(void) {
|
||||
char c = peekc();
|
||||
if (is_newline(c)) {
|
||||
indent_level = lex_indentation();
|
||||
if (indent_level <= pending_level) {
|
||||
going_back = true;
|
||||
}
|
||||
}
|
||||
while (indent_level > pending_level) {
|
||||
pending_level++;
|
||||
if (level_is_block[pending_level]) {
|
||||
return simple(TOK_OPEN_BLOCK);
|
||||
}
|
||||
}
|
||||
while (indent_level < pending_level) {
|
||||
_Bool was_block = level_is_block[pending_level];
|
||||
level_is_block[pending_level] = false;
|
||||
pending_level--;
|
||||
if (was_block) {
|
||||
return simple(TOK_CLOSE_BLOCK);
|
||||
}
|
||||
}
|
||||
if (going_back) {
|
||||
going_back = false;
|
||||
if (level_is_block[indent_level]) {
|
||||
return simple(TOK_TERMINATOR);
|
||||
}
|
||||
}
|
||||
c = peekc();
|
||||
while (is_indent(c)) {
|
||||
nextc();
|
||||
c = peekc();
|
||||
}
|
||||
_Bool sign = false;
|
||||
switch (c) {
|
||||
case 0:
|
||||
nextc();
|
||||
return simple(TOK_EOF);
|
||||
case '"': {
|
||||
nextc();
|
||||
union token_data data;
|
||||
data.string = lex_string();
|
||||
struct token tok = { TOK_STRING, data };
|
||||
return tok;
|
||||
}
|
||||
case '\'': {
|
||||
nextc();
|
||||
union token_data data;
|
||||
data.label = lex_identifier();
|
||||
struct token tok = { TOK_LABEL, data };
|
||||
return tok;
|
||||
}
|
||||
case ':':
|
||||
nextc();
|
||||
while (is_indent(peekc())) {
|
||||
nextc();
|
||||
}
|
||||
if (is_newline(peekc())) {
|
||||
level_is_block[indent_level + 1] = true;
|
||||
return lex();
|
||||
}
|
||||
return op(OP_TYPE);
|
||||
case '{':
|
||||
nextc();
|
||||
return simple(TOK_OPEN_BLOCK);
|
||||
case '}':
|
||||
nextc();
|
||||
return simple(TOK_CLOSE_BLOCK);
|
||||
case '(':
|
||||
nextc();
|
||||
return simple(TOK_OPEN_GROUP);
|
||||
case ')':
|
||||
nextc();
|
||||
return simple(TOK_CLOSE_GROUP);
|
||||
case ';':
|
||||
nextc();
|
||||
return simple(TOK_TERMINATOR);
|
||||
case ',':
|
||||
nextc();
|
||||
return simple(TOK_SEPARATOR);
|
||||
case '=':
|
||||
nextc();
|
||||
return simple(TOK_EQUALS);
|
||||
case '-':
|
||||
nextc();
|
||||
if (peekc() == '>') {
|
||||
nextc();
|
||||
return op(OP_FUN);
|
||||
}
|
||||
if (is_digit(peekc())) {
|
||||
return lex_integer(true);
|
||||
}
|
||||
return op(OP_SUB);
|
||||
case '+':
|
||||
nextc();
|
||||
return op(OP_ADD);
|
||||
case '*':
|
||||
nextc();
|
||||
return op(OP_MUL);
|
||||
case '/':
|
||||
nextc();
|
||||
return op(OP_DIV);
|
||||
case '%':
|
||||
nextc();
|
||||
return op(OP_MOD);
|
||||
case '~':
|
||||
nextc();
|
||||
return op(OP_INV);
|
||||
case '&':
|
||||
nextc();
|
||||
return op(OP_AND);
|
||||
case '|':
|
||||
nextc();
|
||||
return op(OP_OR);
|
||||
case '^':
|
||||
nextc();
|
||||
return op(OP_XOR);
|
||||
case '!':
|
||||
nextc();
|
||||
if (peekc() == '=') {
|
||||
nextc();
|
||||
return op(OP_NE);
|
||||
}
|
||||
return op(OP_NOT);
|
||||
case '>':
|
||||
while (true) {
|
||||
// skip whitespace
|
||||
while (is_whitespace(c)) {
|
||||
nextc();
|
||||
c = peekc();
|
||||
if (c == '=') {
|
||||
nextc();
|
||||
return op(OP_GTE);
|
||||
}
|
||||
if (c == '>') {
|
||||
nextc();
|
||||
if (peekc() == '>') {
|
||||
nextc();
|
||||
return op(OP_SHR);
|
||||
}
|
||||
return op(OP_SAR);
|
||||
}
|
||||
return op(OP_GT);
|
||||
case '<':
|
||||
nextc();
|
||||
c = peekc();
|
||||
if (c == '<') {
|
||||
nextc();
|
||||
return op(OP_SHL);
|
||||
}
|
||||
if (c == '=') {
|
||||
nextc();
|
||||
return op(OP_LTE);
|
||||
}
|
||||
return op(OP_LT);
|
||||
}
|
||||
if (is_digit(c)) {
|
||||
return lex_integer(false);
|
||||
}
|
||||
char* name = lex_identifier();
|
||||
if (strcmp(name, "if") == 0) {
|
||||
return simple(TOK_IF);
|
||||
}
|
||||
if (strcmp(name, "else") == 0) {
|
||||
return simple(TOK_ELSE);
|
||||
}
|
||||
if (strcmp(name, "match") == 0) {
|
||||
return simple(TOK_MATCH);
|
||||
}
|
||||
if (strcmp(name, "case") == 0) {
|
||||
return simple(TOK_CASE);
|
||||
}
|
||||
if (strcmp(name, "loop") == 0) {
|
||||
return simple(TOK_LOOP);
|
||||
}
|
||||
if (strcmp(name, "fn") == 0) {
|
||||
return simple(TOK_FN);
|
||||
}
|
||||
if (strcmp(name, "next") == 0) {
|
||||
return simple(TOK_NEXT);
|
||||
}
|
||||
if (strcmp(name, "exit") == 0) {
|
||||
return simple(TOK_EXIT);
|
||||
}
|
||||
if (strcmp(name, "recurse") == 0) {
|
||||
return simple(TOK_RECURSE);
|
||||
}
|
||||
if (strcmp(name, "return") == 0) {
|
||||
return simple(TOK_RETURN);
|
||||
}
|
||||
// skip line comments
|
||||
if (c == '!') {
|
||||
do {
|
||||
c = nextc();
|
||||
} while (c != '\n');
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
union token_data data;
|
||||
data.name = name;
|
||||
struct token tok = { TOK_NAME, data };
|
||||
// syntax
|
||||
switch (c) {
|
||||
case 0:
|
||||
return simple(TOK_EOF);
|
||||
case '{':
|
||||
nextc();
|
||||
return simple(TOK_MAP_BEGIN);
|
||||
case '}':
|
||||
nextc();
|
||||
return simple(TOK_MAP_END);
|
||||
}
|
||||
|
||||
char* name = lex_identifier();
|
||||
|
||||
// keywords
|
||||
for (size_t kwd = 0; kwd < KEYWORD_COUNT; kwd++) {
|
||||
if (strcmp(name, keywords[kwd]) == 0) {
|
||||
return simple((enum token_type) kwd);
|
||||
}
|
||||
}
|
||||
|
||||
enum token_type type = TOK_JUMP;
|
||||
// labels
|
||||
if (peekc() == ':') {
|
||||
type = TOK_LABEL;
|
||||
nextc();
|
||||
}
|
||||
|
||||
struct token tok = { type, name };
|
||||
return tok;
|
||||
}
|
||||
|
||||
|
@ -418,7 +156,6 @@ static struct token peek_buf;
|
|||
struct token next(void) {
|
||||
if (!init) {
|
||||
init = true;
|
||||
indent_level = lex_indentation();
|
||||
next();
|
||||
}
|
||||
struct token tmp = peek_buf;
|
||||
|
@ -429,78 +166,3 @@ struct token next(void) {
|
|||
struct token peek(void) {
|
||||
return peek_buf;
|
||||
}
|
||||
|
||||
void print_token(struct token tok) {
|
||||
switch (tok.type) {
|
||||
case TOK_NAME:
|
||||
fprintf(stdout, "%s", tok.data.name);
|
||||
break;
|
||||
case TOK_LABEL:
|
||||
fprintf(stdout, "'%s", tok.data.label);
|
||||
break;
|
||||
case TOK_INTEGER:
|
||||
fprintf(stdout, "%zi", tok.data.int_);
|
||||
break;
|
||||
case TOK_STRING:
|
||||
fprintf(stdout, "\"%s\"", tok.data.string);
|
||||
break;
|
||||
case TOK_OPEN_GROUP:
|
||||
fprintf(stdout, "(");
|
||||
break;
|
||||
case TOK_CLOSE_GROUP:
|
||||
fprintf(stdout, ")");
|
||||
break;
|
||||
case TOK_OPEN_BLOCK:
|
||||
fprintf(stdout, "{");
|
||||
break;
|
||||
case TOK_CLOSE_BLOCK:
|
||||
fprintf(stdout, "}");
|
||||
break;
|
||||
case TOK_TERMINATOR:
|
||||
fprintf(stdout, ";");
|
||||
break;
|
||||
case TOK_SEPARATOR:
|
||||
fprintf(stdout, ",");
|
||||
break;
|
||||
case TOK_OPERATOR:
|
||||
// TODO: printing for operators
|
||||
fprintf(stdout, "OP:%i", tok.data.op);
|
||||
break;
|
||||
case TOK_EOF:
|
||||
fprintf(stdout, "<EOF>");
|
||||
break;
|
||||
case TOK_CASE:
|
||||
fprintf(stdout, "case");
|
||||
break;
|
||||
case TOK_ELSE:
|
||||
fprintf(stdout, "else");
|
||||
break;
|
||||
case TOK_EQUALS:
|
||||
fprintf(stdout, "=");
|
||||
break;
|
||||
case TOK_EXIT:
|
||||
fprintf(stdout, "exit");
|
||||
break;
|
||||
case TOK_FN:
|
||||
fprintf(stdout, "fn");
|
||||
break;
|
||||
case TOK_IF:
|
||||
fprintf(stdout, "if");
|
||||
break;
|
||||
case TOK_LOOP:
|
||||
fprintf(stdout, "loop");
|
||||
break;
|
||||
case TOK_NEXT:
|
||||
fprintf(stdout, "next");
|
||||
break;
|
||||
case TOK_RETURN:
|
||||
fprintf(stdout, "return");
|
||||
break;
|
||||
case TOK_RECURSE:
|
||||
fprintf(stdout, "recurse");
|
||||
break;
|
||||
case TOK_MATCH:
|
||||
fprintf(stdout, "match");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
95
src/lex.h
95
src/lex.h
|
@ -4,79 +4,44 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define KEYWORD_COUNT 23
|
||||
|
||||
enum token_type {
|
||||
TOK_COMM = 0,
|
||||
TOK_ASSOCL = 1,
|
||||
TOK_ASSOCR = 2,
|
||||
TOK_DISTL = 3,
|
||||
TOK_DISTR = 4,
|
||||
TOK_FACTL = 5,
|
||||
TOK_FACTR = 6,
|
||||
TOK_MAPL = 7,
|
||||
TOK_MAPR = 8,
|
||||
TOK_UNITIL = 9,
|
||||
TOK_UNITIR = 10,
|
||||
TOK_UNITEL = 11,
|
||||
TOK_UNITER = 12,
|
||||
TOK_COMM_PLUS = 13,
|
||||
TOK_ASSOCL_PLUS = 14,
|
||||
TOK_ASSOCR_PLUS = 15,
|
||||
TOK_MAPL_PLUS = 16,
|
||||
TOK_MAPR_PLUS = 17,
|
||||
TOK_INL = 18,
|
||||
TOK_INR = 19,
|
||||
TOK_OUT = 20,
|
||||
TOK_HALT = 21,
|
||||
TOK_IF = 22,
|
||||
TOK_LABEL,
|
||||
TOK_JUMP,
|
||||
TOK_MAP_BEGIN,
|
||||
TOK_MAP_END,
|
||||
TOK_EOF, // end of file
|
||||
TOK_NAME, // foo, bar_quux123, loop
|
||||
TOK_LABEL, // 'my_loop
|
||||
TOK_INTEGER, // -123, 0xDEADBEEF
|
||||
TOK_STRING, // "..."
|
||||
TOK_OPERATOR,
|
||||
TOK_OPEN_BLOCK, // `{` or `:` at the end of a line
|
||||
TOK_CLOSE_BLOCK, // `}` or inferred from indentation
|
||||
TOK_OPEN_GROUP, // `(`
|
||||
TOK_CLOSE_GROUP, // `)`
|
||||
TOK_TERMINATOR, // `;` or inferred from indentation, used to separate statements in blocks
|
||||
TOK_SEPARATOR, // `,`, used to separate variables in initializers
|
||||
TOK_EQUALS, // `=`, used for assignments or as an equality operator
|
||||
TOK_IF, // if
|
||||
TOK_ELSE, // else
|
||||
TOK_MATCH, // match
|
||||
TOK_CASE, // case
|
||||
TOK_LOOP, // loop
|
||||
TOK_FN, // fn
|
||||
TOK_NEXT, // next
|
||||
TOK_EXIT, // exit
|
||||
TOK_RECURSE, // recurse
|
||||
TOK_RETURN, // return
|
||||
};
|
||||
|
||||
enum operator_ {
|
||||
OP_EQ, // =
|
||||
|
||||
OP_ADD, // +
|
||||
OP_SUB, // -
|
||||
OP_MUL, // *
|
||||
OP_DIV, // /
|
||||
OP_MOD, // %
|
||||
|
||||
OP_INV, // ~
|
||||
OP_AND, // &
|
||||
OP_OR, // |
|
||||
OP_XOR, // ^
|
||||
OP_SHL, // <<
|
||||
OP_SAR, // >>
|
||||
OP_SHR, // >>>
|
||||
|
||||
OP_NOT, // !
|
||||
OP_GT, // >
|
||||
OP_LT, // <
|
||||
OP_GTE, // >=
|
||||
OP_LTE, // <=
|
||||
OP_NE, // !=
|
||||
|
||||
OP_TYPE, // :
|
||||
OP_FUN, // ->
|
||||
|
||||
OP_JUXT, // space! but this is not emitted by the lexer.
|
||||
};
|
||||
|
||||
union token_data {
|
||||
char* name;
|
||||
char* label;
|
||||
char* string;
|
||||
int64_t int_;
|
||||
enum operator_ op;
|
||||
};
|
||||
|
||||
struct token {
|
||||
enum token_type type;
|
||||
union token_data data;
|
||||
char* identifier;
|
||||
};
|
||||
|
||||
_Bool is_unary(enum operator_ op);
|
||||
_Bool is_binary(enum operator_ op);
|
||||
_Bool is_lit(struct token tok);
|
||||
|
||||
struct token next(void);
|
||||
struct token peek(void);
|
||||
|
||||
|
|
451
src/main.c
451
src/main.c
|
@ -2,310 +2,209 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "bytecode.h"
|
||||
#include "format.h"
|
||||
#include "io.h"
|
||||
#include "lex.h"
|
||||
|
||||
#define ELF_HEADER_SIZE 0xb0
|
||||
|
||||
enum map_type {
|
||||
MAP_LEFT_TIMES,
|
||||
MAP_RIGHT_TIMES,
|
||||
MAP_LEFT_PLUS,
|
||||
MAP_RIGHT_PLUS,
|
||||
};
|
||||
|
||||
// a + (b + (c + d))
|
||||
// (a + b) + (c + d)
|
||||
// (b + a) + (c + d)
|
||||
// b + (a + (c + d))
|
||||
//
|
||||
static enum map_type maps[16];
|
||||
static size_t mapi = 0;
|
||||
|
||||
void transition_right(void) {
|
||||
assocl_plus();
|
||||
mapl_plus_begin();
|
||||
out();
|
||||
inr();
|
||||
mapl_plus_end();
|
||||
assocr_plus();
|
||||
static char* label_names[2048];
|
||||
static symbol label_symbols[2048];
|
||||
static size_t labeli = 0;
|
||||
|
||||
enum map_type pop(void) {
|
||||
if (mapi <= 0) {
|
||||
fprintf(stderr, "unmatched }\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return maps[--mapi];
|
||||
}
|
||||
|
||||
void transition_left(void) {
|
||||
out();
|
||||
inl();
|
||||
void push(enum map_type type) {
|
||||
if (mapi >= 16) {
|
||||
fprintf(stderr, "out of maps\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
maps[mapi++] = type;
|
||||
}
|
||||
|
||||
void jump_from_to(size_t from, size_t to) {
|
||||
if (from < to) {
|
||||
mapl_plus_begin();
|
||||
inl();
|
||||
for(; from <= to; to--) {
|
||||
inr();
|
||||
}
|
||||
mapl_plus_end();
|
||||
mapr_plus_begin();
|
||||
inr();
|
||||
mapr_plus_end();
|
||||
out();
|
||||
} else if (to > from) {
|
||||
for (size_t i = 0; i < from - to; i++) {
|
||||
mapr_plus_end();
|
||||
mapl_plus_begin();
|
||||
inl();
|
||||
mapl_plus_end();
|
||||
symbol lookup_label(const char* name) {
|
||||
for (size_t i = 0; i < labeli; i++) {
|
||||
if (strcmp(label_names[i], name) == 0) {
|
||||
fprintf(stderr, "%s\n", name);
|
||||
return label_symbols[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void transition_into(void) {
|
||||
assocl_plus();
|
||||
mapl_plus_begin();
|
||||
mapl_plus_begin();
|
||||
inl();
|
||||
mapl_plus_end();
|
||||
out();
|
||||
inr();
|
||||
mapl_plus_end();
|
||||
assocr_plus();
|
||||
}
|
||||
|
||||
void transition_while(void) {
|
||||
assocl_plus();
|
||||
mapl_plus_begin();
|
||||
mapr_plus_begin();
|
||||
inr();
|
||||
mapr_plus_end();
|
||||
out();
|
||||
mapl_plus_end();
|
||||
assocr_plus();
|
||||
}
|
||||
|
||||
void inc(void) {
|
||||
inr();
|
||||
factl();
|
||||
}
|
||||
|
||||
void new_nat(void) {
|
||||
// ctx
|
||||
unitil(); // ctx * 1
|
||||
inl(); // ctx * 1 + ctx * 1
|
||||
factl(); // ctx * (1 + 1)
|
||||
}
|
||||
|
||||
void swap(void) {
|
||||
assocr();
|
||||
mapr_begin();
|
||||
comm();
|
||||
mapr_end();
|
||||
assocl();
|
||||
}
|
||||
|
||||
static void select_var(size_t var) {
|
||||
// (... * a) * (b * (c * ...))
|
||||
for (size_t i = 0; i < var; i++) {
|
||||
assocr();
|
||||
// ((... * a) * b) * (c * ...)
|
||||
if (labeli >= 2048) {
|
||||
fprintf(stderr, "out of labels\n");
|
||||
exit(1);
|
||||
}
|
||||
comm();
|
||||
// (c * ...) * ((... * a) * b)
|
||||
assocl();
|
||||
// ((c * ...) * (... * a)) * b
|
||||
|
||||
fprintf(stderr, "%s:\n", name);
|
||||
|
||||
unsigned long len = strlen(name) + 1;
|
||||
label_names[labeli] = malloc(len);
|
||||
memcpy(label_names[labeli], name, len);
|
||||
label_symbols[labeli] = new_symbol();
|
||||
return label_symbols[labeli++];
|
||||
}
|
||||
|
||||
static void unselect_var(size_t var) {
|
||||
assocr();
|
||||
comm();
|
||||
for (size_t i = 0; i < var; i++) {
|
||||
assocl();
|
||||
void nomap(void) {
|
||||
if (mapi > 0) {
|
||||
fprintf(stderr, "expected all maps to be closed before new label or EOF\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void case_on(size_t var) {
|
||||
select_var(var);
|
||||
distr();
|
||||
mapl_plus_begin(); {
|
||||
unselect_var(var);
|
||||
} mapl_plus_end();
|
||||
mapr_plus_begin(); {
|
||||
unselect_var(var);
|
||||
} mapr_plus_end();
|
||||
}
|
||||
|
||||
static void snipe(size_t var) {
|
||||
select_var(var);
|
||||
unitel();
|
||||
comm();
|
||||
for (size_t i = 0; i < var; i++) {
|
||||
assocl();
|
||||
void begin_map(enum map_type type) {
|
||||
if (next().type != TOK_MAP_BEGIN) {
|
||||
fprintf(stderr, "expected {\n");
|
||||
exit(1);
|
||||
}
|
||||
push(type);
|
||||
}
|
||||
|
||||
symbol compile(void) {
|
||||
symbol entry_point = init_bytecode();
|
||||
|
||||
// This is the program we're trying to execute:
|
||||
//
|
||||
// fib n = fib_acc n 0 1
|
||||
// fib_acc 0 a b = a
|
||||
// fib_acc (S n) a b = fib_acc n b (a + b)
|
||||
//
|
||||
// Looks simple, right? Well, things are a bit more complicated than that.
|
||||
//
|
||||
// 1. In `fib_acc 0`, we implicitly drop the value of `b`. Because we do not have
|
||||
// weakening, we will have to free `b` explicitly here.
|
||||
//
|
||||
// fib_acc 0 a 0 = a
|
||||
// fib_acc 0 a (S b) = fib_acc 0 a b
|
||||
//
|
||||
// 2. In `fib_acc (S n)`, we use `b` twice. We do not have contraction, so we must
|
||||
// explicitly duplicate it, or implicitly duplicate it when we consume `b`.
|
||||
//
|
||||
// 3. We do not have addition as a built-in; we will need to define it ourselves.
|
||||
// Moreover, we do not have functions, so it must be fused into the definition
|
||||
// of fib_acc.
|
||||
//
|
||||
// -- We will duplicate `b` into the first argument (the new `a`)
|
||||
// -- while adding it to the second argument (`a`, which will become the new `b`).
|
||||
// fib_acc (S n) a b = fib_acc_plus n 0 a b
|
||||
// fib_acc_plus n a b' 0 = fib_acc n a b'
|
||||
// fib_acc_plus n a b' (S b) = fib_acc_plus n (S a) (S b') b
|
||||
//
|
||||
// 4. We'll have to do a lot of tedious work shuffling variables around.
|
||||
// We don't even have implicit associativity, much less commutativity!
|
||||
//
|
||||
// We have this hierarchy of states:
|
||||
//
|
||||
// 1. start(1)
|
||||
// 2. fib(n)
|
||||
// 3. fib_acc(n, a, b)
|
||||
// 4. fib_acc(0, a, b)
|
||||
// 5. fib_acc_0(a b)
|
||||
//
|
||||
|
||||
// States:
|
||||
// * start(1)
|
||||
// * fib(n)
|
||||
// * fib_acc(n, a, b)
|
||||
// * fib_acc_Z(1, (a, b))
|
||||
// * fib_acc_Z_free(a, b)
|
||||
// * fib_acc_Z_done
|
||||
// * fib_acc_S
|
||||
// * fib_acc_S_copy
|
||||
// * fib_acc_S_copy_done
|
||||
// * fib_acc_S_copy_S
|
||||
|
||||
// State 0: starting state
|
||||
mapl_plus_begin();
|
||||
// Initialize with integer (5).
|
||||
inl();
|
||||
inr();
|
||||
inr();
|
||||
inr();
|
||||
inr();
|
||||
inr();
|
||||
mapl_plus_end();
|
||||
transition_right();
|
||||
|
||||
mapr_plus_begin();
|
||||
// State 1: fib(n);
|
||||
mapl_plus_begin();
|
||||
// a = 0
|
||||
new_nat();
|
||||
// b = 1
|
||||
new_nat();
|
||||
inc();
|
||||
mapl_plus_end();
|
||||
transition_right();
|
||||
|
||||
mapr_plus_begin();
|
||||
// State 2: fib_acc(n, a, b)
|
||||
mapl_plus_begin();
|
||||
// if n=1, we return the accumulated value
|
||||
assocr();
|
||||
distl();
|
||||
mapl_plus_end();
|
||||
transition_right();
|
||||
|
||||
mapr_plus_begin();
|
||||
mapl_plus_begin();
|
||||
// State 3.1.1: fib_acc_Z(1, (a, b))
|
||||
mapl_plus_begin();
|
||||
uniter();
|
||||
// (a, b)
|
||||
mapl_plus_end();
|
||||
transition_into();
|
||||
|
||||
mapr_plus_begin();
|
||||
// State 3.1.2.1: fib_acc_Z_free(a, b)
|
||||
mapl_plus_begin();
|
||||
// n * (1 + n)
|
||||
distr();
|
||||
mapl_plus_end();
|
||||
transition_while();
|
||||
|
||||
// State 3.1.2.2: fib_acc_Z_done
|
||||
mapr_plus_begin();
|
||||
uniter();
|
||||
quit();
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
mapl_plus_end();
|
||||
|
||||
mapr_plus_begin();
|
||||
// State 4: fib_acc_S
|
||||
mapl_plus_begin();
|
||||
assocl();
|
||||
new_nat();
|
||||
swap();
|
||||
new_nat();
|
||||
swap();
|
||||
mapl_plus_end();
|
||||
transition_into();
|
||||
|
||||
mapr_plus_begin();
|
||||
mapl_plus_begin();
|
||||
// State 5.1: fib_acc_S_copy(n, a, b1, b2, b)
|
||||
mapl_plus_begin();
|
||||
distl();
|
||||
mapl_plus_end();
|
||||
transition_into();
|
||||
mapr_plus_begin();
|
||||
mapl_plus_begin();
|
||||
// State 5.2.1: fib_acc_S_copy_done(n, a, b, b, 1)
|
||||
uniter();
|
||||
// TODO:
|
||||
mapl_plus_end();
|
||||
mapr_plus_begin();
|
||||
// State 5.2.2: fib_acc_S_copy_S(n, a, b1, b2, b)
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
mapl_plus_end();
|
||||
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
|
||||
|
||||
// State 1: fib(n)
|
||||
assocl_plus();
|
||||
mapl_plus_begin();
|
||||
|
||||
// switch to state 2
|
||||
out();
|
||||
inr();
|
||||
mapl_plus_end();
|
||||
assocr_plus();
|
||||
|
||||
mapr_plus_begin();
|
||||
// State 2: fib_acc(n, a, b)
|
||||
mapl_plus_begin();
|
||||
|
||||
|
||||
// State 2.1: transition to state 3
|
||||
mapl_plus_begin();
|
||||
|
||||
|
||||
mapr_plus_end();
|
||||
mapr_plus_end();
|
||||
|
||||
while (true) {
|
||||
struct token tok = next();
|
||||
switch (tok.type) {
|
||||
case TOK_COMM:
|
||||
comm();
|
||||
break;
|
||||
case TOK_ASSOCL:
|
||||
assocl();
|
||||
break;
|
||||
case TOK_ASSOCR:
|
||||
assocr();
|
||||
break;
|
||||
case TOK_DISTL:
|
||||
distl();
|
||||
break;
|
||||
case TOK_DISTR:
|
||||
distr();
|
||||
break;
|
||||
case TOK_FACTL:
|
||||
factl();
|
||||
break;
|
||||
case TOK_FACTR:
|
||||
factr();
|
||||
break;
|
||||
case TOK_MAPL:
|
||||
begin_map(MAP_LEFT_TIMES);
|
||||
mapl_begin();
|
||||
break;
|
||||
case TOK_MAPR:
|
||||
begin_map(MAP_RIGHT_TIMES);
|
||||
mapr_begin();
|
||||
break;
|
||||
case TOK_UNITIL:
|
||||
unitil();
|
||||
break;
|
||||
case TOK_UNITIR:
|
||||
unitir();
|
||||
break;
|
||||
case TOK_UNITEL:
|
||||
unitel();
|
||||
break;
|
||||
case TOK_UNITER:
|
||||
uniter();
|
||||
break;
|
||||
case TOK_COMM_PLUS:
|
||||
comm_plus();
|
||||
break;
|
||||
case TOK_ASSOCL_PLUS:
|
||||
assocl_plus();
|
||||
break;
|
||||
case TOK_ASSOCR_PLUS:
|
||||
assocr_plus();
|
||||
break;
|
||||
case TOK_MAPL_PLUS:
|
||||
begin_map(MAP_LEFT_PLUS);
|
||||
mapl_plus_begin();
|
||||
break;
|
||||
case TOK_MAPR_PLUS:
|
||||
begin_map(MAP_RIGHT_PLUS);
|
||||
mapr_plus_begin();
|
||||
break;
|
||||
case TOK_INL:
|
||||
inl();
|
||||
break;
|
||||
case TOK_INR:
|
||||
inr();
|
||||
break;
|
||||
case TOK_OUT:
|
||||
out();
|
||||
break;
|
||||
case TOK_HALT:
|
||||
halt();
|
||||
break;
|
||||
case TOK_LABEL:
|
||||
nomap();
|
||||
define_executable_symbol(lookup_label(tok.identifier));
|
||||
break;
|
||||
case TOK_JUMP:
|
||||
fprintf(stderr, "!jump %s\n", tok.identifier);
|
||||
nomap();
|
||||
jump(lookup_label(tok.identifier));
|
||||
break;
|
||||
case TOK_MAP_BEGIN:
|
||||
fprintf(stderr, "unexpected {\n");
|
||||
exit(1);
|
||||
break;
|
||||
case TOK_MAP_END:
|
||||
; enum map_type type = pop();
|
||||
switch (type) {
|
||||
case MAP_LEFT_TIMES:
|
||||
mapl_end();
|
||||
break;
|
||||
case MAP_RIGHT_TIMES:
|
||||
mapr_end();
|
||||
break;
|
||||
case MAP_LEFT_PLUS:
|
||||
mapl_plus_end();
|
||||
break;
|
||||
case MAP_RIGHT_PLUS:
|
||||
mapr_plus_end();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TOK_EOF:
|
||||
goto eof;
|
||||
case TOK_IF:
|
||||
nomap();
|
||||
struct token a = next();
|
||||
struct token b = next();
|
||||
fprintf(stderr, "!if %s %s\n", a.identifier, b.identifier);
|
||||
symbol aa = lookup_label(a.identifier);
|
||||
symbol bb = lookup_label(b.identifier);
|
||||
if (a.type != TOK_JUMP || b.type != TOK_JUMP) {
|
||||
fprintf(stderr, "arguments to 'if' should be labels\n");
|
||||
exit(1);
|
||||
}
|
||||
jump_if(aa, bb);
|
||||
break;
|
||||
}
|
||||
}
|
||||
eof:
|
||||
nomap();
|
||||
finish_bytecode();
|
||||
return entry_point;
|
||||
}
|
||||
|
|
302
src/parse.c
302
src/parse.c
|
@ -1,302 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "lang.h"
|
||||
#include "lex.h"
|
||||
#include "parse.h"
|
||||
|
||||
enum state {
|
||||
ST_BLOCK,
|
||||
ST_BLOCK_BODY,
|
||||
ST_BLOCK_CONT,
|
||||
ST_BLOCK_CLOSE,
|
||||
ST_ASSIGN,
|
||||
ST_EXPR,
|
||||
// HACK: The existence of this state.
|
||||
// Also, the entire structure of the parser is ugly.
|
||||
ST_EXPR_HACK,
|
||||
ST_EXPR_CONT,
|
||||
ST_EXPR_END,
|
||||
ST_GROUP,
|
||||
ST_IF_ELSE,
|
||||
ST_IF_END,
|
||||
ST_LOOP_VARS,
|
||||
ST_LOOP_VARS_CONT,
|
||||
};
|
||||
|
||||
const char* state_name(enum state st) {
|
||||
switch (st) {
|
||||
case ST_BLOCK:
|
||||
return "{";
|
||||
case ST_BLOCK_BODY:
|
||||
return "B";
|
||||
case ST_BLOCK_CONT:
|
||||
return ";";
|
||||
case ST_BLOCK_CLOSE:
|
||||
return "}";
|
||||
case ST_ASSIGN:
|
||||
return "=";
|
||||
case ST_EXPR:
|
||||
return "x";
|
||||
case ST_EXPR_CONT:
|
||||
return "c";
|
||||
case ST_GROUP:
|
||||
return "(";
|
||||
case ST_IF_ELSE:
|
||||
return "|";
|
||||
case ST_LOOP_VARS:
|
||||
return "v";
|
||||
case ST_LOOP_VARS_CONT:
|
||||
return ",";
|
||||
case ST_EXPR_END:
|
||||
return "E";
|
||||
case ST_EXPR_HACK:
|
||||
return "H";
|
||||
case ST_IF_END:
|
||||
return "i";
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_CONTEXT 256
|
||||
static uint32_t sp = 0;
|
||||
static enum state stack[MAX_CONTEXT];
|
||||
|
||||
static void debug_print(struct token tok, struct token next) {
|
||||
for (uint32_t i = 0; i < sp; i++) {
|
||||
printf("%s", state_name(stack[i]));
|
||||
}
|
||||
printf(" ");
|
||||
print_token(tok);
|
||||
printf(" ");
|
||||
print_token(next);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
static void push(enum state state) {
|
||||
stack[sp] = state;
|
||||
sp++;
|
||||
}
|
||||
|
||||
static enum state pop(void) {
|
||||
assert(sp != 0);
|
||||
sp--;
|
||||
return stack[sp];
|
||||
}
|
||||
|
||||
static _Bool is_assignment(struct token tok, struct token next) {
|
||||
return tok.type == TOK_NAME && next.type == TOK_EQUALS;
|
||||
}
|
||||
|
||||
|
||||
static _Bool is_expr(struct token tok) {
|
||||
return is_lit(tok)
|
||||
|| tok.type == TOK_NAME
|
||||
|| tok.type == TOK_OPEN_GROUP
|
||||
|| tok.type == TOK_IF
|
||||
|| tok.type == TOK_MATCH
|
||||
|| tok.type == TOK_FN
|
||||
|| tok.type == TOK_LOOP
|
||||
|| tok.type == TOK_NEXT
|
||||
|| tok.type == TOK_EXIT
|
||||
|| tok.type == TOK_NEXT
|
||||
|| tok.type == TOK_RETURN
|
||||
|| tok.type == TOK_RECURSE
|
||||
|| tok.type == TOK_MATCH;
|
||||
}
|
||||
|
||||
#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1)
|
||||
|
||||
void parse(void) {
|
||||
sp = 0;
|
||||
// TODO: add support for the top-level instead of this block hack
|
||||
push(ST_BLOCK_BODY);
|
||||
struct token tok = next();
|
||||
struct token nxt = peek();
|
||||
while (sp > 0) {
|
||||
debug_print(tok, nxt);
|
||||
// FIXME: stack underflow because we're faking the top-level with blocks
|
||||
switch (pop()) {
|
||||
case ST_BLOCK:
|
||||
if (tok.type == TOK_OPEN_BLOCK) {
|
||||
push(ST_BLOCK_CLOSE);
|
||||
push(ST_BLOCK_BODY);
|
||||
enter_block();
|
||||
break;
|
||||
}
|
||||
syntax_error("expected beginning of block");
|
||||
break;
|
||||
case ST_BLOCK_BODY:
|
||||
if (is_assignment(tok, nxt)) {
|
||||
push(ST_BLOCK_CONT);
|
||||
push(ST_ASSIGN);
|
||||
stmt_assign(tok.data.name);
|
||||
break;
|
||||
}
|
||||
if (is_expr(tok)) {
|
||||
push(ST_BLOCK_CONT);
|
||||
push(ST_EXPR);
|
||||
stmt_expr();
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
case ST_BLOCK_CONT:
|
||||
if (tok.type == TOK_TERMINATOR) {
|
||||
push(ST_BLOCK_BODY);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
case ST_BLOCK_CLOSE:
|
||||
if (tok.type == TOK_CLOSE_BLOCK) {
|
||||
exit_block();
|
||||
break;
|
||||
}
|
||||
syntax_error("expected end of block");
|
||||
case ST_ASSIGN:
|
||||
assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ);
|
||||
push(ST_EXPR);
|
||||
break;
|
||||
case ST_EXPR:
|
||||
push(ST_EXPR_END);
|
||||
push(ST_EXPR_HACK);
|
||||
continue;
|
||||
case ST_EXPR_HACK:
|
||||
switch (tok.type) {
|
||||
case TOK_STRING:
|
||||
push(ST_EXPR_CONT);
|
||||
expr_string(tok.data.string);
|
||||
break;
|
||||
case TOK_INTEGER:
|
||||
push(ST_EXPR_CONT);
|
||||
expr_integer(tok.data.int_);
|
||||
break;
|
||||
case TOK_IF:
|
||||
push(ST_IF_END);
|
||||
push(ST_IF_ELSE);
|
||||
push(ST_BLOCK);
|
||||
push(ST_EXPR);
|
||||
enter_if();
|
||||
break;
|
||||
case TOK_LOOP:
|
||||
push(ST_BLOCK);
|
||||
push(ST_LOOP_VARS);
|
||||
if (nxt.type == TOK_LABEL) {
|
||||
next();
|
||||
enter_loop(nxt.data.label);
|
||||
} else {
|
||||
enter_loop(NULL);
|
||||
}
|
||||
break;
|
||||
case TOK_NEXT:
|
||||
push(ST_EXPR);
|
||||
if (nxt.type == TOK_LABEL) {
|
||||
next();
|
||||
expr_next(nxt.data.label);
|
||||
} else {
|
||||
expr_next(NULL);
|
||||
}
|
||||
break;
|
||||
case TOK_EXIT:
|
||||
push(ST_EXPR);
|
||||
if (nxt.type == TOK_LABEL) {
|
||||
next();
|
||||
expr_exit(nxt.data.label);
|
||||
} else {
|
||||
expr_exit(NULL);
|
||||
}
|
||||
break;
|
||||
case TOK_RETURN:
|
||||
push(ST_EXPR);
|
||||
expr_return();
|
||||
break;
|
||||
case TOK_NAME:
|
||||
push(ST_EXPR_CONT);
|
||||
expr_var(tok.data.name);
|
||||
break;
|
||||
case TOK_OPEN_GROUP:
|
||||
push(ST_EXPR_CONT);
|
||||
push(ST_GROUP);
|
||||
push(ST_EXPR);
|
||||
enter_group();
|
||||
break;
|
||||
case TOK_OPERATOR:
|
||||
if (is_unary(tok.data.op)) {
|
||||
push(ST_EXPR_CONT);
|
||||
push(ST_EXPR_HACK);
|
||||
expr_op(tok.data.op);
|
||||
break;
|
||||
}
|
||||
syntax_error("only unary operators allowed at beginning of expression");
|
||||
case TOK_OPEN_BLOCK:
|
||||
push(ST_BLOCK);
|
||||
continue;
|
||||
default:
|
||||
syntax_error("expected expression");
|
||||
}
|
||||
break;
|
||||
case ST_EXPR_CONT:
|
||||
if (is_expr(tok)) {
|
||||
push(ST_EXPR_HACK);
|
||||
expr_op(OP_JUXT);
|
||||
continue;
|
||||
}
|
||||
if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) {
|
||||
push(ST_EXPR_HACK);
|
||||
expr_op(tok.data.op);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
case ST_EXPR_END:
|
||||
exit_expr();
|
||||
continue;
|
||||
case ST_GROUP:
|
||||
if (tok.type == TOK_CLOSE_GROUP) {
|
||||
exit_group();
|
||||
break;
|
||||
}
|
||||
syntax_error("mismatched parentheses");
|
||||
case ST_IF_ELSE:
|
||||
if (tok.type == TOK_ELSE) {
|
||||
push(ST_BLOCK);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
case ST_IF_END:
|
||||
exit_if();
|
||||
continue;
|
||||
case ST_LOOP_VARS:
|
||||
if (is_assignment(tok, nxt)) {
|
||||
push(ST_LOOP_VARS_CONT);
|
||||
push(ST_ASSIGN);
|
||||
cvar_init(tok.data.name);
|
||||
break;
|
||||
}
|
||||
if (tok.type == TOK_NAME) {
|
||||
push(ST_LOOP_VARS_CONT);
|
||||
cvar_pass(tok.data.name);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
case ST_LOOP_VARS_CONT:
|
||||
if (tok.type == TOK_SEPARATOR) {
|
||||
push(ST_LOOP_VARS);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
tok = next();
|
||||
nxt = peek();
|
||||
}
|
||||
if (tok.type != TOK_EOF) {
|
||||
fprintf(stderr, "syntax error: finished parsing before end of file\n");
|
||||
exit(1);
|
||||
}
|
||||
if (sp > 0) {
|
||||
fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
#ifndef PARSE_H
|
||||
#define PARSE_H
|
||||
|
||||
void parse(void);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue