diff --git a/Makefile b/Makefile index 709ab72..186cbcb 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL = /bin/sh CFLAGS = -std=c99 -pedantic -Wextra -Os LDFLAGS = -lc -OBJECTS = asm.o io.o ir.o lex.o lex/indent.o main.o parse.o x86encode.o +OBJECTS = asm.o io.o ir.o lex.o lex/indent.o lang.o main.o parse.o x86encode.o .PHONY: passc passc: .bin $(OBJECTS) diff --git a/src/ir.c b/src/ir.c index 61f940c..12d81db 100644 --- a/src/ir.c +++ b/src/ir.c @@ -7,8 +7,10 @@ #include "ir.h" #include +#include #include #include +#include #include #include @@ -45,12 +47,13 @@ void init(var* argc, var* argv, var* env) { *argc = stack_depth++; } -label enter(uint32_t retc) { +void enter(void) { assert(stack_frame < MAX_STACK_FRAMES); struct stack_frame frame = { stack_depth, label_depth }; stack_frames[stack_frame] = frame; stack_frame++; - return declare(retc); + // exit label + declare(0); } void leave(var* args) { @@ -68,6 +71,12 @@ label declare(uint32_t argc) { return label_depth++; } +label declare_exit(uint32_t argc) { + label label = stack_frames[stack_frame].label_depth; + labels[label].argc = argc; + return label; +} + void define(label l, var* args) { struct label* label = &labels[l]; label->definition = here; @@ -114,6 +123,11 @@ var lit(uint64_t lit) { return stack_depth++; } +var lit_string(char* str) { + fprintf(stderr, "error: string literals not yet implemented\n"); + exit(1); +} + var sub(var subtrahend, var minuend) { // TODO: use modr/m load_var(AX, subtrahend); diff --git a/src/ir.h b/src/ir.h index 9aec2de..3548e54 100644 --- a/src/ir.h +++ b/src/ir.h @@ -20,15 +20,13 @@ void init(var* argc, var* argv, var* env); /// /// This also generates a new label corresponding with the end of the block, /// which will be automatically defined when you call `leave`. -label enter(uint32_t retc); +void enter(void); /// Leave a block. /// /// This will restore the context to how it was when `enter` was called, -/// plus the return values declared by the call to `enter`. -void leave(var* args); - -label declare_continue(uint32_t retc); +/// plus the return values declared by the call to `declare_continue`. +void leave(var* rets); /// Declare a new label in the innermost block. /// @@ -36,7 +34,13 @@ label declare_continue(uint32_t retc); /// This label must be called with the given number of arguments. label declare(uint32_t argc); -/// Define a label in the innermost block, automatically terminating +//// Declare an exit label for the surrounding block. +/// +/// Calling this label will exit the surrounding blocks. +/// The usual restrictions for labels apply. +label declare_exit(uint32_t retc); + +// Define a label in the innermost block, automatically terminating /// any previous labels. /// /// All variables defined prior to the beginning of this block will be in scope. @@ -63,6 +67,9 @@ void jump_if(label label, var cond, var* args); /// Integer literal. var lit(uint64_t lit); +/// String literal. +var lit_string(char* str); + /// Subtraction. var sub(var subtrahend, var minuend); diff --git a/src/lang.c b/src/lang.c new file mode 100644 index 0000000..d12fca7 --- /dev/null +++ b/src/lang.c @@ -0,0 +1,558 @@ +#include "ir.h" +#include "lang.h" + +#include +#include +#include +#include +#include + +#define MAX_CONTEXT 32 +#define MAX_ASSIGNMENTS 256 +#define MAX_ARGUMENTS 256 +#define MAX_OPERATORS 256 + +struct assignment { + char* name; + var ref; +}; + +enum block_state { + BLOCK_CLEAN, + BLOCK_ASSIGN, + BLOCK_EXPR, +}; + +struct block_crumb { + enum block_state state; + uint32_t assignment_count; + struct assignment assignments[MAX_ASSIGNMENTS]; + var final; +}; + +enum if_state { + IF_COND, + IF_THEN, + IF_ELSE, +}; + +struct if_crumb { + enum if_state state; + label then; + label else_; + label end; +}; + +enum loop_state { + LOOP_CLEAN, + LOOP_CVAR_INIT, + LOOP_BODY, +}; + +struct loop_crumb { + enum loop_state state; + char* label_name; + label next; + label exit; + uint32_t assignment_count; + var initializers[MAX_ASSIGNMENTS]; + struct assignment assignments[MAX_ASSIGNMENTS]; +}; + +struct expr_crumb { + uint32_t argument_count; + uint32_t operator_count; + var arguments[MAX_ARGUMENTS]; + enum operator_ operators[MAX_OPERATORS]; +}; + +enum crumb_type { + BLOCK_CRUMB, + IF_CRUMB, + LOOP_CRUMB, + EXPR_CRUMB, + JUMP_CRUMB, +}; + +union crumb_data { + struct block_crumb block; + struct if_crumb if_; + struct loop_crumb loop; + struct expr_crumb expr; + label jump; +}; + +struct crumb { + enum crumb_type type; + union crumb_data data; +}; + +static uint32_t context_depth = 1; +static struct crumb context[MAX_CONTEXT]; + +static char* copy_str(char* str) { + unsigned long len = strlen(str); + char* new = malloc(len * sizeof(char) + 1); + memcpy(new, str, len); + new[len] = 0; + return new; +} + +static void push(struct crumb crumb) { + context[context_depth] = crumb; + context_depth++; +} + +static void push_new_block(void) { + union crumb_data data; + struct block_crumb block = { + .state = BLOCK_CLEAN, + .assignment_count = 0, + .final = (var) -1, + }; + data.block = block; + struct crumb crumb = { + .type = BLOCK_CRUMB, + .data = data, + }; + push(crumb); +} + +static void push_new_expr(void) { + struct expr_crumb exprc = { + .argument_count = 0, + .operator_count = 0, + }; + union crumb_data data; + data.expr = exprc; + struct crumb crumb = { + .type = EXPR_CRUMB, + .data = data, + }; + push(crumb); +} + +static void push_new_jump(label label) { + union crumb_data data; + data.jump = label; + struct crumb crumb = { + .type = JUMP_CRUMB, + .data = data, + }; + push(crumb); +} + +static void push_argument(var ref) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == EXPR_CRUMB); + struct expr_crumb exprc = ctx.data.expr; + if (exprc.argument_count > MAX_ARGUMENTS) { + fprintf(stderr, "error: exceeded maximum number of arguments in expression\n"); + exit(1); + } + exprc.arguments[exprc.argument_count] = ref; + exprc.argument_count++; +} + +static void push_cvar_name(char* name) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == LOOP_CRUMB); + struct loop_crumb loopc = ctx.data.loop; + if (loopc.assignment_count == MAX_ASSIGNMENTS) { + fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n"); + exit(1); + } + loopc.assignments[loopc.assignment_count].name = copy_str(name); +} + +static void push_cvar(var ref) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == LOOP_CRUMB); + struct loop_crumb loopc = ctx.data.loop; + if (loopc.assignment_count > MAX_ASSIGNMENTS) { + fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n"); + exit(1); + } + loopc.initializers[loopc.assignment_count] = ref; + loopc.assignment_count++; +} + +static var lookup_assignment( + uint32_t assignment_count, + struct assignment* assignments, + char* name +) { + for (uint32_t i = assignment_count; i > 0; i--) { + struct assignment asgn = assignments[i - 1]; + if (strcmp(asgn.name, name) == 0) { + return asgn.ref; + } + } + return (var) -1; +} + +static var lookup_var(char* name) { + for (uint32_t i = context_depth; i > 0; i--) { + struct crumb ctx = context[i - 1]; + var ref = (var) -1; + switch (ctx.type) { + case LOOP_CRUMB: + if (ctx.data.loop.state != LOOP_BODY) { + break; + } + ref = lookup_assignment( + ctx.data.loop.assignment_count, + ctx.data.loop.assignments, + name + ); + break; + case BLOCK_CRUMB: + ref = lookup_assignment( + ctx.data.block.assignment_count, + ctx.data.block.assignments, + name + ); + break; + default: + continue; + } + if (ref != (var) -1) { + return ref; + } + } + fprintf(stderr, "name resolution error: unknown variable %s\n", name); + exit(1); +} + +enum label_type { + NEXT_LABEL, + EXIT_LABEL, + RETURN_LABEL, +}; + +static const char* label_type_name(enum label_type type) { + switch (type) { + case NEXT_LABEL: + return "next"; + case EXIT_LABEL: + return "exit"; + case RETURN_LABEL: + return "return"; + } +} + +static label lookup_label(enum label_type type, char* name) { + for (uint32_t i = context_depth; i > 0; i--) { + struct crumb ctx = context[i - 1]; + switch (ctx.type) { + case LOOP_CRUMB: + if (name == NULL || strcmp(name, ctx.data.loop.label_name) == 0) { + if (type == NEXT_LABEL) { + return ctx.data.loop.next; + } + if (type == EXIT_LABEL) { + return ctx.data.loop.exit; + } + } + break; + default: + continue; + } + } + if (name == NULL) { + fprintf(stderr, "name resolution error: no %s label in scope\n", label_type_name(type)); + } else { + fprintf(stderr, "name resolution error: unknown label %s\n", name); + } + exit(1); +} + +void enter_block(void) { + struct crumb ctx = context[context_depth - 1]; + switch (ctx.type) { + case BLOCK_CRUMB: + // we should have seen a stmt_assign or stmt_expr first, + // either of which pushes an expr crumb. + assert(0); + case EXPR_CRUMB: { + // this block is purely a scope/sequencing thing + // with no special semantics + break; + } + case IF_CRUMB: { + struct if_crumb ifc = ctx.data.if_; + switch (ifc.state) { + case IF_COND: + assert(0); + case IF_THEN: + define(ifc.then, NULL); + break; + case IF_ELSE: + define(ifc.else_, NULL); + break; + } + break; + } + case LOOP_CRUMB: { + struct loop_crumb loopc = ctx.data.loop; + assert(loopc.state == LOOP_CLEAN); + loopc.state = LOOP_BODY; + var args[MAX_ASSIGNMENTS]; + define(loopc.next, args); + // TODO NOTE: is this the correct order? + for (uint32_t i = 0; i < loopc.assignment_count; i++) { + loopc.assignments[i].ref = args[i]; + } + break; + } + default: + assert(0); + } + push_new_block(); +} + +void stmt_assign(char* name) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == BLOCK_CRUMB); + struct block_crumb blockc = ctx.data.block; + assert(blockc.state == BLOCK_CLEAN); + if (blockc.assignment_count == MAX_ASSIGNMENTS) { + fprintf(stderr, "error: exceeded maximum number of assignments in block\n"); + exit(1); + } + blockc.state = BLOCK_ASSIGN; + blockc.assignments[blockc.assignment_count].name = copy_str(name); + push_new_expr(); +} + +void stmt_expr(void) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == BLOCK_CRUMB); + struct block_crumb blockc = ctx.data.block; + assert(blockc.state == BLOCK_CLEAN); + blockc.state = BLOCK_EXPR; + push_new_expr(); +} + +void exit_block(void) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == BLOCK_CRUMB); + struct block_crumb blockc = ctx.data.block; + assert(blockc.state == BLOCK_CLEAN); + var ret = blockc.final; + if (ret == (var) -1) { + // TODO: better way to handle empty blocks + ret = lit(0); + } + context_depth--; + ctx = context[context_depth - 1]; + switch (ctx.type) { + case EXPR_CRUMB: { + push_argument(ret); + break; + } + case IF_CRUMB: { + struct if_crumb ifc = ctx.data.if_; + assert(ifc.state != IF_COND); + jump(ifc.end, &ret); + break; + } + case LOOP_CRUMB: { + // unlike with `if`, there is no `exit_loop`, so we do clean-up here. + struct loop_crumb loopc = ctx.data.loop; + assert(loopc.state == LOOP_CLEAN); + jump(loopc.exit, &ret); + context_depth--; + for (uint32_t i = 0; i < loopc.assignment_count; i++) { + free(loopc.assignments[i].name); + } + leave(&ret); + push_argument(ret); + break; + } + default: + assert(0); + } + for (uint32_t i = 0; i < blockc.assignment_count; i++) { + free(blockc.assignments[i].name); + } +} + +void exit_expr(void) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == EXPR_CRUMB); + struct expr_crumb exprc = ctx.data.expr; + assert(exprc.argument_count > 0); + if (exprc.operator_count > 0 || exprc.argument_count > 1) { + // TODO FIXME + fprintf(stderr, "error: I don't know how to evaluate exprs yet\n"); + exit(1); + } + var ret = exprc.arguments[0]; + context_depth--; + ctx = context[context_depth - 1]; + switch (ctx.type) { + case BLOCK_CRUMB: { + struct block_crumb blockc = ctx.data.block; + blockc.final = ret; + switch (blockc.state) { + case BLOCK_CLEAN: + assert(0); + case BLOCK_EXPR: + break; + case BLOCK_ASSIGN: + blockc.assignments[blockc.assignment_count].ref = ret; + blockc.assignment_count++; + break; + } + break; + } + case IF_CRUMB: { + struct if_crumb ifc = ctx.data.if_; + assert(ifc.state == IF_COND); + jump_if(ifc.then, ret, NULL); + jump(ifc.else_, NULL); + ifc.state = IF_THEN; + break; + } + case EXPR_CRUMB: + push_argument(ret); + break; + case LOOP_CRUMB: { + struct loop_crumb loopc = ctx.data.loop; + assert(loopc.state == LOOP_CVAR_INIT); + push_cvar(ret); + loopc.state = LOOP_CLEAN; + break; + } + case JUMP_CRUMB: { + // TODO FIXME: this is *completely wrong* for `next`! + label label = ctx.data.jump; + jump(label, &ret); + // TODO: better way to handle returning impossible value + push_argument(ret); + break; + } + } +} + +void enter_if(void) { + enter(); + label then = declare(0); + label else_ = declare(0); + label end = declare_exit(1); + struct if_crumb ifc = { + .state = IF_COND, + .then = then, + .else_ = else_, + .end = end, + }; + union crumb_data data; + data.if_ = ifc; + struct crumb ctx = { + .type = IF_CRUMB, + .data = data, + }; + push(ctx); + push_new_expr(); +} + +void exit_if(void) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == IF_CRUMB); + struct if_crumb ifc = ctx.data.if_; + switch (ifc.state) { + case IF_COND: + assert(0); + case IF_THEN: { + define(ifc.else_, NULL); + var ret = lit(0); + jump(ifc.end, &ret); + break; + } + case IF_ELSE: + break; + } + var ret; + leave(&ret); + push_argument(ret); +} + +void enter_loop(char* label_name) { + enter(); + label exit = declare_exit(1); + struct loop_crumb loopc = { + .state = LOOP_CLEAN, + .label_name = copy_str(label_name), + .assignment_count = 0, + .exit = exit + }; + union crumb_data data; + data.loop = loopc; + struct crumb ctx = { + .type = LOOP_CRUMB, + .data = data, + }; + push(ctx); +} + +void cvar_pass(char* name) { + push_cvar_name(name); + push_cvar(lookup_var(name)); +} + +void cvar_init(char* name) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == LOOP_CRUMB); + struct loop_crumb loopc = ctx.data.loop; + loopc.state = LOOP_CVAR_INIT; + push_cvar_name(name); + push_new_expr(); +} + +void expr_next(char* label) { + push_new_jump(lookup_label(NEXT_LABEL, label)); + push_new_expr(); +} + +void expr_exit(char* label) { + push_new_jump(lookup_label(EXIT_LABEL, label)); + push_new_expr(); +} + +void expr_return(void) { + push_new_jump(lookup_label(RETURN_LABEL, NULL)); + push_new_expr(); +} + +void enter_group(void) { + push_new_expr(); +} + +void exit_group(void) { + // exit_expr is sufficient +} + +void expr_op(enum operator_ op) { + struct crumb ctx = context[context_depth - 1]; + assert(ctx.type == EXPR_CRUMB); + struct expr_crumb exprc = ctx.data.expr; + if (exprc.operator_count > MAX_OPERATORS) { + fprintf(stderr, "error: exceeded maximum number of operators in expression\n"); + exit(1); + } + exprc.operators[exprc.operator_count] = op; + exprc.operator_count++; +} + +void expr_string(char* string) { + push_argument(lit_string(string)); +} + +void expr_integer(int64_t num) { + push_argument(lit((uint64_t) num)); +} + +void expr_var(char* var) { + push_argument(lookup_var(var)); +} diff --git a/src/lang.h b/src/lang.h new file mode 100644 index 0000000..d493d22 --- /dev/null +++ b/src/lang.h @@ -0,0 +1,32 @@ +#ifndef LANG_H +#define LANG_H + +#include "lex.h" + +void enter_block(void); +void stmt_assign(char* name); +void stmt_expr(void); +void exit_block(void); + +void exit_expr(void); + +void enter_if(void); +void exit_if(void); + +void enter_loop(char* label); +void cvar_pass(char* name); +void cvar_init(char* name); + +void expr_next(char* label); +void expr_exit(char* label); +void expr_return(void); + +void enter_group(void); +void exit_group(void); + +void expr_op(enum operator_ op); +void expr_string(char* string); +void expr_integer(int64_t num); +void expr_var(char* var); + +#endif diff --git a/src/parse.c b/src/parse.c index 872590f..f08974b 100644 --- a/src/parse.c +++ b/src/parse.c @@ -192,6 +192,10 @@ void parse(void) { push(ST_EXPR); break; } + if (tok.type == TOK_OPEN_BLOCK) { + push(ST_BLOCK); + continue; + } syntax_error("expected expression"); case ST_EXPR_CONT: if (is_expr(tok)) {