pass-lang/src/lang.c

657 lines
18 KiB
C

#include "ir.h"
#include "lang.h"
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_CONTEXT 32
#define MAX_ASSIGNMENTS 256
#define MAX_ARGUMENTS 256
#define MAX_OPERATORS 256
struct assignment {
char* name;
var ref;
};
enum block_state {
BLOCK_CLEAN,
BLOCK_ASSIGN,
BLOCK_EXPR,
};
struct block_crumb {
enum block_state state;
uint32_t assignment_count;
struct assignment assignments[MAX_ASSIGNMENTS];
var final;
};
enum if_state {
IF_COND,
IF_THEN,
IF_ELSE,
IF_END,
};
struct if_crumb {
enum if_state state;
label then;
label else_;
label end;
};
enum loop_state {
LOOP_CLEAN,
LOOP_CVAR_INIT,
LOOP_BODY,
};
struct loop_crumb {
enum loop_state state;
char* label_name;
label next;
label exit;
uint32_t assignment_count;
var initializers[MAX_ASSIGNMENTS];
struct assignment assignments[MAX_ASSIGNMENTS];
};
struct expr_crumb {
uint32_t argument_count;
uint32_t operator_count;
var arguments[MAX_ARGUMENTS];
enum operator_ operators[MAX_OPERATORS];
};
struct jump_crumb {
label label;
uint32_t arity;
uint32_t argument_count;
var arguments[MAX_ARGUMENTS];
};
enum crumb_type {
BLOCK_CRUMB,
IF_CRUMB,
LOOP_CRUMB,
EXPR_CRUMB,
JUMP_CRUMB,
};
union crumb_data {
struct block_crumb block;
struct if_crumb if_;
struct loop_crumb loop;
struct expr_crumb expr;
struct jump_crumb jump;
};
struct crumb {
enum crumb_type type;
union crumb_data data;
};
static uint32_t context_depth = 1;
static struct crumb context[MAX_CONTEXT];
static char* copy_str(char* str) {
unsigned long len = strlen(str);
char* new = malloc(len * sizeof(char) + 1);
memcpy(new, str, len);
new[len] = 0;
return new;
}
static void push(struct crumb crumb) {
context[context_depth] = crumb;
context_depth++;
}
static void push_new_block(void) {
union crumb_data data;
struct block_crumb block = {
.state = BLOCK_CLEAN,
.assignment_count = 0,
.final = (var) -1,
};
data.block = block;
struct crumb crumb = {
.type = BLOCK_CRUMB,
.data = data,
};
push(crumb);
}
static void push_new_expr(void) {
struct expr_crumb exprc = {
.argument_count = 0,
.operator_count = 0,
};
union crumb_data data;
data.expr = exprc;
struct crumb crumb = {
.type = EXPR_CRUMB,
.data = data,
};
push(crumb);
}
struct label_and_arity {
label label;
uint32_t arity;
};
static void push_new_jump(struct label_and_arity label) {
union crumb_data data;
data.jump.label = label.label;
data.jump.arity = label.arity;
data.jump.argument_count = 0;
struct crumb crumb = {
.type = JUMP_CRUMB,
.data = data,
};
push(crumb);
}
static void push_argument(var ref) {
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == EXPR_CRUMB);
struct expr_crumb* exprc = &ctx->data.expr;
if (exprc->argument_count > MAX_ARGUMENTS) {
fprintf(stderr, "error: exceeded maximum number of arguments in expression\n");
exit(1);
}
exprc->arguments[exprc->argument_count] = ref;
exprc->argument_count++;
}
static void push_cvar_name(char* name) {
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == LOOP_CRUMB);
struct loop_crumb* loopc = &ctx->data.loop;
if (loopc->assignment_count == MAX_ASSIGNMENTS) {
fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n");
exit(1);
}
loopc->assignments[loopc->assignment_count].name = copy_str(name);
}
static void push_cvar(var ref) {
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == LOOP_CRUMB);
struct loop_crumb* loopc = &ctx->data.loop;
if (loopc->assignment_count > MAX_ASSIGNMENTS) {
fprintf(stderr, "error: exceed maximum number of assignments in loop cvars\n");
exit(1);
}
loopc->initializers[loopc->assignment_count] = ref;
loopc->assignment_count++;
}
static var lookup_assignment(
uint32_t assignment_count,
struct assignment* assignments,
char* name
) {
for (uint32_t i = assignment_count; i > 0; i--) {
struct assignment asgn = assignments[i - 1];
if (strcmp(asgn.name, name) == 0) {
return asgn.ref;
}
}
return (var) -1;
}
static var lookup_var(char* name) {
for (uint32_t i = context_depth; i > 0; i--) {
struct crumb ctx = context[i - 1];
var ref = (var) -1;
switch (ctx.type) {
case LOOP_CRUMB:
if (ctx.data.loop.state != LOOP_BODY) {
break;
}
ref = lookup_assignment(
ctx.data.loop.assignment_count,
ctx.data.loop.assignments,
name
);
break;
case BLOCK_CRUMB:
ref = lookup_assignment(
ctx.data.block.assignment_count,
ctx.data.block.assignments,
name
);
break;
default:
continue;
}
if (ref != (var) -1) {
return ref;
}
}
fprintf(stderr, "name resolution error: unknown variable %s\n", name);
exit(1);
}
enum label_type {
NEXT_LABEL,
EXIT_LABEL,
RETURN_LABEL,
};
static const char* label_type_name(enum label_type type) {
switch (type) {
case NEXT_LABEL:
return "next";
case EXIT_LABEL:
return "exit";
case RETURN_LABEL:
return "return";
}
}
static struct label_and_arity lookup_label(enum label_type type, char* name) {
for (uint32_t i = context_depth; i > 0; i--) {
struct crumb ctx = context[i - 1];
switch (ctx.type) {
case LOOP_CRUMB:
if (name == NULL || strcmp(name, ctx.data.loop.label_name) == 0) {
struct label_and_arity label;
if (type == NEXT_LABEL) {
label.label = ctx.data.loop.next;
label.arity = ctx.data.loop.assignment_count;
return label;
}
if (type == EXIT_LABEL) {
label.label = ctx.data.loop.exit;
label.arity = 1;
return label;
}
}
break;
default:
continue;
}
}
if (name == NULL) {
fprintf(stderr, "name resolution error: no %s label in scope\n", label_type_name(type));
} else {
fprintf(stderr, "name resolution error: unknown label %s\n", name);
}
exit(1);
}
static void reduce_expression_binop(struct expr_crumb* exprc, var (*emit)(var arg1, var arg2)) {
assert(exprc->argument_count >= 2);
var arg1 = exprc->arguments[0];
var arg2 = exprc->arguments[1];
exprc->arguments[0] = emit(arg1, arg2);
memmove(&exprc->arguments[1], &exprc->arguments[2], exprc->argument_count - 2);
exprc->argument_count--;
}
static var reduce_expression(struct expr_crumb* exprc) {
// TODO FIXME: operator precedence
if (exprc->operator_count > 0 || exprc->argument_count > 1) {
fprintf(stderr, "warning: expression reduction may be incorrect\n");
//exit(1);
}
for (uint32_t op_ix = 0; op_ix < exprc->operator_count; op_ix++) {
switch (exprc->operators[op_ix]) {
case OP_ADD:
reduce_expression_binop(exprc, add);
break;
case OP_SUB:
reduce_expression_binop(exprc, sub);
break;
default:
fprintf(stderr, "error: operator not implemented: %i", exprc->operators[op_ix]);
exit(1);
}
}
exprc->operator_count = 0;
assert(exprc->argument_count == 1);
return exprc->arguments[0];
}
void enter_block(void) {
printf("** enter_block\n");
struct crumb* ctx = &context[context_depth - 1];
switch (ctx->type) {
case BLOCK_CRUMB:
// we should have seen a stmt_assign or stmt_expr first,
// either of which pushes an expr crumb.
assert(0);
case EXPR_CRUMB: {
// this block is purely a scope/sequencing thing
// with no special semantics
break;
}
case IF_CRUMB: {
struct if_crumb ifc = ctx->data.if_;
switch (ifc.state) {
case IF_COND:
case IF_END:
assert(0);
case IF_THEN:
define(ifc.then, NULL);
break;
case IF_ELSE:
define(ifc.else_, NULL);
break;
}
break;
}
case LOOP_CRUMB: {
struct loop_crumb* loopc = &ctx->data.loop;
assert(loopc->state == LOOP_CLEAN);
loopc->state = LOOP_BODY;
loopc->next = declare(loopc->assignment_count);
printf("LOOP %i END %i\n", loopc->next, loopc->exit);
var args[MAX_ASSIGNMENTS];
define(loopc->next, args);
// TODO NOTE: is this the correct order?
for (uint32_t i = 0; i < loopc->assignment_count; i++) {
loopc->assignments[i].ref = args[i];
}
break;
}
default:
assert(0);
}
push_new_block();
}
void stmt_assign(char* name) {
printf("** stmt_assign\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == BLOCK_CRUMB);
struct block_crumb* blockc = &ctx->data.block;
assert(blockc->state == BLOCK_CLEAN);
if (blockc->assignment_count == MAX_ASSIGNMENTS) {
fprintf(stderr, "error: exceeded maximum number of assignments in block\n");
exit(1);
}
blockc->state = BLOCK_ASSIGN;
blockc->assignments[blockc->assignment_count].name = copy_str(name);
push_new_expr();
}
void stmt_expr(void) {
printf("** stmt_expr\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == BLOCK_CRUMB);
struct block_crumb* blockc = &ctx->data.block;
assert(blockc->state == BLOCK_CLEAN);
blockc->state = BLOCK_EXPR;
push_new_expr();
}
void exit_block(void) {
printf("** exit_block\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == BLOCK_CRUMB);
struct block_crumb blockc = ctx->data.block;
assert(blockc.state == BLOCK_CLEAN);
var ret = blockc.final;
if (ret == (var) -1) {
// TODO: better way to handle empty blocks
ret = lit(0);
}
context_depth--;
ctx = &context[context_depth - 1];
switch (ctx->type) {
case EXPR_CRUMB: {
push_argument(ret);
break;
}
case IF_CRUMB: {
struct if_crumb* ifc = &ctx->data.if_;
assert(ifc->state == IF_THEN || ifc->state == IF_ELSE);
jump(ifc->end, &ret);
if (ifc->state == IF_THEN) {
ifc->state = IF_ELSE;
} else if (ifc->state == IF_ELSE) {
ifc->state = IF_END;
}
break;
}
case LOOP_CRUMB: {
// unlike with `if`, there is no `exit_loop`, so we do clean-up here.
struct loop_crumb loopc = ctx->data.loop;
assert(loopc.state == LOOP_BODY);
jump(loopc.exit, &ret);
context_depth--;
for (uint32_t i = 0; i < loopc.assignment_count; i++) {
free(loopc.assignments[i].name);
}
leave(&ret);
push_argument(ret);
break;
}
default:
assert(0);
}
for (uint32_t i = 0; i < blockc.assignment_count; i++) {
free(blockc.assignments[i].name);
}
}
void exit_expr(void) {
printf("** exit_expr\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == EXPR_CRUMB);
struct expr_crumb* exprc = &ctx->data.expr;
assert(exprc->argument_count > 0);
var ret = reduce_expression(exprc);
context_depth--;
ctx = &context[context_depth - 1];
switch (ctx->type) {
case BLOCK_CRUMB: {
struct block_crumb* blockc = &ctx->data.block;
blockc->final = ret;
switch (blockc->state) {
case BLOCK_CLEAN:
assert(0);
case BLOCK_EXPR:
blockc->state = BLOCK_CLEAN;
break;
case BLOCK_ASSIGN:
blockc->assignments[blockc->assignment_count].ref = ret;
blockc->assignment_count++;
blockc->state = BLOCK_CLEAN;
break;
}
break;
}
case IF_CRUMB: {
struct if_crumb* ifc = &ctx->data.if_;
assert(ifc->state == IF_COND);
jump_unless(ifc->else_, ret, NULL);
//jump(ifc->then_, NULL);
ifc->state = IF_THEN;
break;
}
case EXPR_CRUMB:
push_argument(ret);
break;
case LOOP_CRUMB: {
struct loop_crumb* loopc = &ctx->data.loop;
assert(loopc->state == LOOP_CVAR_INIT);
push_cvar(ret);
loopc->state = LOOP_CLEAN;
break;
}
case JUMP_CRUMB: {
// TODO FIXME: this is *completely wrong* for `next`!
struct jump_crumb jumpc = ctx->data.jump;
fprintf(stderr, "args: %i, arity: %i\n", jumpc.argument_count, jumpc.arity);
assert(jumpc.argument_count + 1 == jumpc.arity);
jumpc.arguments[jumpc.argument_count] = ret;
jump(jumpc.label, jumpc.arguments);
// TODO: better way to handle returning impossible value
context_depth--;
push_argument(ret);
break;
}
}
}
void enter_if(void) {
printf("** enter_if\n");
enter();
label then = declare(0);
label else_ = declare(0);
label end = declare_exit(1);
printf("IF THEN %i ELSE %i EXIT %i\n", then, else_, end);
struct if_crumb ifc = {
.state = IF_COND,
.then = then,
.else_ = else_,
.end = end,
};
union crumb_data data;
data.if_ = ifc;
struct crumb ctx = {
.type = IF_CRUMB,
.data = data,
};
push(ctx);
push_new_expr();
}
void exit_if(void) {
printf("** exit_if\n");
struct crumb ctx = context[context_depth - 1];
assert(ctx.type == IF_CRUMB);
struct if_crumb ifc = ctx.data.if_;
switch (ifc.state) {
case IF_COND:
case IF_THEN:
assert(0);
case IF_ELSE: {
define(ifc.else_, NULL);
var ret = lit(0);
jump(ifc.end, &ret);
break;
}
case IF_END:
break;
}
var ret;
leave(&ret);
context_depth--;
push_argument(ret);
}
void enter_loop(char* label_name) {
printf("** enter_loop\n");
enter();
label exit = declare_exit(1);
struct loop_crumb loopc = {
.state = LOOP_CLEAN,
.label_name = copy_str(label_name),
.assignment_count = 0,
.exit = exit
};
union crumb_data data;
data.loop = loopc;
struct crumb ctx = {
.type = LOOP_CRUMB,
.data = data,
};
push(ctx);
}
void cvar_pass(char* name) {
printf("** cvar_pass\n");
push_cvar_name(name);
push_cvar(lookup_var(name));
}
void cvar_init(char* name) {
printf("** cvar_init\n");
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == LOOP_CRUMB);
struct loop_crumb* loopc = &ctx->data.loop;
loopc->state = LOOP_CVAR_INIT;
push_cvar_name(name);
push_new_expr();
}
void expr_next(char* label) {
printf("** expr_next\n");
push_new_jump(lookup_label(NEXT_LABEL, label));
push_new_expr();
}
void expr_exit(char* label) {
printf("** expr_exit\n");
push_new_jump(lookup_label(EXIT_LABEL, label));
push_new_expr();
}
void expr_return(void) {
printf("** expr_return\n");
push_new_jump(lookup_label(RETURN_LABEL, NULL));
push_new_expr();
}
void enter_group(void) {
printf("** enter_group\n");
push_new_expr();
}
void exit_group(void) {
printf("** exit_group\n");
// exit_expr is sufficient
}
void expr_op(enum operator_ op) {
printf("** expr_op %i\n", op);
struct crumb* ctx = &context[context_depth - 1];
assert(ctx->type == EXPR_CRUMB);
struct expr_crumb* exprc = &ctx->data.expr;
if (op == OP_JUXT && context_depth > 1) {
// HACK: should handle continuations separately from expressions
struct crumb* ctx2 = &context[context_depth - 2];
if (ctx2->type == JUMP_CRUMB) {
struct jump_crumb* jumpc = &ctx2->data.jump;
var ret = reduce_expression(exprc);
assert(jumpc->argument_count < MAX_ARGUMENTS);
jumpc->arguments[jumpc->argument_count] = ret;
jumpc->argument_count++;
context_depth--;
push_new_expr();
return;
}
}
if (exprc->operator_count > MAX_OPERATORS) {
fprintf(stderr, "error: exceeded maximum number of operators in expression\n");
exit(1);
}
exprc->operators[exprc->operator_count] = op;
exprc->operator_count++;
}
void expr_string(char* string) {
printf("** expr_string %s\n", string);
push_argument(lit_string(string));
}
void expr_integer(int64_t num) {
printf("** expr_integer %lli\n", num);
push_argument(lit((uint64_t) num));
}
void expr_var(char* var) {
printf("** expr_var %s\n", var);
push_argument(lookup_var(var));
}