pass-lang/src/parse.c

303 lines
9.0 KiB
C

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lang.h"
#include "lex.h"
#include "parse.h"
enum state {
ST_BLOCK,
ST_BLOCK_BODY,
ST_BLOCK_CONT,
ST_BLOCK_CLOSE,
ST_ASSIGN,
ST_EXPR,
// HACK: The existence of this state.
// Also, the entire structure of the parser is ugly.
ST_EXPR_HACK,
ST_EXPR_CONT,
ST_EXPR_END,
ST_GROUP,
ST_IF_ELSE,
ST_IF_END,
ST_LOOP_VARS,
ST_LOOP_VARS_CONT,
};
const char* state_name(enum state st) {
switch (st) {
case ST_BLOCK:
return "{";
case ST_BLOCK_BODY:
return "B";
case ST_BLOCK_CONT:
return ";";
case ST_BLOCK_CLOSE:
return "}";
case ST_ASSIGN:
return "=";
case ST_EXPR:
return "x";
case ST_EXPR_CONT:
return "c";
case ST_GROUP:
return "(";
case ST_IF_ELSE:
return "|";
case ST_LOOP_VARS:
return "v";
case ST_LOOP_VARS_CONT:
return ",";
case ST_EXPR_END:
return "E";
case ST_EXPR_HACK:
return "H";
case ST_IF_END:
return "i";
}
}
#define MAX_CONTEXT 256
static uint32_t sp = 0;
static enum state stack[MAX_CONTEXT];
static void debug_print(struct token tok, struct token next) {
for (uint32_t i = 0; i < sp; i++) {
printf("%s", state_name(stack[i]));
}
printf(" ");
print_token(tok);
printf(" ");
print_token(next);
printf("\n");
}
static void push(enum state state) {
stack[sp] = state;
sp++;
}
static enum state pop(void) {
assert(sp != 0);
sp--;
return stack[sp];
}
static _Bool is_assignment(struct token tok, struct token next) {
return tok.type == TOK_NAME && next.type == TOK_EQUALS;
}
static _Bool is_expr(struct token tok) {
return is_lit(tok)
|| tok.type == TOK_NAME
|| tok.type == TOK_OPEN_GROUP
|| tok.type == TOK_IF
|| tok.type == TOK_MATCH
|| tok.type == TOK_FN
|| tok.type == TOK_LOOP
|| tok.type == TOK_NEXT
|| tok.type == TOK_EXIT
|| tok.type == TOK_NEXT
|| tok.type == TOK_RETURN
|| tok.type == TOK_RECURSE
|| tok.type == TOK_MATCH;
}
#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1)
void parse(void) {
sp = 0;
// TODO: add support for the top-level instead of this block hack
push(ST_BLOCK_BODY);
struct token tok = next();
struct token nxt = peek();
while (sp > 0) {
debug_print(tok, nxt);
// FIXME: stack underflow because we're faking the top-level with blocks
switch (pop()) {
case ST_BLOCK:
if (tok.type == TOK_OPEN_BLOCK) {
push(ST_BLOCK_CLOSE);
push(ST_BLOCK_BODY);
enter_block();
break;
}
syntax_error("expected beginning of block");
break;
case ST_BLOCK_BODY:
if (is_assignment(tok, nxt)) {
push(ST_BLOCK_CONT);
push(ST_ASSIGN);
stmt_assign(tok.data.name);
break;
}
if (is_expr(tok)) {
push(ST_BLOCK_CONT);
push(ST_EXPR);
stmt_expr();
continue;
}
continue;
case ST_BLOCK_CONT:
if (tok.type == TOK_TERMINATOR) {
push(ST_BLOCK_BODY);
break;
}
continue;
case ST_BLOCK_CLOSE:
if (tok.type == TOK_CLOSE_BLOCK) {
exit_block();
break;
}
syntax_error("expected end of block");
case ST_ASSIGN:
assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ);
push(ST_EXPR);
break;
case ST_EXPR:
push(ST_EXPR_END);
push(ST_EXPR_HACK);
continue;
case ST_EXPR_HACK:
switch (tok.type) {
case TOK_STRING:
push(ST_EXPR_CONT);
expr_string(tok.data.string);
break;
case TOK_INTEGER:
push(ST_EXPR_CONT);
expr_integer(tok.data.int_);
break;
case TOK_IF:
push(ST_IF_END);
push(ST_IF_ELSE);
push(ST_BLOCK);
push(ST_EXPR);
enter_if();
break;
case TOK_LOOP:
push(ST_BLOCK);
push(ST_LOOP_VARS);
if (nxt.type == TOK_LABEL) {
next();
enter_loop(nxt.data.label);
} else {
enter_loop(NULL);
}
break;
case TOK_NEXT:
push(ST_EXPR_CONT);
if (nxt.type == TOK_LABEL) {
next();
expr_next(nxt.data.label);
} else {
expr_next(NULL);
}
break;
case TOK_EXIT:
push(ST_EXPR);
if (nxt.type == TOK_LABEL) {
next();
expr_exit(nxt.data.label);
} else {
expr_exit(NULL);
}
break;
case TOK_RETURN:
push(ST_EXPR);
expr_return();
break;
case TOK_NAME:
push(ST_EXPR_CONT);
expr_var(tok.data.name);
break;
case TOK_OPEN_GROUP:
push(ST_EXPR_CONT);
push(ST_GROUP);
push(ST_EXPR);
enter_group();
break;
case TOK_OPERATOR:
if (is_unary(tok.data.op)) {
push(ST_EXPR_CONT);
push(ST_EXPR_HACK);
expr_op(tok.data.op);
break;
}
syntax_error("only unary operators allowed at beginning of expression");
case TOK_OPEN_BLOCK:
push(ST_BLOCK);
continue;
default:
syntax_error("expected expression");
}
break;
case ST_EXPR_CONT:
if (is_expr(tok)) {
push(ST_EXPR_HACK);
expr_op(OP_JUXT);
continue;
}
if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) {
push(ST_EXPR_HACK);
expr_op(tok.data.op);
break;
}
continue;
case ST_EXPR_END:
exit_expr();
continue;
case ST_GROUP:
if (tok.type == TOK_CLOSE_GROUP) {
exit_group();
break;
}
syntax_error("mismatched parentheses");
case ST_IF_ELSE:
if (tok.type == TOK_ELSE) {
push(ST_BLOCK);
break;
}
continue;
case ST_IF_END:
exit_if();
continue;
case ST_LOOP_VARS:
if (is_assignment(tok, nxt)) {
push(ST_LOOP_VARS_CONT);
push(ST_ASSIGN);
cvar_init(tok.data.name);
break;
}
if (tok.type == TOK_NAME) {
push(ST_LOOP_VARS_CONT);
cvar_pass(tok.data.name);
break;
}
continue;
case ST_LOOP_VARS_CONT:
if (tok.type == TOK_SEPARATOR) {
push(ST_LOOP_VARS);
break;
}
continue;
}
tok = next();
nxt = peek();
}
if (tok.type != TOK_EOF) {
fprintf(stderr, "syntax error: finished parsing before end of file\n");
exit(1);
}
if (sp > 0) {
fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]);
exit(1);
}
}