#include #include #include #include #include #include #include "lang.h" #include "lex.h" #include "parse.h" enum state { ST_BLOCK, ST_BLOCK_BODY, ST_BLOCK_CONT, ST_BLOCK_CLOSE, ST_ASSIGN, ST_EXPR, // HACK: The existence of this state. // Also, the entire structure of the parser is ugly. ST_EXPR_HACK, ST_EXPR_CONT, ST_EXPR_END, ST_GROUP, ST_IF_ELSE, ST_IF_END, ST_LOOP_VARS, ST_LOOP_VARS_CONT, }; const char* state_name(enum state st) { switch (st) { case ST_BLOCK: return "{"; case ST_BLOCK_BODY: return "B"; case ST_BLOCK_CONT: return ";"; case ST_BLOCK_CLOSE: return "}"; case ST_ASSIGN: return "="; case ST_EXPR: return "x"; case ST_EXPR_CONT: return "c"; case ST_GROUP: return "("; case ST_IF_ELSE: return "|"; case ST_LOOP_VARS: return "v"; case ST_LOOP_VARS_CONT: return ","; case ST_EXPR_END: return "E"; case ST_EXPR_HACK: return "H"; case ST_IF_END: return "i"; } } #define MAX_CONTEXT 256 static uint32_t sp = 0; static enum state stack[MAX_CONTEXT]; static void debug_print(struct token tok, struct token next) { for (uint32_t i = 0; i < sp; i++) { printf("%s", state_name(stack[i])); } printf(" "); print_token(tok); printf(" "); print_token(next); printf("\n"); } static void push(enum state state) { stack[sp] = state; sp++; } static enum state pop(void) { assert(sp != 0); sp--; return stack[sp]; } static _Bool is_assignment(struct token tok, struct token next) { return tok.type == TOK_NAME && next.type == TOK_EQUALS; } static _Bool is_expr(struct token tok) { return is_lit(tok) || tok.type == TOK_NAME || tok.type == TOK_OPEN_GROUP || tok.type == TOK_IF || tok.type == TOK_MATCH || tok.type == TOK_FN || tok.type == TOK_LOOP || tok.type == TOK_NEXT || tok.type == TOK_EXIT || tok.type == TOK_NEXT || tok.type == TOK_RETURN || tok.type == TOK_RECURSE || tok.type == TOK_MATCH; } #define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1) void parse(void) { sp = 0; // TODO: add support for the top-level instead of this block hack push(ST_BLOCK_BODY); struct token tok = next(); struct token nxt = peek(); while (sp > 0) { debug_print(tok, nxt); // FIXME: stack underflow because we're faking the top-level with blocks switch (pop()) { case ST_BLOCK: if (tok.type == TOK_OPEN_BLOCK) { push(ST_BLOCK_CLOSE); push(ST_BLOCK_BODY); enter_block(); break; } syntax_error("expected beginning of block"); break; case ST_BLOCK_BODY: if (is_assignment(tok, nxt)) { push(ST_BLOCK_CONT); push(ST_ASSIGN); stmt_assign(tok.data.name); break; } if (is_expr(tok)) { push(ST_BLOCK_CONT); push(ST_EXPR); stmt_expr(); continue; } continue; case ST_BLOCK_CONT: if (tok.type == TOK_TERMINATOR) { push(ST_BLOCK_BODY); break; } continue; case ST_BLOCK_CLOSE: if (tok.type == TOK_CLOSE_BLOCK) { exit_block(); break; } syntax_error("expected end of block"); case ST_ASSIGN: assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ); push(ST_EXPR); break; case ST_EXPR: push(ST_EXPR_END); push(ST_EXPR_HACK); continue; case ST_EXPR_HACK: switch (tok.type) { case TOK_STRING: push(ST_EXPR_CONT); expr_string(tok.data.string); break; case TOK_INTEGER: push(ST_EXPR_CONT); expr_integer(tok.data.int_); break; case TOK_IF: push(ST_IF_END); push(ST_IF_ELSE); push(ST_BLOCK); push(ST_EXPR); enter_if(); break; case TOK_LOOP: push(ST_BLOCK); push(ST_LOOP_VARS); if (nxt.type == TOK_LABEL) { next(); enter_loop(nxt.data.label); } else { enter_loop(NULL); } break; case TOK_NEXT: push(ST_EXPR); if (nxt.type == TOK_LABEL) { next(); expr_next(nxt.data.label); } else { expr_next(NULL); } break; case TOK_EXIT: push(ST_EXPR); if (nxt.type == TOK_LABEL) { next(); expr_exit(nxt.data.label); } else { expr_exit(NULL); } break; case TOK_RETURN: push(ST_EXPR); expr_return(); break; case TOK_NAME: push(ST_EXPR_CONT); expr_var(tok.data.name); break; case TOK_OPEN_GROUP: push(ST_EXPR_CONT); push(ST_GROUP); push(ST_EXPR); enter_group(); break; case TOK_OPERATOR: if (is_unary(tok.data.op)) { push(ST_EXPR_CONT); push(ST_EXPR_HACK); expr_op(tok.data.op); break; } syntax_error("only unary operators allowed at beginning of expression"); case TOK_OPEN_BLOCK: push(ST_BLOCK); continue; default: syntax_error("expected expression"); } break; case ST_EXPR_CONT: if (is_expr(tok)) { push(ST_EXPR_HACK); expr_op(OP_JUXT); continue; } if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) { push(ST_EXPR_HACK); expr_op(tok.data.op); break; } continue; case ST_EXPR_END: exit_expr(); continue; case ST_GROUP: if (tok.type == TOK_CLOSE_GROUP) { exit_group(); break; } syntax_error("mismatched parentheses"); case ST_IF_ELSE: if (tok.type == TOK_ELSE) { push(ST_BLOCK); break; } continue; case ST_IF_END: exit_if(); continue; case ST_LOOP_VARS: if (is_assignment(tok, nxt)) { push(ST_LOOP_VARS_CONT); push(ST_ASSIGN); cvar_init(tok.data.name); break; } if (tok.type == TOK_NAME) { push(ST_LOOP_VARS_CONT); cvar_pass(tok.data.name); break; } continue; case ST_LOOP_VARS_CONT: if (tok.type == TOK_SEPARATOR) { push(ST_LOOP_VARS); break; } continue; } tok = next(); nxt = peek(); } if (tok.type != TOK_EOF) { fprintf(stderr, "syntax error: finished parsing before end of file\n"); exit(1); } if (sp > 0) { fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]); exit(1); } }