pass-lang/src/parse.c

247 lines
7.0 KiB
C

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lex.h"
#include "parse.h"
enum state {
ST_BLOCK,
ST_BLOCK_BODY,
ST_BLOCK_CONT,
ST_BLOCK_CLOSE,
ST_ASSIGN,
ST_EXPR,
ST_EXPR_CONT,
ST_GROUP,
ST_IF_ELSE,
ST_LOOP_VARS,
ST_LOOP_VARS_CONT,
};
const char* state_name(enum state st) {
switch (st) {
case ST_BLOCK:
return "{";
case ST_BLOCK_BODY:
return "B";
case ST_BLOCK_CONT:
return ";";
case ST_BLOCK_CLOSE:
return "}";
case ST_ASSIGN:
return "=";
case ST_EXPR:
return "x";
case ST_EXPR_CONT:
return "c";
case ST_GROUP:
return "(";
case ST_IF_ELSE:
return "|";
case ST_LOOP_VARS:
return "v";
case ST_LOOP_VARS_CONT:
return ",";
}
}
#define MAX_CONTEXT 256
static uint32_t sp = 0;
static enum state stack[MAX_CONTEXT];
static void debug_print(struct token tok, struct token next) {
for (uint32_t i = 0; i < sp; i++) {
printf("%s", state_name(stack[i]));
}
printf(" ");
print_token(tok);
printf(" ");
print_token(next);
printf("\n");
}
static void push(enum state state) {
stack[sp] = state;
sp++;
}
static enum state pop(void) {
assert(sp != 0);
sp--;
return stack[sp];
}
static _Bool is_assignment(struct token tok, struct token next) {
return tok.type == TOK_NAME && next.type == TOK_OPERATOR && next.data.op == OP_EQ;
}
static _Bool is_expr(struct token tok) {
if (is_lit(tok) || tok.type == TOK_OPEN_GROUP) {
return true;
}
return tok.type == TOK_NAME;
}
#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1)
void parse(void) {
sp = 0;
// TODO: add support for the top-level instead of this block hack
push(ST_BLOCK_BODY);
struct token tok = next();
struct token nxt = peek();
while (sp > 0) {
debug_print(tok, nxt);
// FIXME: stack underflow because we're faking the top-level with blocks
switch (pop()) {
case ST_BLOCK:
if (tok.type == TOK_OPEN_BLOCK) {
push(ST_BLOCK_CLOSE);
push(ST_BLOCK_BODY);
break;
}
syntax_error("expected beginning of block");
break;
case ST_BLOCK_BODY:
if (is_assignment(tok, nxt)) {
push(ST_BLOCK_CONT);
push(ST_ASSIGN);
break;
}
if (is_expr(tok)) {
push(ST_BLOCK_CONT);
push(ST_EXPR);
continue;
}
continue;
case ST_BLOCK_CONT:
if (tok.type == TOK_TERMINATOR) {
push(ST_BLOCK_BODY);
break;
}
continue;
case ST_BLOCK_CLOSE:
if (tok.type == TOK_CLOSE_BLOCK) {
break;
}
syntax_error("expected end of block");
case ST_ASSIGN:
assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ);
push(ST_EXPR);
break;
case ST_EXPR:
if (tok.type == TOK_STRING) {
push(ST_EXPR_CONT);
break;
}
if (tok.type == TOK_INTEGER) {
push(ST_EXPR_CONT);
break;
}
if (tok.type == TOK_NAME) {
char* name = tok.data.name;
if (strcmp(name, "if") == 0) {
push(ST_IF_ELSE);
push(ST_BLOCK);
push(ST_EXPR);
break;
}
if (strcmp(name, "loop") == 0) {
push(ST_BLOCK);
push(ST_LOOP_VARS);
if (nxt.type == TOK_LABEL) {
next();
}
break;
}
if (strcmp(name, "next") == 0) {
push(ST_LOOP_VARS);
if (nxt.type == TOK_LABEL) {
next();
}
break;
}
if (strcmp(name, "exit") == 0) {
push(ST_EXPR);
if (nxt.type == TOK_LABEL) {
next();
}
break;
}
if (strcmp(name, "return") == 0) {
push(ST_EXPR);
break;
}
push(ST_EXPR_CONT);
break;
}
if (tok.type == TOK_OPEN_GROUP) {
push(ST_EXPR_CONT);
push(ST_GROUP);
push(ST_EXPR);
break;
}
if (tok.type == TOK_OPERATOR && is_unary(tok.data.op)) {
push(ST_EXPR_CONT);
push(ST_EXPR);
break;
}
syntax_error("expected expression");
case ST_EXPR_CONT:
if (is_expr(tok)) {
push(ST_EXPR);
continue;
}
if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) {
push(ST_EXPR);
break;
}
continue;
case ST_GROUP:
if (tok.type == TOK_CLOSE_GROUP) {
break;
}
syntax_error("mismatched parentheses");
case ST_IF_ELSE:
if (tok.type == TOK_NAME && strcmp(tok.data.name, "else") == 0) {
push(ST_BLOCK);
break;
}
continue;
case ST_LOOP_VARS:
if (is_assignment(tok, nxt)) {
push(ST_LOOP_VARS_CONT);
push(ST_ASSIGN);
break;
}
if (tok.type == TOK_NAME) {
push(ST_LOOP_VARS_CONT);
break;
}
continue;
case ST_LOOP_VARS_CONT:
if (tok.type == TOK_SEPARATOR) {
push(ST_LOOP_VARS);
break;
}
continue;
}
tok = next();
nxt = peek();
}
if (tok.type != TOK_EOF) {
fprintf(stderr, "syntax error: finished parsing before end of file\n");
exit(1);
}
if (sp > 0) {
fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]);
exit(1);
}
}