247 lines
7.0 KiB
C
247 lines
7.0 KiB
C
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "lex.h"
|
|
#include "parse.h"
|
|
|
|
enum state {
|
|
ST_BLOCK,
|
|
ST_BLOCK_BODY,
|
|
ST_BLOCK_CONT,
|
|
ST_BLOCK_CLOSE,
|
|
ST_ASSIGN,
|
|
ST_EXPR,
|
|
ST_EXPR_CONT,
|
|
ST_GROUP,
|
|
ST_IF_ELSE,
|
|
ST_LOOP_VARS,
|
|
ST_LOOP_VARS_CONT,
|
|
};
|
|
|
|
const char* state_name(enum state st) {
|
|
switch (st) {
|
|
case ST_BLOCK:
|
|
return "{";
|
|
case ST_BLOCK_BODY:
|
|
return "B";
|
|
case ST_BLOCK_CONT:
|
|
return ";";
|
|
case ST_BLOCK_CLOSE:
|
|
return "}";
|
|
case ST_ASSIGN:
|
|
return "=";
|
|
case ST_EXPR:
|
|
return "x";
|
|
case ST_EXPR_CONT:
|
|
return "c";
|
|
case ST_GROUP:
|
|
return "(";
|
|
case ST_IF_ELSE:
|
|
return "|";
|
|
case ST_LOOP_VARS:
|
|
return "v";
|
|
case ST_LOOP_VARS_CONT:
|
|
return ",";
|
|
}
|
|
}
|
|
|
|
#define MAX_CONTEXT 256
|
|
static uint32_t sp = 0;
|
|
static enum state stack[MAX_CONTEXT];
|
|
|
|
static void debug_print(struct token tok, struct token next) {
|
|
for (uint32_t i = 0; i < sp; i++) {
|
|
printf("%s", state_name(stack[i]));
|
|
}
|
|
printf(" ");
|
|
print_token(tok);
|
|
printf(" ");
|
|
print_token(next);
|
|
printf("\n");
|
|
}
|
|
|
|
|
|
static void push(enum state state) {
|
|
stack[sp] = state;
|
|
sp++;
|
|
}
|
|
|
|
static enum state pop(void) {
|
|
assert(sp != 0);
|
|
sp--;
|
|
return stack[sp];
|
|
}
|
|
|
|
static _Bool is_assignment(struct token tok, struct token next) {
|
|
return tok.type == TOK_NAME && next.type == TOK_OPERATOR && next.data.op == OP_EQ;
|
|
}
|
|
|
|
|
|
static _Bool is_expr(struct token tok) {
|
|
if (is_lit(tok) || tok.type == TOK_OPEN_GROUP) {
|
|
return true;
|
|
}
|
|
return tok.type == TOK_NAME;
|
|
}
|
|
|
|
#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1)
|
|
|
|
void parse(void) {
|
|
sp = 0;
|
|
// TODO: add support for the top-level instead of this block hack
|
|
push(ST_BLOCK_BODY);
|
|
struct token tok = next();
|
|
struct token nxt = peek();
|
|
while (sp > 0) {
|
|
debug_print(tok, nxt);
|
|
// FIXME: stack underflow because we're faking the top-level with blocks
|
|
switch (pop()) {
|
|
case ST_BLOCK:
|
|
if (tok.type == TOK_OPEN_BLOCK) {
|
|
push(ST_BLOCK_CLOSE);
|
|
push(ST_BLOCK_BODY);
|
|
break;
|
|
}
|
|
syntax_error("expected beginning of block");
|
|
break;
|
|
case ST_BLOCK_BODY:
|
|
if (is_assignment(tok, nxt)) {
|
|
push(ST_BLOCK_CONT);
|
|
push(ST_ASSIGN);
|
|
break;
|
|
}
|
|
if (is_expr(tok)) {
|
|
push(ST_BLOCK_CONT);
|
|
push(ST_EXPR);
|
|
continue;
|
|
}
|
|
continue;
|
|
case ST_BLOCK_CONT:
|
|
if (tok.type == TOK_TERMINATOR) {
|
|
push(ST_BLOCK_BODY);
|
|
break;
|
|
}
|
|
continue;
|
|
case ST_BLOCK_CLOSE:
|
|
if (tok.type == TOK_CLOSE_BLOCK) {
|
|
break;
|
|
}
|
|
syntax_error("expected end of block");
|
|
case ST_ASSIGN:
|
|
assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ);
|
|
push(ST_EXPR);
|
|
break;
|
|
case ST_EXPR:
|
|
if (tok.type == TOK_STRING) {
|
|
push(ST_EXPR_CONT);
|
|
break;
|
|
}
|
|
if (tok.type == TOK_INTEGER) {
|
|
push(ST_EXPR_CONT);
|
|
break;
|
|
}
|
|
if (tok.type == TOK_NAME) {
|
|
char* name = tok.data.name;
|
|
if (strcmp(name, "if") == 0) {
|
|
push(ST_IF_ELSE);
|
|
push(ST_BLOCK);
|
|
push(ST_EXPR);
|
|
break;
|
|
}
|
|
if (strcmp(name, "loop") == 0) {
|
|
push(ST_BLOCK);
|
|
push(ST_LOOP_VARS);
|
|
if (nxt.type == TOK_LABEL) {
|
|
next();
|
|
}
|
|
break;
|
|
}
|
|
if (strcmp(name, "next") == 0) {
|
|
push(ST_LOOP_VARS);
|
|
if (nxt.type == TOK_LABEL) {
|
|
next();
|
|
}
|
|
break;
|
|
}
|
|
if (strcmp(name, "exit") == 0) {
|
|
push(ST_EXPR);
|
|
if (nxt.type == TOK_LABEL) {
|
|
next();
|
|
}
|
|
break;
|
|
}
|
|
if (strcmp(name, "return") == 0) {
|
|
push(ST_EXPR);
|
|
break;
|
|
}
|
|
push(ST_EXPR_CONT);
|
|
break;
|
|
}
|
|
if (tok.type == TOK_OPEN_GROUP) {
|
|
push(ST_EXPR_CONT);
|
|
push(ST_GROUP);
|
|
push(ST_EXPR);
|
|
break;
|
|
}
|
|
if (tok.type == TOK_OPERATOR && is_unary(tok.data.op)) {
|
|
push(ST_EXPR_CONT);
|
|
push(ST_EXPR);
|
|
break;
|
|
}
|
|
syntax_error("expected expression");
|
|
case ST_EXPR_CONT:
|
|
if (is_expr(tok)) {
|
|
push(ST_EXPR);
|
|
continue;
|
|
}
|
|
if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) {
|
|
push(ST_EXPR);
|
|
break;
|
|
}
|
|
continue;
|
|
case ST_GROUP:
|
|
if (tok.type == TOK_CLOSE_GROUP) {
|
|
break;
|
|
}
|
|
syntax_error("mismatched parentheses");
|
|
case ST_IF_ELSE:
|
|
if (tok.type == TOK_NAME && strcmp(tok.data.name, "else") == 0) {
|
|
push(ST_BLOCK);
|
|
break;
|
|
}
|
|
continue;
|
|
case ST_LOOP_VARS:
|
|
if (is_assignment(tok, nxt)) {
|
|
push(ST_LOOP_VARS_CONT);
|
|
push(ST_ASSIGN);
|
|
break;
|
|
}
|
|
if (tok.type == TOK_NAME) {
|
|
push(ST_LOOP_VARS_CONT);
|
|
break;
|
|
}
|
|
continue;
|
|
case ST_LOOP_VARS_CONT:
|
|
if (tok.type == TOK_SEPARATOR) {
|
|
push(ST_LOOP_VARS);
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
tok = next();
|
|
nxt = peek();
|
|
}
|
|
if (tok.type != TOK_EOF) {
|
|
fprintf(stderr, "syntax error: finished parsing before end of file\n");
|
|
exit(1);
|
|
}
|
|
if (sp > 0) {
|
|
fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]);
|
|
exit(1);
|
|
}
|
|
}
|