pass-lang/src/parse.c

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "lex.h"
#include "parse.h"

enum state {
    ST_BLOCK,
    ST_BLOCK_BODY,
    ST_BLOCK_CONT,
    ST_BLOCK_CLOSE,
    ST_ASSIGN,
    ST_EXPR,
    ST_EXPR_CONT,
    ST_GROUP,
    ST_IF_ELSE,
    ST_LOOP_VARS,
    ST_LOOP_VARS_CONT,
};

const char* state_name(enum state st) {
    switch (st) {
        case ST_BLOCK:
            return "{";
        case ST_BLOCK_BODY:
            return "B";
        case ST_BLOCK_CONT:
            return ";";
        case ST_BLOCK_CLOSE:
            return "}";
        case ST_ASSIGN:
            return "=";
        case ST_EXPR:
            return "x";
        case ST_EXPR_CONT:
            return "c";
        case ST_GROUP:
            return "(";
        case ST_IF_ELSE:
            return "|";
        case ST_LOOP_VARS:
            return "v";
        case ST_LOOP_VARS_CONT:
            return ",";
    }
}

#define MAX_CONTEXT 256
static uint32_t sp = 0;
static enum state stack[MAX_CONTEXT];

static void debug_print(struct token tok, struct token next) {
    for (uint32_t i = 0; i < sp; i++) {
        printf("%s", state_name(stack[i]));
    }
    printf("    ");
    print_token(tok);
    printf(" ");
    print_token(next);
    printf("\n");
}


static void push(enum state state) {
    stack[sp] = state;
    sp++;
}

static enum state pop(void) {
    assert(sp != 0);
    sp--;
    return stack[sp];
}

static _Bool is_assignment(struct token tok, struct token next) {
    return tok.type == TOK_NAME && next.type == TOK_OPERATOR && next.data.op == OP_EQ;
}


static _Bool is_expr(struct token tok) {
    if (is_lit(tok) || tok.type == TOK_OPEN_GROUP) {
        return true;
    }
    return tok.type == TOK_NAME;
}

#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1)

void parse(void) {
    sp = 0;
    // TODO: add support for the top-level instead of this block hack
    push(ST_BLOCK_BODY);
    struct token tok = next();
    struct token nxt = peek();
    while (sp > 0) {
        debug_print(tok, nxt);
        // FIXME: stack underflow because we're faking the top-level with blocks
        switch (pop()) {
            case ST_BLOCK:
                if (tok.type == TOK_OPEN_BLOCK) {
                    push(ST_BLOCK_CLOSE);
                    push(ST_BLOCK_BODY);
                    break;
                }
                syntax_error("expected beginning of block");
                break;
            case ST_BLOCK_BODY:
                if (is_assignment(tok, nxt)) {
                    push(ST_BLOCK_CONT);
                    push(ST_ASSIGN);
                    break;
                }
                if (is_expr(tok)) {
                    push(ST_BLOCK_CONT);
                    push(ST_EXPR);
                    continue;
                }
                continue;
            case ST_BLOCK_CONT:
                if (tok.type == TOK_TERMINATOR) {
                    push(ST_BLOCK_BODY);
                    break;
                }
                continue;
            case ST_BLOCK_CLOSE:
                if (tok.type == TOK_CLOSE_BLOCK) {
                    break;
                }
                syntax_error("expected end of block");
            case ST_ASSIGN:
                assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ);
                push(ST_EXPR);
                break;
            case ST_EXPR:
                if (tok.type == TOK_STRING) {
                    push(ST_EXPR_CONT);
                    break;
                }
                if (tok.type == TOK_INTEGER) {
                    push(ST_EXPR_CONT);
                    break;
                }
                if (tok.type == TOK_NAME) {
                    char* name = tok.data.name;
                    if (strcmp(name, "if") == 0) {
                        push(ST_IF_ELSE);
                        push(ST_BLOCK);
                        push(ST_EXPR);
                        break;
                    }
                    if (strcmp(name, "loop") == 0) {
                        push(ST_BLOCK);
                        push(ST_LOOP_VARS);
                        if (nxt.type == TOK_LABEL) {
                            next();
                        }
                        break;
                    }
                    if (strcmp(name, "next") == 0) {
                        push(ST_LOOP_VARS);
                        if (nxt.type == TOK_LABEL) {
                            next();
                        }
                        break;
                    }
                    if (strcmp(name, "exit") == 0) {
                        push(ST_EXPR);
                        if (nxt.type == TOK_LABEL) {
                            next();
                        }
                        break;
                    }
                    if (strcmp(name, "return") == 0) {
                        push(ST_EXPR);
                        break;
                    }
                    push(ST_EXPR_CONT);
                    break;
                }
                if (tok.type == TOK_OPEN_GROUP) {
                    push(ST_EXPR_CONT);
                    push(ST_GROUP);
                    push(ST_EXPR);
                    break;
                }
                if (tok.type == TOK_OPERATOR && is_unary(tok.data.op)) {
                    push(ST_EXPR_CONT);
                    push(ST_EXPR);
                    break;
                }
                syntax_error("expected expression");
            case ST_EXPR_CONT:
                if (is_expr(tok)) {
                    push(ST_EXPR);
                    continue;
                }
                if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) {
                    push(ST_EXPR);
                    break;
                }
                continue;
            case ST_GROUP:
                if (tok.type == TOK_CLOSE_GROUP) {
                    break;
                }
                syntax_error("mismatched parentheses");
            case ST_IF_ELSE:
                if (tok.type == TOK_NAME && strcmp(tok.data.name, "else") == 0) {
                    push(ST_BLOCK);
                    break;
                }
                continue;
            case ST_LOOP_VARS:
                if (is_assignment(tok, nxt)) {
                    push(ST_LOOP_VARS_CONT);
                    push(ST_ASSIGN);
                    break;
                }
                if (tok.type == TOK_NAME) {
                    push(ST_LOOP_VARS_CONT);
                    break;
                }
                continue;
            case ST_LOOP_VARS_CONT:
                if (tok.type == TOK_SEPARATOR) {
                    push(ST_LOOP_VARS);
                    break;
                }
                continue;
        }
        tok = next();
        nxt = peek();
    }
    if (tok.type != TOK_EOF) {
        fprintf(stderr, "syntax error: finished parsing before end of file\n");
        exit(1);
    }
    if (sp > 0) {
        fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]);
        exit(1);
    }
}