diff --git a/Makefile b/Makefile index 9584e56..729d4e7 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL = /bin/sh CFLAGS = -std=c99 -pedantic -Wextra -Os LDFLAGS = -lc -OBJECTS = asm.o io.o ir.o lex.o main.o x86encode.o +OBJECTS = asm.o io.o ir.o lex.o main.o parse.o x86encode.o .PHONY: passc passc: .bin $(OBJECTS) diff --git a/src/io.c b/src/io.c index aaea749..6c946f9 100644 --- a/src/io.c +++ b/src/io.c @@ -2,6 +2,7 @@ #include #include +#include #include #ifdef __unix__ diff --git a/src/io.h b/src/io.h index b981f65..f865434 100644 --- a/src/io.h +++ b/src/io.h @@ -1,8 +1,8 @@ -#ifndef _IO_H -#define _IO_H +#ifndef IO_H +#define IO_H +#include #include -#include extern uint32_t here; diff --git a/src/ir.h b/src/ir.h index cd61831..9aec2de 100644 --- a/src/ir.h +++ b/src/ir.h @@ -28,6 +28,7 @@ label enter(uint32_t retc); /// plus the return values declared by the call to `enter`. void leave(var* args); +label declare_continue(uint32_t retc); /// Declare a new label in the innermost block. /// diff --git a/src/lex.c b/src/lex.c index 18f835c..5c294c6 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,12 +1,45 @@ #include #include #include +#include #include -#include +#include #include "lex.h" #include "io.h" +_Bool is_unary(enum operator_ op) { + return op == OP_SUB + || op == OP_INV + || op == OP_NOT; +} + +_Bool is_binary(enum operator_ op) { + return op == OP_EQ + || op == OP_ADD + || op == OP_SUB + || op == OP_MUL + || op == OP_DIV + || op == OP_MOD + || op == OP_AND + || op == OP_OR + || op == OP_XOR + || op == OP_SHL + || op == OP_SAR + || op == OP_SHR + || op == OP_GT + || op == OP_LT + || op == OP_GTE + || op == OP_LTE + || op == OP_NE + || op == OP_TYPE + || op == OP_FUN; +} + +_Bool is_lit(struct token tok) { + return tok.type == TOK_INTEGER || tok.type == TOK_STRING || tok.type == TOK_NAME; +} + static _Bool is_whitespace(char c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } @@ -153,6 +186,14 @@ static struct token lex_integer(void) { static size_t str_index; static char str_buf[MAX_STR_LEN]; +static char* leak_buf(void) { + // FIXME: memory leak + char* str = malloc(str_index + 1); + memcpy(str, str_buf, str_index); + str[str_index] = 0; + return str; +} + static char* lex_string(void) { char* buf; size_t len = 1; @@ -185,7 +226,7 @@ static char* lex_string(void) { skip(1); } str_buf[str_index] = 0; - return str_buf; + return leak_buf(); } static char* lex_identifier(void) { @@ -208,7 +249,7 @@ static char* lex_identifier(void) { exit(1); } str_buf[str_index] = 0; - return str_buf; + return leak_buf(); } struct token lex(void) { @@ -348,3 +389,44 @@ struct token lex(void) { struct token tok = { TOK_NAME, data }; return tok; } + +void print_token(struct token tok) { + switch (tok.type) { + case TOK_NAME: + fprintf(stdout, "%s", tok.data.name); + break; + case TOK_LABEL: + fprintf(stdout, "'%s", tok.data.label); + break; + case TOK_INTEGER: + fprintf(stdout, "%zi", tok.data.int_); + break; + case TOK_STRING: + fprintf(stdout, "\"%s\"", tok.data.string); + break; + case TOK_OPEN_GROUP: + fprintf(stdout, "("); + break; + case TOK_CLOSE_GROUP: + fprintf(stdout, ")"); + break; + case TOK_OPEN_BLOCK: + fprintf(stdout, "{"); + break; + case TOK_CLOSE_BLOCK: + fprintf(stdout, "}"); + break; + case TOK_TERMINATOR: + fprintf(stdout, ";"); + break; + case TOK_SEPARATOR: + fprintf(stdout, ","); + break; + case TOK_OPERATOR: + fprintf(stdout, "OP: %i", tok.data.op); + break; + case TOK_EOF: + fprintf(stdout, "EOF"); + break; + } +} diff --git a/src/lex.h b/src/lex.h index efd7ec6..b981382 100644 --- a/src/lex.h +++ b/src/lex.h @@ -1,6 +1,7 @@ #ifndef LEX_H #define LEX_H +#include #include enum token_type { @@ -59,6 +60,12 @@ struct token { union token_data data; }; +_Bool is_unary(enum operator_ op); +_Bool is_binary(enum operator_ op); +_Bool is_lit(struct token tok); + struct token lex(void); +void print_token(struct token tok); + #endif diff --git a/src/main.c b/src/main.c index 148fa7e..90b81d0 100644 --- a/src/main.c +++ b/src/main.c @@ -9,7 +9,7 @@ #include "io.h" #include "ir.h" -#include "lex.h" +#include "parse.h" #define ELF_HEADER_SIZE 0xb0 @@ -76,48 +76,7 @@ int main(int argc, char** argv) { } open_files(argv[2], argv[1]); - struct token tok; - do { - tok = lex(); - switch (tok.type) { - case TOK_NAME: - fprintf(stdout, "%s\n", tok.data.name); - break; - case TOK_LABEL: - fprintf(stdout, "'%s\n", tok.data.label); - break; - case TOK_INTEGER: - fprintf(stdout, "%zi\n", tok.data.int_); - break; - case TOK_STRING: - fprintf(stdout, "\"%s\"\n", tok.data.string); - break; - case TOK_OPEN_GROUP: - fprintf(stdout, "(\n"); - break; - case TOK_CLOSE_GROUP: - fprintf(stdout, ")\n"); - break; - case TOK_OPEN_BLOCK: - fprintf(stdout, "{\n"); - break; - case TOK_CLOSE_BLOCK: - fprintf(stdout, "}\n"); - break; - case TOK_TERMINATOR: - fprintf(stdout, ";\n"); - break; - case TOK_SEPARATOR: - fprintf(stdout, ",\n"); - break; - case TOK_OPERATOR: - fprintf(stdout, "OP: %i\n", tok.data.op); - break; - case TOK_EOF: - fprintf(stdout, "EOF\n"); - break; - } - } while (tok.type != TOK_EOF); + parse(); reserve(ELF_HEADER_SIZE); size_t entry_point = compile(); diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..4d3cd7f --- /dev/null +++ b/src/parse.c @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include +#include + +#include "lex.h" +#include "parse.h" + +enum state { + ST_BLOCK_OPEN, + ST_BLOCK, + ST_ASSIGN, + ST_EXPR, + ST_EXPR_CONT, + ST_GROUP, + ST_IF_ELSE, + ST_LOOP_VARS, + ST_LOOP_VARS_CONT, + ST_LABEL, +}; + +const char* state_name(enum state st) { + switch (st) { + case ST_BLOCK_OPEN: + return "{"; + case ST_BLOCK: + return ";"; + case ST_ASSIGN: + return "="; + case ST_EXPR: + return "x"; + case ST_EXPR_CONT: + return "c"; + case ST_GROUP: + return "("; + case ST_IF_ELSE: + return "|"; + case ST_LOOP_VARS: + return "v"; + case ST_LOOP_VARS_CONT: + return ","; + case ST_LABEL: + return "'"; + } +} + +#define MAX_CONTEXT 256 +static uint32_t sp = 0; +static enum state stack[MAX_CONTEXT]; + +static void debug_print(struct token tok, struct token next) { + for (uint32_t i = 0; i < sp; i++) { + printf("%s", state_name(stack[i])); + } + printf(" "); + print_token(tok); + printf(" "); + print_token(next); + printf("\n"); +} + + +static void push(enum state state) { + stack[sp] = state; + sp++; +} + +static enum state pop(void) { + assert(sp != 0); + sp--; + return stack[sp]; +} + +static _Bool is_assignment(struct token tok, struct token next) { + return tok.type == TOK_NAME && next.type == TOK_OPERATOR && next.data.op == OP_EQ; +} + + +static _Bool is_expr(struct token tok) { + if (is_lit(tok) || tok.type == TOK_OPEN_GROUP) { + return true; + } + return tok.type == TOK_NAME; +} + +#define syntax_error(msg) fprintf(stderr, "syntax error: %s\n", msg); exit(1) + +void parse(void) { + // TODO: add support for the top-level instead of this block hack + push(ST_BLOCK); + struct token tok = lex(); + struct token next = lex(); + while (sp > 0) { + debug_print(tok, next); + // FIXME: stack underflow because we're faking the top-level with blocks + switch (pop()) { + case ST_BLOCK_OPEN: + if (tok.type != TOK_OPEN_BLOCK) { + syntax_error("expected open block (`{`)"); + } + push(ST_BLOCK); + break; + case ST_BLOCK: + if (tok.type == TOK_CLOSE_BLOCK) { + break; + } + if (tok.type == TOK_TERMINATOR) { + push(ST_BLOCK); + break; + } + if (is_assignment(tok, next)) { + push(ST_BLOCK); + push(ST_ASSIGN); + break; + } + if (is_expr(tok)) { + push(ST_BLOCK); + push(ST_EXPR); + continue; + } + break; + case ST_ASSIGN: + assert(tok.type == TOK_OPERATOR || tok.data.op == OP_EQ); + push(ST_EXPR); + break; + case ST_EXPR: + push(ST_EXPR_CONT); + if (tok.type == TOK_STRING) { + break; + } + if (tok.type == TOK_INTEGER) { + break; + } + if (tok.type == TOK_NAME) { + char* name = tok.data.name; + if (strcmp(name, "if") == 0) { + push(ST_IF_ELSE); + push(ST_BLOCK_OPEN); + push(ST_EXPR); + break; + } + if (strcmp(name, "loop") == 0) { + push(ST_BLOCK_OPEN); + push(ST_LOOP_VARS); + if (next.type == TOK_LABEL) { + push(ST_LABEL); + } + break; + } + if (strcmp(name, "next") == 0) { + push(ST_LOOP_VARS); + if (next.type == TOK_LABEL) { + push(ST_LABEL); + } + break; + } + if (strcmp(name, "exit") == 0) { + push(ST_EXPR); + if (next.type == TOK_LABEL) { + push(ST_LABEL); + } + break; + } + if (strcmp(name, "return") == 0) { + push(ST_EXPR); + break; + } + break; + } + if (tok.type == TOK_OPEN_GROUP) { + push(ST_GROUP); + push(ST_EXPR); + break; + } + if (tok.type == TOK_OPERATOR && is_unary(tok.data.op)) { + push(ST_EXPR); + break; + } + syntax_error("expected expression"); + case ST_EXPR_CONT: + if (is_expr(tok)) { + push(ST_EXPR); + continue; + } + if (tok.type == TOK_OPERATOR && is_binary(tok.data.op)) { + push(ST_EXPR); + break; + } + continue; + case ST_GROUP: + if (tok.type == TOK_CLOSE_GROUP) { + break; + } + syntax_error("mismatched parentheses"); + case ST_IF_ELSE: + if (tok.type == TOK_NAME && strcmp(tok.data.name, "else") == 0) { + push(ST_BLOCK_OPEN); + break; + } + continue; + case ST_LOOP_VARS: + if (is_assignment(tok, next)) { + push(ST_LOOP_VARS_CONT); + push(ST_ASSIGN); + break; + } + if (tok.type == TOK_NAME) { + push(ST_LOOP_VARS_CONT); + break; + } + continue; + case ST_LOOP_VARS_CONT: + if (tok.type == TOK_SEPARATOR) { + push(ST_LOOP_VARS); + break; + } + continue; + case ST_LABEL: + assert(tok.type == TOK_LABEL); + break; + } + tok = next; + next = lex(); + } + if (tok.type != TOK_EOF) { + fprintf(stderr, "syntax error: finished parsing before end of file\n"); + exit(1); + } + if (sp > 0) { + fprintf(stderr, "syntax error: unfinished business at end of file: %i, %i\n", sp, stack[0]); + exit(1); + } +} diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..1acd6dc --- /dev/null +++ b/src/parse.h @@ -0,0 +1,6 @@ +#ifndef PARSE_H +#define PARSE_H + +void parse(void); + +#endif