#include #include #include #include #include "lex.h" #include "lex/indent.h" #include "io.h" static const char* const keywords[KEYWORD_COUNT] = { "comm", "assocl", "assocr", "distl", "distr", "factl", "factr", "mapl", "mapr", "unitil", "unitir", "unitel", "uniter", "comm+", "assocl+", "assocr+", "mapl+", "mapr+", "inl", "inr", "out", "halt", "if", }; static struct token simple(enum token_type type) { struct token tok = { type, 0 }; return tok; } #define MAX_STR_LEN 4096 static size_t str_index; // alternate string buffers between tokens to prevent overwriting buffer. // we're LL(1) so 2 buffers is sufficient. // NOTE: I later changed the code and it wasn't sufficient anymore, lmao. static int which_buf = 0; static char str_buf_1[MAX_STR_LEN]; static char str_buf_2[MAX_STR_LEN]; static char str_buf_3[MAX_STR_LEN]; static char* str_buf(void) { which_buf = (which_buf + 1) % 3; switch (which_buf) { case 0: return str_buf_1; case 1: return str_buf_2; case 2: return str_buf_3; } assert(false); } static _Bool is_alpha(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } static _Bool is_digit(char c) { return c >= '0' && c <= '9'; } static _Bool is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); } static _Bool is_id_char(char c) { // TODO: allow unicode identifiers return is_alphanumeric(c) || c == '_'; } _Bool is_whitespace(char c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } static char* lex_identifier(void) { str_index = 0; char* buf = str_buf(); while (true) { char c = peekc(); if (!is_id_char(c)) break; nextc(); buf[str_index] = c; str_index++; } if (str_index == 0) { fprintf(stderr, "lexical error: expected identifier (possibly illegal character?)\n"); exit(1); } buf[str_index] = 0; return buf; } static struct token lex(void) { char c = peekc(); while (true) { // skip whitespace while (is_whitespace(c)) { nextc(); c = peekc(); } // skip line comments if (c == '!') { do { c = nextc(); } while (c != '\n'); } else { break; } } // syntax switch (c) { case 0: return simple(TOK_EOF); case '{': nextc(); return simple(TOK_MAP_BEGIN); case '}': nextc(); return simple(TOK_MAP_END); } char* name = lex_identifier(); // keywords for (size_t kwd = 0; kwd < KEYWORD_COUNT; kwd++) { if (strcmp(name, keywords[kwd]) == 0) { return simple((enum token_type) kwd); } } enum token_type type = TOK_JUMP; // labels if (peekc() == ':') { type = TOK_LABEL; nextc(); } struct token tok = { type, name }; return tok; } static _Bool init = false; static struct token peek_buf; struct token next(void) { if (!init) { init = true; next(); } struct token tmp = peek_buf; peek_buf = lex(); return tmp; } struct token peek(void) { return peek_buf; }