diff options
Diffstat (limited to 'reader.c')
-rw-r--r-- | reader.c | 419 |
1 files changed, 0 insertions, 419 deletions
diff --git a/reader.c b/reader.c deleted file mode 100644 index b9217d3..0000000 --- a/reader.c +++ /dev/null @@ -1,419 +0,0 @@ -/* Copyright (C) 2025 Aryadev Chavali - - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for - * details. - - * You may distribute and modify this code under the terms of the GNU General - * Public License Version 2, which you should have received a copy of along with - * this program. If not, please go to <https://www.gnu.org/licenses/>. - - * Created: 2025-04-16 - * Description: Implementation of parser - */ - -#include "./reader.h" - -#include <ctype.h> -#include <string.h> - -bool is_digit(char c) -{ - return isdigit(c); -} - -bool is_alpha(char c) -{ - return isalpha(c); -} - -bool is_space(char c) -{ - return isspace(c); -} - -bool is_skip(char c) -{ - return is_space(c) || c == ';'; -} - -bool is_sym(char c) -{ - return strchr(SYM_CHARS, c) != NULL; -} - -void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv) -{ - inp->name = name; - inp->str = sv_copy(&ctx->read, sv); -} - -void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp) -{ - input->name = name; - // TODO: Choose a best fit (i.e. maximal capacity, unused) page - page_t *page = page_create(INPUT_CHUNK_SIZE); - // chunk should be in scratch space so we can reset it later. - char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE); - - u64 total_size = 0, size_read = 0; - while (!feof(fp)) - { - size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp); - if (size_read > 0) - { - page_rappend(&page, chunk, size_read); - total_size += size_read; - } - } - - input->str = SV((char *)page->data, total_size); - - // Memory cleanup - context_reset_scratch(ctx); - arena_attach(&ctx->read, page); -} - -bool input_eof(input_t *input) -{ - return !input || (input->offset >= input->str.size) || - (input->str.data[input->offset] == '\0'); -} - -char input_peek(input_t *input, u64 offset) -{ - if (input_eof(input) || input->offset + offset >= input->str.size) - return '\0'; - return input->str.data[input->offset + offset]; -} - -char input_next(input_t *input, u64 offset) -{ - if (input_eof(input) || input->offset + offset >= input->str.size) - return '\0'; - input->offset += offset; - return input->str.data[input->offset]; -} - -void input_skip(input_t *inp) -{ - // n + 2 lookup - sv_t current = sv_cut(inp->str, inp->offset); - sv_t lookup = sv_chop(current, 2); - while ((!input_eof(inp) && is_space(lookup.data[0])) || - lookup.data[0] == ';' || strncmp(lookup.data, "#|", 2) == 0) - { - if (lookup.data[0] == ';') - { - i64 newline = sv_find_subcstr(current, "\n", 1); - if (newline < 0) - inp->offset = inp->str.size; - else - inp->offset += newline + 1; - } - else if (strncmp(lookup.data, "#|", 2) == 0) - { - i64 offset = sv_find_subcstr(current, "|#", 2); - if (offset < 0) - inp->offset = inp->str.size; - else - inp->offset += offset + 2; - } - - inp->offset += sv_while(sv_cut(inp->str, inp->offset), is_space); - current = sv_cut(inp->str, inp->offset); - lookup = sv_chop(current, 2); - } -} - -perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret) -{ - debug("parse_int[%lu] => ", inp->offset); - - // TODO: Parse arbitrary sized integers - (void)ctx; - - bool negative = (input_peek(inp, 0) == '-'); - sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0)); - sv_t digits = sv_chop(current, sv_while(current, is_digit)); - - debug("`" PR_SV "` => ", SV_FMT(digits)); - i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10); - debug("%ld\n", x); - - input_next(inp, digits.size + (negative ? 1 : 0)); - - *ret = make_int(x); - return PERR_OK; -} - -perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret) -{ - debug("parse_sym[%lu] => ", inp->offset); - - sv_t current = sv_cut(inp->str, inp->offset); - sv_t sym = sv_chop(current, sv_while(current, is_sym)); - debug("`" PR_SV "`\n", SV_FMT(sym)); - - if (sym.size == 3) - { - // NOTE: We can't mutate sym directly because it's on `read` space. - - // TODO: Make this beautiful please. - char buf[3]; - for (u64 i = 0; i < 3; ++i) - buf[i] = toupper(sym.data[i]); - - // NOTE: NIL symbol to actual NIL - if (strncmp(buf, "NIL", 3) == 0) - { - input_next(inp, 3); - return NIL; - } - } - - lisp_t *lsym = make_sym(ctx, sym.data, sym.size); - input_next(inp, sym.size); - *ret = lsym; - - return PERR_OK; -} - -perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret) -{ - (void)ctx; - debug("parse_bool[%lu] => ", inp->offset); - char c = input_peek(inp, 1); - bool b = -1; - if (c == 't') - b = true; - else if (c == 'f') - b = false; - else - return PERR_EXPECTED_BOOLEAN; - *ret = tag_bool(b); - input_next(inp, 2); - return PERR_OK; -} - -perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret) -{ - // TODO: Put this in a symbol table - lisp_t *lisp_dot = make_sym(ctx, ".", 1); - debug("parse_cons[%lu] => (\n", inp->offset); - inp->offset += 1; - - lisp_t *root = NIL; - lisp_t **cur = NIL; - bool dotted = false; - - while (!input_eof(inp) && input_peek(inp, 0) != ')') - { - lisp_t *lisp = NIL; - perr_t res = parse(ctx, inp, &lisp); - if (res) - return res; - - // This is cheap to do - if (lisp == lisp_dot) - { - dotted = true; - continue; - } - - if (!root) - { - root = make_cons(ctx, lisp, NIL); - cur = &root; - } - else if (!dotted) - *cur = make_cons(ctx, lisp, NIL); - else - *cur = lisp; - - if (cur && !dotted) - cur = &as_cons(*cur)->cdr; - - input_skip(inp); - } - - if (input_peek(inp, 0) != ')') - return PERR_EXPECTED_CLOSE_BRACKET; - - input_next(inp, 1); - - debug(")\n"); - *ret = root; - return PERR_OK; -} - -perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret) -{ - debug("parse_vec[%lu] => [\n", inp->offset); - input_next(inp, 2); - - lisp_t *lvec = make_vec(ctx, 0); - vec_t *vec = as_vec(lvec); - - while (!input_eof(inp) && input_peek(inp, 0) != ')') - { - lisp_t *lisp = NIL; - perr_t res = parse(ctx, inp, &lisp); - if (res) - return res; - - vec_append(&ctx->memory, vec, &lisp, sizeof(lisp)); - input_skip(inp); - } - - if (input_peek(inp, 0) != ')') - return PERR_EXPECTED_CLOSE_BRACKET; - - input_next(inp, 1); - - debug("]\n"); - *ret = lvec; - return PERR_OK; -} - -perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret) -{ - debug("parse_str[%lu] => ", inp->offset); - input_next(inp, 1); // 1 for the first speechmark - sv_t sv = sv_cut(inp->str, inp->offset); - i64 size = sv_find_subcstr(sv, "\"", 1); - if (size < 0) - return PERR_EXPECTED_SPEECH_MARK; - - input_next(inp, size + 1); // 1 for that last speechmark - sv_t str_content = sv_chop(sv, size); - debug("\"" PR_SV "\"\n", SV_FMT(str_content)); - *ret = make_str(ctx, str_content.data, str_content.size); - return PERR_OK; -} - -perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret) -{ - char c = input_peek(inp, 0); - if (!(c == '\'' || c == '`')) - return PERR_UNEXPECTED_CHAR; - input_next(inp, 1); - sv_t prefix = {0}; - if (c == '\'') - prefix = SV("quote", 5); - else if (c == '`') - prefix = SV("quasiquote", 10); - lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL); - lisp_t *rest = NIL; - perr_t perr = parse(ctx, inp, &rest); - if (perr) - return perr; - CDR(root) = make_cons(ctx, rest, NIL); - *ret = root; - return PERR_OK; -} - -// TODO: Make this interactable with user once we have evaluation -perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret) -{ - char c = input_peek(inp, 1); - if (c == '\\') - { - // character or weird base integer - TODO("Not implemented reader macro for characters or weird bases"); - } - else if (c == '(') - { - return parse_vec(ctx, inp, ret); - } - else if (c == 't' || c == 'f') - return parse_bool(ctx, inp, ret); - return PERR_UNEXPECTED_READER_MACRO_SYMBOL; -} - -static_assert(NUM_TAGS == 9); -perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret) -{ - debug("parse => "); - input_skip(inp); - if (input_eof(inp)) - return PERR_EOF; - - char c = input_peek(inp, 0); - - if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1)))) - return parse_int(ctx, inp, ret); - else if (c == '#') - return parse_reader_macro(ctx, inp, ret); - else if (is_sym(c)) - return parse_sym(ctx, inp, ret); - else if (c == '(') - return parse_cons(ctx, inp, ret); - else if (c == '\'' || c == '`') - return parse_quote(ctx, inp, ret); - else if (c == '\"') - return parse_str(ctx, inp, ret); - else - return PERR_UNEXPECTED_CHAR; -} - -perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec) -{ - while (!input_eof(inp)) - { - lisp_t *member = NIL; - perr_t err = parse(ctx, inp, &member); - - if (err) - return err; - else - vec_append(&ctx->scratch, vec, &member, sizeof(member)); - - input_skip(inp); - } - return PERR_OK; -} - -int print_perror(FILE *fp, input_t *inp, perr_t error) -{ - pos_t pos = input_offset_to_pos(inp); - fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col, - perr_to_cstr(error)); - switch (error) - { - case PERR_UNEXPECTED_CHAR: - fprintf(fp, "(`%c`)", input_peek(inp, 0)); - break; - case PERR_OK: - case PERR_EOF: - case PERR_EXPECTED_BOOLEAN: - case PERR_UNEXPECTED_READER_MACRO_SYMBOL: - case PERR_EXPECTED_CLOSE_BRACKET: - case PERR_EXPECTED_SPEECH_MARK: - default: - break; - } - fprintf(stderr, "\n"); - - return error; -} - -pos_t input_offset_to_pos(input_t *inp) -{ - pos_t pos = {.col = 1, .line = 1}; - for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i) - { - char c = (inp->str.data[i]); - if (c == '\n') - { - ++pos.line; - pos.col = 1; - } - else - { - ++pos.col; - } - } - return pos; -} |