aboutsummaryrefslogtreecommitdiff
path: root/reader.c
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2025-05-14 21:12:58 +0100
committerAryadev Chavali <aryadev@aryadevchavali.com>2025-05-15 22:25:45 +0100
commit12de1e8db90bccd5a0eefd21075f07c7b7e3dfaa (patch)
tree0434141f2bfd24207a2864f613a1c2e3ee7181fc /reader.c
parentba5c0a4579ece5d53c009a14d00e683e70b982f4 (diff)
downloadoats-12de1e8db90bccd5a0eefd21075f07c7b7e3dfaa.tar.gz
oats-12de1e8db90bccd5a0eefd21075f07c7b7e3dfaa.tar.bz2
oats-12de1e8db90bccd5a0eefd21075f07c7b7e3dfaa.zip
Refactor for cleanliness
Move files into separate folders for ease of reading, include source directory so we can use angle bracket includes, adjust build system to make directories for objects
Diffstat (limited to 'reader.c')
-rw-r--r--reader.c419
1 files changed, 0 insertions, 419 deletions
diff --git a/reader.c b/reader.c
deleted file mode 100644
index b9217d3..0000000
--- a/reader.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/* Copyright (C) 2025 Aryadev Chavali
-
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
- * details.
-
- * You may distribute and modify this code under the terms of the GNU General
- * Public License Version 2, which you should have received a copy of along with
- * this program. If not, please go to <https://www.gnu.org/licenses/>.
-
- * Created: 2025-04-16
- * Description: Implementation of parser
- */
-
-#include "./reader.h"
-
-#include <ctype.h>
-#include <string.h>
-
-bool is_digit(char c)
-{
- return isdigit(c);
-}
-
-bool is_alpha(char c)
-{
- return isalpha(c);
-}
-
-bool is_space(char c)
-{
- return isspace(c);
-}
-
-bool is_skip(char c)
-{
- return is_space(c) || c == ';';
-}
-
-bool is_sym(char c)
-{
- return strchr(SYM_CHARS, c) != NULL;
-}
-
-void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv)
-{
- inp->name = name;
- inp->str = sv_copy(&ctx->read, sv);
-}
-
-void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp)
-{
- input->name = name;
- // TODO: Choose a best fit (i.e. maximal capacity, unused) page
- page_t *page = page_create(INPUT_CHUNK_SIZE);
- // chunk should be in scratch space so we can reset it later.
- char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE);
-
- u64 total_size = 0, size_read = 0;
- while (!feof(fp))
- {
- size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp);
- if (size_read > 0)
- {
- page_rappend(&page, chunk, size_read);
- total_size += size_read;
- }
- }
-
- input->str = SV((char *)page->data, total_size);
-
- // Memory cleanup
- context_reset_scratch(ctx);
- arena_attach(&ctx->read, page);
-}
-
-bool input_eof(input_t *input)
-{
- return !input || (input->offset >= input->str.size) ||
- (input->str.data[input->offset] == '\0');
-}
-
-char input_peek(input_t *input, u64 offset)
-{
- if (input_eof(input) || input->offset + offset >= input->str.size)
- return '\0';
- return input->str.data[input->offset + offset];
-}
-
-char input_next(input_t *input, u64 offset)
-{
- if (input_eof(input) || input->offset + offset >= input->str.size)
- return '\0';
- input->offset += offset;
- return input->str.data[input->offset];
-}
-
-void input_skip(input_t *inp)
-{
- // n + 2 lookup
- sv_t current = sv_cut(inp->str, inp->offset);
- sv_t lookup = sv_chop(current, 2);
- while ((!input_eof(inp) && is_space(lookup.data[0])) ||
- lookup.data[0] == ';' || strncmp(lookup.data, "#|", 2) == 0)
- {
- if (lookup.data[0] == ';')
- {
- i64 newline = sv_find_subcstr(current, "\n", 1);
- if (newline < 0)
- inp->offset = inp->str.size;
- else
- inp->offset += newline + 1;
- }
- else if (strncmp(lookup.data, "#|", 2) == 0)
- {
- i64 offset = sv_find_subcstr(current, "|#", 2);
- if (offset < 0)
- inp->offset = inp->str.size;
- else
- inp->offset += offset + 2;
- }
-
- inp->offset += sv_while(sv_cut(inp->str, inp->offset), is_space);
- current = sv_cut(inp->str, inp->offset);
- lookup = sv_chop(current, 2);
- }
-}
-
-perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_int[%lu] => ", inp->offset);
-
- // TODO: Parse arbitrary sized integers
- (void)ctx;
-
- bool negative = (input_peek(inp, 0) == '-');
- sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0));
- sv_t digits = sv_chop(current, sv_while(current, is_digit));
-
- debug("`" PR_SV "` => ", SV_FMT(digits));
- i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10);
- debug("%ld\n", x);
-
- input_next(inp, digits.size + (negative ? 1 : 0));
-
- *ret = make_int(x);
- return PERR_OK;
-}
-
-perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_sym[%lu] => ", inp->offset);
-
- sv_t current = sv_cut(inp->str, inp->offset);
- sv_t sym = sv_chop(current, sv_while(current, is_sym));
- debug("`" PR_SV "`\n", SV_FMT(sym));
-
- if (sym.size == 3)
- {
- // NOTE: We can't mutate sym directly because it's on `read` space.
-
- // TODO: Make this beautiful please.
- char buf[3];
- for (u64 i = 0; i < 3; ++i)
- buf[i] = toupper(sym.data[i]);
-
- // NOTE: NIL symbol to actual NIL
- if (strncmp(buf, "NIL", 3) == 0)
- {
- input_next(inp, 3);
- return NIL;
- }
- }
-
- lisp_t *lsym = make_sym(ctx, sym.data, sym.size);
- input_next(inp, sym.size);
- *ret = lsym;
-
- return PERR_OK;
-}
-
-perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- (void)ctx;
- debug("parse_bool[%lu] => ", inp->offset);
- char c = input_peek(inp, 1);
- bool b = -1;
- if (c == 't')
- b = true;
- else if (c == 'f')
- b = false;
- else
- return PERR_EXPECTED_BOOLEAN;
- *ret = tag_bool(b);
- input_next(inp, 2);
- return PERR_OK;
-}
-
-perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- // TODO: Put this in a symbol table
- lisp_t *lisp_dot = make_sym(ctx, ".", 1);
- debug("parse_cons[%lu] => (\n", inp->offset);
- inp->offset += 1;
-
- lisp_t *root = NIL;
- lisp_t **cur = NIL;
- bool dotted = false;
-
- while (!input_eof(inp) && input_peek(inp, 0) != ')')
- {
- lisp_t *lisp = NIL;
- perr_t res = parse(ctx, inp, &lisp);
- if (res)
- return res;
-
- // This is cheap to do
- if (lisp == lisp_dot)
- {
- dotted = true;
- continue;
- }
-
- if (!root)
- {
- root = make_cons(ctx, lisp, NIL);
- cur = &root;
- }
- else if (!dotted)
- *cur = make_cons(ctx, lisp, NIL);
- else
- *cur = lisp;
-
- if (cur && !dotted)
- cur = &as_cons(*cur)->cdr;
-
- input_skip(inp);
- }
-
- if (input_peek(inp, 0) != ')')
- return PERR_EXPECTED_CLOSE_BRACKET;
-
- input_next(inp, 1);
-
- debug(")\n");
- *ret = root;
- return PERR_OK;
-}
-
-perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_vec[%lu] => [\n", inp->offset);
- input_next(inp, 2);
-
- lisp_t *lvec = make_vec(ctx, 0);
- vec_t *vec = as_vec(lvec);
-
- while (!input_eof(inp) && input_peek(inp, 0) != ')')
- {
- lisp_t *lisp = NIL;
- perr_t res = parse(ctx, inp, &lisp);
- if (res)
- return res;
-
- vec_append(&ctx->memory, vec, &lisp, sizeof(lisp));
- input_skip(inp);
- }
-
- if (input_peek(inp, 0) != ')')
- return PERR_EXPECTED_CLOSE_BRACKET;
-
- input_next(inp, 1);
-
- debug("]\n");
- *ret = lvec;
- return PERR_OK;
-}
-
-perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_str[%lu] => ", inp->offset);
- input_next(inp, 1); // 1 for the first speechmark
- sv_t sv = sv_cut(inp->str, inp->offset);
- i64 size = sv_find_subcstr(sv, "\"", 1);
- if (size < 0)
- return PERR_EXPECTED_SPEECH_MARK;
-
- input_next(inp, size + 1); // 1 for that last speechmark
- sv_t str_content = sv_chop(sv, size);
- debug("\"" PR_SV "\"\n", SV_FMT(str_content));
- *ret = make_str(ctx, str_content.data, str_content.size);
- return PERR_OK;
-}
-
-perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- char c = input_peek(inp, 0);
- if (!(c == '\'' || c == '`'))
- return PERR_UNEXPECTED_CHAR;
- input_next(inp, 1);
- sv_t prefix = {0};
- if (c == '\'')
- prefix = SV("quote", 5);
- else if (c == '`')
- prefix = SV("quasiquote", 10);
- lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL);
- lisp_t *rest = NIL;
- perr_t perr = parse(ctx, inp, &rest);
- if (perr)
- return perr;
- CDR(root) = make_cons(ctx, rest, NIL);
- *ret = root;
- return PERR_OK;
-}
-
-// TODO: Make this interactable with user once we have evaluation
-perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- char c = input_peek(inp, 1);
- if (c == '\\')
- {
- // character or weird base integer
- TODO("Not implemented reader macro for characters or weird bases");
- }
- else if (c == '(')
- {
- return parse_vec(ctx, inp, ret);
- }
- else if (c == 't' || c == 'f')
- return parse_bool(ctx, inp, ret);
- return PERR_UNEXPECTED_READER_MACRO_SYMBOL;
-}
-
-static_assert(NUM_TAGS == 9);
-perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse => ");
- input_skip(inp);
- if (input_eof(inp))
- return PERR_EOF;
-
- char c = input_peek(inp, 0);
-
- if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1))))
- return parse_int(ctx, inp, ret);
- else if (c == '#')
- return parse_reader_macro(ctx, inp, ret);
- else if (is_sym(c))
- return parse_sym(ctx, inp, ret);
- else if (c == '(')
- return parse_cons(ctx, inp, ret);
- else if (c == '\'' || c == '`')
- return parse_quote(ctx, inp, ret);
- else if (c == '\"')
- return parse_str(ctx, inp, ret);
- else
- return PERR_UNEXPECTED_CHAR;
-}
-
-perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec)
-{
- while (!input_eof(inp))
- {
- lisp_t *member = NIL;
- perr_t err = parse(ctx, inp, &member);
-
- if (err)
- return err;
- else
- vec_append(&ctx->scratch, vec, &member, sizeof(member));
-
- input_skip(inp);
- }
- return PERR_OK;
-}
-
-int print_perror(FILE *fp, input_t *inp, perr_t error)
-{
- pos_t pos = input_offset_to_pos(inp);
- fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col,
- perr_to_cstr(error));
- switch (error)
- {
- case PERR_UNEXPECTED_CHAR:
- fprintf(fp, "(`%c`)", input_peek(inp, 0));
- break;
- case PERR_OK:
- case PERR_EOF:
- case PERR_EXPECTED_BOOLEAN:
- case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
- case PERR_EXPECTED_CLOSE_BRACKET:
- case PERR_EXPECTED_SPEECH_MARK:
- default:
- break;
- }
- fprintf(stderr, "\n");
-
- return error;
-}
-
-pos_t input_offset_to_pos(input_t *inp)
-{
- pos_t pos = {.col = 1, .line = 1};
- for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i)
- {
- char c = (inp->str.data[i]);
- if (c == '\n')
- {
- ++pos.line;
- pos.col = 1;
- }
- else
- {
- ++pos.col;
- }
- }
- return pos;
-}