aboutsummaryrefslogtreecommitdiff
path: root/reader.c
diff options
context:
space:
mode:
Diffstat (limited to 'reader.c')
-rw-r--r--reader.c419
1 files changed, 0 insertions, 419 deletions
diff --git a/reader.c b/reader.c
deleted file mode 100644
index b9217d3..0000000
--- a/reader.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/* Copyright (C) 2025 Aryadev Chavali
-
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
- * details.
-
- * You may distribute and modify this code under the terms of the GNU General
- * Public License Version 2, which you should have received a copy of along with
- * this program. If not, please go to <https://www.gnu.org/licenses/>.
-
- * Created: 2025-04-16
- * Description: Implementation of parser
- */
-
-#include "./reader.h"
-
-#include <ctype.h>
-#include <string.h>
-
-bool is_digit(char c)
-{
- return isdigit(c);
-}
-
-bool is_alpha(char c)
-{
- return isalpha(c);
-}
-
-bool is_space(char c)
-{
- return isspace(c);
-}
-
-bool is_skip(char c)
-{
- return is_space(c) || c == ';';
-}
-
-bool is_sym(char c)
-{
- return strchr(SYM_CHARS, c) != NULL;
-}
-
-void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv)
-{
- inp->name = name;
- inp->str = sv_copy(&ctx->read, sv);
-}
-
-void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp)
-{
- input->name = name;
- // TODO: Choose a best fit (i.e. maximal capacity, unused) page
- page_t *page = page_create(INPUT_CHUNK_SIZE);
- // chunk should be in scratch space so we can reset it later.
- char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE);
-
- u64 total_size = 0, size_read = 0;
- while (!feof(fp))
- {
- size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp);
- if (size_read > 0)
- {
- page_rappend(&page, chunk, size_read);
- total_size += size_read;
- }
- }
-
- input->str = SV((char *)page->data, total_size);
-
- // Memory cleanup
- context_reset_scratch(ctx);
- arena_attach(&ctx->read, page);
-}
-
-bool input_eof(input_t *input)
-{
- return !input || (input->offset >= input->str.size) ||
- (input->str.data[input->offset] == '\0');
-}
-
-char input_peek(input_t *input, u64 offset)
-{
- if (input_eof(input) || input->offset + offset >= input->str.size)
- return '\0';
- return input->str.data[input->offset + offset];
-}
-
-char input_next(input_t *input, u64 offset)
-{
- if (input_eof(input) || input->offset + offset >= input->str.size)
- return '\0';
- input->offset += offset;
- return input->str.data[input->offset];
-}
-
-void input_skip(input_t *inp)
-{
- // n + 2 lookup
- sv_t current = sv_cut(inp->str, inp->offset);
- sv_t lookup = sv_chop(current, 2);
- while ((!input_eof(inp) && is_space(lookup.data[0])) ||
- lookup.data[0] == ';' || strncmp(lookup.data, "#|", 2) == 0)
- {
- if (lookup.data[0] == ';')
- {
- i64 newline = sv_find_subcstr(current, "\n", 1);
- if (newline < 0)
- inp->offset = inp->str.size;
- else
- inp->offset += newline + 1;
- }
- else if (strncmp(lookup.data, "#|", 2) == 0)
- {
- i64 offset = sv_find_subcstr(current, "|#", 2);
- if (offset < 0)
- inp->offset = inp->str.size;
- else
- inp->offset += offset + 2;
- }
-
- inp->offset += sv_while(sv_cut(inp->str, inp->offset), is_space);
- current = sv_cut(inp->str, inp->offset);
- lookup = sv_chop(current, 2);
- }
-}
-
-perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_int[%lu] => ", inp->offset);
-
- // TODO: Parse arbitrary sized integers
- (void)ctx;
-
- bool negative = (input_peek(inp, 0) == '-');
- sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0));
- sv_t digits = sv_chop(current, sv_while(current, is_digit));
-
- debug("`" PR_SV "` => ", SV_FMT(digits));
- i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10);
- debug("%ld\n", x);
-
- input_next(inp, digits.size + (negative ? 1 : 0));
-
- *ret = make_int(x);
- return PERR_OK;
-}
-
-perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_sym[%lu] => ", inp->offset);
-
- sv_t current = sv_cut(inp->str, inp->offset);
- sv_t sym = sv_chop(current, sv_while(current, is_sym));
- debug("`" PR_SV "`\n", SV_FMT(sym));
-
- if (sym.size == 3)
- {
- // NOTE: We can't mutate sym directly because it's on `read` space.
-
- // TODO: Make this beautiful please.
- char buf[3];
- for (u64 i = 0; i < 3; ++i)
- buf[i] = toupper(sym.data[i]);
-
- // NOTE: NIL symbol to actual NIL
- if (strncmp(buf, "NIL", 3) == 0)
- {
- input_next(inp, 3);
- return NIL;
- }
- }
-
- lisp_t *lsym = make_sym(ctx, sym.data, sym.size);
- input_next(inp, sym.size);
- *ret = lsym;
-
- return PERR_OK;
-}
-
-perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- (void)ctx;
- debug("parse_bool[%lu] => ", inp->offset);
- char c = input_peek(inp, 1);
- bool b = -1;
- if (c == 't')
- b = true;
- else if (c == 'f')
- b = false;
- else
- return PERR_EXPECTED_BOOLEAN;
- *ret = tag_bool(b);
- input_next(inp, 2);
- return PERR_OK;
-}
-
-perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- // TODO: Put this in a symbol table
- lisp_t *lisp_dot = make_sym(ctx, ".", 1);
- debug("parse_cons[%lu] => (\n", inp->offset);
- inp->offset += 1;
-
- lisp_t *root = NIL;
- lisp_t **cur = NIL;
- bool dotted = false;
-
- while (!input_eof(inp) && input_peek(inp, 0) != ')')
- {
- lisp_t *lisp = NIL;
- perr_t res = parse(ctx, inp, &lisp);
- if (res)
- return res;
-
- // This is cheap to do
- if (lisp == lisp_dot)
- {
- dotted = true;
- continue;
- }
-
- if (!root)
- {
- root = make_cons(ctx, lisp, NIL);
- cur = &root;
- }
- else if (!dotted)
- *cur = make_cons(ctx, lisp, NIL);
- else
- *cur = lisp;
-
- if (cur && !dotted)
- cur = &as_cons(*cur)->cdr;
-
- input_skip(inp);
- }
-
- if (input_peek(inp, 0) != ')')
- return PERR_EXPECTED_CLOSE_BRACKET;
-
- input_next(inp, 1);
-
- debug(")\n");
- *ret = root;
- return PERR_OK;
-}
-
-perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_vec[%lu] => [\n", inp->offset);
- input_next(inp, 2);
-
- lisp_t *lvec = make_vec(ctx, 0);
- vec_t *vec = as_vec(lvec);
-
- while (!input_eof(inp) && input_peek(inp, 0) != ')')
- {
- lisp_t *lisp = NIL;
- perr_t res = parse(ctx, inp, &lisp);
- if (res)
- return res;
-
- vec_append(&ctx->memory, vec, &lisp, sizeof(lisp));
- input_skip(inp);
- }
-
- if (input_peek(inp, 0) != ')')
- return PERR_EXPECTED_CLOSE_BRACKET;
-
- input_next(inp, 1);
-
- debug("]\n");
- *ret = lvec;
- return PERR_OK;
-}
-
-perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse_str[%lu] => ", inp->offset);
- input_next(inp, 1); // 1 for the first speechmark
- sv_t sv = sv_cut(inp->str, inp->offset);
- i64 size = sv_find_subcstr(sv, "\"", 1);
- if (size < 0)
- return PERR_EXPECTED_SPEECH_MARK;
-
- input_next(inp, size + 1); // 1 for that last speechmark
- sv_t str_content = sv_chop(sv, size);
- debug("\"" PR_SV "\"\n", SV_FMT(str_content));
- *ret = make_str(ctx, str_content.data, str_content.size);
- return PERR_OK;
-}
-
-perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- char c = input_peek(inp, 0);
- if (!(c == '\'' || c == '`'))
- return PERR_UNEXPECTED_CHAR;
- input_next(inp, 1);
- sv_t prefix = {0};
- if (c == '\'')
- prefix = SV("quote", 5);
- else if (c == '`')
- prefix = SV("quasiquote", 10);
- lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL);
- lisp_t *rest = NIL;
- perr_t perr = parse(ctx, inp, &rest);
- if (perr)
- return perr;
- CDR(root) = make_cons(ctx, rest, NIL);
- *ret = root;
- return PERR_OK;
-}
-
-// TODO: Make this interactable with user once we have evaluation
-perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- char c = input_peek(inp, 1);
- if (c == '\\')
- {
- // character or weird base integer
- TODO("Not implemented reader macro for characters or weird bases");
- }
- else if (c == '(')
- {
- return parse_vec(ctx, inp, ret);
- }
- else if (c == 't' || c == 'f')
- return parse_bool(ctx, inp, ret);
- return PERR_UNEXPECTED_READER_MACRO_SYMBOL;
-}
-
-static_assert(NUM_TAGS == 9);
-perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret)
-{
- debug("parse => ");
- input_skip(inp);
- if (input_eof(inp))
- return PERR_EOF;
-
- char c = input_peek(inp, 0);
-
- if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1))))
- return parse_int(ctx, inp, ret);
- else if (c == '#')
- return parse_reader_macro(ctx, inp, ret);
- else if (is_sym(c))
- return parse_sym(ctx, inp, ret);
- else if (c == '(')
- return parse_cons(ctx, inp, ret);
- else if (c == '\'' || c == '`')
- return parse_quote(ctx, inp, ret);
- else if (c == '\"')
- return parse_str(ctx, inp, ret);
- else
- return PERR_UNEXPECTED_CHAR;
-}
-
-perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec)
-{
- while (!input_eof(inp))
- {
- lisp_t *member = NIL;
- perr_t err = parse(ctx, inp, &member);
-
- if (err)
- return err;
- else
- vec_append(&ctx->scratch, vec, &member, sizeof(member));
-
- input_skip(inp);
- }
- return PERR_OK;
-}
-
-int print_perror(FILE *fp, input_t *inp, perr_t error)
-{
- pos_t pos = input_offset_to_pos(inp);
- fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col,
- perr_to_cstr(error));
- switch (error)
- {
- case PERR_UNEXPECTED_CHAR:
- fprintf(fp, "(`%c`)", input_peek(inp, 0));
- break;
- case PERR_OK:
- case PERR_EOF:
- case PERR_EXPECTED_BOOLEAN:
- case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
- case PERR_EXPECTED_CLOSE_BRACKET:
- case PERR_EXPECTED_SPEECH_MARK:
- default:
- break;
- }
- fprintf(stderr, "\n");
-
- return error;
-}
-
-pos_t input_offset_to_pos(input_t *inp)
-{
- pos_t pos = {.col = 1, .line = 1};
- for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i)
- {
- char c = (inp->str.data[i]);
- if (c == '\n')
- {
- ++pos.line;
- pos.col = 1;
- }
- else
- {
- ++pos.col;
- }
- }
- return pos;
-}