aboutsummaryrefslogtreecommitdiff
path: root/lisp
diff options
context:
space:
mode:
Diffstat (limited to 'lisp')
-rw-r--r--lisp/context.c92
-rw-r--r--lisp/context.h38
-rw-r--r--lisp/eval.c24
-rw-r--r--lisp/eval.h28
-rw-r--r--lisp/lisp.c337
-rw-r--r--lisp/lisp.h57
-rw-r--r--lisp/reader.c426
-rw-r--r--lisp/reader.h98
-rw-r--r--lisp/tag.c94
-rw-r--r--lisp/tag.h92
10 files changed, 1286 insertions, 0 deletions
diff --git a/lisp/context.c b/lisp/context.c
new file mode 100644
index 0000000..3b94e54
--- /dev/null
+++ b/lisp/context.c
@@ -0,0 +1,92 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ Public License Version 2
+ * for details.
+
+ * You may distribute and modify this code under the terms of the
+ * GNU General Public License Version 2, which you should have received a copy
+ of along with this
+ * program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-05-12
+ * Description:
+ */
+
+#include <lisp/context.h>
+
+#include <string.h>
+
+// Allocates against stable memory i.e. we can have pointers of this lying
+// around without any fear of them being thrown away.
+void *context_alloc(context_t *context, u64 size)
+{
+ return arena_alloc(&context->memory, size);
+}
+
+// Allocate against a "scratch space", separate from main memory, for internal
+// use.
+void *context_salloc(context_t *context, u64 size)
+{
+ return arena_alloc(&context->scratch, size);
+}
+
+void context_reset_read(context_t *context)
+{
+ arena_reset(&context->read);
+}
+
+void context_reset_scratch(context_t *context)
+{
+ arena_reset(&context->scratch);
+}
+
+void context_reset(context_t *context)
+{
+ arena_reset(&context->memory);
+ arena_reset(&context->read);
+ arena_reset(&context->scratch);
+}
+
+void context_cleanup(context_t *context)
+{
+ if (!context)
+ return;
+ arena_cleanup(&context->memory);
+ arena_cleanup(&context->read);
+ arena_cleanup(&context->scratch);
+ memset(context, 0, sizeof(*context));
+}
+
+void context_report(context_t *context)
+{
+#if DEBUG
+ // Figure this out at runtime
+ u64 mem_used = 0, mem_cap = 0;
+ for (page_t *page = context->memory.start; page; page = page->next)
+ {
+ mem_used += page->size;
+ mem_cap += page->capacity;
+ }
+
+ u64 read_used = 0, read_cap = 0;
+ for (page_t *page = context->read.start; page; page = page->next)
+ {
+ read_used += page->size;
+ read_cap += page->capacity;
+ }
+
+ u64 scr_used = 0, scr_cap = 0;
+ for (page_t *page = context->scratch.start; page; page = page->next)
+ {
+ scr_used += page->size;
+ scr_cap += page->capacity;
+ }
+
+ info("<Context>: %luB/%luB main memory used\n", mem_used, mem_cap);
+ info("<Context>: %luB/%luB read space used\n", read_used, read_cap);
+ info("<Context>: %luB/%luB scratch space used\n", scr_used, scr_cap);
+#endif
+}
diff --git a/lisp/context.h b/lisp/context.h
new file mode 100644
index 0000000..2923ad9
--- /dev/null
+++ b/lisp/context.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ Public License Version 2
+ * for details.
+
+ * You may distribute and modify this code under the terms of the
+ * GNU General Public License Version 2, which you should have received a copy
+ of along with this
+ * program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-05-12
+ * Description:
+ */
+
+#ifndef CONTEXT_H
+#define CONTEXT_H
+
+#include <lib/arena.h>
+
+typedef struct Context
+{
+ arena_t memory, read, scratch;
+} context_t;
+
+void *context_alloc(context_t *context, u64 size);
+void *context_salloc(context_t *context, u64 size);
+page_t *context_get_read_page(context_t *ctx);
+void context_reset_read(context_t *context);
+void context_reset_scratch(context_t *context);
+
+void context_reset(context_t *context);
+void context_cleanup(context_t *context);
+void context_report(context_t *context);
+
+#endif
diff --git a/lisp/eval.c b/lisp/eval.c
new file mode 100644
index 0000000..3cd05ca
--- /dev/null
+++ b/lisp/eval.c
@@ -0,0 +1,24 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-18
+ * Description: Evaluator implementation
+ */
+
+#include <lisp/eval.h>
+
+err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret)
+{
+ (void)ctx;
+ (void)obj;
+ (void)ret;
+ TODO("implement evaluator");
+}
diff --git a/lisp/eval.h b/lisp/eval.h
new file mode 100644
index 0000000..bc7fb60
--- /dev/null
+++ b/lisp/eval.h
@@ -0,0 +1,28 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-18
+ * Description: Evaluator
+ */
+
+#ifndef EVAL_H
+#define EVAL_H
+
+#include <lisp/lisp.h>
+
+typedef enum
+{
+ ERR_OK = 0,
+} err_t;
+
+err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret);
+
+#endif
diff --git a/lisp/lisp.c b/lisp/lisp.c
new file mode 100644
index 0000000..24a4788
--- /dev/null
+++ b/lisp/lisp.c
@@ -0,0 +1,337 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description:
+ */
+
+#include <lib/sv.h>
+#include <lisp/lisp.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+
+lisp_t *make_int(i64 integer)
+{
+ return tag_int(integer);
+}
+
+lisp_t *make_sym(context_t *ctx, char *data, u64 size)
+{
+ // Small symbol optimisation
+ if (size <= 7)
+ return tag_ssym(data, size);
+
+ // Make a cell with the data we need
+ cell_t *cell = context_alloc(ctx, sizeof(*cell) + size);
+ cell->size = size;
+ memcpy(cell->data, data, size);
+
+ return tag_sym(cell);
+}
+
+lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr)
+{
+ cons_t *cons = context_alloc(ctx, sizeof(*cons));
+ memset(cons, 0, sizeof(*cons));
+ cons->car = car;
+ cons->cdr = cdr;
+ return tag_cons(cons);
+}
+
+lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size)
+{
+ if (!lisps)
+ return NIL;
+ lisp_t *cur = NIL;
+ for (u64 i = size; i > 0; --i)
+ cur = make_cons(ctx, lisps[i - 1], cur);
+ return cur;
+}
+
+lisp_t *make_vec(context_t *ctx, u32 size)
+{
+ // Make a vector with the content
+ lisp_t *container = NIL;
+ vec_t *vec = context_alloc(ctx, sizeof(*vec));
+ vec->size = 0;
+ vec->cap = size * sizeof(&container);
+ if (size == 0)
+ vec->data = NULL;
+ else
+ vec->data = context_alloc(ctx, vec->cap);
+ container = tag_vec(vec);
+
+ return container;
+}
+
+lisp_t *make_str(context_t *ctx, char *data, u64 size)
+{
+ if (size == 0)
+ // No need to allocate unless necessary
+ return tag_str(NIL);
+ // Make a vector with the content
+ vec_t *vec = context_alloc(ctx, sizeof(*vec));
+ vec->data = context_alloc(ctx, sizeof(*vec->data) * size);
+ vec->cap = size;
+ vec->size = size;
+ memcpy(vec->data, data, size);
+
+ return tag_str(vec);
+}
+
+i64 as_int(lisp_t *obj)
+{
+ assert(IS_TAG(obj, INT));
+ u64 p_obj = (u64)obj;
+ return UNTAG(p_obj, INT) | // Delete the tag
+ (NTH_BYTE(p_obj, 7) & 0x80) << 56 // duplicate the MSB (preserve sign)
+ ;
+}
+
+cons_t *as_cons(lisp_t *obj)
+{
+ assert(IS_TAG(obj, CONS));
+ return (cons_t *)UNTAG(obj, CONS);
+}
+
+u32 as_char(lisp_t *obj)
+{
+ return (u32)UNTAG(obj, CHAR);
+}
+
+cell_t *as_sym(lisp_t *obj)
+{
+ assert(IS_TAG(obj, SYM));
+ return (cell_t *)UNTAG(obj, SYM);
+}
+
+void as_ssym(lisp_t *obj, sv_t *container)
+{
+ assert(IS_TAG(obj, SSYM));
+ u64 p_obj = (u64)obj;
+ container->size = NTH_BYTE(p_obj, 0) >> SHIFT_SSYM;
+ p_obj >>= 8;
+ memcpy(container->data, &p_obj, container->size);
+}
+
+bool as_bool(lisp_t *obj)
+{
+ assert(IS_TAG(obj, BOOL));
+ return (bool)UNTAG(obj, BOOL);
+}
+
+f64 as_float(lisp_t *obj)
+{
+ assert(IS_TAG(obj, FLOAT));
+ f64 *f = (f64 *)UNTAG(obj, FLOAT);
+ return f ? *f : 0;
+}
+
+vec_t *as_vec(lisp_t *obj)
+{
+ assert(IS_TAG(obj, VEC));
+ return (vec_t *)UNTAG(obj, VEC);
+}
+
+vec_t *as_str(lisp_t *obj)
+{
+ assert(IS_TAG(obj, STR));
+ return (vec_t *)UNTAG(obj, STR);
+}
+
+sv_t serialise(context_t *ctx, lisp_t *ptr)
+{
+ enum Tag t = tag_get(ptr);
+ switch (t)
+ {
+ case TAG_NIL:
+ {
+ return SV("NIL", 3);
+ }
+ case TAG_INT:
+ {
+ i64 i = as_int(ptr);
+ sv_t s = sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+ "int["
+#endif
+ "%ld"
+#if DEBUG > 1
+ "]"
+#endif
+ ,
+ i);
+ return s;
+ }
+ case TAG_CHAR:
+ {
+ u32 codepoint = as_char(ptr);
+ return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+ "char["
+#endif
+ "%lc"
+#if DEBUG > 1
+ "]"
+#endif
+ ,
+ codepoint);
+ }
+ case TAG_SYM:
+ {
+ cell_t *cell = as_sym(ptr);
+ sv_t s = sv_make(&ctx->scratch, (char *)cell->data, cell->size);
+#if DEBUG > 1
+ s = sv_fmt(&ctx->scratch, "sym[" PR_SV "]", SV_FMT(s));
+#endif
+ return s;
+ }
+ case TAG_SSYM:
+ {
+ char data[7];
+ sv_t rsym = SV(data, 0);
+ as_ssym(ptr, &rsym);
+ sv_t s = sv_copy(&ctx->scratch, rsym);
+
+#if DEBUG > 1
+ s = sv_fmt(&ctx->scratch, "ssym[" PR_SV "]", SV_FMT(s));
+#endif
+ return s;
+ }
+ case TAG_BOOL:
+ {
+ return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+ "bool["
+#endif
+ "%s"
+#if DEBUG > 1
+ "]"
+#endif
+ ,
+ as_bool(ptr) ? "#t" : "#f");
+ }
+ case TAG_FLOAT:
+ {
+ return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+ "float["
+#endif
+ "%lf"
+#if DEBUG > 1
+ "]"
+#endif
+ ,
+ as_float(ptr));
+ }
+ case TAG_CONS:
+ {
+ if (!CAR(ptr) && !CDR(ptr))
+#if DEBUG > 1
+ return SV("lst[NIL]", 8);
+#else
+ return SV("NIL", 3);
+#endif
+
+ sv_t s = {0};
+
+ for (lisp_t *lsp = ptr; lsp; lsp = IS_TAG(lsp, CONS) ? CDR(lsp) : NIL)
+ {
+ lisp_t *lmember = lsp;
+ if (IS_TAG(lsp, CONS))
+ lmember = CAR(lmember);
+
+ sv_t member = serialise(ctx, lmember);
+ s = sv_concat(&ctx->scratch, s, member);
+ if (IS_TAG(lsp, CONS) && !CDR(lsp))
+ continue;
+ else if (IS_TAG(CDR(lsp), CONS))
+ // normal list
+ s = sv_append(&ctx->scratch, s, " ", 1);
+ else
+ // dotted list
+ s = sv_append(&ctx->scratch, s, " . ", 3);
+ }
+
+#if DEBUG > 1
+ s = sv_fmt(&ctx->scratch, "lst[" PR_SV "]", SV_FMT(s));
+#else
+ s = sv_fmt(&ctx->scratch, "(" PR_SV ")", SV_FMT(s));
+#endif
+ return s;
+ }
+ case TAG_VEC:
+ {
+ vec_t *vec = as_vec(ptr);
+ if (!vec)
+#if DEBUG > 1
+ return SV("vec[NIL]", 8);
+#else
+ return SV("#()", 3);
+#endif
+ else if (vec->size < sizeof(&ptr))
+#if DEBUG > 1
+ return SV("vec[0/0 #()]", 13);
+#else
+ return SV("#()", 3);
+#endif
+
+ sv_t s = {0};
+ for (u64 i = 0; i < vec->size / sizeof(&ptr); ++i)
+ {
+ lisp_t *lmember = ((lisp_t **)vec->data)[i];
+ sv_t member = serialise(ctx, lmember);
+ s = sv_concat(&ctx->scratch, s, member);
+ if (i == ((vec->size / sizeof(&ptr)) - 1))
+ continue;
+ s = sv_append(&ctx->scratch, s, " ", 1);
+ }
+
+#if DEBUG > 1
+ s = sv_fmt(&ctx->scratch, "vec[%lu/%lu #(" PR_SV ")]", vec->size, vec->cap,
+ SV_FMT(s));
+#else
+ s = sv_fmt(&ctx->scratch, "#(" PR_SV ")", SV_FMT(s));
+#endif
+ return s;
+ break;
+ }
+ case TAG_STR:
+ {
+ vec_t *vec = as_str(ptr);
+ sv_t sv = {0};
+ if (vec)
+ sv = SV((char *)vec->data, vec->size);
+ else
+ sv = SV("", 0);
+ return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+ "str["
+#else
+ "\""
+#endif
+ PR_SV
+#if DEBUG > 1
+ "]"
+#else
+ "\""
+#endif
+ ,
+ SV_FMT(sv));
+ }
+ case NUM_TAGS:
+ default:
+ assert(false && "serialise: unreachable");
+ return SV(0, 0);
+ }
+}
diff --git a/lisp/lisp.h b/lisp/lisp.h
new file mode 100644
index 0000000..2892bdb
--- /dev/null
+++ b/lisp/lisp.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description: Object model where we deal with contexts
+ */
+
+#ifndef LISP_H
+#define LISP_H
+
+#include <lib/sv.h>
+#include <lib/vec.h>
+#include <lisp/context.h>
+#include <lisp/tag.h>
+
+typedef struct Cons
+{
+ lisp_t *car, *cdr;
+} cons_t;
+
+typedef struct Cell
+{
+ u64 size;
+ u8 data[];
+} cell_t;
+
+lisp_t *make_int(i64 integer);
+lisp_t *make_sym(context_t *ctx, char *data, u64 size);
+lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr);
+lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size);
+lisp_t *make_vec(context_t *ctx, u32 size);
+lisp_t *make_str(context_t *ctx, char *data, u64 size);
+
+i64 as_int(lisp_t *obj);
+u32 as_char(lisp_t *obj);
+cell_t *as_sym(lisp_t *obj);
+void as_ssym(lisp_t *obj, sv_t *sv);
+bool as_bool(lisp_t *obj);
+f64 as_float(lisp_t *obj);
+cons_t *as_cons(lisp_t *obj);
+vec_t *as_vec(lisp_t *obj);
+vec_t *as_str(lisp_t *obj);
+
+sv_t serialise(context_t *ctx, lisp_t *lisp);
+
+#define CAR(PTR) (as_cons(PTR)->car)
+#define CDR(PTR) (as_cons(PTR)->cdr)
+
+#endif
diff --git a/lisp/reader.c b/lisp/reader.c
new file mode 100644
index 0000000..0c8a914
--- /dev/null
+++ b/lisp/reader.c
@@ -0,0 +1,426 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-16
+ * Description: Implementation of parser
+ */
+
+#include <lisp/reader.h>
+
+#include <ctype.h>
+#include <string.h>
+
+bool is_digit(char c)
+{
+ return isdigit(c);
+}
+
+bool is_alpha(char c)
+{
+ return isalpha(c);
+}
+
+bool is_space(char c)
+{
+ return isspace(c);
+}
+
+bool is_skip(char c)
+{
+ return is_space(c) || c == ';';
+}
+
+bool is_sym(char c)
+{
+ return strchr(SYM_CHARS, c) != NULL;
+}
+
+void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv)
+{
+ inp->name = name;
+ inp->str = sv_copy(&ctx->read, sv);
+}
+
+void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp)
+{
+ input->name = name;
+ // TODO: Choose a best fit (i.e. maximal capacity, unused) page
+ page_t *page = page_create(INPUT_CHUNK_SIZE);
+ // chunk should be in scratch space so we can reset it later.
+ char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE);
+
+ u64 total_size = 0, size_read = 0;
+ while (!feof(fp))
+ {
+ size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp);
+ if (size_read > 0)
+ {
+ page_rappend(&page, chunk, size_read);
+ total_size += size_read;
+ }
+ }
+
+ input->str = SV((char *)page->data, total_size);
+
+ // Memory cleanup
+ context_reset_scratch(ctx);
+ arena_attach(&ctx->read, page);
+}
+
+bool input_eof(input_t *input)
+{
+ return !input || (input->offset >= input->str.size) ||
+ (input->str.data[input->offset] == '\0');
+}
+
+char input_peek(input_t *input, u64 offset)
+{
+ if (input_eof(input) || input->offset + offset >= input->str.size)
+ return '\0';
+ return input->str.data[input->offset + offset];
+}
+
+char input_next(input_t *input, u64 offset)
+{
+ if (input_eof(input) || input->offset + offset >= input->str.size)
+ return '\0';
+ input->offset += offset;
+ return input->str.data[input->offset];
+}
+
+void input_skip(input_t *inp)
+{
+ while (!input_eof(inp))
+ {
+ sv_t current = sv_cut(inp->str, inp->offset);
+ // Skip any whitespace
+ inp->offset += sv_while(current, is_space);
+ current = sv_cut(inp->str, inp->offset);
+ // Is there a comment to skip?
+ if (current.size && current.data[0] == ';')
+ {
+ // Skip till newline
+ i64 newline = sv_find_subcstr(current, "\n", 1);
+ if (newline < 0)
+ inp->offset = inp->str.size;
+ else
+ inp->offset += newline + 1;
+ // Then skip any whitespace
+ }
+ // Multiline comment to skip?
+ else if (current.size > 2 && strncmp(current.data, "#|", 2) == 0)
+ {
+ i64 offset = sv_find_subcstr(current, "|#", 2);
+ if (offset < 0)
+ inp->offset = inp->str.size;
+ else
+ inp->offset += offset + 2;
+ // Then skip any whitespace
+ }
+ // Nothing to skip, stop.
+ else
+ break;
+ }
+ return;
+}
+
+perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ debug("parse_int[%lu] => ", inp->offset);
+
+ // TODO: Parse arbitrary sized integers
+ (void)ctx;
+
+ bool negative = (input_peek(inp, 0) == '-');
+ sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0));
+ sv_t digits = sv_chop(current, sv_while(current, is_digit));
+
+ debug("`" PR_SV "` => ", SV_FMT(digits));
+ i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10);
+ debug("%ld\n", x);
+
+ input_next(inp, digits.size + (negative ? 1 : 0));
+
+ *ret = make_int(x);
+ return PERR_OK;
+}
+
+perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ debug("parse_sym[%lu] => ", inp->offset);
+
+ sv_t current = sv_cut(inp->str, inp->offset);
+ sv_t sym = sv_chop(current, sv_while(current, is_sym));
+ debug("`" PR_SV "`\n", SV_FMT(sym));
+
+ if (sym.size == 3)
+ {
+ // NOTE: We can't mutate sym directly because it's on `read` space.
+
+ // TODO: Make this beautiful please.
+ char buf[3];
+ for (u64 i = 0; i < 3; ++i)
+ buf[i] = toupper(sym.data[i]);
+
+ // NOTE: NIL symbol to actual NIL
+ if (strncmp(buf, "NIL", 3) == 0)
+ {
+ input_next(inp, 3);
+ return NIL;
+ }
+ }
+
+ lisp_t *lsym = make_sym(ctx, sym.data, sym.size);
+ input_next(inp, sym.size);
+ *ret = lsym;
+
+ return PERR_OK;
+}
+
+perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ (void)ctx;
+ debug("parse_bool[%lu] => ", inp->offset);
+ char c = input_peek(inp, 1);
+ bool b = -1;
+ if (c == 't')
+ b = true;
+ else if (c == 'f')
+ b = false;
+ else
+ return PERR_EXPECTED_BOOLEAN;
+ *ret = tag_bool(b);
+ input_next(inp, 2);
+ return PERR_OK;
+}
+
+perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ // TODO: Put this in a symbol table
+ lisp_t *lisp_dot = make_sym(ctx, ".", 1);
+ debug("parse_cons[%lu] => (\n", inp->offset);
+ inp->offset += 1;
+
+ lisp_t *root = NIL;
+ lisp_t **cur = NIL;
+ bool dotted = false;
+
+ while (!input_eof(inp) && input_peek(inp, 0) != ')')
+ {
+ lisp_t *lisp = NIL;
+ perr_t res = parse(ctx, inp, &lisp);
+ if (res)
+ return res;
+
+ // This is cheap to do
+ if (lisp == lisp_dot)
+ {
+ dotted = true;
+ continue;
+ }
+
+ if (!root)
+ {
+ root = make_cons(ctx, lisp, NIL);
+ cur = &root;
+ }
+ else if (!dotted)
+ *cur = make_cons(ctx, lisp, NIL);
+ else
+ *cur = lisp;
+
+ if (cur && !dotted)
+ cur = &as_cons(*cur)->cdr;
+
+ input_skip(inp);
+ }
+
+ if (input_peek(inp, 0) != ')')
+ return PERR_EXPECTED_CLOSE_BRACKET;
+
+ input_next(inp, 1);
+
+ debug(")\n");
+ *ret = root;
+ return PERR_OK;
+}
+
+perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ debug("parse_vec[%lu] => [\n", inp->offset);
+ input_next(inp, 2);
+
+ lisp_t *lvec = make_vec(ctx, 0);
+ vec_t *vec = as_vec(lvec);
+
+ while (!input_eof(inp) && input_peek(inp, 0) != ')')
+ {
+ lisp_t *lisp = NIL;
+ perr_t res = parse(ctx, inp, &lisp);
+ if (res)
+ return res;
+
+ vec_append(&ctx->memory, vec, &lisp, sizeof(lisp));
+ input_skip(inp);
+ }
+
+ if (input_peek(inp, 0) != ')')
+ return PERR_EXPECTED_CLOSE_BRACKET;
+
+ input_next(inp, 1);
+
+ debug("]\n");
+ *ret = lvec;
+ return PERR_OK;
+}
+
+perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ debug("parse_str[%lu] => ", inp->offset);
+ input_next(inp, 1); // 1 for the first speechmark
+ sv_t sv = sv_cut(inp->str, inp->offset);
+ i64 size = sv_find_subcstr(sv, "\"", 1);
+ if (size < 0)
+ return PERR_EXPECTED_SPEECH_MARK;
+
+ input_next(inp, size + 1); // 1 for that last speechmark
+ sv_t str_content = sv_chop(sv, size);
+ debug("\"" PR_SV "\"\n", SV_FMT(str_content));
+ *ret = make_str(ctx, str_content.data, str_content.size);
+ return PERR_OK;
+}
+
+perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ char c = input_peek(inp, 0);
+ if (!(c == '\'' || c == '`'))
+ return PERR_UNEXPECTED_CHAR;
+ input_next(inp, 1);
+ sv_t prefix = {0};
+ if (c == '\'')
+ prefix = SV("quote", 5);
+ else if (c == '`')
+ prefix = SV("quasiquote", 10);
+ lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL);
+ lisp_t *rest = NIL;
+ perr_t perr = parse(ctx, inp, &rest);
+ if (perr)
+ return perr;
+ CDR(root) = make_cons(ctx, rest, NIL);
+ *ret = root;
+ return PERR_OK;
+}
+
+// TODO: Make this interactable with user once we have evaluation
+perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ char c = input_peek(inp, 1);
+ if (c == '\\')
+ {
+ // character or weird base integer
+ TODO("Not implemented reader macro for characters or weird bases");
+ }
+ else if (c == '(')
+ return parse_vec(ctx, inp, ret);
+ else if (c == 't' || c == 'f')
+ return parse_bool(ctx, inp, ret);
+ else if (c == 'e')
+ {
+ // Scientific notation for floats
+ }
+ return PERR_UNEXPECTED_READER_MACRO_SYMBOL;
+}
+
+static_assert(NUM_TAGS == 9);
+perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+ debug("parse => ");
+ input_skip(inp);
+ if (input_eof(inp))
+ return PERR_EOF;
+
+ char c = input_peek(inp, 0);
+
+ if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1))))
+ return parse_int(ctx, inp, ret);
+ else if (c == '#')
+ return parse_reader_macro(ctx, inp, ret);
+ else if (is_sym(c))
+ return parse_sym(ctx, inp, ret);
+ else if (c == '(')
+ return parse_cons(ctx, inp, ret);
+ else if (c == '\'' || c == '`')
+ return parse_quote(ctx, inp, ret);
+ else if (c == '\"')
+ return parse_str(ctx, inp, ret);
+ else
+ return PERR_UNEXPECTED_CHAR;
+}
+
+perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec)
+{
+ while (!input_eof(inp))
+ {
+ lisp_t *member = NIL;
+ perr_t err = parse(ctx, inp, &member);
+
+ if (err)
+ return err;
+ else
+ vec_append(&ctx->scratch, vec, &member, sizeof(member));
+
+ input_skip(inp);
+ }
+ return PERR_OK;
+}
+
+int print_perror(FILE *fp, input_t *inp, perr_t error)
+{
+ pos_t pos = input_offset_to_pos(inp);
+ fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col,
+ perr_to_cstr(error));
+ switch (error)
+ {
+ case PERR_UNEXPECTED_CHAR:
+ fprintf(fp, "(`%c`)", input_peek(inp, 0));
+ break;
+ case PERR_OK:
+ case PERR_EOF:
+ case PERR_EXPECTED_BOOLEAN:
+ case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
+ case PERR_EXPECTED_CLOSE_BRACKET:
+ case PERR_EXPECTED_SPEECH_MARK:
+ default:
+ break;
+ }
+ fprintf(stderr, "\n");
+
+ return error;
+}
+
+pos_t input_offset_to_pos(input_t *inp)
+{
+ pos_t pos = {.col = 1, .line = 1};
+ for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i)
+ {
+ char c = (inp->str.data[i]);
+ if (c == '\n')
+ {
+ ++pos.line;
+ pos.col = 1;
+ }
+ else
+ {
+ ++pos.col;
+ }
+ }
+ return pos;
+}
diff --git a/lisp/reader.h b/lisp/reader.h
new file mode 100644
index 0000000..4bd0578
--- /dev/null
+++ b/lisp/reader.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-16
+ * Description: Parsing Lisp!
+ */
+
+#ifndef READER_H
+#define READER_H
+
+#include <lib/vec.h>
+#include <lisp/context.h>
+#include <lisp/lisp.h>
+
+#define INPUT_CHUNK_SIZE 512
+static const char SYM_CHARS[] =
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ "¬!£$%^&*_-+={[]}:@~#<,>.?/";
+
+typedef struct
+{
+ const char *name;
+ u64 offset;
+ sv_t str;
+} input_t;
+
+void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv);
+void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp);
+bool input_eof(input_t *input);
+
+typedef struct
+{
+ u64 col, line;
+} pos_t;
+
+pos_t input_offset_to_pos(input_t *inp);
+
+typedef enum
+{
+ PERR_OK = 0,
+ PERR_EOF,
+ PERR_UNEXPECTED_CHAR,
+ PERR_EXPECTED_CLOSE_BRACKET,
+ PERR_EXPECTED_SPEECH_MARK,
+ PERR_UNEXPECTED_READER_MACRO_SYMBOL,
+ PERR_EXPECTED_BOOLEAN,
+} perr_t;
+
+static inline const char *perr_to_cstr(perr_t perr)
+{
+ switch (perr)
+ {
+ case PERR_OK:
+ return "OK";
+ break;
+ case PERR_EOF:
+ return "EOF";
+ break;
+ case PERR_UNEXPECTED_CHAR:
+ return "UNEXPECTED_CHAR";
+ break;
+ case PERR_EXPECTED_CLOSE_BRACKET:
+ return "EXPECTED_CLOSE_BRACKET";
+ break;
+ case PERR_EXPECTED_SPEECH_MARK:
+ return "EXPECTED_SPEECH_MARK";
+ break;
+ case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
+ return "UNEXPECTED_READER_MACRO_SYMBOL";
+ break;
+ case PERR_EXPECTED_BOOLEAN:
+ return "EXPECTED_BOOLEAN";
+ break;
+ }
+ assert(false && "perr_to_cstr: unreachable");
+ return "";
+}
+
+typedef struct
+{
+ lisp_t *result;
+ perr_t error;
+} pres_t;
+
+perr_t parse(context_t *ctx, input_t *str, lisp_t **ret);
+perr_t parse_all(context_t *ctx, input_t *str, vec_t *vec);
+
+int print_perror(FILE *fp, input_t *inp, perr_t error);
+
+#endif
diff --git a/lisp/tag.c b/lisp/tag.c
new file mode 100644
index 0000000..26db6d5
--- /dev/null
+++ b/lisp/tag.c
@@ -0,0 +1,94 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description:
+ */
+
+#include <lisp/tag.h>
+#include <string.h>
+
+lisp_t *tag_int(i64 i)
+{
+ return TAG((u64)i, INT);
+}
+
+lisp_t *tag_cons(void *ptr)
+{
+ return TAG((u64)ptr, CONS);
+}
+
+lisp_t *tag_sym(void *ptr)
+{
+ return TAG((u64)ptr, SYM);
+}
+
+lisp_t *tag_ssym(const char *data, size_t size)
+{
+ assert(size <= 7);
+ u8 buffer[sizeof(u64)];
+ memset(buffer, 0, sizeof(buffer));
+ // in 8 bits we have:
+ // size - 3 bits (up to 7)
+ // tag - 5 bits
+ buffer[0] = size;
+ buffer[0] <<= SHIFT_SSYM;
+ buffer[0] |= TAG_SSYM;
+ memcpy(buffer + 1, data, size);
+ u64 word = 0;
+ memcpy(&word, buffer, sizeof(u64));
+ return (lisp_t *)word;
+}
+
+lisp_t *tag_bool(bool b)
+{
+ return TAG((u64)b, BOOL);
+}
+
+lisp_t *tag_vec(void *ptr)
+{
+ return TAG((u64)ptr, VEC);
+}
+
+lisp_t *tag_str(void *ptr)
+{
+ return TAG((u64)ptr, STR);
+}
+
+lisp_t *tag_char(u32 codepoint)
+{
+ u64 w = codepoint;
+ return TAG(w, CHAR);
+}
+
+enum Tag tag_get(lisp_t *ptr)
+{
+ static_assert(NUM_TAGS == 9);
+ if (!ptr)
+ return TAG_NIL;
+ else if (IS_TAG(ptr, INT))
+ return TAG_INT;
+ else if (IS_TAG(ptr, CHAR))
+ return TAG_CHAR;
+ else if (IS_TAG(ptr, SYM))
+ return TAG_SYM;
+ else if (IS_TAG(ptr, SSYM))
+ return TAG_SSYM;
+ else if (IS_TAG(ptr, BOOL))
+ return TAG_BOOL;
+ else if (IS_TAG(ptr, VEC))
+ return TAG_VEC;
+ else if (IS_TAG(ptr, STR))
+ return TAG_STR;
+ else if (IS_TAG(ptr, CONS))
+ return TAG_CONS;
+ return 0;
+}
diff --git a/lisp/tag.h b/lisp/tag.h
new file mode 100644
index 0000000..06b7b3f
--- /dev/null
+++ b/lisp/tag.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program. If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description:
+ */
+
+#ifndef TAG_H
+#define TAG_H
+
+#include <base.h>
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+// Opaque structure to make tagged pointers a separate type from general
+// pointers
+typedef struct Obj lisp_t;
+
+#define NIL NULL
+
+enum Tag
+{
+ TAG_NIL = 0b00000000, // Atomic types
+ TAG_INT = 0b00000001, // special so we can encode 63 bit integers
+ TAG_SYM = 0b00000100,
+ TAG_SSYM = 0b00001000,
+ TAG_CHAR = 0b00001100,
+ TAG_BOOL = 0b00010000,
+ TAG_FLOAT = 0b00010100,
+ TAG_CONS = 0b00000010, // Container types
+ TAG_VEC = 0b00000110,
+ TAG_STR = 0b00001010,
+
+ NUM_TAGS = 9,
+};
+
+enum Shift
+{
+ SHIFT_INT = 1,
+ SHIFT_SSYM = 5,
+ SHIFT_CHAR = 8,
+ SHIFT_SYM = 8,
+ SHIFT_BOOL = 8,
+ SHIFT_FLOAT = 8,
+
+ SHIFT_CONS = 8,
+ SHIFT_VEC = 8,
+ SHIFT_STR = 8,
+};
+
+enum Mask
+{
+ MASK_INT = 0b00000001,
+ MASK_SSYM = 0b00011111,
+ MASK_SYM = 0b11111111,
+ MASK_CHAR = 0b11111111,
+ MASK_BOOL = 0b11111111,
+ MASK_FLOAT = 0b11111111,
+
+ MASK_CONS = 0b11111111,
+ MASK_VEC = 0b11111111,
+ MASK_STR = 0b11111111,
+};
+
+#define TAG(PTR, TYPE) ((lisp_t *)(((PTR) << SHIFT_##TYPE) | TAG_##TYPE))
+#define IS_TAG(PTR, TYPE) (((u64)(PTR) & MASK_##TYPE) == TAG_##TYPE)
+#define UNTAG(PTR, TYPE) (((u64)PTR) >> SHIFT_##TYPE)
+
+enum Tag tag_get(lisp_t *ptr);
+
+lisp_t *tag_int(i64 i);
+lisp_t *tag_char(u32 codepoint);
+lisp_t *tag_sym(void *ptr);
+lisp_t *tag_ssym(const char *data, size_t size);
+lisp_t *tag_bool(bool b);
+lisp_t *tag_vec(void *ptr);
+lisp_t *tag_str(void *ptr);
+lisp_t *tag_cons(void *ptr);
+
+#define INT_MAX ((1L << 62) - 1)
+#define INT_MIN (-(1L << 62))
+
+#endif