Initial implementation

2025-05-09 18:29:52 +01:00
parent 576bf0f308
commit ba5c0a4579
18 changed files with 2217 additions and 0 deletions
--- a/51
+++ b/51
@@ -0,0 +1,51 @@
+CC=gcc
+OUT=oats.out
+LIBS=-I.
+ARGS=
+
+RELEASE=0
+GFLAGS=-Wall -Wextra -Wswitch-enum -Werror -std=c11
+DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined
+RFLAGS=-O3
+DEPFLAGS=-MT $@ -MMD -MP -MF
+
+ifeq ($(RELEASE), 1)
+CFLAGS=$(GFLAGS) $(RFLAGS) -DDEBUG=0
+else
+CFLAGS=$(GFLAGS) $(DFLAGS)
+endif
+
+DIST=build
+SRC:=tag.c memory.c vec.c sv.c lisp.c reader.c eval.c main.c
+OBJ:=$(SRC:%.c=$(DIST)/%.o)
+DEPDIR=$(DIST)/dependencies
+DEPS:=$(SRC:%.c=$(DEPDIR)/%.d)
+
+.PHONY: all
+all: $(DIST)/$(OUT)
+
+$(DIST)/%.o: %.c | $(DIST) $(DEPDIR)
+	$(CC) $(CFLAGS) $(DEPFLAGS) $(DEPDIR)/$*.d -c $< -o $@ $(LIBS)
+
+$(DIST)/$(OUT): $(OBJ) | $(DIST)
+	$(CC) $(CFLAGS) $^ -o $@ $(LIBS)
+
+.PHONY: run
+run: $(DIST)/$(OUT)
+	./$^ $(ARGS)
+
+.PHONY:
+clean:
+	rm -rfv $(DIST)/*
+
+$(DIST):
+	mkdir -p $(DIST)
+
+$(DEPDIR):
+	mkdir -p $(DEPDIR)
+
+.PHONY:
+watch:
+	find . -type 'f' -regex ".*.c\\|.*.h" | entr -cs "make run"
+
+-include $(DEPS)
--- a/base.h
+++ b/base.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-03-31
+ * Description: Base
+ */
+
+#ifndef BASE_H
+#define BASE_H
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+
+typedef float f32;
+typedef double f64;
+static_assert(sizeof(f32) == sizeof(u32));
+static_assert(sizeof(f64) == sizeof(u64));
+
+#define MAX(A, B)      ((A) > (B) ? (A) : (B))
+#define MIN(A, B)      ((A) < (B) ? (A) : (B))
+#define NTH_BYTE(X, N) (((X) >> (8 * N)) & ((1 << 8) - 1))
+#define ARR_SIZE(XS)   (sizeof(XS) / sizeof((XS)[0]))
+
+#define TODO(MSG) (assert(false && MSG));
+
+#ifndef DEBUG
+#define DEBUG 2
+#endif
+
+static inline void debug(char *fmt, ...)
+{
+#if DEBUG > 1
+  va_list ap;
+  va_start(ap, fmt);
+  vprintf(fmt, ap);
+  va_end(ap);
+#endif
+}
+
+static inline void info(char *fmt, ...)
+{
+  va_list ap;
+  va_start(ap, fmt);
+  vprintf(fmt, ap);
+  va_end(ap);
+}
+
+static inline void print_bits(u64 w)
+{
+  for (u64 i = 8 * sizeof(w); i > 0; --i)
+  {
+    printf("%c", ((w >> (i - 1)) & 1) == 1 ? '1' : '0');
+  }
+  printf("\n");
+}
+
+#endif
--- a/eval.c
+++ b/eval.c
@@ -0,0 +1,24 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-18
+ * Description: Evaluator implementation
+ */
+
+#include "./eval.h"
+
+err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret)
+{
+  (void)ctx;
+  (void)obj;
+  (void)ret;
+  TODO("implement evaluator");
+}
--- a/eval.h
+++ b/eval.h
@@ -0,0 +1,28 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-18
+ * Description: Evaluator
+ */
+
+#ifndef EVAL_H
+#define EVAL_H
+
+#include "./lisp.h"
+
+typedef enum
+{
+  ERR_OK = 0,
+} err_t;
+
+err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret);
+
+#endif
--- a/lisp.c
+++ b/lisp.c
@@ -0,0 +1,317 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description:
+ */
+
+#include "./lisp.h"
+#include "./sv.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+
+lisp_t *make_int(i64 integer)
+{
+  return tag_int(integer);
+}
+
+lisp_t *make_sym(context_t *ctx, char *data, u64 size)
+{
+  // Small symbol optimisation
+  if (size <= 7)
+    return tag_ssym(data, size);
+
+  // Make a cell with the data we need
+  cell_t *cell = context_alloc(ctx, sizeof(*cell) + size);
+  cell->size   = size;
+  memcpy(cell->data, data, size);
+
+  return tag_sym(cell);
+}
+
+lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr)
+{
+  cons_t *cons = context_alloc(ctx, sizeof(*cons));
+  memset(cons, 0, sizeof(*cons));
+  cons->car = car;
+  cons->cdr = cdr;
+  return tag_cons(cons);
+}
+
+lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size)
+{
+  if (!lisps)
+    return NIL;
+  lisp_t *cur = NIL;
+  for (u64 i = size; i > 0; --i)
+    cur = make_cons(ctx, lisps[i - 1], cur);
+  return cur;
+}
+
+lisp_t *make_vec(context_t *ctx, u32 size)
+{
+  // Make a vector with the content
+  lisp_t *container = NIL;
+  vec_t *vec        = context_alloc(ctx, sizeof(*vec));
+  vec->size         = 0;
+  vec->cap          = size * sizeof(&container);
+  if (size == 0)
+    vec->data = NULL;
+  else
+    vec->data = context_alloc(ctx, vec->cap);
+  container = tag_vec(vec);
+
+  return container;
+}
+
+lisp_t *make_str(context_t *ctx, char *data, u64 size)
+{
+  if (size == 0)
+    // No need to allocate unless necessary
+    return tag_str(NIL);
+  // Make a vector with the content
+  vec_t *vec = context_alloc(ctx, sizeof(*vec));
+  vec->data  = context_alloc(ctx, sizeof(*vec->data) * size);
+  vec->cap   = size;
+  vec->size  = size;
+  memcpy(vec->data, data, size);
+
+  return tag_str(vec);
+}
+
+i64 as_int(lisp_t *obj)
+{
+  assert(IS_TAG(obj, INT));
+  u64 p_obj = (u64)obj;
+  return UNTAG(p_obj, INT) |               // Delete the tag
+         (NTH_BYTE(p_obj, 7) & 0x80) << 56 // duplicate the MSB (preserve sign)
+      ;
+}
+
+cons_t *as_cons(lisp_t *obj)
+{
+  assert(IS_TAG(obj, CONS));
+  return (cons_t *)UNTAG(obj, CONS);
+}
+
+u32 as_char(lisp_t *obj)
+{
+  return (u32)UNTAG(obj, CHAR);
+}
+
+cell_t *as_sym(lisp_t *obj)
+{
+  assert(IS_TAG(obj, SYM));
+  return (cell_t *)UNTAG(obj, SYM);
+}
+
+void as_ssym(lisp_t *obj, sv_t *container)
+{
+  assert(IS_TAG(obj, SSYM));
+  u64 p_obj       = (u64)obj;
+  container->size = NTH_BYTE(p_obj, 0) >> SHIFT_SSYM;
+  p_obj >>= 8;
+  memcpy(container->data, &p_obj, container->size);
+}
+
+bool as_bool(lisp_t *obj)
+{
+  assert(IS_TAG(obj, BOOL));
+  return (bool)UNTAG(obj, BOOL);
+}
+
+vec_t *as_vec(lisp_t *obj)
+{
+  assert(IS_TAG(obj, VEC));
+  return (vec_t *)UNTAG(obj, VEC);
+}
+
+vec_t *as_str(lisp_t *obj)
+{
+  assert(IS_TAG(obj, STR));
+  return (vec_t *)UNTAG(obj, STR);
+}
+
+sv_t serialise(context_t *ctx, lisp_t *ptr)
+{
+  enum Tag t = tag_get(ptr);
+  switch (t)
+  {
+  case TAG_NIL:
+  {
+    return SV("NIL", 3);
+  }
+  case TAG_INT:
+  {
+    i64 i  = as_int(ptr);
+    sv_t s = sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+                    "int["
+#endif
+                    "%ld"
+#if DEBUG > 1
+                    "]"
+#endif
+                    ,
+                    i);
+    return s;
+  }
+  case TAG_CHAR:
+  {
+    u32 codepoint = as_char(ptr);
+    return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+                  "char["
+#endif
+                  "%lc"
+#if DEBUG > 1
+                  "]"
+#endif
+                  ,
+                  codepoint);
+  }
+  case TAG_SYM:
+  {
+    cell_t *cell = as_sym(ptr);
+    sv_t s       = sv_make(&ctx->scratch, (char *)cell->data, cell->size);
+#if DEBUG > 1
+    s = sv_fmt(&ctx->scratch, "sym[" PR_SV "]", SV_FMT(s));
+#endif
+    return s;
+  }
+  case TAG_SSYM:
+  {
+    char data[7];
+    sv_t rsym = SV(data, 0);
+    as_ssym(ptr, &rsym);
+    sv_t s = sv_copy(&ctx->scratch, rsym);
+
+#if DEBUG > 1
+    s = sv_fmt(&ctx->scratch, "ssym[" PR_SV "]", SV_FMT(s));
+#endif
+    return s;
+  }
+  case TAG_BOOL:
+  {
+    return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+                  "bool["
+#endif
+                  "%s"
+#if DEBUG > 1
+                  "]"
+#endif
+                  ,
+                  as_bool(ptr) ? "#t" : "#f");
+  }
+  case TAG_CONS:
+  {
+    if (!CAR(ptr) && !CDR(ptr))
+#if DEBUG > 1
+      return SV("lst[NIL]", 8);
+#else
+      return SV("NIL", 3);
+#endif
+
+    sv_t s = {0};
+
+    for (lisp_t *lsp = ptr; lsp; lsp = IS_TAG(lsp, CONS) ? CDR(lsp) : NIL)
+    {
+      lisp_t *lmember = lsp;
+      if (IS_TAG(lsp, CONS))
+        lmember = CAR(lmember);
+
+      sv_t member = serialise(ctx, lmember);
+      s           = sv_concat(&ctx->scratch, s, member);
+      if (IS_TAG(lsp, CONS) && !CDR(lsp))
+        continue;
+      else if (IS_TAG(CDR(lsp), CONS))
+        // normal list
+        s = sv_append(&ctx->scratch, s, " ", 1);
+      else
+        // dotted list
+        s = sv_append(&ctx->scratch, s, " . ", 3);
+    }
+
+#if DEBUG > 1
+    s = sv_fmt(&ctx->scratch, "lst[" PR_SV "]", SV_FMT(s));
+#else
+    s = sv_fmt(&ctx->scratch, "(" PR_SV ")", SV_FMT(s));
+#endif
+    return s;
+  }
+  case TAG_VEC:
+  {
+    vec_t *vec = as_vec(ptr);
+    if (!vec)
+#if DEBUG > 1
+      return SV("vec[NIL]", 8);
+#else
+      return SV("#()", 3);
+#endif
+    else if (vec->size < sizeof(&ptr))
+#if DEBUG > 1
+      return SV("vec[0/0 #()]", 13);
+#else
+      return SV("#()", 3);
+#endif
+
+    sv_t s = {0};
+    for (u64 i = 0; i < vec->size / sizeof(&ptr); ++i)
+    {
+      lisp_t *lmember = ((lisp_t **)vec->data)[i];
+      sv_t member     = serialise(ctx, lmember);
+      s               = sv_concat(&ctx->scratch, s, member);
+      if (i == ((vec->size / sizeof(&ptr)) - 1))
+        continue;
+      s = sv_append(&ctx->scratch, s, " ", 1);
+    }
+
+#if DEBUG > 1
+    s = sv_fmt(&ctx->scratch, "vec[%lu/%lu #(" PR_SV ")]", vec->size, vec->cap,
+               SV_FMT(s));
+#else
+    s = sv_fmt(&ctx->scratch, "#(" PR_SV ")", SV_FMT(s));
+#endif
+    return s;
+    break;
+  }
+  case TAG_STR:
+  {
+    vec_t *vec = as_str(ptr);
+    sv_t sv    = {0};
+    if (vec)
+      sv = SV((char *)vec->data, vec->size);
+    else
+      sv = SV("", 0);
+    return sv_fmt(&ctx->scratch,
+#if DEBUG > 1
+                  "str["
+#else
+                  "\""
+#endif
+                  PR_SV
+#if DEBUG > 1
+                  "]"
+#else
+                  "\""
+#endif
+                  ,
+                  SV_FMT(sv));
+  }
+  case NUM_TAGS:
+  default:
+    assert(false && "serialise: unreachable");
+    return SV(0, 0);
+  }
+}
--- a/lisp.h
+++ b/lisp.h
@@ -0,0 +1,59 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description: Object model where we deal with contexts
+ */
+
+#ifndef LISP_H
+#define LISP_H
+
+#include "./base.h"
+#include "./memory.h"
+#include "./sv.h"
+#include "./tag.h"
+#include "./vec.h"
+
+typedef struct Cons
+{
+  lisp_t *car, *cdr;
+} cons_t;
+
+typedef struct Cell
+{
+  u64 size;
+  u8 data[];
+} cell_t;
+
+#define NIL 0
+lisp_t *make_int(i64 integer);
+lisp_t *make_sym(context_t *ctx, char *data, u64 size);
+lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr);
+lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size);
+lisp_t *make_vec(context_t *ctx, u32 size);
+lisp_t *make_str(context_t *ctx, char *data, u64 size);
+
+i64 as_int(lisp_t *obj);
+u32 as_char(lisp_t *obj);
+cell_t *as_sym(lisp_t *obj);
+void as_ssym(lisp_t *obj, sv_t *sv);
+bool as_bool(lisp_t *obj);
+f64 as_float(lisp_t *obj);
+cons_t *as_cons(lisp_t *obj);
+vec_t *as_vec(lisp_t *obj);
+vec_t *as_str(lisp_t *obj);
+
+sv_t serialise(context_t *ctx, lisp_t *lisp);
+
+#define CAR(PTR) (as_cons(PTR)->car)
+#define CDR(PTR) (as_cons(PTR)->cdr)
+
+#endif
--- a/main.c
+++ b/main.c
@@ -0,0 +1,92 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-05
+ * Description: Entrypoint
+ */
+
+#include "./base.h"
+#include "./lisp.h"
+#include "./memory.h"
+#include "./reader.h"
+#include "./sv.h"
+#include "./vec.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <malloc.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char *argv[])
+{
+  int exit             = 0;
+  context_t ctx        = {0};
+  const char *filename = NULL;
+#if 1
+  if (argc > 1)
+    filename = argv[1];
+  else
+    filename = "./r7rs-tests.scm";
+
+  FILE *fp = fopen(filename, "r");
+  if (!fp)
+  {
+    fprintf(stderr, "[ERROR]: File `%s` does not exist\n", filename);
+    exit = 1;
+    goto end;
+  }
+  input_t inp = {0};
+  input_from_fp(&ctx, &inp, filename, fp);
+  fclose(fp);
+#else
+  filename        = "<example>";
+  char inp_data[] = "(print (+ 34 35))";
+  input_t inp     = {0};
+  sv_t inp_sv     = SV(inp_data, sizeof(inp_data));
+  input_from_sv(&ctx, &inp, filename, inp_sv);
+#endif
+
+  if (inp.str.size == 0)
+  {
+    info("[WARNING] `%s` is empty.\n", filename);
+    goto end;
+  }
+  debug("[file read]: %luB read from `%s`\n", inp.str.size, filename);
+
+  // Setup a vector to hold all the lisps
+  vec_t results = {0};
+  perr_t perr   = parse_all(&ctx, &inp, &results);
+  if (perr)
+  {
+    exit = perr;
+    print_perror(stderr, &inp, perr);
+    goto end;
+  }
+  context_reset_read(&ctx);
+
+  lisp_t **lisps = (lisp_t **)results.data;
+  u64 size       = results.size / sizeof(*lisps);
+  for (u64 i = 0; i < size; ++i)
+  {
+    lisp_t *lisp = lisps[i];
+    // printf("tag=%x\n", tag_get(lisp));
+    sv_t serialised = serialise(&ctx, lisp);
+    info("lisp[%lu]: %p => " PR_SV "\n", i, lisp, SV_FMT(serialised));
+  }
+  context_report(&ctx);
+
+end:
+
+  context_cleanup(&ctx);
+  return exit;
+}
--- a/memory.c
+++ b/memory.c
@@ -0,0 +1,275 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-05
+ * Description: Implementations for memory models.
+ */
+
+#include "./memory.h"
+
+#include <malloc.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+page_t *page_create(u64 size)
+{
+  size           = MAX(size, PAGE_DEFAULT_SIZE);
+  page_t *page   = calloc(1, sizeof(*page) + size);
+  page->next     = NULL;
+  page->size     = 0;
+  page->capacity = size;
+  return page;
+}
+
+void page_resize(page_t **page, u64 size)
+{
+  if (!page)
+    return;
+  else if (!(*page))
+    *page = page_create(size);
+  else if (page[0]->capacity < size)
+  {
+    page[0]->capacity = MAX(size, page[0]->capacity * 1.5);
+    page[0]           = realloc(page[0], sizeof(*page[0]) + page[0]->capacity);
+  }
+}
+
+i64 page_append(page_t *page, void *data, u64 size)
+{
+  if (!page || page->size + size >= page->capacity)
+    return -1;
+  if (data)
+    memcpy(page->data + page->size, data, size);
+  u64 ptr = page->size;
+  page->size += size;
+  return ptr;
+}
+
+u64 page_rappend(page_t **page, void *data, u64 size)
+{
+  page_resize(page, page[0]->size + size);
+  if (data)
+    memcpy(page[0]->data + page[0]->size, data, size);
+  u64 ptr = page[0]->size;
+  page[0]->size += size;
+  return ptr;
+}
+
+void *arena_alloc(arena_t *arena, u64 size)
+{
+  if (!arena)
+    return NULL;
+  else if (!arena->start)
+  {
+    arena->start = page_create(1);
+    arena->end   = arena->start;
+  }
+
+  page_t *best_fit;
+  for (best_fit = arena->start; best_fit; best_fit = best_fit->next)
+    if (best_fit->size + size + MEMORY_ALIGNMENT < best_fit->capacity)
+      break;
+
+  if (!best_fit)
+  {
+    best_fit         = page_create(size);
+    arena->end->next = best_fit;
+    arena->end       = best_fit;
+  }
+
+  // NOTE: Fsanitize has a hissy fit if we don't align memory "correctly"
+  u64 offset = 0;
+
+  if (size % MEMORY_ALIGNMENT == 0)
+  {
+    u64 div = (((u64)(best_fit->data + best_fit->size)) % MEMORY_ALIGNMENT);
+    if (div != 0)
+      offset = MEMORY_ALIGNMENT - div;
+  }
+
+  void *start = best_fit->data + best_fit->size + offset;
+
+  best_fit->size += size + offset;
+
+  return start;
+}
+
+void *arena_realloc(arena_t *arena, void *ptr, u64 oldsize, u64 newsize)
+{
+  if (!ptr)
+    return arena_alloc(arena, newsize);
+  else if (!arena || !arena->start)
+    return NULL;
+  else if (newsize <= oldsize)
+    // No need to change anything.
+    return ptr;
+
+  bool copy_into_new = true;
+  void *start        = NULL;
+  page_t *old_page = NULL, *best_fit = NULL;
+
+  for (page_t *page = arena->start; page; page = page->next)
+  {
+    if (!best_fit && page->size + newsize < page->capacity)
+      best_fit = page;
+    if (page->data <= (u8 *)ptr &&
+        (u8 *)(ptr) + oldsize <= page->data + page->capacity)
+      old_page = page;
+  }
+
+  // If the old page exists, ptr is the latest allocation in it, and it has
+  // enough space to contain the new size, just resize and return.
+  if (old_page && old_page->data + old_page->size - oldsize == ptr &&
+      old_page->size - oldsize + newsize < old_page->capacity)
+  {
+    start = ptr;
+    old_page->size += newsize - oldsize;
+    copy_into_new = false;
+  }
+  else
+  {
+    if (!old_page)
+      copy_into_new = false;
+    if (!best_fit)
+    {
+      best_fit         = page_create(newsize);
+      arena->end->next = best_fit;
+      arena->end       = best_fit;
+    }
+
+    start = best_fit->data + best_fit->size;
+    best_fit->size += newsize;
+  }
+
+  if (copy_into_new)
+  {
+    memcpy(start, ptr, oldsize);
+    memset(start + oldsize, 0, newsize - oldsize);
+  }
+
+  return start;
+}
+
+void arena_attach(arena_t *arena, page_t *page)
+{
+  if (!arena || !page)
+    return;
+  else if (!arena->start || !arena->end)
+  {
+    arena->start = page;
+    arena->end   = page;
+  }
+  else
+  {
+    page->next   = arena->start;
+    arena->start = page;
+  }
+}
+
+void arena_reset(arena_t *arena)
+{
+  if (!arena || !arena->start)
+    return;
+
+  for (page_t *cur = arena->start; cur; cur = cur->next)
+  {
+    if (cur->size == 0)
+      continue;
+    cur->size = 0;
+    memset(cur->data, 0, cur->capacity);
+  }
+}
+
+void arena_cleanup(arena_t *arena)
+{
+  if (!arena || !arena->start)
+    return;
+
+  for (page_t *cur = arena->start, *next = NULL; cur;
+       next = cur->next, free(cur), cur = next)
+    continue;
+
+  memset(arena, 0, sizeof(*arena));
+}
+
+// Allocates against stable memory i.e. we can have pointers of this lying
+// around without any fear of them being thrown away.
+void *context_alloc(context_t *context, u64 size)
+{
+  return arena_alloc(&context->memory, size);
+}
+
+// Allocate against a "scratch space", separate from main memory, for internal
+// use.
+void *context_salloc(context_t *context, u64 size)
+{
+  return arena_alloc(&context->scratch, size);
+}
+
+void context_reset_read(context_t *context)
+{
+  arena_reset(&context->read);
+}
+
+void context_reset_scratch(context_t *context)
+{
+  arena_reset(&context->scratch);
+}
+
+void context_reset(context_t *context)
+{
+  arena_reset(&context->memory);
+  arena_reset(&context->read);
+  arena_reset(&context->scratch);
+}
+
+void context_cleanup(context_t *context)
+{
+  if (!context)
+    return;
+  arena_cleanup(&context->memory);
+  arena_cleanup(&context->read);
+  arena_cleanup(&context->scratch);
+  memset(context, 0, sizeof(*context));
+}
+
+void context_report(context_t *context)
+{
+#if DEBUG
+  // Figure this out at runtime
+  u64 mem_used = 0, mem_cap = 0;
+  for (page_t *page = context->memory.start; page; page = page->next)
+  {
+    mem_used += page->size;
+    mem_cap += page->capacity;
+  }
+
+  u64 read_used = 0, read_cap = 0;
+  for (page_t *page = context->read.start; page; page = page->next)
+  {
+    read_used += page->size;
+    read_cap += page->capacity;
+  }
+
+  u64 scr_used = 0, scr_cap = 0;
+  for (page_t *page = context->scratch.start; page; page = page->next)
+  {
+    scr_used += page->size;
+    scr_cap += page->capacity;
+  }
+
+  info("<Context>: %luB/%luB main memory used\n", mem_used, mem_cap);
+  info("<Context>: %luB/%luB read space used\n", read_used, read_cap);
+  info("<Context>: %luB/%luB scratch space used\n", scr_used, scr_cap);
+#endif
+}
--- a/memory.h
+++ b/memory.h
@@ -0,0 +1,66 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-05
+ * Description: Memory management structures
+ */
+
+#ifndef MEMORY_H
+#define MEMORY_H
+
+#include "./base.h"
+#include <stdio.h>
+
+#define PAGE_DEFAULT_SIZE 1024
+#define MEMORY_ALIGNMENT  8
+
+typedef struct Page
+{
+  struct Page *next;
+  u64 size, capacity;
+  u8 data[];
+} page_t;
+
+page_t *page_create(u64 size);
+void page_resize(page_t **page, u64 newsize);
+// Append - fail (by returning <0) if not enough space.
+i64 page_append(page_t *page, void *data, u64 size);
+// Append - will resize if necessary
+u64 page_rappend(page_t **page, void *data, u64 size);
+
+typedef struct Aren
+{
+  page_t *start, *end;
+} arena_t;
+
+void *arena_alloc(arena_t *arena, u64 size);
+void *arena_realloc(arena_t *arena, void *ptr, u64 oldsize, u64 newsize);
+void *arena_copy(arena_t *arena, void *ptr, u64 size);
+void arena_attach(arena_t *arena, page_t *page);
+void arena_reset(arena_t *arena);
+void arena_cleanup(arena_t *arena);
+
+typedef struct Context
+{
+  arena_t memory, read, scratch;
+} context_t;
+
+void *context_alloc(context_t *context, u64 size);
+void *context_salloc(context_t *context, u64 size);
+page_t *context_get_read_page(context_t *ctx);
+void context_reset_read(context_t *context);
+void context_reset_scratch(context_t *context);
+
+void context_reset(context_t *context);
+void context_cleanup(context_t *context);
+void context_report(context_t *context);
+
+#endif
--- a/reader.c
+++ b/reader.c
@@ -0,0 +1,419 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-16
+ * Description: Implementation of parser
+ */
+
+#include "./reader.h"
+
+#include <ctype.h>
+#include <string.h>
+
+bool is_digit(char c)
+{
+  return isdigit(c);
+}
+
+bool is_alpha(char c)
+{
+  return isalpha(c);
+}
+
+bool is_space(char c)
+{
+  return isspace(c);
+}
+
+bool is_skip(char c)
+{
+  return is_space(c) || c == ';';
+}
+
+bool is_sym(char c)
+{
+  return strchr(SYM_CHARS, c) != NULL;
+}
+
+void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv)
+{
+  inp->name = name;
+  inp->str  = sv_copy(&ctx->read, sv);
+}
+
+void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp)
+{
+  input->name = name;
+  // TODO: Choose a best fit (i.e. maximal capacity, unused) page
+  page_t *page = page_create(INPUT_CHUNK_SIZE);
+  // chunk should be in scratch space so we can reset it later.
+  char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE);
+
+  u64 total_size = 0, size_read = 0;
+  while (!feof(fp))
+  {
+    size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp);
+    if (size_read > 0)
+    {
+      page_rappend(&page, chunk, size_read);
+      total_size += size_read;
+    }
+  }
+
+  input->str = SV((char *)page->data, total_size);
+
+  // Memory cleanup
+  context_reset_scratch(ctx);
+  arena_attach(&ctx->read, page);
+}
+
+bool input_eof(input_t *input)
+{
+  return !input || (input->offset >= input->str.size) ||
+         (input->str.data[input->offset] == '\0');
+}
+
+char input_peek(input_t *input, u64 offset)
+{
+  if (input_eof(input) || input->offset + offset >= input->str.size)
+    return '\0';
+  return input->str.data[input->offset + offset];
+}
+
+char input_next(input_t *input, u64 offset)
+{
+  if (input_eof(input) || input->offset + offset >= input->str.size)
+    return '\0';
+  input->offset += offset;
+  return input->str.data[input->offset];
+}
+
+void input_skip(input_t *inp)
+{
+  // n + 2 lookup
+  sv_t current = sv_cut(inp->str, inp->offset);
+  sv_t lookup  = sv_chop(current, 2);
+  while ((!input_eof(inp) && is_space(lookup.data[0])) ||
+         lookup.data[0] == ';' || strncmp(lookup.data, "#|", 2) == 0)
+  {
+    if (lookup.data[0] == ';')
+    {
+      i64 newline = sv_find_subcstr(current, "\n", 1);
+      if (newline < 0)
+        inp->offset = inp->str.size;
+      else
+        inp->offset += newline + 1;
+    }
+    else if (strncmp(lookup.data, "#|", 2) == 0)
+    {
+      i64 offset = sv_find_subcstr(current, "|#", 2);
+      if (offset < 0)
+        inp->offset = inp->str.size;
+      else
+        inp->offset += offset + 2;
+    }
+
+    inp->offset += sv_while(sv_cut(inp->str, inp->offset), is_space);
+    current = sv_cut(inp->str, inp->offset);
+    lookup  = sv_chop(current, 2);
+  }
+}
+
+perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  debug("parse_int[%lu] => ", inp->offset);
+
+  // TODO: Parse arbitrary sized integers
+  (void)ctx;
+
+  bool negative = (input_peek(inp, 0) == '-');
+  sv_t current  = sv_cut(inp->str, inp->offset + (negative ? 1 : 0));
+  sv_t digits   = sv_chop(current, sv_while(current, is_digit));
+
+  debug("`" PR_SV "` => ", SV_FMT(digits));
+  i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10);
+  debug("%ld\n", x);
+
+  input_next(inp, digits.size + (negative ? 1 : 0));
+
+  *ret = make_int(x);
+  return PERR_OK;
+}
+
+perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  debug("parse_sym[%lu] => ", inp->offset);
+
+  sv_t current = sv_cut(inp->str, inp->offset);
+  sv_t sym     = sv_chop(current, sv_while(current, is_sym));
+  debug("`" PR_SV "`\n", SV_FMT(sym));
+
+  if (sym.size == 3)
+  {
+    // NOTE: We can't mutate sym directly because it's on `read` space.
+
+    // TODO: Make this beautiful please.
+    char buf[3];
+    for (u64 i = 0; i < 3; ++i)
+      buf[i] = toupper(sym.data[i]);
+
+    // NOTE: NIL symbol to actual NIL
+    if (strncmp(buf, "NIL", 3) == 0)
+    {
+      input_next(inp, 3);
+      return NIL;
+    }
+  }
+
+  lisp_t *lsym = make_sym(ctx, sym.data, sym.size);
+  input_next(inp, sym.size);
+  *ret = lsym;
+
+  return PERR_OK;
+}
+
+perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  (void)ctx;
+  debug("parse_bool[%lu] => ", inp->offset);
+  char c = input_peek(inp, 1);
+  bool b = -1;
+  if (c == 't')
+    b = true;
+  else if (c == 'f')
+    b = false;
+  else
+    return PERR_EXPECTED_BOOLEAN;
+  *ret = tag_bool(b);
+  input_next(inp, 2);
+  return PERR_OK;
+}
+
+perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  // TODO: Put this in a symbol table
+  lisp_t *lisp_dot = make_sym(ctx, ".", 1);
+  debug("parse_cons[%lu] => (\n", inp->offset);
+  inp->offset += 1;
+
+  lisp_t *root = NIL;
+  lisp_t **cur = NIL;
+  bool dotted  = false;
+
+  while (!input_eof(inp) && input_peek(inp, 0) != ')')
+  {
+    lisp_t *lisp = NIL;
+    perr_t res   = parse(ctx, inp, &lisp);
+    if (res)
+      return res;
+
+    // This is cheap to do
+    if (lisp == lisp_dot)
+    {
+      dotted = true;
+      continue;
+    }
+
+    if (!root)
+    {
+      root = make_cons(ctx, lisp, NIL);
+      cur  = &root;
+    }
+    else if (!dotted)
+      *cur = make_cons(ctx, lisp, NIL);
+    else
+      *cur = lisp;
+
+    if (cur && !dotted)
+      cur = &as_cons(*cur)->cdr;
+
+    input_skip(inp);
+  }
+
+  if (input_peek(inp, 0) != ')')
+    return PERR_EXPECTED_CLOSE_BRACKET;
+
+  input_next(inp, 1);
+
+  debug(")\n");
+  *ret = root;
+  return PERR_OK;
+}
+
+perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  debug("parse_vec[%lu] => [\n", inp->offset);
+  input_next(inp, 2);
+
+  lisp_t *lvec = make_vec(ctx, 0);
+  vec_t *vec   = as_vec(lvec);
+
+  while (!input_eof(inp) && input_peek(inp, 0) != ')')
+  {
+    lisp_t *lisp = NIL;
+    perr_t res   = parse(ctx, inp, &lisp);
+    if (res)
+      return res;
+
+    vec_append(&ctx->memory, vec, &lisp, sizeof(lisp));
+    input_skip(inp);
+  }
+
+  if (input_peek(inp, 0) != ')')
+    return PERR_EXPECTED_CLOSE_BRACKET;
+
+  input_next(inp, 1);
+
+  debug("]\n");
+  *ret = lvec;
+  return PERR_OK;
+}
+
+perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  debug("parse_str[%lu] => ", inp->offset);
+  input_next(inp, 1); // 1 for the first speechmark
+  sv_t sv  = sv_cut(inp->str, inp->offset);
+  i64 size = sv_find_subcstr(sv, "\"", 1);
+  if (size < 0)
+    return PERR_EXPECTED_SPEECH_MARK;
+
+  input_next(inp, size + 1); // 1 for that last speechmark
+  sv_t str_content = sv_chop(sv, size);
+  debug("\"" PR_SV "\"\n", SV_FMT(str_content));
+  *ret = make_str(ctx, str_content.data, str_content.size);
+  return PERR_OK;
+}
+
+perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  char c = input_peek(inp, 0);
+  if (!(c == '\'' || c == '`'))
+    return PERR_UNEXPECTED_CHAR;
+  input_next(inp, 1);
+  sv_t prefix = {0};
+  if (c == '\'')
+    prefix = SV("quote", 5);
+  else if (c == '`')
+    prefix = SV("quasiquote", 10);
+  lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL);
+  lisp_t *rest = NIL;
+  perr_t perr  = parse(ctx, inp, &rest);
+  if (perr)
+    return perr;
+  CDR(root) = make_cons(ctx, rest, NIL);
+  *ret      = root;
+  return PERR_OK;
+}
+
+// TODO: Make this interactable with user once we have evaluation
+perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  char c = input_peek(inp, 1);
+  if (c == '\\')
+  {
+    // character or weird base integer
+    TODO("Not implemented reader macro for characters or weird bases");
+  }
+  else if (c == '(')
+  {
+    return parse_vec(ctx, inp, ret);
+  }
+  else if (c == 't' || c == 'f')
+    return parse_bool(ctx, inp, ret);
+  return PERR_UNEXPECTED_READER_MACRO_SYMBOL;
+}
+
+static_assert(NUM_TAGS == 9);
+perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret)
+{
+  debug("parse => ");
+  input_skip(inp);
+  if (input_eof(inp))
+    return PERR_EOF;
+
+  char c = input_peek(inp, 0);
+
+  if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1))))
+    return parse_int(ctx, inp, ret);
+  else if (c == '#')
+    return parse_reader_macro(ctx, inp, ret);
+  else if (is_sym(c))
+    return parse_sym(ctx, inp, ret);
+  else if (c == '(')
+    return parse_cons(ctx, inp, ret);
+  else if (c == '\'' || c == '`')
+    return parse_quote(ctx, inp, ret);
+  else if (c == '\"')
+    return parse_str(ctx, inp, ret);
+  else
+    return PERR_UNEXPECTED_CHAR;
+}
+
+perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec)
+{
+  while (!input_eof(inp))
+  {
+    lisp_t *member = NIL;
+    perr_t err     = parse(ctx, inp, &member);
+
+    if (err)
+      return err;
+    else
+      vec_append(&ctx->scratch, vec, &member, sizeof(member));
+
+    input_skip(inp);
+  }
+  return PERR_OK;
+}
+
+int print_perror(FILE *fp, input_t *inp, perr_t error)
+{
+  pos_t pos = input_offset_to_pos(inp);
+  fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col,
+          perr_to_cstr(error));
+  switch (error)
+  {
+  case PERR_UNEXPECTED_CHAR:
+    fprintf(fp, "(`%c`)", input_peek(inp, 0));
+    break;
+  case PERR_OK:
+  case PERR_EOF:
+  case PERR_EXPECTED_BOOLEAN:
+  case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
+  case PERR_EXPECTED_CLOSE_BRACKET:
+  case PERR_EXPECTED_SPEECH_MARK:
+  default:
+    break;
+  }
+  fprintf(stderr, "\n");
+
+  return error;
+}
+
+pos_t input_offset_to_pos(input_t *inp)
+{
+  pos_t pos = {.col = 1, .line = 1};
+  for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i)
+  {
+    char c = (inp->str.data[i]);
+    if (c == '\n')
+    {
+      ++pos.line;
+      pos.col = 1;
+    }
+    else
+    {
+      ++pos.col;
+    }
+  }
+  return pos;
+}
--- a/reader.h
+++ b/reader.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-16
+ * Description: Parsing Lisp!
+ */
+
+#ifndef READER_H
+#define READER_H
+
+#include "./lisp.h"
+#include "./memory.h"
+#include "./vec.h"
+
+#define INPUT_CHUNK_SIZE 512
+static const char SYM_CHARS[] =
+    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+    "¬!£$%^&*_-+={[]}:@~#<,>.?/";
+
+typedef struct
+{
+  const char *name;
+  u64 offset;
+  sv_t str;
+} input_t;
+
+void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv);
+void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp);
+bool input_eof(input_t *input);
+
+typedef struct
+{
+  u64 col, line;
+} pos_t;
+
+pos_t input_offset_to_pos(input_t *inp);
+
+typedef enum
+{
+  PERR_OK = 0,
+  PERR_EOF,
+  PERR_UNEXPECTED_CHAR,
+  PERR_EXPECTED_CLOSE_BRACKET,
+  PERR_EXPECTED_SPEECH_MARK,
+  PERR_UNEXPECTED_READER_MACRO_SYMBOL,
+  PERR_EXPECTED_BOOLEAN,
+} perr_t;
+
+static inline const char *perr_to_cstr(perr_t perr)
+{
+  switch (perr)
+  {
+  case PERR_OK:
+    return "OK";
+    break;
+  case PERR_EOF:
+    return "EOF";
+    break;
+  case PERR_UNEXPECTED_CHAR:
+    return "UNEXPECTED_CHAR";
+    break;
+  case PERR_EXPECTED_CLOSE_BRACKET:
+    return "EXPECTED_CLOSE_BRACKET";
+    break;
+  case PERR_EXPECTED_SPEECH_MARK:
+    return "EXPECTED_SPEECH_MARK";
+    break;
+  case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
+    return "UNEXPECTED_READER_MACRO_SYMBOL";
+    break;
+  case PERR_EXPECTED_BOOLEAN:
+    return "EXPECTED_BOOLEAN";
+    break;
+  }
+  assert(false && "perr_to_cstr: unreachable");
+  return "";
+}
+
+typedef struct
+{
+  lisp_t *result;
+  perr_t error;
+} pres_t;
+
+perr_t parse(context_t *ctx, input_t *str, lisp_t **ret);
+perr_t parse_all(context_t *ctx, input_t *str, vec_t *vec);
+
+int print_perror(FILE *fp, input_t *inp, perr_t error);
+
+#endif
--- a/sv.c
+++ b/sv.c
@@ -0,0 +1,158 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-14
+ * Description: String View implementation
+ */
+
+#include "./sv.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+sv_t sv_make(arena_t *allocator, const char *data, u64 size)
+{
+  sv_t s = {0};
+  if (data)
+    s = sv_append(allocator, s, data, size);
+  else
+  {
+    s.data = arena_alloc(allocator, size);
+    s.size = size;
+  }
+  return s;
+}
+
+sv_t sv_copy(arena_t *allocator, sv_t sv)
+{
+  return sv_make(allocator, sv.data, sv.size);
+}
+
+sv_t sv_substr(sv_t sv, u64 index, u64 size)
+{
+  sv_t newsv = {0};
+  if (index + size > sv.size)
+    return newsv;
+  newsv.data = sv.data + index;
+  newsv.size = size;
+  return newsv;
+}
+
+sv_t sv_cut(sv_t sv, u64 index)
+{
+  return sv_substr(sv, index, sv.size < index ? 0 : sv.size - index);
+}
+
+sv_t sv_chop(sv_t sv, u64 size)
+{
+  return sv_substr(sv, 0, size);
+}
+
+sv_t sv_concat(arena_t *allocator, sv_t a, sv_t b)
+{
+  sv_t c = sv_make(allocator, a.data, a.size + b.size);
+  memcpy(c.data + a.size, b.data, b.size);
+  return c;
+}
+
+sv_t sv_append(arena_t *allocator, sv_t sv, const char *data, u64 size)
+{
+  if (!allocator)
+    return (sv_t){0};
+
+  sv_t newsv = {0};
+  newsv.size = sv.size + size;
+  newsv.data = arena_realloc(allocator, sv.data, sv.size, newsv.size);
+  if (data)
+    memcpy(newsv.data + sv.size, data, size);
+  return newsv;
+}
+
+sv_t sv_prepend(arena_t *allocator, sv_t sv, const char *data, u64 size)
+{
+  if (!allocator)
+    return (sv_t){0};
+
+  // TODO: Can we make this cheaper to do?
+  sv_t newsv = sv_make(allocator, NULL, size + sv.size);
+  // Copy over `data` to the left side
+  memcpy(newsv.data, data, size);
+  // Copy old string to the right side
+  if (sv.data)
+    memcpy(newsv.data + size, sv.data, sv.size);
+
+  return newsv;
+}
+
+sv_t sv_fmt(arena_t *allocator, char *fmt, ...)
+{
+  if (!allocator)
+    return (sv_t){0};
+
+  va_list ap_1, ap_2;
+  va_start(ap_1, fmt);
+
+  va_copy(ap_2, ap_1);
+  u64 size = vsnprintf(NULL, 0, fmt, ap_2);
+  va_end(ap_2);
+
+  sv_t sv = sv_make(allocator, NULL, size);
+  vsprintf(sv.data, fmt, ap_1);
+  va_end(ap_1);
+
+  return sv;
+}
+
+i64 sv_find_substr(const sv_t sv, const sv_t substr)
+{
+  if (substr.size == 0)
+    return 0;
+  else if (sv.size < substr.size)
+    return -1;
+  else if (sv.size == substr.size)
+    return strncmp(sv.data, substr.data, sv.size) == 0 ? 0 : -1;
+
+  for (u64 i = 0; i < (sv.size - substr.size); ++i)
+    if (strncmp(sv.data + i, substr.data, substr.size) == 0)
+      return i;
+  return -1;
+}
+
+i64 sv_find_subcstr(const sv_t sv, const char *substr, u64 size)
+{
+  return sv_find_substr(sv, SV((char *)substr, size));
+}
+
+i64 sv_find_any(const sv_t sv, const char *bag)
+{
+  for (u64 i = 0; i < sv.size; ++i)
+    if (strchr(bag, sv.data[i]))
+      return i;
+  return -1;
+}
+
+u64 sv_while(const sv_t sv, bool (*pred)(char))
+{
+  u64 i;
+  for (i = 0; i < sv.size && pred(sv.data[i]); ++i)
+    continue;
+  return i;
+}
+
+u64 sv_till(const sv_t sv, bool (*pred)(char))
+{
+  u64 i;
+  for (i = 0; i < sv.size && !pred(sv.data[i]); ++i)
+    continue;
+  return i;
+}
--- a/sv.h
+++ b/sv.h
@@ -0,0 +1,70 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-14
+ * Description: String Views
+ */
+
+#ifndef SV_H
+#define SV_H
+
+#include "./memory.h"
+
+typedef struct SV
+{
+  u64 size;
+  char *data;
+} sv_t;
+
+#define SV(DATA, SIZE) ((const sv_t){.size = (SIZE), .data = (DATA)})
+#define SV_FMT(SV)     (int)(SV).size, (SV).data
+#define PR_SV          "%.*s"
+
+sv_t sv_make(arena_t *allocator, const char *data, u64 size);
+sv_t sv_copy(arena_t *allocator, sv_t sv);
+sv_t sv_append(arena_t *allocator, sv_t sv, const char *data, u64 size);
+sv_t sv_prepend(arena_t *allocator, sv_t sv, const char *data, u64 size);
+
+/**
+ * @brief Concatenate two string views, returning that concatenation.
+
+ * Allocates memory.
+ */
+sv_t sv_concat(arena_t *allocator, sv_t a, sv_t b);
+
+/**
+ * @brief Allocates a string view with the given `printf` format.
+ */
+sv_t sv_fmt(arena_t *allocator, char *fmt, ...);
+
+/**
+ * @brief Constructs a new string view at a different offset.  Does not allocate
+ * new memory.
+ */
+sv_t sv_substr(sv_t sv, u64 index, u64 size);
+/**
+ * @brief Return a string view INDEX characters ahead (i.e. cut the string from
+ * the left).
+ */
+sv_t sv_cut(sv_t sv, u64 index);
+/**
+ * @brief Return a string view with SIZE (i.e. chopping the string from the
+ * right)
+ */
+sv_t sv_chop(sv_t sv, u64 size);
+
+i64 sv_find_substr(const sv_t sv, const sv_t substr);
+i64 sv_find_subcstr(const sv_t sv, const char *substr, u64 size);
+i64 sv_find_any(const sv_t sv, const char *bag);
+u64 sv_while(const sv_t sv, bool (*pred)(char));
+u64 sv_till(const sv_t sv, bool (*pred)(char));
+
+#endif
--- a/tag.c
+++ b/tag.c
@@ -0,0 +1,94 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description:
+ */
+
+#include <string.h>
+#include <tag.h>
+
+lisp_t *tag_int(i64 i)
+{
+  return TAG((u64)i, INT);
+}
+
+lisp_t *tag_cons(void *ptr)
+{
+  return TAG((u64)ptr, CONS);
+}
+
+lisp_t *tag_sym(void *ptr)
+{
+  return TAG((u64)ptr, SYM);
+}
+
+lisp_t *tag_ssym(const char *data, size_t size)
+{
+  assert(size <= 7);
+  u8 buffer[sizeof(u64)];
+  memset(buffer, 0, sizeof(buffer));
+  // in 8 bits we have:
+  //   size - 3 bits (up to 7)
+  //   tag - 5 bits
+  buffer[0] = size;
+  buffer[0] <<= SHIFT_SSYM;
+  buffer[0] |= TAG_SSYM;
+  memcpy(buffer + 1, data, size);
+  u64 word = 0;
+  memcpy(&word, buffer, sizeof(u64));
+  return (lisp_t *)word;
+}
+
+lisp_t *tag_bool(bool b)
+{
+  return TAG((u64)b, BOOL);
+}
+
+lisp_t *tag_vec(void *ptr)
+{
+  return TAG((u64)ptr, VEC);
+}
+
+lisp_t *tag_str(void *ptr)
+{
+  return TAG((u64)ptr, STR);
+}
+
+lisp_t *tag_char(u32 codepoint)
+{
+  u64 w = codepoint;
+  return TAG(w, CHAR);
+}
+
+enum Tag tag_get(lisp_t *ptr)
+{
+  static_assert(NUM_TAGS == 9);
+  if (!ptr)
+    return TAG_NIL;
+  else if (IS_TAG(ptr, INT))
+    return TAG_INT;
+  else if (IS_TAG(ptr, CHAR))
+    return TAG_CHAR;
+  else if (IS_TAG(ptr, SYM))
+    return TAG_SYM;
+  else if (IS_TAG(ptr, SSYM))
+    return TAG_SSYM;
+  else if (IS_TAG(ptr, BOOL))
+    return TAG_BOOL;
+  else if (IS_TAG(ptr, VEC))
+    return TAG_VEC;
+  else if (IS_TAG(ptr, STR))
+    return TAG_STR;
+  else if (IS_TAG(ptr, CONS))
+    return TAG_CONS;
+  return 0;
+}
--- a/tag.h
+++ b/tag.h
@@ -0,0 +1,90 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-06
+ * Description:
+ */
+
+#ifndef TAG_H
+#define TAG_H
+
+#include "./base.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+// Opaque structure to make tagged pointers a separate type from general
+// pointers
+typedef struct Obj lisp_t;
+
+enum Tag
+{
+  TAG_NIL  = 0b00000000, // Atomic types
+  TAG_INT  = 0b00000001, // special so we can encode 63 bit integers
+  TAG_SYM  = 0b00000100,
+  TAG_SSYM = 0b00001000,
+  TAG_CHAR = 0b00001100,
+  TAG_BOOL = 0b00010000,
+  // TAG_FLOAT = 0b00010100,
+  TAG_CONS = 0b00000010, // Container types
+  TAG_VEC  = 0b00000110,
+  TAG_STR  = 0b00001010,
+
+  NUM_TAGS = 9,
+};
+
+enum Shift
+{
+  SHIFT_INT   = 1,
+  SHIFT_SSYM  = 5,
+  SHIFT_CHAR  = 8,
+  SHIFT_SYM   = 8,
+  SHIFT_BOOL  = 8,
+  SHIFT_FLOAT = 8,
+
+  SHIFT_CONS = 8,
+  SHIFT_VEC  = 8,
+  SHIFT_STR  = 8,
+};
+
+enum Mask
+{
+  MASK_INT   = 0b00000001,
+  MASK_SSYM  = 0b00011111,
+  MASK_SYM   = 0b11111111,
+  MASK_CHAR  = 0b11111111,
+  MASK_BOOL  = 0b11111111,
+  MASK_FLOAT = 0b11111111,
+
+  MASK_CONS = 0b11111111,
+  MASK_VEC  = 0b11111111,
+  MASK_STR  = 0b11111111,
+};
+
+#define TAG(PTR, TYPE)    ((lisp_t *)(((PTR) << SHIFT_##TYPE) | TAG_##TYPE))
+#define IS_TAG(PTR, TYPE) (((u64)(PTR) & MASK_##TYPE) == TAG_##TYPE)
+#define UNTAG(PTR, TYPE)  (((u64)PTR) >> SHIFT_##TYPE)
+
+enum Tag tag_get(lisp_t *ptr);
+
+lisp_t *tag_int(i64 i);
+lisp_t *tag_char(u32 codepoint);
+lisp_t *tag_sym(void *ptr);
+lisp_t *tag_ssym(const char *data, size_t size);
+lisp_t *tag_bool(bool b);
+lisp_t *tag_vec(void *ptr);
+lisp_t *tag_str(void *ptr);
+lisp_t *tag_cons(void *ptr);
+
+#define INT_MAX ((1L << 62) - 1)
+#define INT_MIN (-(1L << 62))
+
+#endif
--- a/tasks.org
+++ b/tasks.org
@@ -0,0 +1,230 @@
+#+title: Tasks
+#+date: 2025-02-18
+
+* WIP Implement a reader
+We want something a bit generic: able to handle reading from some
+buffer of memory (a string, or contents of a file where we can read
+the entire thing at once) or directly from a file stream (STDIN,
+network streams, etc).
+
+We don't need a tokeniser - the basic grammar of a Lisp is really easy
+to narrow down, so we can skip tokenisation and go straight for
+parsing.
+
+We also want to be able to admit when reading went wrong for some
+reason with proper errors messages (i.e. can be read by Emacs) - this
+will need to be refactored when we introduce errors within the Lisp
+runtime itself.
+** TODO Implement floats and rationals
+Rationals are pretty easy - just two integers (quotient and divisor) -
+so a tagged cons cell would do the job.  Floats are a bit more
+difficult since I'd either need to box them or find a creative way of
+sticking IEEE-754 floats into < 64 bits.
+
+Also implement a reader macro for #e<scientific form>.  Also deal with
+[-,+,]inf(.0) and [-,+,]nan(.0).
+
+Need to do some reading.
+
+[[file:r7rs-tests.scm::test #t (real? #e1e10)][trigger]]
+** TODO Consider user instantiated reader macros
+We don't have an evaluator so we can't really interpret whatever a
+user wants for a reader macro currently, but it would be useful to
+think about it now.  Currently I have a single function which deals
+with parsing reader macros but that's just static.
+
+Thing is, it does have the context as a parameter to pass to delegate
+functions (such as ~parse_vec~) - wouldn't be a massive jump to also
+consider user environments via the context.
+
+[[file:reader.c::perr_t parse_reader_macro(context_t *ctx, input_t
+*inp, lisp_t **ret)][function link]]
+* TODO Consider Lisp runtime errors
+* TODO Admit arbitrarily sized integers
+Currently we admit fixed size integers of 63 bits.  They use 2s
+complement due to x86 which means our max and min are 62 bit based.
+
+However, to even try to be a scheme implementation we need to allow
+arbitrarily sized integers.  What are the specific tasks we need to
+complete in our model to achieve this?:
+ Allow "reading" of unfixed size integers
+  + This will require reading a sequence of base 10 digits without
+    relying on strtold
+ Implement unfixed size integers into our memory model
+  + Certainly, unfixed size integers cannot be carried around like
+    fixnums wherein we can embed an integer into the pointer.
+    Thus we have to allocate them in memory.
+    + NOTE: There will be definitely be an optimisation to be done
+      here; integers that are within the bound of a fixnum could be
+      left as a fixnum then "elevated" to an integer when needed
+  + I think the big idea is allocating them as a fixed set of bytes
+    like big symbols.  For big integers we have to read the memory
+    associated thus we need a pointer.  Due to 2s complement it should
+    be trivial to increase the size of an integer to fit a new result
+    i.e. if I'm adding two integers and that leads to an "overflow"
+    where the result is of greater width than its inputs, we should
+    just allocate new memory for it.
+
+Consequences:
+- Greater memory use
+  - In fact exponential if we need to allocate a whole new integer per
+    operation rather than utilising the input memory
+- Possible loss of performance due to making integers over fixnums
+  when they don't need to be
+- Comparison is harder on integers
+- Harder to cache for the CPU
+
+but all of this is to be expected when the user is an idiot.
+* TODO Think about how to perform operations on different types
+** TODO Integers
+** TODO Symbols
+** TODO Pairs
+* DONE More efficient memory model for symbols
+The primitive model for symbol allocation is an 8 byte number
+representing the size of the symbol, followed by a variable number of
+characters (as bytes).  This is stored somewhere in the memory
+context, which will likely be in the heap.
+
+We're actually wasting a ridiculous amount of memory with this model.
+We'll almost never be using the full 64 bits of the size to represent
+a symbol (who's ever going to go close to 1.8 quintillion bytes for a
+single symbol?) - and even more annoying, there are tons of small
+sized symbols where we actually need _more space_ for the 8 byte size
+than the underlying symbol data.
+
+I think there's definitely a better model available, at least for
+smaller symbols.  We already have inlined integers where the pointer
+itself is the integer, why can't we do the same for symbols?
+A pointer has 8 bytes of data to use - one character is one byte -
+thus we can represent 8 character symbols in one pointer.
+
+If we want this to still be within the remit of our pointer tagging
+scheme, we'll need a bit of space for our administrative purposes
+(admin slop...).  So let's take _one_ byte out of that 8 for that.  So
+we can represent any symbols 7 bytes long in a single pointer.  We'll
+need to keep in mind we want to represent symbols that may be less
+than 7 characters, so that one admin byte is going to be doing some
+heavy lifting.
+
+Let's examine that one admin byte:
+ At least 3 bits are necessary for the actual pointer tag: "at least"
+  because we might increase the size of the tag based on demand
+ Thus, 5 bits left for our use - let's fit the size in there.
+  pow(2,6) - 1 = 63, so we have _way_ more than we need
+
+What are the benefits of doing this?:
+ Symbol equality for small symbols is a cinch: just compare the two
+  tagged "pointers"
+ 7 or less character symbols require no memory allocation, just
+  working off the stack
+
+One thing to note is that for more than 7 character symbols, we'll
+need to allocate memory.  But, in the worst case of 8 character
+symbols, we're only allocating two 64 bit integers: these are easy to
+walk on x86 and we've reached at least parity between the memory
+required for administration (the size number) and the actual data.
+** Being more aggressive?
+Technically, ANSI bytes only need 7 bits.  For each of the 7 bytes
+used for the character data, we can take one bit off, leaving us with
+7 bits to use for an additional character.  We don't need to adjust
+anything else in the schema.
+
+So, hypothetically we could represent up to 8 character symbols!  This
+would require packing the characters more aggressively into a single
+pointer.  Let's look at the layout of our pointers.  This table is
+indexed from most significant to least i.e. 0 is the MSB and 63 is the
+LSB:
+
+|-------+------------|
+| Index | Usage      |
+|-------+------------|
+|  0-55 | Characters |
+| 56-60 | Size       |
+| 61-63 | Tag        |
+|-------+------------|
+
+Honestly though, for an extra byte of information we'll probably have
+to do a lot more work.  x86-64 CPUs are much better at walking bytes
+than they are walking 7 bit offsets.  This may be something to
+consider if CPU time is cheaper than allocating 8 byte symbols
+somewhere.
+* DONE Tagging scheme based on arena pages
+2025-04-09:21:59:29: We went for option (2) of just taking a byte for
+free from the memory address and using it as our management byte.
+** 1) Page-offset schema
+I've realised arenas are way better than the standard array dynamic I
+was going for before.  However, we lose the nicer semantics of using
+an array index for pointers, where we can implement our own semantics
+regarding what bits in that pointer are free to use, when using a
+normal stable pointer into the arena; the host operating system has
+its own semantics regarding how pointers are arranged and this _will_
+change between operating systems.  In particular, because of the way
+I've arranged pages, we can't use the classic "div by 8" methodology
+where new allocations on the heap generally must be aligned by 8 bytes
+(u64), so we can use those 3 bits at the bottom for our tagging;
+offsets into pages are where our pointers will lie and they won't
+necessarily be divisible by 8.
+
+So we can't use the pointers directly into the pages - we'll call
+these types of pointers `host pointers`, because once we have them
+it's trivial to access the underlying data.  We'll call the pointers
+we want to make `managed pointers` because we're managing the memory
+system associated with them.  We want to be able to translate from
+managed pointers to host pointers.
+
+Managed pointers are really just encodings for direct access into the
+arena memory.  So in 8 bytes, we need to encode both the page and the
+specific offset in that page where the pointer is pointing to.  We
+also want to leave space for tagging and any metadata we might want to
+store in the pointer to that data.  A schema I could think of was:
+
+|------------------+--------------------|
+| Index (in bytes) | Representation     |
+|------------------+--------------------|
+|                0 | Metadata (tagging) |
+|              1-4 | Offset in page     |
+|              4-7 | Page choice        |
+|------------------+--------------------|
+
+This gives us pow(2, 24) - 1 = 16777215 possible pages and
+pow(2, 32) - 1 = 4294967295 offsets in each page.  Thus our total
+addressable memory would be pow(2, 56) - 1 = 72057594037927935 bytes.
+
+Certainly no machine would ever have this much memory and so we're
+quite safe for most machines.  That reserved management byte for our
+purposes (tagging, currently) will make the math to translate it a bit
+easier.
+
+Let's reason about how we'd encode and decode these addresses.  The
+arena itself should provide addresses with the management byte set to
+0 for the user to encode what they wish.  The top bytes should be
+encoded as per the above i.e. top 3 bytes as the page index, next 4
+bytes as the offset in that page.  This shouldn't be super difficult
+when we're doing it within the management functions of the arena
+itself as this data should be handy when performing the allocation.
+
+When decoding these addresses i.e. retrieving data i.e. translating
+from a managed pointer to a host pointer, all it will need to do is
+convert the pointer into a byte buffer and copy the top 3 bytes as a
+page index and the next 4 bytes as the offset in the page.  Once these
+are verified to be valid, we can just access the underlying pages and
+get the host pointer.  Because of how arenas work, those host pointers
+will be stable regardless of any further memory management functions
+performed on the arena (excluding cleanup) - so once you have a host
+pointer, you can use it as much as you want without having to worry
+about the pointer becoming invalid in the next second.
+** 2) 48-bit addressing exploit
+Most x86 CPUs only use around 48-56 bits for actual memory addresses -
+mostly as a result of not needing _nearly_ as many addresses as a full
+64 bit word would provide.
+
+So we /could/ get away with using one of those bytes for our
+administrative tasks.  Since the smallest remit we have is one byte,
+we'll stick to that (but maybe we could go for two bytes - need to
+investigate further).
+
+This byte should be the MSB, but using that for tagging will require
+more work than the lowest byte (to look at it we'll need to push that
+byte all the way down).  So we'll be going for a low byte strategy by
+shifting the pointer up by one byte.  This leaves us with the lowest
+byte to play with as we choose.
--- a/vec.c
+++ b/vec.c
@@ -0,0 +1,38 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-15
+ * Description: Vector implementation
+ */
+
+#include "./vec.h"
+
+#include <malloc.h>
+#include <string.h>
+
+void vec_reserve(arena_t *allocator, vec_t *vec, u32 size)
+{
+  if (vec->cap - vec->size < size)
+  {
+    u32 old_cap = vec->cap;
+    vec->cap    = MAX(vec->cap * 1.5, vec->size + size);
+    vec->data   = arena_realloc(allocator, vec->data, old_cap, vec->cap);
+  }
+}
+
+u32 vec_append(arena_t *allocator, vec_t *vec, const void *data, u32 size)
+{
+  vec_reserve(allocator, vec, size);
+  memcpy(vec->data + vec->size, data, size);
+  u32 ptr = vec->size;
+  vec->size += size;
+  return ptr;
+}
--- a/vec.h
+++ b/vec.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License Version 2 for
+ * details.
+
+ * You may distribute and modify this code under the terms of the GNU General
+ * Public License Version 2, which you should have received a copy of along with
+ * this program.  If not, please go to <https://www.gnu.org/licenses/>.
+
+ * Created: 2025-04-15
+ * Description: Vectors (dynamic sized arrays)
+ */
+
+#ifndef VEC_H
+#define VEC_H
+
+#include "./memory.h"
+
+typedef struct
+{
+  u32 size, cap;
+  u8 *data;
+} vec_t;
+
+void vec_reserve(arena_t *alloactor, vec_t *vec, u32 size);
+u32 vec_append(arena_t *allocator, vec_t *vec, const void *data, u32 size);
+
+#endif