Initial implementation
This commit is contained in:
51
Makefile
Normal file
51
Makefile
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
CC=gcc
|
||||||
|
OUT=oats.out
|
||||||
|
LIBS=-I.
|
||||||
|
ARGS=
|
||||||
|
|
||||||
|
RELEASE=0
|
||||||
|
GFLAGS=-Wall -Wextra -Wswitch-enum -Werror -std=c11
|
||||||
|
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined
|
||||||
|
RFLAGS=-O3
|
||||||
|
DEPFLAGS=-MT $@ -MMD -MP -MF
|
||||||
|
|
||||||
|
ifeq ($(RELEASE), 1)
|
||||||
|
CFLAGS=$(GFLAGS) $(RFLAGS) -DDEBUG=0
|
||||||
|
else
|
||||||
|
CFLAGS=$(GFLAGS) $(DFLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
|
DIST=build
|
||||||
|
SRC:=tag.c memory.c vec.c sv.c lisp.c reader.c eval.c main.c
|
||||||
|
OBJ:=$(SRC:%.c=$(DIST)/%.o)
|
||||||
|
DEPDIR=$(DIST)/dependencies
|
||||||
|
DEPS:=$(SRC:%.c=$(DEPDIR)/%.d)
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
|
all: $(DIST)/$(OUT)
|
||||||
|
|
||||||
|
$(DIST)/%.o: %.c | $(DIST) $(DEPDIR)
|
||||||
|
$(CC) $(CFLAGS) $(DEPFLAGS) $(DEPDIR)/$*.d -c $< -o $@ $(LIBS)
|
||||||
|
|
||||||
|
$(DIST)/$(OUT): $(OBJ) | $(DIST)
|
||||||
|
$(CC) $(CFLAGS) $^ -o $@ $(LIBS)
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run: $(DIST)/$(OUT)
|
||||||
|
./$^ $(ARGS)
|
||||||
|
|
||||||
|
.PHONY:
|
||||||
|
clean:
|
||||||
|
rm -rfv $(DIST)/*
|
||||||
|
|
||||||
|
$(DIST):
|
||||||
|
mkdir -p $(DIST)
|
||||||
|
|
||||||
|
$(DEPDIR):
|
||||||
|
mkdir -p $(DEPDIR)
|
||||||
|
|
||||||
|
.PHONY:
|
||||||
|
watch:
|
||||||
|
find . -type 'f' -regex ".*.c\\|.*.h" | entr -cs "make run"
|
||||||
|
|
||||||
|
-include $(DEPS)
|
||||||
78
base.h
Normal file
78
base.h
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-03-31
|
||||||
|
* Description: Base
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BASE_H
|
||||||
|
#define BASE_H
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
typedef uint8_t u8;
|
||||||
|
typedef uint16_t u16;
|
||||||
|
typedef uint32_t u32;
|
||||||
|
typedef uint64_t u64;
|
||||||
|
|
||||||
|
typedef int8_t i8;
|
||||||
|
typedef int16_t i16;
|
||||||
|
typedef int32_t i32;
|
||||||
|
typedef int64_t i64;
|
||||||
|
|
||||||
|
typedef float f32;
|
||||||
|
typedef double f64;
|
||||||
|
static_assert(sizeof(f32) == sizeof(u32));
|
||||||
|
static_assert(sizeof(f64) == sizeof(u64));
|
||||||
|
|
||||||
|
#define MAX(A, B) ((A) > (B) ? (A) : (B))
|
||||||
|
#define MIN(A, B) ((A) < (B) ? (A) : (B))
|
||||||
|
#define NTH_BYTE(X, N) (((X) >> (8 * N)) & ((1 << 8) - 1))
|
||||||
|
#define ARR_SIZE(XS) (sizeof(XS) / sizeof((XS)[0]))
|
||||||
|
|
||||||
|
#define TODO(MSG) (assert(false && MSG));
|
||||||
|
|
||||||
|
#ifndef DEBUG
|
||||||
|
#define DEBUG 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void debug(char *fmt, ...)
|
||||||
|
{
|
||||||
|
#if DEBUG > 1
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, fmt);
|
||||||
|
vprintf(fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void info(char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, fmt);
|
||||||
|
vprintf(fmt, ap);
|
||||||
|
va_end(ap);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void print_bits(u64 w)
|
||||||
|
{
|
||||||
|
for (u64 i = 8 * sizeof(w); i > 0; --i)
|
||||||
|
{
|
||||||
|
printf("%c", ((w >> (i - 1)) & 1) == 1 ? '1' : '0');
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
24
eval.c
Normal file
24
eval.c
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-18
|
||||||
|
* Description: Evaluator implementation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./eval.h"
|
||||||
|
|
||||||
|
err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret)
|
||||||
|
{
|
||||||
|
(void)ctx;
|
||||||
|
(void)obj;
|
||||||
|
(void)ret;
|
||||||
|
TODO("implement evaluator");
|
||||||
|
}
|
||||||
28
eval.h
Normal file
28
eval.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-18
|
||||||
|
* Description: Evaluator
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef EVAL_H
|
||||||
|
#define EVAL_H
|
||||||
|
|
||||||
|
#include "./lisp.h"
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
ERR_OK = 0,
|
||||||
|
} err_t;
|
||||||
|
|
||||||
|
err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret);
|
||||||
|
|
||||||
|
#endif
|
||||||
317
lisp.c
Normal file
317
lisp.c
Normal file
@@ -0,0 +1,317 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-06
|
||||||
|
* Description:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./lisp.h"
|
||||||
|
#include "./sv.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
|
lisp_t *make_int(i64 integer)
|
||||||
|
{
|
||||||
|
return tag_int(integer);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *make_sym(context_t *ctx, char *data, u64 size)
|
||||||
|
{
|
||||||
|
// Small symbol optimisation
|
||||||
|
if (size <= 7)
|
||||||
|
return tag_ssym(data, size);
|
||||||
|
|
||||||
|
// Make a cell with the data we need
|
||||||
|
cell_t *cell = context_alloc(ctx, sizeof(*cell) + size);
|
||||||
|
cell->size = size;
|
||||||
|
memcpy(cell->data, data, size);
|
||||||
|
|
||||||
|
return tag_sym(cell);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr)
|
||||||
|
{
|
||||||
|
cons_t *cons = context_alloc(ctx, sizeof(*cons));
|
||||||
|
memset(cons, 0, sizeof(*cons));
|
||||||
|
cons->car = car;
|
||||||
|
cons->cdr = cdr;
|
||||||
|
return tag_cons(cons);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size)
|
||||||
|
{
|
||||||
|
if (!lisps)
|
||||||
|
return NIL;
|
||||||
|
lisp_t *cur = NIL;
|
||||||
|
for (u64 i = size; i > 0; --i)
|
||||||
|
cur = make_cons(ctx, lisps[i - 1], cur);
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *make_vec(context_t *ctx, u32 size)
|
||||||
|
{
|
||||||
|
// Make a vector with the content
|
||||||
|
lisp_t *container = NIL;
|
||||||
|
vec_t *vec = context_alloc(ctx, sizeof(*vec));
|
||||||
|
vec->size = 0;
|
||||||
|
vec->cap = size * sizeof(&container);
|
||||||
|
if (size == 0)
|
||||||
|
vec->data = NULL;
|
||||||
|
else
|
||||||
|
vec->data = context_alloc(ctx, vec->cap);
|
||||||
|
container = tag_vec(vec);
|
||||||
|
|
||||||
|
return container;
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *make_str(context_t *ctx, char *data, u64 size)
|
||||||
|
{
|
||||||
|
if (size == 0)
|
||||||
|
// No need to allocate unless necessary
|
||||||
|
return tag_str(NIL);
|
||||||
|
// Make a vector with the content
|
||||||
|
vec_t *vec = context_alloc(ctx, sizeof(*vec));
|
||||||
|
vec->data = context_alloc(ctx, sizeof(*vec->data) * size);
|
||||||
|
vec->cap = size;
|
||||||
|
vec->size = size;
|
||||||
|
memcpy(vec->data, data, size);
|
||||||
|
|
||||||
|
return tag_str(vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
i64 as_int(lisp_t *obj)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, INT));
|
||||||
|
u64 p_obj = (u64)obj;
|
||||||
|
return UNTAG(p_obj, INT) | // Delete the tag
|
||||||
|
(NTH_BYTE(p_obj, 7) & 0x80) << 56 // duplicate the MSB (preserve sign)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
cons_t *as_cons(lisp_t *obj)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, CONS));
|
||||||
|
return (cons_t *)UNTAG(obj, CONS);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 as_char(lisp_t *obj)
|
||||||
|
{
|
||||||
|
return (u32)UNTAG(obj, CHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
cell_t *as_sym(lisp_t *obj)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, SYM));
|
||||||
|
return (cell_t *)UNTAG(obj, SYM);
|
||||||
|
}
|
||||||
|
|
||||||
|
void as_ssym(lisp_t *obj, sv_t *container)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, SSYM));
|
||||||
|
u64 p_obj = (u64)obj;
|
||||||
|
container->size = NTH_BYTE(p_obj, 0) >> SHIFT_SSYM;
|
||||||
|
p_obj >>= 8;
|
||||||
|
memcpy(container->data, &p_obj, container->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool as_bool(lisp_t *obj)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, BOOL));
|
||||||
|
return (bool)UNTAG(obj, BOOL);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec_t *as_vec(lisp_t *obj)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, VEC));
|
||||||
|
return (vec_t *)UNTAG(obj, VEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec_t *as_str(lisp_t *obj)
|
||||||
|
{
|
||||||
|
assert(IS_TAG(obj, STR));
|
||||||
|
return (vec_t *)UNTAG(obj, STR);
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t serialise(context_t *ctx, lisp_t *ptr)
|
||||||
|
{
|
||||||
|
enum Tag t = tag_get(ptr);
|
||||||
|
switch (t)
|
||||||
|
{
|
||||||
|
case TAG_NIL:
|
||||||
|
{
|
||||||
|
return SV("NIL", 3);
|
||||||
|
}
|
||||||
|
case TAG_INT:
|
||||||
|
{
|
||||||
|
i64 i = as_int(ptr);
|
||||||
|
sv_t s = sv_fmt(&ctx->scratch,
|
||||||
|
#if DEBUG > 1
|
||||||
|
"int["
|
||||||
|
#endif
|
||||||
|
"%ld"
|
||||||
|
#if DEBUG > 1
|
||||||
|
"]"
|
||||||
|
#endif
|
||||||
|
,
|
||||||
|
i);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
case TAG_CHAR:
|
||||||
|
{
|
||||||
|
u32 codepoint = as_char(ptr);
|
||||||
|
return sv_fmt(&ctx->scratch,
|
||||||
|
#if DEBUG > 1
|
||||||
|
"char["
|
||||||
|
#endif
|
||||||
|
"%lc"
|
||||||
|
#if DEBUG > 1
|
||||||
|
"]"
|
||||||
|
#endif
|
||||||
|
,
|
||||||
|
codepoint);
|
||||||
|
}
|
||||||
|
case TAG_SYM:
|
||||||
|
{
|
||||||
|
cell_t *cell = as_sym(ptr);
|
||||||
|
sv_t s = sv_make(&ctx->scratch, (char *)cell->data, cell->size);
|
||||||
|
#if DEBUG > 1
|
||||||
|
s = sv_fmt(&ctx->scratch, "sym[" PR_SV "]", SV_FMT(s));
|
||||||
|
#endif
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
case TAG_SSYM:
|
||||||
|
{
|
||||||
|
char data[7];
|
||||||
|
sv_t rsym = SV(data, 0);
|
||||||
|
as_ssym(ptr, &rsym);
|
||||||
|
sv_t s = sv_copy(&ctx->scratch, rsym);
|
||||||
|
|
||||||
|
#if DEBUG > 1
|
||||||
|
s = sv_fmt(&ctx->scratch, "ssym[" PR_SV "]", SV_FMT(s));
|
||||||
|
#endif
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
case TAG_BOOL:
|
||||||
|
{
|
||||||
|
return sv_fmt(&ctx->scratch,
|
||||||
|
#if DEBUG > 1
|
||||||
|
"bool["
|
||||||
|
#endif
|
||||||
|
"%s"
|
||||||
|
#if DEBUG > 1
|
||||||
|
"]"
|
||||||
|
#endif
|
||||||
|
,
|
||||||
|
as_bool(ptr) ? "#t" : "#f");
|
||||||
|
}
|
||||||
|
case TAG_CONS:
|
||||||
|
{
|
||||||
|
if (!CAR(ptr) && !CDR(ptr))
|
||||||
|
#if DEBUG > 1
|
||||||
|
return SV("lst[NIL]", 8);
|
||||||
|
#else
|
||||||
|
return SV("NIL", 3);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
sv_t s = {0};
|
||||||
|
|
||||||
|
for (lisp_t *lsp = ptr; lsp; lsp = IS_TAG(lsp, CONS) ? CDR(lsp) : NIL)
|
||||||
|
{
|
||||||
|
lisp_t *lmember = lsp;
|
||||||
|
if (IS_TAG(lsp, CONS))
|
||||||
|
lmember = CAR(lmember);
|
||||||
|
|
||||||
|
sv_t member = serialise(ctx, lmember);
|
||||||
|
s = sv_concat(&ctx->scratch, s, member);
|
||||||
|
if (IS_TAG(lsp, CONS) && !CDR(lsp))
|
||||||
|
continue;
|
||||||
|
else if (IS_TAG(CDR(lsp), CONS))
|
||||||
|
// normal list
|
||||||
|
s = sv_append(&ctx->scratch, s, " ", 1);
|
||||||
|
else
|
||||||
|
// dotted list
|
||||||
|
s = sv_append(&ctx->scratch, s, " . ", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if DEBUG > 1
|
||||||
|
s = sv_fmt(&ctx->scratch, "lst[" PR_SV "]", SV_FMT(s));
|
||||||
|
#else
|
||||||
|
s = sv_fmt(&ctx->scratch, "(" PR_SV ")", SV_FMT(s));
|
||||||
|
#endif
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
case TAG_VEC:
|
||||||
|
{
|
||||||
|
vec_t *vec = as_vec(ptr);
|
||||||
|
if (!vec)
|
||||||
|
#if DEBUG > 1
|
||||||
|
return SV("vec[NIL]", 8);
|
||||||
|
#else
|
||||||
|
return SV("#()", 3);
|
||||||
|
#endif
|
||||||
|
else if (vec->size < sizeof(&ptr))
|
||||||
|
#if DEBUG > 1
|
||||||
|
return SV("vec[0/0 #()]", 13);
|
||||||
|
#else
|
||||||
|
return SV("#()", 3);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
sv_t s = {0};
|
||||||
|
for (u64 i = 0; i < vec->size / sizeof(&ptr); ++i)
|
||||||
|
{
|
||||||
|
lisp_t *lmember = ((lisp_t **)vec->data)[i];
|
||||||
|
sv_t member = serialise(ctx, lmember);
|
||||||
|
s = sv_concat(&ctx->scratch, s, member);
|
||||||
|
if (i == ((vec->size / sizeof(&ptr)) - 1))
|
||||||
|
continue;
|
||||||
|
s = sv_append(&ctx->scratch, s, " ", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if DEBUG > 1
|
||||||
|
s = sv_fmt(&ctx->scratch, "vec[%lu/%lu #(" PR_SV ")]", vec->size, vec->cap,
|
||||||
|
SV_FMT(s));
|
||||||
|
#else
|
||||||
|
s = sv_fmt(&ctx->scratch, "#(" PR_SV ")", SV_FMT(s));
|
||||||
|
#endif
|
||||||
|
return s;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case TAG_STR:
|
||||||
|
{
|
||||||
|
vec_t *vec = as_str(ptr);
|
||||||
|
sv_t sv = {0};
|
||||||
|
if (vec)
|
||||||
|
sv = SV((char *)vec->data, vec->size);
|
||||||
|
else
|
||||||
|
sv = SV("", 0);
|
||||||
|
return sv_fmt(&ctx->scratch,
|
||||||
|
#if DEBUG > 1
|
||||||
|
"str["
|
||||||
|
#else
|
||||||
|
"\""
|
||||||
|
#endif
|
||||||
|
PR_SV
|
||||||
|
#if DEBUG > 1
|
||||||
|
"]"
|
||||||
|
#else
|
||||||
|
"\""
|
||||||
|
#endif
|
||||||
|
,
|
||||||
|
SV_FMT(sv));
|
||||||
|
}
|
||||||
|
case NUM_TAGS:
|
||||||
|
default:
|
||||||
|
assert(false && "serialise: unreachable");
|
||||||
|
return SV(0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
59
lisp.h
Normal file
59
lisp.h
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-06
|
||||||
|
* Description: Object model where we deal with contexts
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LISP_H
|
||||||
|
#define LISP_H
|
||||||
|
|
||||||
|
#include "./base.h"
|
||||||
|
#include "./memory.h"
|
||||||
|
#include "./sv.h"
|
||||||
|
#include "./tag.h"
|
||||||
|
#include "./vec.h"
|
||||||
|
|
||||||
|
typedef struct Cons
|
||||||
|
{
|
||||||
|
lisp_t *car, *cdr;
|
||||||
|
} cons_t;
|
||||||
|
|
||||||
|
typedef struct Cell
|
||||||
|
{
|
||||||
|
u64 size;
|
||||||
|
u8 data[];
|
||||||
|
} cell_t;
|
||||||
|
|
||||||
|
#define NIL 0
|
||||||
|
lisp_t *make_int(i64 integer);
|
||||||
|
lisp_t *make_sym(context_t *ctx, char *data, u64 size);
|
||||||
|
lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr);
|
||||||
|
lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size);
|
||||||
|
lisp_t *make_vec(context_t *ctx, u32 size);
|
||||||
|
lisp_t *make_str(context_t *ctx, char *data, u64 size);
|
||||||
|
|
||||||
|
i64 as_int(lisp_t *obj);
|
||||||
|
u32 as_char(lisp_t *obj);
|
||||||
|
cell_t *as_sym(lisp_t *obj);
|
||||||
|
void as_ssym(lisp_t *obj, sv_t *sv);
|
||||||
|
bool as_bool(lisp_t *obj);
|
||||||
|
f64 as_float(lisp_t *obj);
|
||||||
|
cons_t *as_cons(lisp_t *obj);
|
||||||
|
vec_t *as_vec(lisp_t *obj);
|
||||||
|
vec_t *as_str(lisp_t *obj);
|
||||||
|
|
||||||
|
sv_t serialise(context_t *ctx, lisp_t *lisp);
|
||||||
|
|
||||||
|
#define CAR(PTR) (as_cons(PTR)->car)
|
||||||
|
#define CDR(PTR) (as_cons(PTR)->cdr)
|
||||||
|
|
||||||
|
#endif
|
||||||
92
main.c
Normal file
92
main.c
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-05
|
||||||
|
* Description: Entrypoint
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./base.h"
|
||||||
|
#include "./lisp.h"
|
||||||
|
#include "./memory.h"
|
||||||
|
#include "./reader.h"
|
||||||
|
#include "./sv.h"
|
||||||
|
#include "./vec.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int exit = 0;
|
||||||
|
context_t ctx = {0};
|
||||||
|
const char *filename = NULL;
|
||||||
|
#if 1
|
||||||
|
if (argc > 1)
|
||||||
|
filename = argv[1];
|
||||||
|
else
|
||||||
|
filename = "./r7rs-tests.scm";
|
||||||
|
|
||||||
|
FILE *fp = fopen(filename, "r");
|
||||||
|
if (!fp)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "[ERROR]: File `%s` does not exist\n", filename);
|
||||||
|
exit = 1;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
input_t inp = {0};
|
||||||
|
input_from_fp(&ctx, &inp, filename, fp);
|
||||||
|
fclose(fp);
|
||||||
|
#else
|
||||||
|
filename = "<example>";
|
||||||
|
char inp_data[] = "(print (+ 34 35))";
|
||||||
|
input_t inp = {0};
|
||||||
|
sv_t inp_sv = SV(inp_data, sizeof(inp_data));
|
||||||
|
input_from_sv(&ctx, &inp, filename, inp_sv);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (inp.str.size == 0)
|
||||||
|
{
|
||||||
|
info("[WARNING] `%s` is empty.\n", filename);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
debug("[file read]: %luB read from `%s`\n", inp.str.size, filename);
|
||||||
|
|
||||||
|
// Setup a vector to hold all the lisps
|
||||||
|
vec_t results = {0};
|
||||||
|
perr_t perr = parse_all(&ctx, &inp, &results);
|
||||||
|
if (perr)
|
||||||
|
{
|
||||||
|
exit = perr;
|
||||||
|
print_perror(stderr, &inp, perr);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
context_reset_read(&ctx);
|
||||||
|
|
||||||
|
lisp_t **lisps = (lisp_t **)results.data;
|
||||||
|
u64 size = results.size / sizeof(*lisps);
|
||||||
|
for (u64 i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
lisp_t *lisp = lisps[i];
|
||||||
|
// printf("tag=%x\n", tag_get(lisp));
|
||||||
|
sv_t serialised = serialise(&ctx, lisp);
|
||||||
|
info("lisp[%lu]: %p => " PR_SV "\n", i, lisp, SV_FMT(serialised));
|
||||||
|
}
|
||||||
|
context_report(&ctx);
|
||||||
|
|
||||||
|
end:
|
||||||
|
|
||||||
|
context_cleanup(&ctx);
|
||||||
|
return exit;
|
||||||
|
}
|
||||||
275
memory.c
Normal file
275
memory.c
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-05
|
||||||
|
* Description: Implementations for memory models.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./memory.h"
|
||||||
|
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
page_t *page_create(u64 size)
|
||||||
|
{
|
||||||
|
size = MAX(size, PAGE_DEFAULT_SIZE);
|
||||||
|
page_t *page = calloc(1, sizeof(*page) + size);
|
||||||
|
page->next = NULL;
|
||||||
|
page->size = 0;
|
||||||
|
page->capacity = size;
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
void page_resize(page_t **page, u64 size)
|
||||||
|
{
|
||||||
|
if (!page)
|
||||||
|
return;
|
||||||
|
else if (!(*page))
|
||||||
|
*page = page_create(size);
|
||||||
|
else if (page[0]->capacity < size)
|
||||||
|
{
|
||||||
|
page[0]->capacity = MAX(size, page[0]->capacity * 1.5);
|
||||||
|
page[0] = realloc(page[0], sizeof(*page[0]) + page[0]->capacity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i64 page_append(page_t *page, void *data, u64 size)
|
||||||
|
{
|
||||||
|
if (!page || page->size + size >= page->capacity)
|
||||||
|
return -1;
|
||||||
|
if (data)
|
||||||
|
memcpy(page->data + page->size, data, size);
|
||||||
|
u64 ptr = page->size;
|
||||||
|
page->size += size;
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 page_rappend(page_t **page, void *data, u64 size)
|
||||||
|
{
|
||||||
|
page_resize(page, page[0]->size + size);
|
||||||
|
if (data)
|
||||||
|
memcpy(page[0]->data + page[0]->size, data, size);
|
||||||
|
u64 ptr = page[0]->size;
|
||||||
|
page[0]->size += size;
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *arena_alloc(arena_t *arena, u64 size)
|
||||||
|
{
|
||||||
|
if (!arena)
|
||||||
|
return NULL;
|
||||||
|
else if (!arena->start)
|
||||||
|
{
|
||||||
|
arena->start = page_create(1);
|
||||||
|
arena->end = arena->start;
|
||||||
|
}
|
||||||
|
|
||||||
|
page_t *best_fit;
|
||||||
|
for (best_fit = arena->start; best_fit; best_fit = best_fit->next)
|
||||||
|
if (best_fit->size + size + MEMORY_ALIGNMENT < best_fit->capacity)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!best_fit)
|
||||||
|
{
|
||||||
|
best_fit = page_create(size);
|
||||||
|
arena->end->next = best_fit;
|
||||||
|
arena->end = best_fit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: Fsanitize has a hissy fit if we don't align memory "correctly"
|
||||||
|
u64 offset = 0;
|
||||||
|
|
||||||
|
if (size % MEMORY_ALIGNMENT == 0)
|
||||||
|
{
|
||||||
|
u64 div = (((u64)(best_fit->data + best_fit->size)) % MEMORY_ALIGNMENT);
|
||||||
|
if (div != 0)
|
||||||
|
offset = MEMORY_ALIGNMENT - div;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *start = best_fit->data + best_fit->size + offset;
|
||||||
|
|
||||||
|
best_fit->size += size + offset;
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *arena_realloc(arena_t *arena, void *ptr, u64 oldsize, u64 newsize)
|
||||||
|
{
|
||||||
|
if (!ptr)
|
||||||
|
return arena_alloc(arena, newsize);
|
||||||
|
else if (!arena || !arena->start)
|
||||||
|
return NULL;
|
||||||
|
else if (newsize <= oldsize)
|
||||||
|
// No need to change anything.
|
||||||
|
return ptr;
|
||||||
|
|
||||||
|
bool copy_into_new = true;
|
||||||
|
void *start = NULL;
|
||||||
|
page_t *old_page = NULL, *best_fit = NULL;
|
||||||
|
|
||||||
|
for (page_t *page = arena->start; page; page = page->next)
|
||||||
|
{
|
||||||
|
if (!best_fit && page->size + newsize < page->capacity)
|
||||||
|
best_fit = page;
|
||||||
|
if (page->data <= (u8 *)ptr &&
|
||||||
|
(u8 *)(ptr) + oldsize <= page->data + page->capacity)
|
||||||
|
old_page = page;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the old page exists, ptr is the latest allocation in it, and it has
|
||||||
|
// enough space to contain the new size, just resize and return.
|
||||||
|
if (old_page && old_page->data + old_page->size - oldsize == ptr &&
|
||||||
|
old_page->size - oldsize + newsize < old_page->capacity)
|
||||||
|
{
|
||||||
|
start = ptr;
|
||||||
|
old_page->size += newsize - oldsize;
|
||||||
|
copy_into_new = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!old_page)
|
||||||
|
copy_into_new = false;
|
||||||
|
if (!best_fit)
|
||||||
|
{
|
||||||
|
best_fit = page_create(newsize);
|
||||||
|
arena->end->next = best_fit;
|
||||||
|
arena->end = best_fit;
|
||||||
|
}
|
||||||
|
|
||||||
|
start = best_fit->data + best_fit->size;
|
||||||
|
best_fit->size += newsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (copy_into_new)
|
||||||
|
{
|
||||||
|
memcpy(start, ptr, oldsize);
|
||||||
|
memset(start + oldsize, 0, newsize - oldsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
void arena_attach(arena_t *arena, page_t *page)
|
||||||
|
{
|
||||||
|
if (!arena || !page)
|
||||||
|
return;
|
||||||
|
else if (!arena->start || !arena->end)
|
||||||
|
{
|
||||||
|
arena->start = page;
|
||||||
|
arena->end = page;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
page->next = arena->start;
|
||||||
|
arena->start = page;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void arena_reset(arena_t *arena)
|
||||||
|
{
|
||||||
|
if (!arena || !arena->start)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (page_t *cur = arena->start; cur; cur = cur->next)
|
||||||
|
{
|
||||||
|
if (cur->size == 0)
|
||||||
|
continue;
|
||||||
|
cur->size = 0;
|
||||||
|
memset(cur->data, 0, cur->capacity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void arena_cleanup(arena_t *arena)
|
||||||
|
{
|
||||||
|
if (!arena || !arena->start)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (page_t *cur = arena->start, *next = NULL; cur;
|
||||||
|
next = cur->next, free(cur), cur = next)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
memset(arena, 0, sizeof(*arena));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocates against stable memory i.e. we can have pointers of this lying
|
||||||
|
// around without any fear of them being thrown away.
|
||||||
|
void *context_alloc(context_t *context, u64 size)
|
||||||
|
{
|
||||||
|
return arena_alloc(&context->memory, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate against a "scratch space", separate from main memory, for internal
|
||||||
|
// use.
|
||||||
|
void *context_salloc(context_t *context, u64 size)
|
||||||
|
{
|
||||||
|
return arena_alloc(&context->scratch, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void context_reset_read(context_t *context)
|
||||||
|
{
|
||||||
|
arena_reset(&context->read);
|
||||||
|
}
|
||||||
|
|
||||||
|
void context_reset_scratch(context_t *context)
|
||||||
|
{
|
||||||
|
arena_reset(&context->scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void context_reset(context_t *context)
|
||||||
|
{
|
||||||
|
arena_reset(&context->memory);
|
||||||
|
arena_reset(&context->read);
|
||||||
|
arena_reset(&context->scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void context_cleanup(context_t *context)
|
||||||
|
{
|
||||||
|
if (!context)
|
||||||
|
return;
|
||||||
|
arena_cleanup(&context->memory);
|
||||||
|
arena_cleanup(&context->read);
|
||||||
|
arena_cleanup(&context->scratch);
|
||||||
|
memset(context, 0, sizeof(*context));
|
||||||
|
}
|
||||||
|
|
||||||
|
void context_report(context_t *context)
|
||||||
|
{
|
||||||
|
#if DEBUG
|
||||||
|
// Figure this out at runtime
|
||||||
|
u64 mem_used = 0, mem_cap = 0;
|
||||||
|
for (page_t *page = context->memory.start; page; page = page->next)
|
||||||
|
{
|
||||||
|
mem_used += page->size;
|
||||||
|
mem_cap += page->capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 read_used = 0, read_cap = 0;
|
||||||
|
for (page_t *page = context->read.start; page; page = page->next)
|
||||||
|
{
|
||||||
|
read_used += page->size;
|
||||||
|
read_cap += page->capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 scr_used = 0, scr_cap = 0;
|
||||||
|
for (page_t *page = context->scratch.start; page; page = page->next)
|
||||||
|
{
|
||||||
|
scr_used += page->size;
|
||||||
|
scr_cap += page->capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
info("<Context>: %luB/%luB main memory used\n", mem_used, mem_cap);
|
||||||
|
info("<Context>: %luB/%luB read space used\n", read_used, read_cap);
|
||||||
|
info("<Context>: %luB/%luB scratch space used\n", scr_used, scr_cap);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
66
memory.h
Normal file
66
memory.h
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-05
|
||||||
|
* Description: Memory management structures
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MEMORY_H
|
||||||
|
#define MEMORY_H
|
||||||
|
|
||||||
|
#include "./base.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define PAGE_DEFAULT_SIZE 1024
|
||||||
|
#define MEMORY_ALIGNMENT 8
|
||||||
|
|
||||||
|
typedef struct Page
|
||||||
|
{
|
||||||
|
struct Page *next;
|
||||||
|
u64 size, capacity;
|
||||||
|
u8 data[];
|
||||||
|
} page_t;
|
||||||
|
|
||||||
|
page_t *page_create(u64 size);
|
||||||
|
void page_resize(page_t **page, u64 newsize);
|
||||||
|
// Append - fail (by returning <0) if not enough space.
|
||||||
|
i64 page_append(page_t *page, void *data, u64 size);
|
||||||
|
// Append - will resize if necessary
|
||||||
|
u64 page_rappend(page_t **page, void *data, u64 size);
|
||||||
|
|
||||||
|
typedef struct Aren
|
||||||
|
{
|
||||||
|
page_t *start, *end;
|
||||||
|
} arena_t;
|
||||||
|
|
||||||
|
void *arena_alloc(arena_t *arena, u64 size);
|
||||||
|
void *arena_realloc(arena_t *arena, void *ptr, u64 oldsize, u64 newsize);
|
||||||
|
void *arena_copy(arena_t *arena, void *ptr, u64 size);
|
||||||
|
void arena_attach(arena_t *arena, page_t *page);
|
||||||
|
void arena_reset(arena_t *arena);
|
||||||
|
void arena_cleanup(arena_t *arena);
|
||||||
|
|
||||||
|
typedef struct Context
|
||||||
|
{
|
||||||
|
arena_t memory, read, scratch;
|
||||||
|
} context_t;
|
||||||
|
|
||||||
|
void *context_alloc(context_t *context, u64 size);
|
||||||
|
void *context_salloc(context_t *context, u64 size);
|
||||||
|
page_t *context_get_read_page(context_t *ctx);
|
||||||
|
void context_reset_read(context_t *context);
|
||||||
|
void context_reset_scratch(context_t *context);
|
||||||
|
|
||||||
|
void context_reset(context_t *context);
|
||||||
|
void context_cleanup(context_t *context);
|
||||||
|
void context_report(context_t *context);
|
||||||
|
|
||||||
|
#endif
|
||||||
419
reader.c
Normal file
419
reader.c
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-16
|
||||||
|
* Description: Implementation of parser
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./reader.h"
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
bool is_digit(char c)
|
||||||
|
{
|
||||||
|
return isdigit(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_alpha(char c)
|
||||||
|
{
|
||||||
|
return isalpha(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_space(char c)
|
||||||
|
{
|
||||||
|
return isspace(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_skip(char c)
|
||||||
|
{
|
||||||
|
return is_space(c) || c == ';';
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_sym(char c)
|
||||||
|
{
|
||||||
|
return strchr(SYM_CHARS, c) != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv)
|
||||||
|
{
|
||||||
|
inp->name = name;
|
||||||
|
inp->str = sv_copy(&ctx->read, sv);
|
||||||
|
}
|
||||||
|
|
||||||
|
void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp)
|
||||||
|
{
|
||||||
|
input->name = name;
|
||||||
|
// TODO: Choose a best fit (i.e. maximal capacity, unused) page
|
||||||
|
page_t *page = page_create(INPUT_CHUNK_SIZE);
|
||||||
|
// chunk should be in scratch space so we can reset it later.
|
||||||
|
char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE);
|
||||||
|
|
||||||
|
u64 total_size = 0, size_read = 0;
|
||||||
|
while (!feof(fp))
|
||||||
|
{
|
||||||
|
size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp);
|
||||||
|
if (size_read > 0)
|
||||||
|
{
|
||||||
|
page_rappend(&page, chunk, size_read);
|
||||||
|
total_size += size_read;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
input->str = SV((char *)page->data, total_size);
|
||||||
|
|
||||||
|
// Memory cleanup
|
||||||
|
context_reset_scratch(ctx);
|
||||||
|
arena_attach(&ctx->read, page);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool input_eof(input_t *input)
|
||||||
|
{
|
||||||
|
return !input || (input->offset >= input->str.size) ||
|
||||||
|
(input->str.data[input->offset] == '\0');
|
||||||
|
}
|
||||||
|
|
||||||
|
char input_peek(input_t *input, u64 offset)
|
||||||
|
{
|
||||||
|
if (input_eof(input) || input->offset + offset >= input->str.size)
|
||||||
|
return '\0';
|
||||||
|
return input->str.data[input->offset + offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
char input_next(input_t *input, u64 offset)
|
||||||
|
{
|
||||||
|
if (input_eof(input) || input->offset + offset >= input->str.size)
|
||||||
|
return '\0';
|
||||||
|
input->offset += offset;
|
||||||
|
return input->str.data[input->offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
void input_skip(input_t *inp)
|
||||||
|
{
|
||||||
|
// n + 2 lookup
|
||||||
|
sv_t current = sv_cut(inp->str, inp->offset);
|
||||||
|
sv_t lookup = sv_chop(current, 2);
|
||||||
|
while ((!input_eof(inp) && is_space(lookup.data[0])) ||
|
||||||
|
lookup.data[0] == ';' || strncmp(lookup.data, "#|", 2) == 0)
|
||||||
|
{
|
||||||
|
if (lookup.data[0] == ';')
|
||||||
|
{
|
||||||
|
i64 newline = sv_find_subcstr(current, "\n", 1);
|
||||||
|
if (newline < 0)
|
||||||
|
inp->offset = inp->str.size;
|
||||||
|
else
|
||||||
|
inp->offset += newline + 1;
|
||||||
|
}
|
||||||
|
else if (strncmp(lookup.data, "#|", 2) == 0)
|
||||||
|
{
|
||||||
|
i64 offset = sv_find_subcstr(current, "|#", 2);
|
||||||
|
if (offset < 0)
|
||||||
|
inp->offset = inp->str.size;
|
||||||
|
else
|
||||||
|
inp->offset += offset + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
inp->offset += sv_while(sv_cut(inp->str, inp->offset), is_space);
|
||||||
|
current = sv_cut(inp->str, inp->offset);
|
||||||
|
lookup = sv_chop(current, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
debug("parse_int[%lu] => ", inp->offset);
|
||||||
|
|
||||||
|
// TODO: Parse arbitrary sized integers
|
||||||
|
(void)ctx;
|
||||||
|
|
||||||
|
bool negative = (input_peek(inp, 0) == '-');
|
||||||
|
sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0));
|
||||||
|
sv_t digits = sv_chop(current, sv_while(current, is_digit));
|
||||||
|
|
||||||
|
debug("`" PR_SV "` => ", SV_FMT(digits));
|
||||||
|
i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10);
|
||||||
|
debug("%ld\n", x);
|
||||||
|
|
||||||
|
input_next(inp, digits.size + (negative ? 1 : 0));
|
||||||
|
|
||||||
|
*ret = make_int(x);
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
debug("parse_sym[%lu] => ", inp->offset);
|
||||||
|
|
||||||
|
sv_t current = sv_cut(inp->str, inp->offset);
|
||||||
|
sv_t sym = sv_chop(current, sv_while(current, is_sym));
|
||||||
|
debug("`" PR_SV "`\n", SV_FMT(sym));
|
||||||
|
|
||||||
|
if (sym.size == 3)
|
||||||
|
{
|
||||||
|
// NOTE: We can't mutate sym directly because it's on `read` space.
|
||||||
|
|
||||||
|
// TODO: Make this beautiful please.
|
||||||
|
char buf[3];
|
||||||
|
for (u64 i = 0; i < 3; ++i)
|
||||||
|
buf[i] = toupper(sym.data[i]);
|
||||||
|
|
||||||
|
// NOTE: NIL symbol to actual NIL
|
||||||
|
if (strncmp(buf, "NIL", 3) == 0)
|
||||||
|
{
|
||||||
|
input_next(inp, 3);
|
||||||
|
return NIL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *lsym = make_sym(ctx, sym.data, sym.size);
|
||||||
|
input_next(inp, sym.size);
|
||||||
|
*ret = lsym;
|
||||||
|
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
(void)ctx;
|
||||||
|
debug("parse_bool[%lu] => ", inp->offset);
|
||||||
|
char c = input_peek(inp, 1);
|
||||||
|
bool b = -1;
|
||||||
|
if (c == 't')
|
||||||
|
b = true;
|
||||||
|
else if (c == 'f')
|
||||||
|
b = false;
|
||||||
|
else
|
||||||
|
return PERR_EXPECTED_BOOLEAN;
|
||||||
|
*ret = tag_bool(b);
|
||||||
|
input_next(inp, 2);
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
// TODO: Put this in a symbol table
|
||||||
|
lisp_t *lisp_dot = make_sym(ctx, ".", 1);
|
||||||
|
debug("parse_cons[%lu] => (\n", inp->offset);
|
||||||
|
inp->offset += 1;
|
||||||
|
|
||||||
|
lisp_t *root = NIL;
|
||||||
|
lisp_t **cur = NIL;
|
||||||
|
bool dotted = false;
|
||||||
|
|
||||||
|
while (!input_eof(inp) && input_peek(inp, 0) != ')')
|
||||||
|
{
|
||||||
|
lisp_t *lisp = NIL;
|
||||||
|
perr_t res = parse(ctx, inp, &lisp);
|
||||||
|
if (res)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
// This is cheap to do
|
||||||
|
if (lisp == lisp_dot)
|
||||||
|
{
|
||||||
|
dotted = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!root)
|
||||||
|
{
|
||||||
|
root = make_cons(ctx, lisp, NIL);
|
||||||
|
cur = &root;
|
||||||
|
}
|
||||||
|
else if (!dotted)
|
||||||
|
*cur = make_cons(ctx, lisp, NIL);
|
||||||
|
else
|
||||||
|
*cur = lisp;
|
||||||
|
|
||||||
|
if (cur && !dotted)
|
||||||
|
cur = &as_cons(*cur)->cdr;
|
||||||
|
|
||||||
|
input_skip(inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input_peek(inp, 0) != ')')
|
||||||
|
return PERR_EXPECTED_CLOSE_BRACKET;
|
||||||
|
|
||||||
|
input_next(inp, 1);
|
||||||
|
|
||||||
|
debug(")\n");
|
||||||
|
*ret = root;
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
debug("parse_vec[%lu] => [\n", inp->offset);
|
||||||
|
input_next(inp, 2);
|
||||||
|
|
||||||
|
lisp_t *lvec = make_vec(ctx, 0);
|
||||||
|
vec_t *vec = as_vec(lvec);
|
||||||
|
|
||||||
|
while (!input_eof(inp) && input_peek(inp, 0) != ')')
|
||||||
|
{
|
||||||
|
lisp_t *lisp = NIL;
|
||||||
|
perr_t res = parse(ctx, inp, &lisp);
|
||||||
|
if (res)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
vec_append(&ctx->memory, vec, &lisp, sizeof(lisp));
|
||||||
|
input_skip(inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input_peek(inp, 0) != ')')
|
||||||
|
return PERR_EXPECTED_CLOSE_BRACKET;
|
||||||
|
|
||||||
|
input_next(inp, 1);
|
||||||
|
|
||||||
|
debug("]\n");
|
||||||
|
*ret = lvec;
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
debug("parse_str[%lu] => ", inp->offset);
|
||||||
|
input_next(inp, 1); // 1 for the first speechmark
|
||||||
|
sv_t sv = sv_cut(inp->str, inp->offset);
|
||||||
|
i64 size = sv_find_subcstr(sv, "\"", 1);
|
||||||
|
if (size < 0)
|
||||||
|
return PERR_EXPECTED_SPEECH_MARK;
|
||||||
|
|
||||||
|
input_next(inp, size + 1); // 1 for that last speechmark
|
||||||
|
sv_t str_content = sv_chop(sv, size);
|
||||||
|
debug("\"" PR_SV "\"\n", SV_FMT(str_content));
|
||||||
|
*ret = make_str(ctx, str_content.data, str_content.size);
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
char c = input_peek(inp, 0);
|
||||||
|
if (!(c == '\'' || c == '`'))
|
||||||
|
return PERR_UNEXPECTED_CHAR;
|
||||||
|
input_next(inp, 1);
|
||||||
|
sv_t prefix = {0};
|
||||||
|
if (c == '\'')
|
||||||
|
prefix = SV("quote", 5);
|
||||||
|
else if (c == '`')
|
||||||
|
prefix = SV("quasiquote", 10);
|
||||||
|
lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL);
|
||||||
|
lisp_t *rest = NIL;
|
||||||
|
perr_t perr = parse(ctx, inp, &rest);
|
||||||
|
if (perr)
|
||||||
|
return perr;
|
||||||
|
CDR(root) = make_cons(ctx, rest, NIL);
|
||||||
|
*ret = root;
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Make this interactable with user once we have evaluation
|
||||||
|
perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
char c = input_peek(inp, 1);
|
||||||
|
if (c == '\\')
|
||||||
|
{
|
||||||
|
// character or weird base integer
|
||||||
|
TODO("Not implemented reader macro for characters or weird bases");
|
||||||
|
}
|
||||||
|
else if (c == '(')
|
||||||
|
{
|
||||||
|
return parse_vec(ctx, inp, ret);
|
||||||
|
}
|
||||||
|
else if (c == 't' || c == 'f')
|
||||||
|
return parse_bool(ctx, inp, ret);
|
||||||
|
return PERR_UNEXPECTED_READER_MACRO_SYMBOL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(NUM_TAGS == 9);
|
||||||
|
perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||||
|
{
|
||||||
|
debug("parse => ");
|
||||||
|
input_skip(inp);
|
||||||
|
if (input_eof(inp))
|
||||||
|
return PERR_EOF;
|
||||||
|
|
||||||
|
char c = input_peek(inp, 0);
|
||||||
|
|
||||||
|
if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1))))
|
||||||
|
return parse_int(ctx, inp, ret);
|
||||||
|
else if (c == '#')
|
||||||
|
return parse_reader_macro(ctx, inp, ret);
|
||||||
|
else if (is_sym(c))
|
||||||
|
return parse_sym(ctx, inp, ret);
|
||||||
|
else if (c == '(')
|
||||||
|
return parse_cons(ctx, inp, ret);
|
||||||
|
else if (c == '\'' || c == '`')
|
||||||
|
return parse_quote(ctx, inp, ret);
|
||||||
|
else if (c == '\"')
|
||||||
|
return parse_str(ctx, inp, ret);
|
||||||
|
else
|
||||||
|
return PERR_UNEXPECTED_CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec)
|
||||||
|
{
|
||||||
|
while (!input_eof(inp))
|
||||||
|
{
|
||||||
|
lisp_t *member = NIL;
|
||||||
|
perr_t err = parse(ctx, inp, &member);
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
else
|
||||||
|
vec_append(&ctx->scratch, vec, &member, sizeof(member));
|
||||||
|
|
||||||
|
input_skip(inp);
|
||||||
|
}
|
||||||
|
return PERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int print_perror(FILE *fp, input_t *inp, perr_t error)
|
||||||
|
{
|
||||||
|
pos_t pos = input_offset_to_pos(inp);
|
||||||
|
fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col,
|
||||||
|
perr_to_cstr(error));
|
||||||
|
switch (error)
|
||||||
|
{
|
||||||
|
case PERR_UNEXPECTED_CHAR:
|
||||||
|
fprintf(fp, "(`%c`)", input_peek(inp, 0));
|
||||||
|
break;
|
||||||
|
case PERR_OK:
|
||||||
|
case PERR_EOF:
|
||||||
|
case PERR_EXPECTED_BOOLEAN:
|
||||||
|
case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
|
||||||
|
case PERR_EXPECTED_CLOSE_BRACKET:
|
||||||
|
case PERR_EXPECTED_SPEECH_MARK:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos_t input_offset_to_pos(input_t *inp)
|
||||||
|
{
|
||||||
|
pos_t pos = {.col = 1, .line = 1};
|
||||||
|
for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i)
|
||||||
|
{
|
||||||
|
char c = (inp->str.data[i]);
|
||||||
|
if (c == '\n')
|
||||||
|
{
|
||||||
|
++pos.line;
|
||||||
|
pos.col = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++pos.col;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
98
reader.h
Normal file
98
reader.h
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-16
|
||||||
|
* Description: Parsing Lisp!
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef READER_H
|
||||||
|
#define READER_H
|
||||||
|
|
||||||
|
#include "./lisp.h"
|
||||||
|
#include "./memory.h"
|
||||||
|
#include "./vec.h"
|
||||||
|
|
||||||
|
#define INPUT_CHUNK_SIZE 512
|
||||||
|
static const char SYM_CHARS[] =
|
||||||
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||||
|
"¬!£$%^&*_-+={[]}:@~#<,>.?/";
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
const char *name;
|
||||||
|
u64 offset;
|
||||||
|
sv_t str;
|
||||||
|
} input_t;
|
||||||
|
|
||||||
|
void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv);
|
||||||
|
void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp);
|
||||||
|
bool input_eof(input_t *input);
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
u64 col, line;
|
||||||
|
} pos_t;
|
||||||
|
|
||||||
|
pos_t input_offset_to_pos(input_t *inp);
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
PERR_OK = 0,
|
||||||
|
PERR_EOF,
|
||||||
|
PERR_UNEXPECTED_CHAR,
|
||||||
|
PERR_EXPECTED_CLOSE_BRACKET,
|
||||||
|
PERR_EXPECTED_SPEECH_MARK,
|
||||||
|
PERR_UNEXPECTED_READER_MACRO_SYMBOL,
|
||||||
|
PERR_EXPECTED_BOOLEAN,
|
||||||
|
} perr_t;
|
||||||
|
|
||||||
|
static inline const char *perr_to_cstr(perr_t perr)
|
||||||
|
{
|
||||||
|
switch (perr)
|
||||||
|
{
|
||||||
|
case PERR_OK:
|
||||||
|
return "OK";
|
||||||
|
break;
|
||||||
|
case PERR_EOF:
|
||||||
|
return "EOF";
|
||||||
|
break;
|
||||||
|
case PERR_UNEXPECTED_CHAR:
|
||||||
|
return "UNEXPECTED_CHAR";
|
||||||
|
break;
|
||||||
|
case PERR_EXPECTED_CLOSE_BRACKET:
|
||||||
|
return "EXPECTED_CLOSE_BRACKET";
|
||||||
|
break;
|
||||||
|
case PERR_EXPECTED_SPEECH_MARK:
|
||||||
|
return "EXPECTED_SPEECH_MARK";
|
||||||
|
break;
|
||||||
|
case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
|
||||||
|
return "UNEXPECTED_READER_MACRO_SYMBOL";
|
||||||
|
break;
|
||||||
|
case PERR_EXPECTED_BOOLEAN:
|
||||||
|
return "EXPECTED_BOOLEAN";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(false && "perr_to_cstr: unreachable");
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
lisp_t *result;
|
||||||
|
perr_t error;
|
||||||
|
} pres_t;
|
||||||
|
|
||||||
|
perr_t parse(context_t *ctx, input_t *str, lisp_t **ret);
|
||||||
|
perr_t parse_all(context_t *ctx, input_t *str, vec_t *vec);
|
||||||
|
|
||||||
|
int print_perror(FILE *fp, input_t *inp, perr_t error);
|
||||||
|
|
||||||
|
#endif
|
||||||
158
sv.c
Normal file
158
sv.c
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-14
|
||||||
|
* Description: String View implementation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./sv.h"
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
sv_t sv_make(arena_t *allocator, const char *data, u64 size)
|
||||||
|
{
|
||||||
|
sv_t s = {0};
|
||||||
|
if (data)
|
||||||
|
s = sv_append(allocator, s, data, size);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
s.data = arena_alloc(allocator, size);
|
||||||
|
s.size = size;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_copy(arena_t *allocator, sv_t sv)
|
||||||
|
{
|
||||||
|
return sv_make(allocator, sv.data, sv.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_substr(sv_t sv, u64 index, u64 size)
|
||||||
|
{
|
||||||
|
sv_t newsv = {0};
|
||||||
|
if (index + size > sv.size)
|
||||||
|
return newsv;
|
||||||
|
newsv.data = sv.data + index;
|
||||||
|
newsv.size = size;
|
||||||
|
return newsv;
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_cut(sv_t sv, u64 index)
|
||||||
|
{
|
||||||
|
return sv_substr(sv, index, sv.size < index ? 0 : sv.size - index);
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_chop(sv_t sv, u64 size)
|
||||||
|
{
|
||||||
|
return sv_substr(sv, 0, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_concat(arena_t *allocator, sv_t a, sv_t b)
|
||||||
|
{
|
||||||
|
sv_t c = sv_make(allocator, a.data, a.size + b.size);
|
||||||
|
memcpy(c.data + a.size, b.data, b.size);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_append(arena_t *allocator, sv_t sv, const char *data, u64 size)
|
||||||
|
{
|
||||||
|
if (!allocator)
|
||||||
|
return (sv_t){0};
|
||||||
|
|
||||||
|
sv_t newsv = {0};
|
||||||
|
newsv.size = sv.size + size;
|
||||||
|
newsv.data = arena_realloc(allocator, sv.data, sv.size, newsv.size);
|
||||||
|
if (data)
|
||||||
|
memcpy(newsv.data + sv.size, data, size);
|
||||||
|
return newsv;
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_prepend(arena_t *allocator, sv_t sv, const char *data, u64 size)
|
||||||
|
{
|
||||||
|
if (!allocator)
|
||||||
|
return (sv_t){0};
|
||||||
|
|
||||||
|
// TODO: Can we make this cheaper to do?
|
||||||
|
sv_t newsv = sv_make(allocator, NULL, size + sv.size);
|
||||||
|
// Copy over `data` to the left side
|
||||||
|
memcpy(newsv.data, data, size);
|
||||||
|
// Copy old string to the right side
|
||||||
|
if (sv.data)
|
||||||
|
memcpy(newsv.data + size, sv.data, sv.size);
|
||||||
|
|
||||||
|
return newsv;
|
||||||
|
}
|
||||||
|
|
||||||
|
sv_t sv_fmt(arena_t *allocator, char *fmt, ...)
|
||||||
|
{
|
||||||
|
if (!allocator)
|
||||||
|
return (sv_t){0};
|
||||||
|
|
||||||
|
va_list ap_1, ap_2;
|
||||||
|
va_start(ap_1, fmt);
|
||||||
|
|
||||||
|
va_copy(ap_2, ap_1);
|
||||||
|
u64 size = vsnprintf(NULL, 0, fmt, ap_2);
|
||||||
|
va_end(ap_2);
|
||||||
|
|
||||||
|
sv_t sv = sv_make(allocator, NULL, size);
|
||||||
|
vsprintf(sv.data, fmt, ap_1);
|
||||||
|
va_end(ap_1);
|
||||||
|
|
||||||
|
return sv;
|
||||||
|
}
|
||||||
|
|
||||||
|
i64 sv_find_substr(const sv_t sv, const sv_t substr)
|
||||||
|
{
|
||||||
|
if (substr.size == 0)
|
||||||
|
return 0;
|
||||||
|
else if (sv.size < substr.size)
|
||||||
|
return -1;
|
||||||
|
else if (sv.size == substr.size)
|
||||||
|
return strncmp(sv.data, substr.data, sv.size) == 0 ? 0 : -1;
|
||||||
|
|
||||||
|
for (u64 i = 0; i < (sv.size - substr.size); ++i)
|
||||||
|
if (strncmp(sv.data + i, substr.data, substr.size) == 0)
|
||||||
|
return i;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
i64 sv_find_subcstr(const sv_t sv, const char *substr, u64 size)
|
||||||
|
{
|
||||||
|
return sv_find_substr(sv, SV((char *)substr, size));
|
||||||
|
}
|
||||||
|
|
||||||
|
i64 sv_find_any(const sv_t sv, const char *bag)
|
||||||
|
{
|
||||||
|
for (u64 i = 0; i < sv.size; ++i)
|
||||||
|
if (strchr(bag, sv.data[i]))
|
||||||
|
return i;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 sv_while(const sv_t sv, bool (*pred)(char))
|
||||||
|
{
|
||||||
|
u64 i;
|
||||||
|
for (i = 0; i < sv.size && pred(sv.data[i]); ++i)
|
||||||
|
continue;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 sv_till(const sv_t sv, bool (*pred)(char))
|
||||||
|
{
|
||||||
|
u64 i;
|
||||||
|
for (i = 0; i < sv.size && !pred(sv.data[i]); ++i)
|
||||||
|
continue;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
70
sv.h
Normal file
70
sv.h
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-14
|
||||||
|
* Description: String Views
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SV_H
|
||||||
|
#define SV_H
|
||||||
|
|
||||||
|
#include "./memory.h"
|
||||||
|
|
||||||
|
typedef struct SV
|
||||||
|
{
|
||||||
|
u64 size;
|
||||||
|
char *data;
|
||||||
|
} sv_t;
|
||||||
|
|
||||||
|
#define SV(DATA, SIZE) ((const sv_t){.size = (SIZE), .data = (DATA)})
|
||||||
|
#define SV_FMT(SV) (int)(SV).size, (SV).data
|
||||||
|
#define PR_SV "%.*s"
|
||||||
|
|
||||||
|
sv_t sv_make(arena_t *allocator, const char *data, u64 size);
|
||||||
|
sv_t sv_copy(arena_t *allocator, sv_t sv);
|
||||||
|
sv_t sv_append(arena_t *allocator, sv_t sv, const char *data, u64 size);
|
||||||
|
sv_t sv_prepend(arena_t *allocator, sv_t sv, const char *data, u64 size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Concatenate two string views, returning that concatenation.
|
||||||
|
|
||||||
|
* Allocates memory.
|
||||||
|
*/
|
||||||
|
sv_t sv_concat(arena_t *allocator, sv_t a, sv_t b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Allocates a string view with the given `printf` format.
|
||||||
|
*/
|
||||||
|
sv_t sv_fmt(arena_t *allocator, char *fmt, ...);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Constructs a new string view at a different offset. Does not allocate
|
||||||
|
* new memory.
|
||||||
|
*/
|
||||||
|
sv_t sv_substr(sv_t sv, u64 index, u64 size);
|
||||||
|
/**
|
||||||
|
* @brief Return a string view INDEX characters ahead (i.e. cut the string from
|
||||||
|
* the left).
|
||||||
|
*/
|
||||||
|
sv_t sv_cut(sv_t sv, u64 index);
|
||||||
|
/**
|
||||||
|
* @brief Return a string view with SIZE (i.e. chopping the string from the
|
||||||
|
* right)
|
||||||
|
*/
|
||||||
|
sv_t sv_chop(sv_t sv, u64 size);
|
||||||
|
|
||||||
|
i64 sv_find_substr(const sv_t sv, const sv_t substr);
|
||||||
|
i64 sv_find_subcstr(const sv_t sv, const char *substr, u64 size);
|
||||||
|
i64 sv_find_any(const sv_t sv, const char *bag);
|
||||||
|
u64 sv_while(const sv_t sv, bool (*pred)(char));
|
||||||
|
u64 sv_till(const sv_t sv, bool (*pred)(char));
|
||||||
|
|
||||||
|
#endif
|
||||||
94
tag.c
Normal file
94
tag.c
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-06
|
||||||
|
* Description:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <tag.h>
|
||||||
|
|
||||||
|
lisp_t *tag_int(i64 i)
|
||||||
|
{
|
||||||
|
return TAG((u64)i, INT);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_cons(void *ptr)
|
||||||
|
{
|
||||||
|
return TAG((u64)ptr, CONS);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_sym(void *ptr)
|
||||||
|
{
|
||||||
|
return TAG((u64)ptr, SYM);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_ssym(const char *data, size_t size)
|
||||||
|
{
|
||||||
|
assert(size <= 7);
|
||||||
|
u8 buffer[sizeof(u64)];
|
||||||
|
memset(buffer, 0, sizeof(buffer));
|
||||||
|
// in 8 bits we have:
|
||||||
|
// size - 3 bits (up to 7)
|
||||||
|
// tag - 5 bits
|
||||||
|
buffer[0] = size;
|
||||||
|
buffer[0] <<= SHIFT_SSYM;
|
||||||
|
buffer[0] |= TAG_SSYM;
|
||||||
|
memcpy(buffer + 1, data, size);
|
||||||
|
u64 word = 0;
|
||||||
|
memcpy(&word, buffer, sizeof(u64));
|
||||||
|
return (lisp_t *)word;
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_bool(bool b)
|
||||||
|
{
|
||||||
|
return TAG((u64)b, BOOL);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_vec(void *ptr)
|
||||||
|
{
|
||||||
|
return TAG((u64)ptr, VEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_str(void *ptr)
|
||||||
|
{
|
||||||
|
return TAG((u64)ptr, STR);
|
||||||
|
}
|
||||||
|
|
||||||
|
lisp_t *tag_char(u32 codepoint)
|
||||||
|
{
|
||||||
|
u64 w = codepoint;
|
||||||
|
return TAG(w, CHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Tag tag_get(lisp_t *ptr)
|
||||||
|
{
|
||||||
|
static_assert(NUM_TAGS == 9);
|
||||||
|
if (!ptr)
|
||||||
|
return TAG_NIL;
|
||||||
|
else if (IS_TAG(ptr, INT))
|
||||||
|
return TAG_INT;
|
||||||
|
else if (IS_TAG(ptr, CHAR))
|
||||||
|
return TAG_CHAR;
|
||||||
|
else if (IS_TAG(ptr, SYM))
|
||||||
|
return TAG_SYM;
|
||||||
|
else if (IS_TAG(ptr, SSYM))
|
||||||
|
return TAG_SSYM;
|
||||||
|
else if (IS_TAG(ptr, BOOL))
|
||||||
|
return TAG_BOOL;
|
||||||
|
else if (IS_TAG(ptr, VEC))
|
||||||
|
return TAG_VEC;
|
||||||
|
else if (IS_TAG(ptr, STR))
|
||||||
|
return TAG_STR;
|
||||||
|
else if (IS_TAG(ptr, CONS))
|
||||||
|
return TAG_CONS;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
90
tag.h
Normal file
90
tag.h
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-06
|
||||||
|
* Description:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TAG_H
|
||||||
|
#define TAG_H
|
||||||
|
|
||||||
|
#include "./base.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Opaque structure to make tagged pointers a separate type from general
|
||||||
|
// pointers
|
||||||
|
typedef struct Obj lisp_t;
|
||||||
|
|
||||||
|
enum Tag
|
||||||
|
{
|
||||||
|
TAG_NIL = 0b00000000, // Atomic types
|
||||||
|
TAG_INT = 0b00000001, // special so we can encode 63 bit integers
|
||||||
|
TAG_SYM = 0b00000100,
|
||||||
|
TAG_SSYM = 0b00001000,
|
||||||
|
TAG_CHAR = 0b00001100,
|
||||||
|
TAG_BOOL = 0b00010000,
|
||||||
|
// TAG_FLOAT = 0b00010100,
|
||||||
|
TAG_CONS = 0b00000010, // Container types
|
||||||
|
TAG_VEC = 0b00000110,
|
||||||
|
TAG_STR = 0b00001010,
|
||||||
|
|
||||||
|
NUM_TAGS = 9,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Shift
|
||||||
|
{
|
||||||
|
SHIFT_INT = 1,
|
||||||
|
SHIFT_SSYM = 5,
|
||||||
|
SHIFT_CHAR = 8,
|
||||||
|
SHIFT_SYM = 8,
|
||||||
|
SHIFT_BOOL = 8,
|
||||||
|
SHIFT_FLOAT = 8,
|
||||||
|
|
||||||
|
SHIFT_CONS = 8,
|
||||||
|
SHIFT_VEC = 8,
|
||||||
|
SHIFT_STR = 8,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Mask
|
||||||
|
{
|
||||||
|
MASK_INT = 0b00000001,
|
||||||
|
MASK_SSYM = 0b00011111,
|
||||||
|
MASK_SYM = 0b11111111,
|
||||||
|
MASK_CHAR = 0b11111111,
|
||||||
|
MASK_BOOL = 0b11111111,
|
||||||
|
MASK_FLOAT = 0b11111111,
|
||||||
|
|
||||||
|
MASK_CONS = 0b11111111,
|
||||||
|
MASK_VEC = 0b11111111,
|
||||||
|
MASK_STR = 0b11111111,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define TAG(PTR, TYPE) ((lisp_t *)(((PTR) << SHIFT_##TYPE) | TAG_##TYPE))
|
||||||
|
#define IS_TAG(PTR, TYPE) (((u64)(PTR) & MASK_##TYPE) == TAG_##TYPE)
|
||||||
|
#define UNTAG(PTR, TYPE) (((u64)PTR) >> SHIFT_##TYPE)
|
||||||
|
|
||||||
|
enum Tag tag_get(lisp_t *ptr);
|
||||||
|
|
||||||
|
lisp_t *tag_int(i64 i);
|
||||||
|
lisp_t *tag_char(u32 codepoint);
|
||||||
|
lisp_t *tag_sym(void *ptr);
|
||||||
|
lisp_t *tag_ssym(const char *data, size_t size);
|
||||||
|
lisp_t *tag_bool(bool b);
|
||||||
|
lisp_t *tag_vec(void *ptr);
|
||||||
|
lisp_t *tag_str(void *ptr);
|
||||||
|
lisp_t *tag_cons(void *ptr);
|
||||||
|
|
||||||
|
#define INT_MAX ((1L << 62) - 1)
|
||||||
|
#define INT_MIN (-(1L << 62))
|
||||||
|
|
||||||
|
#endif
|
||||||
230
tasks.org
Normal file
230
tasks.org
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
#+title: Tasks
|
||||||
|
#+date: 2025-02-18
|
||||||
|
|
||||||
|
* WIP Implement a reader
|
||||||
|
We want something a bit generic: able to handle reading from some
|
||||||
|
buffer of memory (a string, or contents of a file where we can read
|
||||||
|
the entire thing at once) or directly from a file stream (STDIN,
|
||||||
|
network streams, etc).
|
||||||
|
|
||||||
|
We don't need a tokeniser - the basic grammar of a Lisp is really easy
|
||||||
|
to narrow down, so we can skip tokenisation and go straight for
|
||||||
|
parsing.
|
||||||
|
|
||||||
|
We also want to be able to admit when reading went wrong for some
|
||||||
|
reason with proper errors messages (i.e. can be read by Emacs) - this
|
||||||
|
will need to be refactored when we introduce errors within the Lisp
|
||||||
|
runtime itself.
|
||||||
|
** TODO Implement floats and rationals
|
||||||
|
Rationals are pretty easy - just two integers (quotient and divisor) -
|
||||||
|
so a tagged cons cell would do the job. Floats are a bit more
|
||||||
|
difficult since I'd either need to box them or find a creative way of
|
||||||
|
sticking IEEE-754 floats into < 64 bits.
|
||||||
|
|
||||||
|
Also implement a reader macro for #e<scientific form>. Also deal with
|
||||||
|
[-,+,]inf(.0) and [-,+,]nan(.0).
|
||||||
|
|
||||||
|
Need to do some reading.
|
||||||
|
|
||||||
|
[[file:r7rs-tests.scm::test #t (real? #e1e10)][trigger]]
|
||||||
|
** TODO Consider user instantiated reader macros
|
||||||
|
We don't have an evaluator so we can't really interpret whatever a
|
||||||
|
user wants for a reader macro currently, but it would be useful to
|
||||||
|
think about it now. Currently I have a single function which deals
|
||||||
|
with parsing reader macros but that's just static.
|
||||||
|
|
||||||
|
Thing is, it does have the context as a parameter to pass to delegate
|
||||||
|
functions (such as ~parse_vec~) - wouldn't be a massive jump to also
|
||||||
|
consider user environments via the context.
|
||||||
|
|
||||||
|
[[file:reader.c::perr_t parse_reader_macro(context_t *ctx, input_t
|
||||||
|
*inp, lisp_t **ret)][function link]]
|
||||||
|
* TODO Consider Lisp runtime errors
|
||||||
|
* TODO Admit arbitrarily sized integers
|
||||||
|
Currently we admit fixed size integers of 63 bits. They use 2s
|
||||||
|
complement due to x86 which means our max and min are 62 bit based.
|
||||||
|
|
||||||
|
However, to even try to be a scheme implementation we need to allow
|
||||||
|
arbitrarily sized integers. What are the specific tasks we need to
|
||||||
|
complete in our model to achieve this?:
|
||||||
|
+ Allow "reading" of unfixed size integers
|
||||||
|
+ This will require reading a sequence of base 10 digits without
|
||||||
|
relying on strtold
|
||||||
|
+ Implement unfixed size integers into our memory model
|
||||||
|
+ Certainly, unfixed size integers cannot be carried around like
|
||||||
|
fixnums wherein we can embed an integer into the pointer.
|
||||||
|
Thus we have to allocate them in memory.
|
||||||
|
+ NOTE: There will be definitely be an optimisation to be done
|
||||||
|
here; integers that are within the bound of a fixnum could be
|
||||||
|
left as a fixnum then "elevated" to an integer when needed
|
||||||
|
+ I think the big idea is allocating them as a fixed set of bytes
|
||||||
|
like big symbols. For big integers we have to read the memory
|
||||||
|
associated thus we need a pointer. Due to 2s complement it should
|
||||||
|
be trivial to increase the size of an integer to fit a new result
|
||||||
|
i.e. if I'm adding two integers and that leads to an "overflow"
|
||||||
|
where the result is of greater width than its inputs, we should
|
||||||
|
just allocate new memory for it.
|
||||||
|
|
||||||
|
Consequences:
|
||||||
|
- Greater memory use
|
||||||
|
- In fact exponential if we need to allocate a whole new integer per
|
||||||
|
operation rather than utilising the input memory
|
||||||
|
- Possible loss of performance due to making integers over fixnums
|
||||||
|
when they don't need to be
|
||||||
|
- Comparison is harder on integers
|
||||||
|
- Harder to cache for the CPU
|
||||||
|
|
||||||
|
but all of this is to be expected when the user is an idiot.
|
||||||
|
* TODO Think about how to perform operations on different types
|
||||||
|
** TODO Integers
|
||||||
|
** TODO Symbols
|
||||||
|
** TODO Pairs
|
||||||
|
* DONE More efficient memory model for symbols
|
||||||
|
The primitive model for symbol allocation is an 8 byte number
|
||||||
|
representing the size of the symbol, followed by a variable number of
|
||||||
|
characters (as bytes). This is stored somewhere in the memory
|
||||||
|
context, which will likely be in the heap.
|
||||||
|
|
||||||
|
We're actually wasting a ridiculous amount of memory with this model.
|
||||||
|
We'll almost never be using the full 64 bits of the size to represent
|
||||||
|
a symbol (who's ever going to go close to 1.8 quintillion bytes for a
|
||||||
|
single symbol?) - and even more annoying, there are tons of small
|
||||||
|
sized symbols where we actually need _more space_ for the 8 byte size
|
||||||
|
than the underlying symbol data.
|
||||||
|
|
||||||
|
I think there's definitely a better model available, at least for
|
||||||
|
smaller symbols. We already have inlined integers where the pointer
|
||||||
|
itself is the integer, why can't we do the same for symbols?
|
||||||
|
A pointer has 8 bytes of data to use - one character is one byte -
|
||||||
|
thus we can represent 8 character symbols in one pointer.
|
||||||
|
|
||||||
|
If we want this to still be within the remit of our pointer tagging
|
||||||
|
scheme, we'll need a bit of space for our administrative purposes
|
||||||
|
(admin slop...). So let's take _one_ byte out of that 8 for that. So
|
||||||
|
we can represent any symbols 7 bytes long in a single pointer. We'll
|
||||||
|
need to keep in mind we want to represent symbols that may be less
|
||||||
|
than 7 characters, so that one admin byte is going to be doing some
|
||||||
|
heavy lifting.
|
||||||
|
|
||||||
|
Let's examine that one admin byte:
|
||||||
|
+ At least 3 bits are necessary for the actual pointer tag: "at least"
|
||||||
|
because we might increase the size of the tag based on demand
|
||||||
|
+ Thus, 5 bits left for our use - let's fit the size in there.
|
||||||
|
pow(2,6) - 1 = 63, so we have _way_ more than we need
|
||||||
|
|
||||||
|
What are the benefits of doing this?:
|
||||||
|
+ Symbol equality for small symbols is a cinch: just compare the two
|
||||||
|
tagged "pointers"
|
||||||
|
+ 7 or less character symbols require no memory allocation, just
|
||||||
|
working off the stack
|
||||||
|
|
||||||
|
One thing to note is that for more than 7 character symbols, we'll
|
||||||
|
need to allocate memory. But, in the worst case of 8 character
|
||||||
|
symbols, we're only allocating two 64 bit integers: these are easy to
|
||||||
|
walk on x86 and we've reached at least parity between the memory
|
||||||
|
required for administration (the size number) and the actual data.
|
||||||
|
** Being more aggressive?
|
||||||
|
Technically, ANSI bytes only need 7 bits. For each of the 7 bytes
|
||||||
|
used for the character data, we can take one bit off, leaving us with
|
||||||
|
7 bits to use for an additional character. We don't need to adjust
|
||||||
|
anything else in the schema.
|
||||||
|
|
||||||
|
So, hypothetically we could represent up to 8 character symbols! This
|
||||||
|
would require packing the characters more aggressively into a single
|
||||||
|
pointer. Let's look at the layout of our pointers. This table is
|
||||||
|
indexed from most significant to least i.e. 0 is the MSB and 63 is the
|
||||||
|
LSB:
|
||||||
|
|
||||||
|
|-------+------------|
|
||||||
|
| Index | Usage |
|
||||||
|
|-------+------------|
|
||||||
|
| 0-55 | Characters |
|
||||||
|
| 56-60 | Size |
|
||||||
|
| 61-63 | Tag |
|
||||||
|
|-------+------------|
|
||||||
|
|
||||||
|
Honestly though, for an extra byte of information we'll probably have
|
||||||
|
to do a lot more work. x86-64 CPUs are much better at walking bytes
|
||||||
|
than they are walking 7 bit offsets. This may be something to
|
||||||
|
consider if CPU time is cheaper than allocating 8 byte symbols
|
||||||
|
somewhere.
|
||||||
|
* DONE Tagging scheme based on arena pages
|
||||||
|
2025-04-09:21:59:29: We went for option (2) of just taking a byte for
|
||||||
|
free from the memory address and using it as our management byte.
|
||||||
|
** 1) Page-offset schema
|
||||||
|
I've realised arenas are way better than the standard array dynamic I
|
||||||
|
was going for before. However, we lose the nicer semantics of using
|
||||||
|
an array index for pointers, where we can implement our own semantics
|
||||||
|
regarding what bits in that pointer are free to use, when using a
|
||||||
|
normal stable pointer into the arena; the host operating system has
|
||||||
|
its own semantics regarding how pointers are arranged and this _will_
|
||||||
|
change between operating systems. In particular, because of the way
|
||||||
|
I've arranged pages, we can't use the classic "div by 8" methodology
|
||||||
|
where new allocations on the heap generally must be aligned by 8 bytes
|
||||||
|
(u64), so we can use those 3 bits at the bottom for our tagging;
|
||||||
|
offsets into pages are where our pointers will lie and they won't
|
||||||
|
necessarily be divisible by 8.
|
||||||
|
|
||||||
|
So we can't use the pointers directly into the pages - we'll call
|
||||||
|
these types of pointers `host pointers`, because once we have them
|
||||||
|
it's trivial to access the underlying data. We'll call the pointers
|
||||||
|
we want to make `managed pointers` because we're managing the memory
|
||||||
|
system associated with them. We want to be able to translate from
|
||||||
|
managed pointers to host pointers.
|
||||||
|
|
||||||
|
Managed pointers are really just encodings for direct access into the
|
||||||
|
arena memory. So in 8 bytes, we need to encode both the page and the
|
||||||
|
specific offset in that page where the pointer is pointing to. We
|
||||||
|
also want to leave space for tagging and any metadata we might want to
|
||||||
|
store in the pointer to that data. A schema I could think of was:
|
||||||
|
|
||||||
|
|------------------+--------------------|
|
||||||
|
| Index (in bytes) | Representation |
|
||||||
|
|------------------+--------------------|
|
||||||
|
| 0 | Metadata (tagging) |
|
||||||
|
| 1-4 | Offset in page |
|
||||||
|
| 4-7 | Page choice |
|
||||||
|
|------------------+--------------------|
|
||||||
|
|
||||||
|
This gives us pow(2, 24) - 1 = 16777215 possible pages and
|
||||||
|
pow(2, 32) - 1 = 4294967295 offsets in each page. Thus our total
|
||||||
|
addressable memory would be pow(2, 56) - 1 = 72057594037927935 bytes.
|
||||||
|
|
||||||
|
Certainly no machine would ever have this much memory and so we're
|
||||||
|
quite safe for most machines. That reserved management byte for our
|
||||||
|
purposes (tagging, currently) will make the math to translate it a bit
|
||||||
|
easier.
|
||||||
|
|
||||||
|
Let's reason about how we'd encode and decode these addresses. The
|
||||||
|
arena itself should provide addresses with the management byte set to
|
||||||
|
0 for the user to encode what they wish. The top bytes should be
|
||||||
|
encoded as per the above i.e. top 3 bytes as the page index, next 4
|
||||||
|
bytes as the offset in that page. This shouldn't be super difficult
|
||||||
|
when we're doing it within the management functions of the arena
|
||||||
|
itself as this data should be handy when performing the allocation.
|
||||||
|
|
||||||
|
When decoding these addresses i.e. retrieving data i.e. translating
|
||||||
|
from a managed pointer to a host pointer, all it will need to do is
|
||||||
|
convert the pointer into a byte buffer and copy the top 3 bytes as a
|
||||||
|
page index and the next 4 bytes as the offset in the page. Once these
|
||||||
|
are verified to be valid, we can just access the underlying pages and
|
||||||
|
get the host pointer. Because of how arenas work, those host pointers
|
||||||
|
will be stable regardless of any further memory management functions
|
||||||
|
performed on the arena (excluding cleanup) - so once you have a host
|
||||||
|
pointer, you can use it as much as you want without having to worry
|
||||||
|
about the pointer becoming invalid in the next second.
|
||||||
|
** 2) 48-bit addressing exploit
|
||||||
|
Most x86 CPUs only use around 48-56 bits for actual memory addresses -
|
||||||
|
mostly as a result of not needing _nearly_ as many addresses as a full
|
||||||
|
64 bit word would provide.
|
||||||
|
|
||||||
|
So we /could/ get away with using one of those bytes for our
|
||||||
|
administrative tasks. Since the smallest remit we have is one byte,
|
||||||
|
we'll stick to that (but maybe we could go for two bytes - need to
|
||||||
|
investigate further).
|
||||||
|
|
||||||
|
This byte should be the MSB, but using that for tagging will require
|
||||||
|
more work than the lowest byte (to look at it we'll need to push that
|
||||||
|
byte all the way down). So we'll be going for a low byte strategy by
|
||||||
|
shifting the pointer up by one byte. This leaves us with the lowest
|
||||||
|
byte to play with as we choose.
|
||||||
38
vec.c
Normal file
38
vec.c
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-15
|
||||||
|
* Description: Vector implementation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "./vec.h"
|
||||||
|
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
void vec_reserve(arena_t *allocator, vec_t *vec, u32 size)
|
||||||
|
{
|
||||||
|
if (vec->cap - vec->size < size)
|
||||||
|
{
|
||||||
|
u32 old_cap = vec->cap;
|
||||||
|
vec->cap = MAX(vec->cap * 1.5, vec->size + size);
|
||||||
|
vec->data = arena_realloc(allocator, vec->data, old_cap, vec->cap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 vec_append(arena_t *allocator, vec_t *vec, const void *data, u32 size)
|
||||||
|
{
|
||||||
|
vec_reserve(allocator, vec, size);
|
||||||
|
memcpy(vec->data + vec->size, data, size);
|
||||||
|
u32 ptr = vec->size;
|
||||||
|
vec->size += size;
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
30
vec.h
Normal file
30
vec.h
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
/* Copyright (C) 2025 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||||
|
* details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the GNU General
|
||||||
|
* Public License Version 2, which you should have received a copy of along with
|
||||||
|
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
* Created: 2025-04-15
|
||||||
|
* Description: Vectors (dynamic sized arrays)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef VEC_H
|
||||||
|
#define VEC_H
|
||||||
|
|
||||||
|
#include "./memory.h"
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
u32 size, cap;
|
||||||
|
u8 *data;
|
||||||
|
} vec_t;
|
||||||
|
|
||||||
|
void vec_reserve(arena_t *alloactor, vec_t *vec, u32 size);
|
||||||
|
u32 vec_append(arena_t *allocator, vec_t *vec, const void *data, u32 size);
|
||||||
|
|
||||||
|
#endif
|
||||||
Reference in New Issue
Block a user