Initial implementation
This commit is contained in:
51
Makefile
Normal file
51
Makefile
Normal file
@@ -0,0 +1,51 @@
|
||||
CC=gcc
|
||||
OUT=oats.out
|
||||
LIBS=-I.
|
||||
ARGS=
|
||||
|
||||
RELEASE=0
|
||||
GFLAGS=-Wall -Wextra -Wswitch-enum -Werror -std=c11
|
||||
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined
|
||||
RFLAGS=-O3
|
||||
DEPFLAGS=-MT $@ -MMD -MP -MF
|
||||
|
||||
ifeq ($(RELEASE), 1)
|
||||
CFLAGS=$(GFLAGS) $(RFLAGS) -DDEBUG=0
|
||||
else
|
||||
CFLAGS=$(GFLAGS) $(DFLAGS)
|
||||
endif
|
||||
|
||||
DIST=build
|
||||
SRC:=tag.c memory.c vec.c sv.c lisp.c reader.c eval.c main.c
|
||||
OBJ:=$(SRC:%.c=$(DIST)/%.o)
|
||||
DEPDIR=$(DIST)/dependencies
|
||||
DEPS:=$(SRC:%.c=$(DEPDIR)/%.d)
|
||||
|
||||
.PHONY: all
|
||||
all: $(DIST)/$(OUT)
|
||||
|
||||
$(DIST)/%.o: %.c | $(DIST) $(DEPDIR)
|
||||
$(CC) $(CFLAGS) $(DEPFLAGS) $(DEPDIR)/$*.d -c $< -o $@ $(LIBS)
|
||||
|
||||
$(DIST)/$(OUT): $(OBJ) | $(DIST)
|
||||
$(CC) $(CFLAGS) $^ -o $@ $(LIBS)
|
||||
|
||||
.PHONY: run
|
||||
run: $(DIST)/$(OUT)
|
||||
./$^ $(ARGS)
|
||||
|
||||
.PHONY:
|
||||
clean:
|
||||
rm -rfv $(DIST)/*
|
||||
|
||||
$(DIST):
|
||||
mkdir -p $(DIST)
|
||||
|
||||
$(DEPDIR):
|
||||
mkdir -p $(DEPDIR)
|
||||
|
||||
.PHONY:
|
||||
watch:
|
||||
find . -type 'f' -regex ".*.c\\|.*.h" | entr -cs "make run"
|
||||
|
||||
-include $(DEPS)
|
||||
78
base.h
Normal file
78
base.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-03-31
|
||||
* Description: Base
|
||||
*/
|
||||
|
||||
#ifndef BASE_H
|
||||
#define BASE_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef int64_t i64;
|
||||
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
static_assert(sizeof(f32) == sizeof(u32));
|
||||
static_assert(sizeof(f64) == sizeof(u64));
|
||||
|
||||
#define MAX(A, B) ((A) > (B) ? (A) : (B))
|
||||
#define MIN(A, B) ((A) < (B) ? (A) : (B))
|
||||
#define NTH_BYTE(X, N) (((X) >> (8 * N)) & ((1 << 8) - 1))
|
||||
#define ARR_SIZE(XS) (sizeof(XS) / sizeof((XS)[0]))
|
||||
|
||||
#define TODO(MSG) (assert(false && MSG));
|
||||
|
||||
#ifndef DEBUG
|
||||
#define DEBUG 2
|
||||
#endif
|
||||
|
||||
static inline void debug(char *fmt, ...)
|
||||
{
|
||||
#if DEBUG > 1
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
vprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void info(char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
vprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static inline void print_bits(u64 w)
|
||||
{
|
||||
for (u64 i = 8 * sizeof(w); i > 0; --i)
|
||||
{
|
||||
printf("%c", ((w >> (i - 1)) & 1) == 1 ? '1' : '0');
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#endif
|
||||
24
eval.c
Normal file
24
eval.c
Normal file
@@ -0,0 +1,24 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-18
|
||||
* Description: Evaluator implementation
|
||||
*/
|
||||
|
||||
#include "./eval.h"
|
||||
|
||||
err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret)
|
||||
{
|
||||
(void)ctx;
|
||||
(void)obj;
|
||||
(void)ret;
|
||||
TODO("implement evaluator");
|
||||
}
|
||||
28
eval.h
Normal file
28
eval.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-18
|
||||
* Description: Evaluator
|
||||
*/
|
||||
|
||||
#ifndef EVAL_H
|
||||
#define EVAL_H
|
||||
|
||||
#include "./lisp.h"
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ERR_OK = 0,
|
||||
} err_t;
|
||||
|
||||
err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret);
|
||||
|
||||
#endif
|
||||
317
lisp.c
Normal file
317
lisp.c
Normal file
@@ -0,0 +1,317 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-06
|
||||
* Description:
|
||||
*/
|
||||
|
||||
#include "./lisp.h"
|
||||
#include "./sv.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
lisp_t *make_int(i64 integer)
|
||||
{
|
||||
return tag_int(integer);
|
||||
}
|
||||
|
||||
lisp_t *make_sym(context_t *ctx, char *data, u64 size)
|
||||
{
|
||||
// Small symbol optimisation
|
||||
if (size <= 7)
|
||||
return tag_ssym(data, size);
|
||||
|
||||
// Make a cell with the data we need
|
||||
cell_t *cell = context_alloc(ctx, sizeof(*cell) + size);
|
||||
cell->size = size;
|
||||
memcpy(cell->data, data, size);
|
||||
|
||||
return tag_sym(cell);
|
||||
}
|
||||
|
||||
lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr)
|
||||
{
|
||||
cons_t *cons = context_alloc(ctx, sizeof(*cons));
|
||||
memset(cons, 0, sizeof(*cons));
|
||||
cons->car = car;
|
||||
cons->cdr = cdr;
|
||||
return tag_cons(cons);
|
||||
}
|
||||
|
||||
lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size)
|
||||
{
|
||||
if (!lisps)
|
||||
return NIL;
|
||||
lisp_t *cur = NIL;
|
||||
for (u64 i = size; i > 0; --i)
|
||||
cur = make_cons(ctx, lisps[i - 1], cur);
|
||||
return cur;
|
||||
}
|
||||
|
||||
lisp_t *make_vec(context_t *ctx, u32 size)
|
||||
{
|
||||
// Make a vector with the content
|
||||
lisp_t *container = NIL;
|
||||
vec_t *vec = context_alloc(ctx, sizeof(*vec));
|
||||
vec->size = 0;
|
||||
vec->cap = size * sizeof(&container);
|
||||
if (size == 0)
|
||||
vec->data = NULL;
|
||||
else
|
||||
vec->data = context_alloc(ctx, vec->cap);
|
||||
container = tag_vec(vec);
|
||||
|
||||
return container;
|
||||
}
|
||||
|
||||
lisp_t *make_str(context_t *ctx, char *data, u64 size)
|
||||
{
|
||||
if (size == 0)
|
||||
// No need to allocate unless necessary
|
||||
return tag_str(NIL);
|
||||
// Make a vector with the content
|
||||
vec_t *vec = context_alloc(ctx, sizeof(*vec));
|
||||
vec->data = context_alloc(ctx, sizeof(*vec->data) * size);
|
||||
vec->cap = size;
|
||||
vec->size = size;
|
||||
memcpy(vec->data, data, size);
|
||||
|
||||
return tag_str(vec);
|
||||
}
|
||||
|
||||
i64 as_int(lisp_t *obj)
|
||||
{
|
||||
assert(IS_TAG(obj, INT));
|
||||
u64 p_obj = (u64)obj;
|
||||
return UNTAG(p_obj, INT) | // Delete the tag
|
||||
(NTH_BYTE(p_obj, 7) & 0x80) << 56 // duplicate the MSB (preserve sign)
|
||||
;
|
||||
}
|
||||
|
||||
cons_t *as_cons(lisp_t *obj)
|
||||
{
|
||||
assert(IS_TAG(obj, CONS));
|
||||
return (cons_t *)UNTAG(obj, CONS);
|
||||
}
|
||||
|
||||
u32 as_char(lisp_t *obj)
|
||||
{
|
||||
return (u32)UNTAG(obj, CHAR);
|
||||
}
|
||||
|
||||
cell_t *as_sym(lisp_t *obj)
|
||||
{
|
||||
assert(IS_TAG(obj, SYM));
|
||||
return (cell_t *)UNTAG(obj, SYM);
|
||||
}
|
||||
|
||||
void as_ssym(lisp_t *obj, sv_t *container)
|
||||
{
|
||||
assert(IS_TAG(obj, SSYM));
|
||||
u64 p_obj = (u64)obj;
|
||||
container->size = NTH_BYTE(p_obj, 0) >> SHIFT_SSYM;
|
||||
p_obj >>= 8;
|
||||
memcpy(container->data, &p_obj, container->size);
|
||||
}
|
||||
|
||||
bool as_bool(lisp_t *obj)
|
||||
{
|
||||
assert(IS_TAG(obj, BOOL));
|
||||
return (bool)UNTAG(obj, BOOL);
|
||||
}
|
||||
|
||||
vec_t *as_vec(lisp_t *obj)
|
||||
{
|
||||
assert(IS_TAG(obj, VEC));
|
||||
return (vec_t *)UNTAG(obj, VEC);
|
||||
}
|
||||
|
||||
vec_t *as_str(lisp_t *obj)
|
||||
{
|
||||
assert(IS_TAG(obj, STR));
|
||||
return (vec_t *)UNTAG(obj, STR);
|
||||
}
|
||||
|
||||
sv_t serialise(context_t *ctx, lisp_t *ptr)
|
||||
{
|
||||
enum Tag t = tag_get(ptr);
|
||||
switch (t)
|
||||
{
|
||||
case TAG_NIL:
|
||||
{
|
||||
return SV("NIL", 3);
|
||||
}
|
||||
case TAG_INT:
|
||||
{
|
||||
i64 i = as_int(ptr);
|
||||
sv_t s = sv_fmt(&ctx->scratch,
|
||||
#if DEBUG > 1
|
||||
"int["
|
||||
#endif
|
||||
"%ld"
|
||||
#if DEBUG > 1
|
||||
"]"
|
||||
#endif
|
||||
,
|
||||
i);
|
||||
return s;
|
||||
}
|
||||
case TAG_CHAR:
|
||||
{
|
||||
u32 codepoint = as_char(ptr);
|
||||
return sv_fmt(&ctx->scratch,
|
||||
#if DEBUG > 1
|
||||
"char["
|
||||
#endif
|
||||
"%lc"
|
||||
#if DEBUG > 1
|
||||
"]"
|
||||
#endif
|
||||
,
|
||||
codepoint);
|
||||
}
|
||||
case TAG_SYM:
|
||||
{
|
||||
cell_t *cell = as_sym(ptr);
|
||||
sv_t s = sv_make(&ctx->scratch, (char *)cell->data, cell->size);
|
||||
#if DEBUG > 1
|
||||
s = sv_fmt(&ctx->scratch, "sym[" PR_SV "]", SV_FMT(s));
|
||||
#endif
|
||||
return s;
|
||||
}
|
||||
case TAG_SSYM:
|
||||
{
|
||||
char data[7];
|
||||
sv_t rsym = SV(data, 0);
|
||||
as_ssym(ptr, &rsym);
|
||||
sv_t s = sv_copy(&ctx->scratch, rsym);
|
||||
|
||||
#if DEBUG > 1
|
||||
s = sv_fmt(&ctx->scratch, "ssym[" PR_SV "]", SV_FMT(s));
|
||||
#endif
|
||||
return s;
|
||||
}
|
||||
case TAG_BOOL:
|
||||
{
|
||||
return sv_fmt(&ctx->scratch,
|
||||
#if DEBUG > 1
|
||||
"bool["
|
||||
#endif
|
||||
"%s"
|
||||
#if DEBUG > 1
|
||||
"]"
|
||||
#endif
|
||||
,
|
||||
as_bool(ptr) ? "#t" : "#f");
|
||||
}
|
||||
case TAG_CONS:
|
||||
{
|
||||
if (!CAR(ptr) && !CDR(ptr))
|
||||
#if DEBUG > 1
|
||||
return SV("lst[NIL]", 8);
|
||||
#else
|
||||
return SV("NIL", 3);
|
||||
#endif
|
||||
|
||||
sv_t s = {0};
|
||||
|
||||
for (lisp_t *lsp = ptr; lsp; lsp = IS_TAG(lsp, CONS) ? CDR(lsp) : NIL)
|
||||
{
|
||||
lisp_t *lmember = lsp;
|
||||
if (IS_TAG(lsp, CONS))
|
||||
lmember = CAR(lmember);
|
||||
|
||||
sv_t member = serialise(ctx, lmember);
|
||||
s = sv_concat(&ctx->scratch, s, member);
|
||||
if (IS_TAG(lsp, CONS) && !CDR(lsp))
|
||||
continue;
|
||||
else if (IS_TAG(CDR(lsp), CONS))
|
||||
// normal list
|
||||
s = sv_append(&ctx->scratch, s, " ", 1);
|
||||
else
|
||||
// dotted list
|
||||
s = sv_append(&ctx->scratch, s, " . ", 3);
|
||||
}
|
||||
|
||||
#if DEBUG > 1
|
||||
s = sv_fmt(&ctx->scratch, "lst[" PR_SV "]", SV_FMT(s));
|
||||
#else
|
||||
s = sv_fmt(&ctx->scratch, "(" PR_SV ")", SV_FMT(s));
|
||||
#endif
|
||||
return s;
|
||||
}
|
||||
case TAG_VEC:
|
||||
{
|
||||
vec_t *vec = as_vec(ptr);
|
||||
if (!vec)
|
||||
#if DEBUG > 1
|
||||
return SV("vec[NIL]", 8);
|
||||
#else
|
||||
return SV("#()", 3);
|
||||
#endif
|
||||
else if (vec->size < sizeof(&ptr))
|
||||
#if DEBUG > 1
|
||||
return SV("vec[0/0 #()]", 13);
|
||||
#else
|
||||
return SV("#()", 3);
|
||||
#endif
|
||||
|
||||
sv_t s = {0};
|
||||
for (u64 i = 0; i < vec->size / sizeof(&ptr); ++i)
|
||||
{
|
||||
lisp_t *lmember = ((lisp_t **)vec->data)[i];
|
||||
sv_t member = serialise(ctx, lmember);
|
||||
s = sv_concat(&ctx->scratch, s, member);
|
||||
if (i == ((vec->size / sizeof(&ptr)) - 1))
|
||||
continue;
|
||||
s = sv_append(&ctx->scratch, s, " ", 1);
|
||||
}
|
||||
|
||||
#if DEBUG > 1
|
||||
s = sv_fmt(&ctx->scratch, "vec[%lu/%lu #(" PR_SV ")]", vec->size, vec->cap,
|
||||
SV_FMT(s));
|
||||
#else
|
||||
s = sv_fmt(&ctx->scratch, "#(" PR_SV ")", SV_FMT(s));
|
||||
#endif
|
||||
return s;
|
||||
break;
|
||||
}
|
||||
case TAG_STR:
|
||||
{
|
||||
vec_t *vec = as_str(ptr);
|
||||
sv_t sv = {0};
|
||||
if (vec)
|
||||
sv = SV((char *)vec->data, vec->size);
|
||||
else
|
||||
sv = SV("", 0);
|
||||
return sv_fmt(&ctx->scratch,
|
||||
#if DEBUG > 1
|
||||
"str["
|
||||
#else
|
||||
"\""
|
||||
#endif
|
||||
PR_SV
|
||||
#if DEBUG > 1
|
||||
"]"
|
||||
#else
|
||||
"\""
|
||||
#endif
|
||||
,
|
||||
SV_FMT(sv));
|
||||
}
|
||||
case NUM_TAGS:
|
||||
default:
|
||||
assert(false && "serialise: unreachable");
|
||||
return SV(0, 0);
|
||||
}
|
||||
}
|
||||
59
lisp.h
Normal file
59
lisp.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-06
|
||||
* Description: Object model where we deal with contexts
|
||||
*/
|
||||
|
||||
#ifndef LISP_H
|
||||
#define LISP_H
|
||||
|
||||
#include "./base.h"
|
||||
#include "./memory.h"
|
||||
#include "./sv.h"
|
||||
#include "./tag.h"
|
||||
#include "./vec.h"
|
||||
|
||||
typedef struct Cons
|
||||
{
|
||||
lisp_t *car, *cdr;
|
||||
} cons_t;
|
||||
|
||||
typedef struct Cell
|
||||
{
|
||||
u64 size;
|
||||
u8 data[];
|
||||
} cell_t;
|
||||
|
||||
#define NIL 0
|
||||
lisp_t *make_int(i64 integer);
|
||||
lisp_t *make_sym(context_t *ctx, char *data, u64 size);
|
||||
lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr);
|
||||
lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size);
|
||||
lisp_t *make_vec(context_t *ctx, u32 size);
|
||||
lisp_t *make_str(context_t *ctx, char *data, u64 size);
|
||||
|
||||
i64 as_int(lisp_t *obj);
|
||||
u32 as_char(lisp_t *obj);
|
||||
cell_t *as_sym(lisp_t *obj);
|
||||
void as_ssym(lisp_t *obj, sv_t *sv);
|
||||
bool as_bool(lisp_t *obj);
|
||||
f64 as_float(lisp_t *obj);
|
||||
cons_t *as_cons(lisp_t *obj);
|
||||
vec_t *as_vec(lisp_t *obj);
|
||||
vec_t *as_str(lisp_t *obj);
|
||||
|
||||
sv_t serialise(context_t *ctx, lisp_t *lisp);
|
||||
|
||||
#define CAR(PTR) (as_cons(PTR)->car)
|
||||
#define CDR(PTR) (as_cons(PTR)->cdr)
|
||||
|
||||
#endif
|
||||
92
main.c
Normal file
92
main.c
Normal file
@@ -0,0 +1,92 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-05
|
||||
* Description: Entrypoint
|
||||
*/
|
||||
|
||||
#include "./base.h"
|
||||
#include "./lisp.h"
|
||||
#include "./memory.h"
|
||||
#include "./reader.h"
|
||||
#include "./sv.h"
|
||||
#include "./vec.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <malloc.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int exit = 0;
|
||||
context_t ctx = {0};
|
||||
const char *filename = NULL;
|
||||
#if 1
|
||||
if (argc > 1)
|
||||
filename = argv[1];
|
||||
else
|
||||
filename = "./r7rs-tests.scm";
|
||||
|
||||
FILE *fp = fopen(filename, "r");
|
||||
if (!fp)
|
||||
{
|
||||
fprintf(stderr, "[ERROR]: File `%s` does not exist\n", filename);
|
||||
exit = 1;
|
||||
goto end;
|
||||
}
|
||||
input_t inp = {0};
|
||||
input_from_fp(&ctx, &inp, filename, fp);
|
||||
fclose(fp);
|
||||
#else
|
||||
filename = "<example>";
|
||||
char inp_data[] = "(print (+ 34 35))";
|
||||
input_t inp = {0};
|
||||
sv_t inp_sv = SV(inp_data, sizeof(inp_data));
|
||||
input_from_sv(&ctx, &inp, filename, inp_sv);
|
||||
#endif
|
||||
|
||||
if (inp.str.size == 0)
|
||||
{
|
||||
info("[WARNING] `%s` is empty.\n", filename);
|
||||
goto end;
|
||||
}
|
||||
debug("[file read]: %luB read from `%s`\n", inp.str.size, filename);
|
||||
|
||||
// Setup a vector to hold all the lisps
|
||||
vec_t results = {0};
|
||||
perr_t perr = parse_all(&ctx, &inp, &results);
|
||||
if (perr)
|
||||
{
|
||||
exit = perr;
|
||||
print_perror(stderr, &inp, perr);
|
||||
goto end;
|
||||
}
|
||||
context_reset_read(&ctx);
|
||||
|
||||
lisp_t **lisps = (lisp_t **)results.data;
|
||||
u64 size = results.size / sizeof(*lisps);
|
||||
for (u64 i = 0; i < size; ++i)
|
||||
{
|
||||
lisp_t *lisp = lisps[i];
|
||||
// printf("tag=%x\n", tag_get(lisp));
|
||||
sv_t serialised = serialise(&ctx, lisp);
|
||||
info("lisp[%lu]: %p => " PR_SV "\n", i, lisp, SV_FMT(serialised));
|
||||
}
|
||||
context_report(&ctx);
|
||||
|
||||
end:
|
||||
|
||||
context_cleanup(&ctx);
|
||||
return exit;
|
||||
}
|
||||
275
memory.c
Normal file
275
memory.c
Normal file
@@ -0,0 +1,275 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-05
|
||||
* Description: Implementations for memory models.
|
||||
*/
|
||||
|
||||
#include "./memory.h"
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
page_t *page_create(u64 size)
|
||||
{
|
||||
size = MAX(size, PAGE_DEFAULT_SIZE);
|
||||
page_t *page = calloc(1, sizeof(*page) + size);
|
||||
page->next = NULL;
|
||||
page->size = 0;
|
||||
page->capacity = size;
|
||||
return page;
|
||||
}
|
||||
|
||||
void page_resize(page_t **page, u64 size)
|
||||
{
|
||||
if (!page)
|
||||
return;
|
||||
else if (!(*page))
|
||||
*page = page_create(size);
|
||||
else if (page[0]->capacity < size)
|
||||
{
|
||||
page[0]->capacity = MAX(size, page[0]->capacity * 1.5);
|
||||
page[0] = realloc(page[0], sizeof(*page[0]) + page[0]->capacity);
|
||||
}
|
||||
}
|
||||
|
||||
i64 page_append(page_t *page, void *data, u64 size)
|
||||
{
|
||||
if (!page || page->size + size >= page->capacity)
|
||||
return -1;
|
||||
if (data)
|
||||
memcpy(page->data + page->size, data, size);
|
||||
u64 ptr = page->size;
|
||||
page->size += size;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
u64 page_rappend(page_t **page, void *data, u64 size)
|
||||
{
|
||||
page_resize(page, page[0]->size + size);
|
||||
if (data)
|
||||
memcpy(page[0]->data + page[0]->size, data, size);
|
||||
u64 ptr = page[0]->size;
|
||||
page[0]->size += size;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *arena_alloc(arena_t *arena, u64 size)
|
||||
{
|
||||
if (!arena)
|
||||
return NULL;
|
||||
else if (!arena->start)
|
||||
{
|
||||
arena->start = page_create(1);
|
||||
arena->end = arena->start;
|
||||
}
|
||||
|
||||
page_t *best_fit;
|
||||
for (best_fit = arena->start; best_fit; best_fit = best_fit->next)
|
||||
if (best_fit->size + size + MEMORY_ALIGNMENT < best_fit->capacity)
|
||||
break;
|
||||
|
||||
if (!best_fit)
|
||||
{
|
||||
best_fit = page_create(size);
|
||||
arena->end->next = best_fit;
|
||||
arena->end = best_fit;
|
||||
}
|
||||
|
||||
// NOTE: Fsanitize has a hissy fit if we don't align memory "correctly"
|
||||
u64 offset = 0;
|
||||
|
||||
if (size % MEMORY_ALIGNMENT == 0)
|
||||
{
|
||||
u64 div = (((u64)(best_fit->data + best_fit->size)) % MEMORY_ALIGNMENT);
|
||||
if (div != 0)
|
||||
offset = MEMORY_ALIGNMENT - div;
|
||||
}
|
||||
|
||||
void *start = best_fit->data + best_fit->size + offset;
|
||||
|
||||
best_fit->size += size + offset;
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void *arena_realloc(arena_t *arena, void *ptr, u64 oldsize, u64 newsize)
|
||||
{
|
||||
if (!ptr)
|
||||
return arena_alloc(arena, newsize);
|
||||
else if (!arena || !arena->start)
|
||||
return NULL;
|
||||
else if (newsize <= oldsize)
|
||||
// No need to change anything.
|
||||
return ptr;
|
||||
|
||||
bool copy_into_new = true;
|
||||
void *start = NULL;
|
||||
page_t *old_page = NULL, *best_fit = NULL;
|
||||
|
||||
for (page_t *page = arena->start; page; page = page->next)
|
||||
{
|
||||
if (!best_fit && page->size + newsize < page->capacity)
|
||||
best_fit = page;
|
||||
if (page->data <= (u8 *)ptr &&
|
||||
(u8 *)(ptr) + oldsize <= page->data + page->capacity)
|
||||
old_page = page;
|
||||
}
|
||||
|
||||
// If the old page exists, ptr is the latest allocation in it, and it has
|
||||
// enough space to contain the new size, just resize and return.
|
||||
if (old_page && old_page->data + old_page->size - oldsize == ptr &&
|
||||
old_page->size - oldsize + newsize < old_page->capacity)
|
||||
{
|
||||
start = ptr;
|
||||
old_page->size += newsize - oldsize;
|
||||
copy_into_new = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!old_page)
|
||||
copy_into_new = false;
|
||||
if (!best_fit)
|
||||
{
|
||||
best_fit = page_create(newsize);
|
||||
arena->end->next = best_fit;
|
||||
arena->end = best_fit;
|
||||
}
|
||||
|
||||
start = best_fit->data + best_fit->size;
|
||||
best_fit->size += newsize;
|
||||
}
|
||||
|
||||
if (copy_into_new)
|
||||
{
|
||||
memcpy(start, ptr, oldsize);
|
||||
memset(start + oldsize, 0, newsize - oldsize);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void arena_attach(arena_t *arena, page_t *page)
|
||||
{
|
||||
if (!arena || !page)
|
||||
return;
|
||||
else if (!arena->start || !arena->end)
|
||||
{
|
||||
arena->start = page;
|
||||
arena->end = page;
|
||||
}
|
||||
else
|
||||
{
|
||||
page->next = arena->start;
|
||||
arena->start = page;
|
||||
}
|
||||
}
|
||||
|
||||
void arena_reset(arena_t *arena)
|
||||
{
|
||||
if (!arena || !arena->start)
|
||||
return;
|
||||
|
||||
for (page_t *cur = arena->start; cur; cur = cur->next)
|
||||
{
|
||||
if (cur->size == 0)
|
||||
continue;
|
||||
cur->size = 0;
|
||||
memset(cur->data, 0, cur->capacity);
|
||||
}
|
||||
}
|
||||
|
||||
void arena_cleanup(arena_t *arena)
|
||||
{
|
||||
if (!arena || !arena->start)
|
||||
return;
|
||||
|
||||
for (page_t *cur = arena->start, *next = NULL; cur;
|
||||
next = cur->next, free(cur), cur = next)
|
||||
continue;
|
||||
|
||||
memset(arena, 0, sizeof(*arena));
|
||||
}
|
||||
|
||||
// Allocates against stable memory i.e. we can have pointers of this lying
|
||||
// around without any fear of them being thrown away.
|
||||
void *context_alloc(context_t *context, u64 size)
|
||||
{
|
||||
return arena_alloc(&context->memory, size);
|
||||
}
|
||||
|
||||
// Allocate against a "scratch space", separate from main memory, for internal
|
||||
// use.
|
||||
void *context_salloc(context_t *context, u64 size)
|
||||
{
|
||||
return arena_alloc(&context->scratch, size);
|
||||
}
|
||||
|
||||
void context_reset_read(context_t *context)
|
||||
{
|
||||
arena_reset(&context->read);
|
||||
}
|
||||
|
||||
void context_reset_scratch(context_t *context)
|
||||
{
|
||||
arena_reset(&context->scratch);
|
||||
}
|
||||
|
||||
void context_reset(context_t *context)
|
||||
{
|
||||
arena_reset(&context->memory);
|
||||
arena_reset(&context->read);
|
||||
arena_reset(&context->scratch);
|
||||
}
|
||||
|
||||
void context_cleanup(context_t *context)
|
||||
{
|
||||
if (!context)
|
||||
return;
|
||||
arena_cleanup(&context->memory);
|
||||
arena_cleanup(&context->read);
|
||||
arena_cleanup(&context->scratch);
|
||||
memset(context, 0, sizeof(*context));
|
||||
}
|
||||
|
||||
void context_report(context_t *context)
|
||||
{
|
||||
#if DEBUG
|
||||
// Figure this out at runtime
|
||||
u64 mem_used = 0, mem_cap = 0;
|
||||
for (page_t *page = context->memory.start; page; page = page->next)
|
||||
{
|
||||
mem_used += page->size;
|
||||
mem_cap += page->capacity;
|
||||
}
|
||||
|
||||
u64 read_used = 0, read_cap = 0;
|
||||
for (page_t *page = context->read.start; page; page = page->next)
|
||||
{
|
||||
read_used += page->size;
|
||||
read_cap += page->capacity;
|
||||
}
|
||||
|
||||
u64 scr_used = 0, scr_cap = 0;
|
||||
for (page_t *page = context->scratch.start; page; page = page->next)
|
||||
{
|
||||
scr_used += page->size;
|
||||
scr_cap += page->capacity;
|
||||
}
|
||||
|
||||
info("<Context>: %luB/%luB main memory used\n", mem_used, mem_cap);
|
||||
info("<Context>: %luB/%luB read space used\n", read_used, read_cap);
|
||||
info("<Context>: %luB/%luB scratch space used\n", scr_used, scr_cap);
|
||||
#endif
|
||||
}
|
||||
66
memory.h
Normal file
66
memory.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-05
|
||||
* Description: Memory management structures
|
||||
*/
|
||||
|
||||
#ifndef MEMORY_H
|
||||
#define MEMORY_H
|
||||
|
||||
#include "./base.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define PAGE_DEFAULT_SIZE 1024
|
||||
#define MEMORY_ALIGNMENT 8
|
||||
|
||||
typedef struct Page
|
||||
{
|
||||
struct Page *next;
|
||||
u64 size, capacity;
|
||||
u8 data[];
|
||||
} page_t;
|
||||
|
||||
page_t *page_create(u64 size);
|
||||
void page_resize(page_t **page, u64 newsize);
|
||||
// Append - fail (by returning <0) if not enough space.
|
||||
i64 page_append(page_t *page, void *data, u64 size);
|
||||
// Append - will resize if necessary
|
||||
u64 page_rappend(page_t **page, void *data, u64 size);
|
||||
|
||||
typedef struct Aren
|
||||
{
|
||||
page_t *start, *end;
|
||||
} arena_t;
|
||||
|
||||
void *arena_alloc(arena_t *arena, u64 size);
|
||||
void *arena_realloc(arena_t *arena, void *ptr, u64 oldsize, u64 newsize);
|
||||
void *arena_copy(arena_t *arena, void *ptr, u64 size);
|
||||
void arena_attach(arena_t *arena, page_t *page);
|
||||
void arena_reset(arena_t *arena);
|
||||
void arena_cleanup(arena_t *arena);
|
||||
|
||||
typedef struct Context
|
||||
{
|
||||
arena_t memory, read, scratch;
|
||||
} context_t;
|
||||
|
||||
void *context_alloc(context_t *context, u64 size);
|
||||
void *context_salloc(context_t *context, u64 size);
|
||||
page_t *context_get_read_page(context_t *ctx);
|
||||
void context_reset_read(context_t *context);
|
||||
void context_reset_scratch(context_t *context);
|
||||
|
||||
void context_reset(context_t *context);
|
||||
void context_cleanup(context_t *context);
|
||||
void context_report(context_t *context);
|
||||
|
||||
#endif
|
||||
419
reader.c
Normal file
419
reader.c
Normal file
@@ -0,0 +1,419 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-16
|
||||
* Description: Implementation of parser
|
||||
*/
|
||||
|
||||
#include "./reader.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
bool is_digit(char c)
|
||||
{
|
||||
return isdigit(c);
|
||||
}
|
||||
|
||||
bool is_alpha(char c)
|
||||
{
|
||||
return isalpha(c);
|
||||
}
|
||||
|
||||
bool is_space(char c)
|
||||
{
|
||||
return isspace(c);
|
||||
}
|
||||
|
||||
bool is_skip(char c)
|
||||
{
|
||||
return is_space(c) || c == ';';
|
||||
}
|
||||
|
||||
bool is_sym(char c)
|
||||
{
|
||||
return strchr(SYM_CHARS, c) != NULL;
|
||||
}
|
||||
|
||||
void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv)
|
||||
{
|
||||
inp->name = name;
|
||||
inp->str = sv_copy(&ctx->read, sv);
|
||||
}
|
||||
|
||||
void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp)
|
||||
{
|
||||
input->name = name;
|
||||
// TODO: Choose a best fit (i.e. maximal capacity, unused) page
|
||||
page_t *page = page_create(INPUT_CHUNK_SIZE);
|
||||
// chunk should be in scratch space so we can reset it later.
|
||||
char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE);
|
||||
|
||||
u64 total_size = 0, size_read = 0;
|
||||
while (!feof(fp))
|
||||
{
|
||||
size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp);
|
||||
if (size_read > 0)
|
||||
{
|
||||
page_rappend(&page, chunk, size_read);
|
||||
total_size += size_read;
|
||||
}
|
||||
}
|
||||
|
||||
input->str = SV((char *)page->data, total_size);
|
||||
|
||||
// Memory cleanup
|
||||
context_reset_scratch(ctx);
|
||||
arena_attach(&ctx->read, page);
|
||||
}
|
||||
|
||||
bool input_eof(input_t *input)
|
||||
{
|
||||
return !input || (input->offset >= input->str.size) ||
|
||||
(input->str.data[input->offset] == '\0');
|
||||
}
|
||||
|
||||
char input_peek(input_t *input, u64 offset)
|
||||
{
|
||||
if (input_eof(input) || input->offset + offset >= input->str.size)
|
||||
return '\0';
|
||||
return input->str.data[input->offset + offset];
|
||||
}
|
||||
|
||||
char input_next(input_t *input, u64 offset)
|
||||
{
|
||||
if (input_eof(input) || input->offset + offset >= input->str.size)
|
||||
return '\0';
|
||||
input->offset += offset;
|
||||
return input->str.data[input->offset];
|
||||
}
|
||||
|
||||
void input_skip(input_t *inp)
|
||||
{
|
||||
// n + 2 lookup
|
||||
sv_t current = sv_cut(inp->str, inp->offset);
|
||||
sv_t lookup = sv_chop(current, 2);
|
||||
while ((!input_eof(inp) && is_space(lookup.data[0])) ||
|
||||
lookup.data[0] == ';' || strncmp(lookup.data, "#|", 2) == 0)
|
||||
{
|
||||
if (lookup.data[0] == ';')
|
||||
{
|
||||
i64 newline = sv_find_subcstr(current, "\n", 1);
|
||||
if (newline < 0)
|
||||
inp->offset = inp->str.size;
|
||||
else
|
||||
inp->offset += newline + 1;
|
||||
}
|
||||
else if (strncmp(lookup.data, "#|", 2) == 0)
|
||||
{
|
||||
i64 offset = sv_find_subcstr(current, "|#", 2);
|
||||
if (offset < 0)
|
||||
inp->offset = inp->str.size;
|
||||
else
|
||||
inp->offset += offset + 2;
|
||||
}
|
||||
|
||||
inp->offset += sv_while(sv_cut(inp->str, inp->offset), is_space);
|
||||
current = sv_cut(inp->str, inp->offset);
|
||||
lookup = sv_chop(current, 2);
|
||||
}
|
||||
}
|
||||
|
||||
perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
debug("parse_int[%lu] => ", inp->offset);
|
||||
|
||||
// TODO: Parse arbitrary sized integers
|
||||
(void)ctx;
|
||||
|
||||
bool negative = (input_peek(inp, 0) == '-');
|
||||
sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0));
|
||||
sv_t digits = sv_chop(current, sv_while(current, is_digit));
|
||||
|
||||
debug("`" PR_SV "` => ", SV_FMT(digits));
|
||||
i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10);
|
||||
debug("%ld\n", x);
|
||||
|
||||
input_next(inp, digits.size + (negative ? 1 : 0));
|
||||
|
||||
*ret = make_int(x);
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
debug("parse_sym[%lu] => ", inp->offset);
|
||||
|
||||
sv_t current = sv_cut(inp->str, inp->offset);
|
||||
sv_t sym = sv_chop(current, sv_while(current, is_sym));
|
||||
debug("`" PR_SV "`\n", SV_FMT(sym));
|
||||
|
||||
if (sym.size == 3)
|
||||
{
|
||||
// NOTE: We can't mutate sym directly because it's on `read` space.
|
||||
|
||||
// TODO: Make this beautiful please.
|
||||
char buf[3];
|
||||
for (u64 i = 0; i < 3; ++i)
|
||||
buf[i] = toupper(sym.data[i]);
|
||||
|
||||
// NOTE: NIL symbol to actual NIL
|
||||
if (strncmp(buf, "NIL", 3) == 0)
|
||||
{
|
||||
input_next(inp, 3);
|
||||
return NIL;
|
||||
}
|
||||
}
|
||||
|
||||
lisp_t *lsym = make_sym(ctx, sym.data, sym.size);
|
||||
input_next(inp, sym.size);
|
||||
*ret = lsym;
|
||||
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
(void)ctx;
|
||||
debug("parse_bool[%lu] => ", inp->offset);
|
||||
char c = input_peek(inp, 1);
|
||||
bool b = -1;
|
||||
if (c == 't')
|
||||
b = true;
|
||||
else if (c == 'f')
|
||||
b = false;
|
||||
else
|
||||
return PERR_EXPECTED_BOOLEAN;
|
||||
*ret = tag_bool(b);
|
||||
input_next(inp, 2);
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
// TODO: Put this in a symbol table
|
||||
lisp_t *lisp_dot = make_sym(ctx, ".", 1);
|
||||
debug("parse_cons[%lu] => (\n", inp->offset);
|
||||
inp->offset += 1;
|
||||
|
||||
lisp_t *root = NIL;
|
||||
lisp_t **cur = NIL;
|
||||
bool dotted = false;
|
||||
|
||||
while (!input_eof(inp) && input_peek(inp, 0) != ')')
|
||||
{
|
||||
lisp_t *lisp = NIL;
|
||||
perr_t res = parse(ctx, inp, &lisp);
|
||||
if (res)
|
||||
return res;
|
||||
|
||||
// This is cheap to do
|
||||
if (lisp == lisp_dot)
|
||||
{
|
||||
dotted = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!root)
|
||||
{
|
||||
root = make_cons(ctx, lisp, NIL);
|
||||
cur = &root;
|
||||
}
|
||||
else if (!dotted)
|
||||
*cur = make_cons(ctx, lisp, NIL);
|
||||
else
|
||||
*cur = lisp;
|
||||
|
||||
if (cur && !dotted)
|
||||
cur = &as_cons(*cur)->cdr;
|
||||
|
||||
input_skip(inp);
|
||||
}
|
||||
|
||||
if (input_peek(inp, 0) != ')')
|
||||
return PERR_EXPECTED_CLOSE_BRACKET;
|
||||
|
||||
input_next(inp, 1);
|
||||
|
||||
debug(")\n");
|
||||
*ret = root;
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
debug("parse_vec[%lu] => [\n", inp->offset);
|
||||
input_next(inp, 2);
|
||||
|
||||
lisp_t *lvec = make_vec(ctx, 0);
|
||||
vec_t *vec = as_vec(lvec);
|
||||
|
||||
while (!input_eof(inp) && input_peek(inp, 0) != ')')
|
||||
{
|
||||
lisp_t *lisp = NIL;
|
||||
perr_t res = parse(ctx, inp, &lisp);
|
||||
if (res)
|
||||
return res;
|
||||
|
||||
vec_append(&ctx->memory, vec, &lisp, sizeof(lisp));
|
||||
input_skip(inp);
|
||||
}
|
||||
|
||||
if (input_peek(inp, 0) != ')')
|
||||
return PERR_EXPECTED_CLOSE_BRACKET;
|
||||
|
||||
input_next(inp, 1);
|
||||
|
||||
debug("]\n");
|
||||
*ret = lvec;
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
debug("parse_str[%lu] => ", inp->offset);
|
||||
input_next(inp, 1); // 1 for the first speechmark
|
||||
sv_t sv = sv_cut(inp->str, inp->offset);
|
||||
i64 size = sv_find_subcstr(sv, "\"", 1);
|
||||
if (size < 0)
|
||||
return PERR_EXPECTED_SPEECH_MARK;
|
||||
|
||||
input_next(inp, size + 1); // 1 for that last speechmark
|
||||
sv_t str_content = sv_chop(sv, size);
|
||||
debug("\"" PR_SV "\"\n", SV_FMT(str_content));
|
||||
*ret = make_str(ctx, str_content.data, str_content.size);
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
char c = input_peek(inp, 0);
|
||||
if (!(c == '\'' || c == '`'))
|
||||
return PERR_UNEXPECTED_CHAR;
|
||||
input_next(inp, 1);
|
||||
sv_t prefix = {0};
|
||||
if (c == '\'')
|
||||
prefix = SV("quote", 5);
|
||||
else if (c == '`')
|
||||
prefix = SV("quasiquote", 10);
|
||||
lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL);
|
||||
lisp_t *rest = NIL;
|
||||
perr_t perr = parse(ctx, inp, &rest);
|
||||
if (perr)
|
||||
return perr;
|
||||
CDR(root) = make_cons(ctx, rest, NIL);
|
||||
*ret = root;
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
// TODO: Make this interactable with user once we have evaluation
|
||||
perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
char c = input_peek(inp, 1);
|
||||
if (c == '\\')
|
||||
{
|
||||
// character or weird base integer
|
||||
TODO("Not implemented reader macro for characters or weird bases");
|
||||
}
|
||||
else if (c == '(')
|
||||
{
|
||||
return parse_vec(ctx, inp, ret);
|
||||
}
|
||||
else if (c == 't' || c == 'f')
|
||||
return parse_bool(ctx, inp, ret);
|
||||
return PERR_UNEXPECTED_READER_MACRO_SYMBOL;
|
||||
}
|
||||
|
||||
static_assert(NUM_TAGS == 9);
|
||||
perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret)
|
||||
{
|
||||
debug("parse => ");
|
||||
input_skip(inp);
|
||||
if (input_eof(inp))
|
||||
return PERR_EOF;
|
||||
|
||||
char c = input_peek(inp, 0);
|
||||
|
||||
if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1))))
|
||||
return parse_int(ctx, inp, ret);
|
||||
else if (c == '#')
|
||||
return parse_reader_macro(ctx, inp, ret);
|
||||
else if (is_sym(c))
|
||||
return parse_sym(ctx, inp, ret);
|
||||
else if (c == '(')
|
||||
return parse_cons(ctx, inp, ret);
|
||||
else if (c == '\'' || c == '`')
|
||||
return parse_quote(ctx, inp, ret);
|
||||
else if (c == '\"')
|
||||
return parse_str(ctx, inp, ret);
|
||||
else
|
||||
return PERR_UNEXPECTED_CHAR;
|
||||
}
|
||||
|
||||
perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec)
|
||||
{
|
||||
while (!input_eof(inp))
|
||||
{
|
||||
lisp_t *member = NIL;
|
||||
perr_t err = parse(ctx, inp, &member);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
else
|
||||
vec_append(&ctx->scratch, vec, &member, sizeof(member));
|
||||
|
||||
input_skip(inp);
|
||||
}
|
||||
return PERR_OK;
|
||||
}
|
||||
|
||||
int print_perror(FILE *fp, input_t *inp, perr_t error)
|
||||
{
|
||||
pos_t pos = input_offset_to_pos(inp);
|
||||
fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col,
|
||||
perr_to_cstr(error));
|
||||
switch (error)
|
||||
{
|
||||
case PERR_UNEXPECTED_CHAR:
|
||||
fprintf(fp, "(`%c`)", input_peek(inp, 0));
|
||||
break;
|
||||
case PERR_OK:
|
||||
case PERR_EOF:
|
||||
case PERR_EXPECTED_BOOLEAN:
|
||||
case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
|
||||
case PERR_EXPECTED_CLOSE_BRACKET:
|
||||
case PERR_EXPECTED_SPEECH_MARK:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
pos_t input_offset_to_pos(input_t *inp)
|
||||
{
|
||||
pos_t pos = {.col = 1, .line = 1};
|
||||
for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i)
|
||||
{
|
||||
char c = (inp->str.data[i]);
|
||||
if (c == '\n')
|
||||
{
|
||||
++pos.line;
|
||||
pos.col = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
++pos.col;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
98
reader.h
Normal file
98
reader.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-16
|
||||
* Description: Parsing Lisp!
|
||||
*/
|
||||
|
||||
#ifndef READER_H
|
||||
#define READER_H
|
||||
|
||||
#include "./lisp.h"
|
||||
#include "./memory.h"
|
||||
#include "./vec.h"
|
||||
|
||||
#define INPUT_CHUNK_SIZE 512
|
||||
static const char SYM_CHARS[] =
|
||||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
"¬!£$%^&*_-+={[]}:@~#<,>.?/";
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *name;
|
||||
u64 offset;
|
||||
sv_t str;
|
||||
} input_t;
|
||||
|
||||
void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv);
|
||||
void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp);
|
||||
bool input_eof(input_t *input);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u64 col, line;
|
||||
} pos_t;
|
||||
|
||||
pos_t input_offset_to_pos(input_t *inp);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PERR_OK = 0,
|
||||
PERR_EOF,
|
||||
PERR_UNEXPECTED_CHAR,
|
||||
PERR_EXPECTED_CLOSE_BRACKET,
|
||||
PERR_EXPECTED_SPEECH_MARK,
|
||||
PERR_UNEXPECTED_READER_MACRO_SYMBOL,
|
||||
PERR_EXPECTED_BOOLEAN,
|
||||
} perr_t;
|
||||
|
||||
static inline const char *perr_to_cstr(perr_t perr)
|
||||
{
|
||||
switch (perr)
|
||||
{
|
||||
case PERR_OK:
|
||||
return "OK";
|
||||
break;
|
||||
case PERR_EOF:
|
||||
return "EOF";
|
||||
break;
|
||||
case PERR_UNEXPECTED_CHAR:
|
||||
return "UNEXPECTED_CHAR";
|
||||
break;
|
||||
case PERR_EXPECTED_CLOSE_BRACKET:
|
||||
return "EXPECTED_CLOSE_BRACKET";
|
||||
break;
|
||||
case PERR_EXPECTED_SPEECH_MARK:
|
||||
return "EXPECTED_SPEECH_MARK";
|
||||
break;
|
||||
case PERR_UNEXPECTED_READER_MACRO_SYMBOL:
|
||||
return "UNEXPECTED_READER_MACRO_SYMBOL";
|
||||
break;
|
||||
case PERR_EXPECTED_BOOLEAN:
|
||||
return "EXPECTED_BOOLEAN";
|
||||
break;
|
||||
}
|
||||
assert(false && "perr_to_cstr: unreachable");
|
||||
return "";
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
lisp_t *result;
|
||||
perr_t error;
|
||||
} pres_t;
|
||||
|
||||
perr_t parse(context_t *ctx, input_t *str, lisp_t **ret);
|
||||
perr_t parse_all(context_t *ctx, input_t *str, vec_t *vec);
|
||||
|
||||
int print_perror(FILE *fp, input_t *inp, perr_t error);
|
||||
|
||||
#endif
|
||||
158
sv.c
Normal file
158
sv.c
Normal file
@@ -0,0 +1,158 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-14
|
||||
* Description: String View implementation
|
||||
*/
|
||||
|
||||
#include "./sv.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
sv_t sv_make(arena_t *allocator, const char *data, u64 size)
|
||||
{
|
||||
sv_t s = {0};
|
||||
if (data)
|
||||
s = sv_append(allocator, s, data, size);
|
||||
else
|
||||
{
|
||||
s.data = arena_alloc(allocator, size);
|
||||
s.size = size;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
sv_t sv_copy(arena_t *allocator, sv_t sv)
|
||||
{
|
||||
return sv_make(allocator, sv.data, sv.size);
|
||||
}
|
||||
|
||||
sv_t sv_substr(sv_t sv, u64 index, u64 size)
|
||||
{
|
||||
sv_t newsv = {0};
|
||||
if (index + size > sv.size)
|
||||
return newsv;
|
||||
newsv.data = sv.data + index;
|
||||
newsv.size = size;
|
||||
return newsv;
|
||||
}
|
||||
|
||||
sv_t sv_cut(sv_t sv, u64 index)
|
||||
{
|
||||
return sv_substr(sv, index, sv.size < index ? 0 : sv.size - index);
|
||||
}
|
||||
|
||||
sv_t sv_chop(sv_t sv, u64 size)
|
||||
{
|
||||
return sv_substr(sv, 0, size);
|
||||
}
|
||||
|
||||
sv_t sv_concat(arena_t *allocator, sv_t a, sv_t b)
|
||||
{
|
||||
sv_t c = sv_make(allocator, a.data, a.size + b.size);
|
||||
memcpy(c.data + a.size, b.data, b.size);
|
||||
return c;
|
||||
}
|
||||
|
||||
sv_t sv_append(arena_t *allocator, sv_t sv, const char *data, u64 size)
|
||||
{
|
||||
if (!allocator)
|
||||
return (sv_t){0};
|
||||
|
||||
sv_t newsv = {0};
|
||||
newsv.size = sv.size + size;
|
||||
newsv.data = arena_realloc(allocator, sv.data, sv.size, newsv.size);
|
||||
if (data)
|
||||
memcpy(newsv.data + sv.size, data, size);
|
||||
return newsv;
|
||||
}
|
||||
|
||||
sv_t sv_prepend(arena_t *allocator, sv_t sv, const char *data, u64 size)
|
||||
{
|
||||
if (!allocator)
|
||||
return (sv_t){0};
|
||||
|
||||
// TODO: Can we make this cheaper to do?
|
||||
sv_t newsv = sv_make(allocator, NULL, size + sv.size);
|
||||
// Copy over `data` to the left side
|
||||
memcpy(newsv.data, data, size);
|
||||
// Copy old string to the right side
|
||||
if (sv.data)
|
||||
memcpy(newsv.data + size, sv.data, sv.size);
|
||||
|
||||
return newsv;
|
||||
}
|
||||
|
||||
sv_t sv_fmt(arena_t *allocator, char *fmt, ...)
|
||||
{
|
||||
if (!allocator)
|
||||
return (sv_t){0};
|
||||
|
||||
va_list ap_1, ap_2;
|
||||
va_start(ap_1, fmt);
|
||||
|
||||
va_copy(ap_2, ap_1);
|
||||
u64 size = vsnprintf(NULL, 0, fmt, ap_2);
|
||||
va_end(ap_2);
|
||||
|
||||
sv_t sv = sv_make(allocator, NULL, size);
|
||||
vsprintf(sv.data, fmt, ap_1);
|
||||
va_end(ap_1);
|
||||
|
||||
return sv;
|
||||
}
|
||||
|
||||
i64 sv_find_substr(const sv_t sv, const sv_t substr)
|
||||
{
|
||||
if (substr.size == 0)
|
||||
return 0;
|
||||
else if (sv.size < substr.size)
|
||||
return -1;
|
||||
else if (sv.size == substr.size)
|
||||
return strncmp(sv.data, substr.data, sv.size) == 0 ? 0 : -1;
|
||||
|
||||
for (u64 i = 0; i < (sv.size - substr.size); ++i)
|
||||
if (strncmp(sv.data + i, substr.data, substr.size) == 0)
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
i64 sv_find_subcstr(const sv_t sv, const char *substr, u64 size)
|
||||
{
|
||||
return sv_find_substr(sv, SV((char *)substr, size));
|
||||
}
|
||||
|
||||
i64 sv_find_any(const sv_t sv, const char *bag)
|
||||
{
|
||||
for (u64 i = 0; i < sv.size; ++i)
|
||||
if (strchr(bag, sv.data[i]))
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
u64 sv_while(const sv_t sv, bool (*pred)(char))
|
||||
{
|
||||
u64 i;
|
||||
for (i = 0; i < sv.size && pred(sv.data[i]); ++i)
|
||||
continue;
|
||||
return i;
|
||||
}
|
||||
|
||||
u64 sv_till(const sv_t sv, bool (*pred)(char))
|
||||
{
|
||||
u64 i;
|
||||
for (i = 0; i < sv.size && !pred(sv.data[i]); ++i)
|
||||
continue;
|
||||
return i;
|
||||
}
|
||||
70
sv.h
Normal file
70
sv.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-14
|
||||
* Description: String Views
|
||||
*/
|
||||
|
||||
#ifndef SV_H
|
||||
#define SV_H
|
||||
|
||||
#include "./memory.h"
|
||||
|
||||
typedef struct SV
|
||||
{
|
||||
u64 size;
|
||||
char *data;
|
||||
} sv_t;
|
||||
|
||||
#define SV(DATA, SIZE) ((const sv_t){.size = (SIZE), .data = (DATA)})
|
||||
#define SV_FMT(SV) (int)(SV).size, (SV).data
|
||||
#define PR_SV "%.*s"
|
||||
|
||||
sv_t sv_make(arena_t *allocator, const char *data, u64 size);
|
||||
sv_t sv_copy(arena_t *allocator, sv_t sv);
|
||||
sv_t sv_append(arena_t *allocator, sv_t sv, const char *data, u64 size);
|
||||
sv_t sv_prepend(arena_t *allocator, sv_t sv, const char *data, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Concatenate two string views, returning that concatenation.
|
||||
|
||||
* Allocates memory.
|
||||
*/
|
||||
sv_t sv_concat(arena_t *allocator, sv_t a, sv_t b);
|
||||
|
||||
/**
|
||||
* @brief Allocates a string view with the given `printf` format.
|
||||
*/
|
||||
sv_t sv_fmt(arena_t *allocator, char *fmt, ...);
|
||||
|
||||
/**
|
||||
* @brief Constructs a new string view at a different offset. Does not allocate
|
||||
* new memory.
|
||||
*/
|
||||
sv_t sv_substr(sv_t sv, u64 index, u64 size);
|
||||
/**
|
||||
* @brief Return a string view INDEX characters ahead (i.e. cut the string from
|
||||
* the left).
|
||||
*/
|
||||
sv_t sv_cut(sv_t sv, u64 index);
|
||||
/**
|
||||
* @brief Return a string view with SIZE (i.e. chopping the string from the
|
||||
* right)
|
||||
*/
|
||||
sv_t sv_chop(sv_t sv, u64 size);
|
||||
|
||||
i64 sv_find_substr(const sv_t sv, const sv_t substr);
|
||||
i64 sv_find_subcstr(const sv_t sv, const char *substr, u64 size);
|
||||
i64 sv_find_any(const sv_t sv, const char *bag);
|
||||
u64 sv_while(const sv_t sv, bool (*pred)(char));
|
||||
u64 sv_till(const sv_t sv, bool (*pred)(char));
|
||||
|
||||
#endif
|
||||
94
tag.c
Normal file
94
tag.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-06
|
||||
* Description:
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <tag.h>
|
||||
|
||||
lisp_t *tag_int(i64 i)
|
||||
{
|
||||
return TAG((u64)i, INT);
|
||||
}
|
||||
|
||||
lisp_t *tag_cons(void *ptr)
|
||||
{
|
||||
return TAG((u64)ptr, CONS);
|
||||
}
|
||||
|
||||
lisp_t *tag_sym(void *ptr)
|
||||
{
|
||||
return TAG((u64)ptr, SYM);
|
||||
}
|
||||
|
||||
lisp_t *tag_ssym(const char *data, size_t size)
|
||||
{
|
||||
assert(size <= 7);
|
||||
u8 buffer[sizeof(u64)];
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
// in 8 bits we have:
|
||||
// size - 3 bits (up to 7)
|
||||
// tag - 5 bits
|
||||
buffer[0] = size;
|
||||
buffer[0] <<= SHIFT_SSYM;
|
||||
buffer[0] |= TAG_SSYM;
|
||||
memcpy(buffer + 1, data, size);
|
||||
u64 word = 0;
|
||||
memcpy(&word, buffer, sizeof(u64));
|
||||
return (lisp_t *)word;
|
||||
}
|
||||
|
||||
lisp_t *tag_bool(bool b)
|
||||
{
|
||||
return TAG((u64)b, BOOL);
|
||||
}
|
||||
|
||||
lisp_t *tag_vec(void *ptr)
|
||||
{
|
||||
return TAG((u64)ptr, VEC);
|
||||
}
|
||||
|
||||
lisp_t *tag_str(void *ptr)
|
||||
{
|
||||
return TAG((u64)ptr, STR);
|
||||
}
|
||||
|
||||
lisp_t *tag_char(u32 codepoint)
|
||||
{
|
||||
u64 w = codepoint;
|
||||
return TAG(w, CHAR);
|
||||
}
|
||||
|
||||
enum Tag tag_get(lisp_t *ptr)
|
||||
{
|
||||
static_assert(NUM_TAGS == 9);
|
||||
if (!ptr)
|
||||
return TAG_NIL;
|
||||
else if (IS_TAG(ptr, INT))
|
||||
return TAG_INT;
|
||||
else if (IS_TAG(ptr, CHAR))
|
||||
return TAG_CHAR;
|
||||
else if (IS_TAG(ptr, SYM))
|
||||
return TAG_SYM;
|
||||
else if (IS_TAG(ptr, SSYM))
|
||||
return TAG_SSYM;
|
||||
else if (IS_TAG(ptr, BOOL))
|
||||
return TAG_BOOL;
|
||||
else if (IS_TAG(ptr, VEC))
|
||||
return TAG_VEC;
|
||||
else if (IS_TAG(ptr, STR))
|
||||
return TAG_STR;
|
||||
else if (IS_TAG(ptr, CONS))
|
||||
return TAG_CONS;
|
||||
return 0;
|
||||
}
|
||||
90
tag.h
Normal file
90
tag.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-06
|
||||
* Description:
|
||||
*/
|
||||
|
||||
#ifndef TAG_H
|
||||
#define TAG_H
|
||||
|
||||
#include "./base.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Opaque structure to make tagged pointers a separate type from general
|
||||
// pointers
|
||||
typedef struct Obj lisp_t;
|
||||
|
||||
enum Tag
|
||||
{
|
||||
TAG_NIL = 0b00000000, // Atomic types
|
||||
TAG_INT = 0b00000001, // special so we can encode 63 bit integers
|
||||
TAG_SYM = 0b00000100,
|
||||
TAG_SSYM = 0b00001000,
|
||||
TAG_CHAR = 0b00001100,
|
||||
TAG_BOOL = 0b00010000,
|
||||
// TAG_FLOAT = 0b00010100,
|
||||
TAG_CONS = 0b00000010, // Container types
|
||||
TAG_VEC = 0b00000110,
|
||||
TAG_STR = 0b00001010,
|
||||
|
||||
NUM_TAGS = 9,
|
||||
};
|
||||
|
||||
enum Shift
|
||||
{
|
||||
SHIFT_INT = 1,
|
||||
SHIFT_SSYM = 5,
|
||||
SHIFT_CHAR = 8,
|
||||
SHIFT_SYM = 8,
|
||||
SHIFT_BOOL = 8,
|
||||
SHIFT_FLOAT = 8,
|
||||
|
||||
SHIFT_CONS = 8,
|
||||
SHIFT_VEC = 8,
|
||||
SHIFT_STR = 8,
|
||||
};
|
||||
|
||||
enum Mask
|
||||
{
|
||||
MASK_INT = 0b00000001,
|
||||
MASK_SSYM = 0b00011111,
|
||||
MASK_SYM = 0b11111111,
|
||||
MASK_CHAR = 0b11111111,
|
||||
MASK_BOOL = 0b11111111,
|
||||
MASK_FLOAT = 0b11111111,
|
||||
|
||||
MASK_CONS = 0b11111111,
|
||||
MASK_VEC = 0b11111111,
|
||||
MASK_STR = 0b11111111,
|
||||
};
|
||||
|
||||
#define TAG(PTR, TYPE) ((lisp_t *)(((PTR) << SHIFT_##TYPE) | TAG_##TYPE))
|
||||
#define IS_TAG(PTR, TYPE) (((u64)(PTR) & MASK_##TYPE) == TAG_##TYPE)
|
||||
#define UNTAG(PTR, TYPE) (((u64)PTR) >> SHIFT_##TYPE)
|
||||
|
||||
enum Tag tag_get(lisp_t *ptr);
|
||||
|
||||
lisp_t *tag_int(i64 i);
|
||||
lisp_t *tag_char(u32 codepoint);
|
||||
lisp_t *tag_sym(void *ptr);
|
||||
lisp_t *tag_ssym(const char *data, size_t size);
|
||||
lisp_t *tag_bool(bool b);
|
||||
lisp_t *tag_vec(void *ptr);
|
||||
lisp_t *tag_str(void *ptr);
|
||||
lisp_t *tag_cons(void *ptr);
|
||||
|
||||
#define INT_MAX ((1L << 62) - 1)
|
||||
#define INT_MIN (-(1L << 62))
|
||||
|
||||
#endif
|
||||
230
tasks.org
Normal file
230
tasks.org
Normal file
@@ -0,0 +1,230 @@
|
||||
#+title: Tasks
|
||||
#+date: 2025-02-18
|
||||
|
||||
* WIP Implement a reader
|
||||
We want something a bit generic: able to handle reading from some
|
||||
buffer of memory (a string, or contents of a file where we can read
|
||||
the entire thing at once) or directly from a file stream (STDIN,
|
||||
network streams, etc).
|
||||
|
||||
We don't need a tokeniser - the basic grammar of a Lisp is really easy
|
||||
to narrow down, so we can skip tokenisation and go straight for
|
||||
parsing.
|
||||
|
||||
We also want to be able to admit when reading went wrong for some
|
||||
reason with proper errors messages (i.e. can be read by Emacs) - this
|
||||
will need to be refactored when we introduce errors within the Lisp
|
||||
runtime itself.
|
||||
** TODO Implement floats and rationals
|
||||
Rationals are pretty easy - just two integers (quotient and divisor) -
|
||||
so a tagged cons cell would do the job. Floats are a bit more
|
||||
difficult since I'd either need to box them or find a creative way of
|
||||
sticking IEEE-754 floats into < 64 bits.
|
||||
|
||||
Also implement a reader macro for #e<scientific form>. Also deal with
|
||||
[-,+,]inf(.0) and [-,+,]nan(.0).
|
||||
|
||||
Need to do some reading.
|
||||
|
||||
[[file:r7rs-tests.scm::test #t (real? #e1e10)][trigger]]
|
||||
** TODO Consider user instantiated reader macros
|
||||
We don't have an evaluator so we can't really interpret whatever a
|
||||
user wants for a reader macro currently, but it would be useful to
|
||||
think about it now. Currently I have a single function which deals
|
||||
with parsing reader macros but that's just static.
|
||||
|
||||
Thing is, it does have the context as a parameter to pass to delegate
|
||||
functions (such as ~parse_vec~) - wouldn't be a massive jump to also
|
||||
consider user environments via the context.
|
||||
|
||||
[[file:reader.c::perr_t parse_reader_macro(context_t *ctx, input_t
|
||||
*inp, lisp_t **ret)][function link]]
|
||||
* TODO Consider Lisp runtime errors
|
||||
* TODO Admit arbitrarily sized integers
|
||||
Currently we admit fixed size integers of 63 bits. They use 2s
|
||||
complement due to x86 which means our max and min are 62 bit based.
|
||||
|
||||
However, to even try to be a scheme implementation we need to allow
|
||||
arbitrarily sized integers. What are the specific tasks we need to
|
||||
complete in our model to achieve this?:
|
||||
+ Allow "reading" of unfixed size integers
|
||||
+ This will require reading a sequence of base 10 digits without
|
||||
relying on strtold
|
||||
+ Implement unfixed size integers into our memory model
|
||||
+ Certainly, unfixed size integers cannot be carried around like
|
||||
fixnums wherein we can embed an integer into the pointer.
|
||||
Thus we have to allocate them in memory.
|
||||
+ NOTE: There will be definitely be an optimisation to be done
|
||||
here; integers that are within the bound of a fixnum could be
|
||||
left as a fixnum then "elevated" to an integer when needed
|
||||
+ I think the big idea is allocating them as a fixed set of bytes
|
||||
like big symbols. For big integers we have to read the memory
|
||||
associated thus we need a pointer. Due to 2s complement it should
|
||||
be trivial to increase the size of an integer to fit a new result
|
||||
i.e. if I'm adding two integers and that leads to an "overflow"
|
||||
where the result is of greater width than its inputs, we should
|
||||
just allocate new memory for it.
|
||||
|
||||
Consequences:
|
||||
- Greater memory use
|
||||
- In fact exponential if we need to allocate a whole new integer per
|
||||
operation rather than utilising the input memory
|
||||
- Possible loss of performance due to making integers over fixnums
|
||||
when they don't need to be
|
||||
- Comparison is harder on integers
|
||||
- Harder to cache for the CPU
|
||||
|
||||
but all of this is to be expected when the user is an idiot.
|
||||
* TODO Think about how to perform operations on different types
|
||||
** TODO Integers
|
||||
** TODO Symbols
|
||||
** TODO Pairs
|
||||
* DONE More efficient memory model for symbols
|
||||
The primitive model for symbol allocation is an 8 byte number
|
||||
representing the size of the symbol, followed by a variable number of
|
||||
characters (as bytes). This is stored somewhere in the memory
|
||||
context, which will likely be in the heap.
|
||||
|
||||
We're actually wasting a ridiculous amount of memory with this model.
|
||||
We'll almost never be using the full 64 bits of the size to represent
|
||||
a symbol (who's ever going to go close to 1.8 quintillion bytes for a
|
||||
single symbol?) - and even more annoying, there are tons of small
|
||||
sized symbols where we actually need _more space_ for the 8 byte size
|
||||
than the underlying symbol data.
|
||||
|
||||
I think there's definitely a better model available, at least for
|
||||
smaller symbols. We already have inlined integers where the pointer
|
||||
itself is the integer, why can't we do the same for symbols?
|
||||
A pointer has 8 bytes of data to use - one character is one byte -
|
||||
thus we can represent 8 character symbols in one pointer.
|
||||
|
||||
If we want this to still be within the remit of our pointer tagging
|
||||
scheme, we'll need a bit of space for our administrative purposes
|
||||
(admin slop...). So let's take _one_ byte out of that 8 for that. So
|
||||
we can represent any symbols 7 bytes long in a single pointer. We'll
|
||||
need to keep in mind we want to represent symbols that may be less
|
||||
than 7 characters, so that one admin byte is going to be doing some
|
||||
heavy lifting.
|
||||
|
||||
Let's examine that one admin byte:
|
||||
+ At least 3 bits are necessary for the actual pointer tag: "at least"
|
||||
because we might increase the size of the tag based on demand
|
||||
+ Thus, 5 bits left for our use - let's fit the size in there.
|
||||
pow(2,6) - 1 = 63, so we have _way_ more than we need
|
||||
|
||||
What are the benefits of doing this?:
|
||||
+ Symbol equality for small symbols is a cinch: just compare the two
|
||||
tagged "pointers"
|
||||
+ 7 or less character symbols require no memory allocation, just
|
||||
working off the stack
|
||||
|
||||
One thing to note is that for more than 7 character symbols, we'll
|
||||
need to allocate memory. But, in the worst case of 8 character
|
||||
symbols, we're only allocating two 64 bit integers: these are easy to
|
||||
walk on x86 and we've reached at least parity between the memory
|
||||
required for administration (the size number) and the actual data.
|
||||
** Being more aggressive?
|
||||
Technically, ANSI bytes only need 7 bits. For each of the 7 bytes
|
||||
used for the character data, we can take one bit off, leaving us with
|
||||
7 bits to use for an additional character. We don't need to adjust
|
||||
anything else in the schema.
|
||||
|
||||
So, hypothetically we could represent up to 8 character symbols! This
|
||||
would require packing the characters more aggressively into a single
|
||||
pointer. Let's look at the layout of our pointers. This table is
|
||||
indexed from most significant to least i.e. 0 is the MSB and 63 is the
|
||||
LSB:
|
||||
|
||||
|-------+------------|
|
||||
| Index | Usage |
|
||||
|-------+------------|
|
||||
| 0-55 | Characters |
|
||||
| 56-60 | Size |
|
||||
| 61-63 | Tag |
|
||||
|-------+------------|
|
||||
|
||||
Honestly though, for an extra byte of information we'll probably have
|
||||
to do a lot more work. x86-64 CPUs are much better at walking bytes
|
||||
than they are walking 7 bit offsets. This may be something to
|
||||
consider if CPU time is cheaper than allocating 8 byte symbols
|
||||
somewhere.
|
||||
* DONE Tagging scheme based on arena pages
|
||||
2025-04-09:21:59:29: We went for option (2) of just taking a byte for
|
||||
free from the memory address and using it as our management byte.
|
||||
** 1) Page-offset schema
|
||||
I've realised arenas are way better than the standard array dynamic I
|
||||
was going for before. However, we lose the nicer semantics of using
|
||||
an array index for pointers, where we can implement our own semantics
|
||||
regarding what bits in that pointer are free to use, when using a
|
||||
normal stable pointer into the arena; the host operating system has
|
||||
its own semantics regarding how pointers are arranged and this _will_
|
||||
change between operating systems. In particular, because of the way
|
||||
I've arranged pages, we can't use the classic "div by 8" methodology
|
||||
where new allocations on the heap generally must be aligned by 8 bytes
|
||||
(u64), so we can use those 3 bits at the bottom for our tagging;
|
||||
offsets into pages are where our pointers will lie and they won't
|
||||
necessarily be divisible by 8.
|
||||
|
||||
So we can't use the pointers directly into the pages - we'll call
|
||||
these types of pointers `host pointers`, because once we have them
|
||||
it's trivial to access the underlying data. We'll call the pointers
|
||||
we want to make `managed pointers` because we're managing the memory
|
||||
system associated with them. We want to be able to translate from
|
||||
managed pointers to host pointers.
|
||||
|
||||
Managed pointers are really just encodings for direct access into the
|
||||
arena memory. So in 8 bytes, we need to encode both the page and the
|
||||
specific offset in that page where the pointer is pointing to. We
|
||||
also want to leave space for tagging and any metadata we might want to
|
||||
store in the pointer to that data. A schema I could think of was:
|
||||
|
||||
|------------------+--------------------|
|
||||
| Index (in bytes) | Representation |
|
||||
|------------------+--------------------|
|
||||
| 0 | Metadata (tagging) |
|
||||
| 1-4 | Offset in page |
|
||||
| 4-7 | Page choice |
|
||||
|------------------+--------------------|
|
||||
|
||||
This gives us pow(2, 24) - 1 = 16777215 possible pages and
|
||||
pow(2, 32) - 1 = 4294967295 offsets in each page. Thus our total
|
||||
addressable memory would be pow(2, 56) - 1 = 72057594037927935 bytes.
|
||||
|
||||
Certainly no machine would ever have this much memory and so we're
|
||||
quite safe for most machines. That reserved management byte for our
|
||||
purposes (tagging, currently) will make the math to translate it a bit
|
||||
easier.
|
||||
|
||||
Let's reason about how we'd encode and decode these addresses. The
|
||||
arena itself should provide addresses with the management byte set to
|
||||
0 for the user to encode what they wish. The top bytes should be
|
||||
encoded as per the above i.e. top 3 bytes as the page index, next 4
|
||||
bytes as the offset in that page. This shouldn't be super difficult
|
||||
when we're doing it within the management functions of the arena
|
||||
itself as this data should be handy when performing the allocation.
|
||||
|
||||
When decoding these addresses i.e. retrieving data i.e. translating
|
||||
from a managed pointer to a host pointer, all it will need to do is
|
||||
convert the pointer into a byte buffer and copy the top 3 bytes as a
|
||||
page index and the next 4 bytes as the offset in the page. Once these
|
||||
are verified to be valid, we can just access the underlying pages and
|
||||
get the host pointer. Because of how arenas work, those host pointers
|
||||
will be stable regardless of any further memory management functions
|
||||
performed on the arena (excluding cleanup) - so once you have a host
|
||||
pointer, you can use it as much as you want without having to worry
|
||||
about the pointer becoming invalid in the next second.
|
||||
** 2) 48-bit addressing exploit
|
||||
Most x86 CPUs only use around 48-56 bits for actual memory addresses -
|
||||
mostly as a result of not needing _nearly_ as many addresses as a full
|
||||
64 bit word would provide.
|
||||
|
||||
So we /could/ get away with using one of those bytes for our
|
||||
administrative tasks. Since the smallest remit we have is one byte,
|
||||
we'll stick to that (but maybe we could go for two bytes - need to
|
||||
investigate further).
|
||||
|
||||
This byte should be the MSB, but using that for tagging will require
|
||||
more work than the lowest byte (to look at it we'll need to push that
|
||||
byte all the way down). So we'll be going for a low byte strategy by
|
||||
shifting the pointer up by one byte. This leaves us with the lowest
|
||||
byte to play with as we choose.
|
||||
38
vec.c
Normal file
38
vec.c
Normal file
@@ -0,0 +1,38 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-15
|
||||
* Description: Vector implementation
|
||||
*/
|
||||
|
||||
#include "./vec.h"
|
||||
|
||||
#include <malloc.h>
|
||||
#include <string.h>
|
||||
|
||||
void vec_reserve(arena_t *allocator, vec_t *vec, u32 size)
|
||||
{
|
||||
if (vec->cap - vec->size < size)
|
||||
{
|
||||
u32 old_cap = vec->cap;
|
||||
vec->cap = MAX(vec->cap * 1.5, vec->size + size);
|
||||
vec->data = arena_realloc(allocator, vec->data, old_cap, vec->cap);
|
||||
}
|
||||
}
|
||||
|
||||
u32 vec_append(arena_t *allocator, vec_t *vec, const void *data, u32 size)
|
||||
{
|
||||
vec_reserve(allocator, vec, size);
|
||||
memcpy(vec->data + vec->size, data, size);
|
||||
u32 ptr = vec->size;
|
||||
vec->size += size;
|
||||
return ptr;
|
||||
}
|
||||
30
vec.h
Normal file
30
vec.h
Normal file
@@ -0,0 +1,30 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for
|
||||
* details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the GNU General
|
||||
* Public License Version 2, which you should have received a copy of along with
|
||||
* this program. If not, please go to <https://www.gnu.org/licenses/>.
|
||||
|
||||
* Created: 2025-04-15
|
||||
* Description: Vectors (dynamic sized arrays)
|
||||
*/
|
||||
|
||||
#ifndef VEC_H
|
||||
#define VEC_H
|
||||
|
||||
#include "./memory.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 size, cap;
|
||||
u8 *data;
|
||||
} vec_t;
|
||||
|
||||
void vec_reserve(arena_t *alloactor, vec_t *vec, u32 size);
|
||||
u32 vec_append(arena_t *allocator, vec_t *vec, const void *data, u32 size);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user