diff options
Diffstat (limited to 'lisp')
-rw-r--r-- | lisp/context.c | 92 | ||||
-rw-r--r-- | lisp/context.h | 38 | ||||
-rw-r--r-- | lisp/eval.c | 24 | ||||
-rw-r--r-- | lisp/eval.h | 28 | ||||
-rw-r--r-- | lisp/lisp.c | 337 | ||||
-rw-r--r-- | lisp/lisp.h | 57 | ||||
-rw-r--r-- | lisp/reader.c | 426 | ||||
-rw-r--r-- | lisp/reader.h | 98 | ||||
-rw-r--r-- | lisp/tag.c | 94 | ||||
-rw-r--r-- | lisp/tag.h | 92 |
10 files changed, 1286 insertions, 0 deletions
diff --git a/lisp/context.c b/lisp/context.c new file mode 100644 index 0000000..3b94e54 --- /dev/null +++ b/lisp/context.c @@ -0,0 +1,92 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + Public License Version 2 + * for details. + + * You may distribute and modify this code under the terms of the + * GNU General Public License Version 2, which you should have received a copy + of along with this + * program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-05-12 + * Description: + */ + +#include <lisp/context.h> + +#include <string.h> + +// Allocates against stable memory i.e. we can have pointers of this lying +// around without any fear of them being thrown away. +void *context_alloc(context_t *context, u64 size) +{ + return arena_alloc(&context->memory, size); +} + +// Allocate against a "scratch space", separate from main memory, for internal +// use. +void *context_salloc(context_t *context, u64 size) +{ + return arena_alloc(&context->scratch, size); +} + +void context_reset_read(context_t *context) +{ + arena_reset(&context->read); +} + +void context_reset_scratch(context_t *context) +{ + arena_reset(&context->scratch); +} + +void context_reset(context_t *context) +{ + arena_reset(&context->memory); + arena_reset(&context->read); + arena_reset(&context->scratch); +} + +void context_cleanup(context_t *context) +{ + if (!context) + return; + arena_cleanup(&context->memory); + arena_cleanup(&context->read); + arena_cleanup(&context->scratch); + memset(context, 0, sizeof(*context)); +} + +void context_report(context_t *context) +{ +#if DEBUG + // Figure this out at runtime + u64 mem_used = 0, mem_cap = 0; + for (page_t *page = context->memory.start; page; page = page->next) + { + mem_used += page->size; + mem_cap += page->capacity; + } + + u64 read_used = 0, read_cap = 0; + for (page_t *page = context->read.start; page; page = page->next) + { + read_used += page->size; + read_cap += page->capacity; + } + + u64 scr_used = 0, scr_cap = 0; + for (page_t *page = context->scratch.start; page; page = page->next) + { + scr_used += page->size; + scr_cap += page->capacity; + } + + info("<Context>: %luB/%luB main memory used\n", mem_used, mem_cap); + info("<Context>: %luB/%luB read space used\n", read_used, read_cap); + info("<Context>: %luB/%luB scratch space used\n", scr_used, scr_cap); +#endif +} diff --git a/lisp/context.h b/lisp/context.h new file mode 100644 index 0000000..2923ad9 --- /dev/null +++ b/lisp/context.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + Public License Version 2 + * for details. + + * You may distribute and modify this code under the terms of the + * GNU General Public License Version 2, which you should have received a copy + of along with this + * program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-05-12 + * Description: + */ + +#ifndef CONTEXT_H +#define CONTEXT_H + +#include <lib/arena.h> + +typedef struct Context +{ + arena_t memory, read, scratch; +} context_t; + +void *context_alloc(context_t *context, u64 size); +void *context_salloc(context_t *context, u64 size); +page_t *context_get_read_page(context_t *ctx); +void context_reset_read(context_t *context); +void context_reset_scratch(context_t *context); + +void context_reset(context_t *context); +void context_cleanup(context_t *context); +void context_report(context_t *context); + +#endif diff --git a/lisp/eval.c b/lisp/eval.c new file mode 100644 index 0000000..3cd05ca --- /dev/null +++ b/lisp/eval.c @@ -0,0 +1,24 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-18 + * Description: Evaluator implementation + */ + +#include <lisp/eval.h> + +err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret) +{ + (void)ctx; + (void)obj; + (void)ret; + TODO("implement evaluator"); +} diff --git a/lisp/eval.h b/lisp/eval.h new file mode 100644 index 0000000..bc7fb60 --- /dev/null +++ b/lisp/eval.h @@ -0,0 +1,28 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-18 + * Description: Evaluator + */ + +#ifndef EVAL_H +#define EVAL_H + +#include <lisp/lisp.h> + +typedef enum +{ + ERR_OK = 0, +} err_t; + +err_t eval(context_t *ctx, lisp_t *obj, lisp_t **ret); + +#endif diff --git a/lisp/lisp.c b/lisp/lisp.c new file mode 100644 index 0000000..24a4788 --- /dev/null +++ b/lisp/lisp.c @@ -0,0 +1,337 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-06 + * Description: + */ + +#include <lib/sv.h> +#include <lisp/lisp.h> + +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +lisp_t *make_int(i64 integer) +{ + return tag_int(integer); +} + +lisp_t *make_sym(context_t *ctx, char *data, u64 size) +{ + // Small symbol optimisation + if (size <= 7) + return tag_ssym(data, size); + + // Make a cell with the data we need + cell_t *cell = context_alloc(ctx, sizeof(*cell) + size); + cell->size = size; + memcpy(cell->data, data, size); + + return tag_sym(cell); +} + +lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr) +{ + cons_t *cons = context_alloc(ctx, sizeof(*cons)); + memset(cons, 0, sizeof(*cons)); + cons->car = car; + cons->cdr = cdr; + return tag_cons(cons); +} + +lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size) +{ + if (!lisps) + return NIL; + lisp_t *cur = NIL; + for (u64 i = size; i > 0; --i) + cur = make_cons(ctx, lisps[i - 1], cur); + return cur; +} + +lisp_t *make_vec(context_t *ctx, u32 size) +{ + // Make a vector with the content + lisp_t *container = NIL; + vec_t *vec = context_alloc(ctx, sizeof(*vec)); + vec->size = 0; + vec->cap = size * sizeof(&container); + if (size == 0) + vec->data = NULL; + else + vec->data = context_alloc(ctx, vec->cap); + container = tag_vec(vec); + + return container; +} + +lisp_t *make_str(context_t *ctx, char *data, u64 size) +{ + if (size == 0) + // No need to allocate unless necessary + return tag_str(NIL); + // Make a vector with the content + vec_t *vec = context_alloc(ctx, sizeof(*vec)); + vec->data = context_alloc(ctx, sizeof(*vec->data) * size); + vec->cap = size; + vec->size = size; + memcpy(vec->data, data, size); + + return tag_str(vec); +} + +i64 as_int(lisp_t *obj) +{ + assert(IS_TAG(obj, INT)); + u64 p_obj = (u64)obj; + return UNTAG(p_obj, INT) | // Delete the tag + (NTH_BYTE(p_obj, 7) & 0x80) << 56 // duplicate the MSB (preserve sign) + ; +} + +cons_t *as_cons(lisp_t *obj) +{ + assert(IS_TAG(obj, CONS)); + return (cons_t *)UNTAG(obj, CONS); +} + +u32 as_char(lisp_t *obj) +{ + return (u32)UNTAG(obj, CHAR); +} + +cell_t *as_sym(lisp_t *obj) +{ + assert(IS_TAG(obj, SYM)); + return (cell_t *)UNTAG(obj, SYM); +} + +void as_ssym(lisp_t *obj, sv_t *container) +{ + assert(IS_TAG(obj, SSYM)); + u64 p_obj = (u64)obj; + container->size = NTH_BYTE(p_obj, 0) >> SHIFT_SSYM; + p_obj >>= 8; + memcpy(container->data, &p_obj, container->size); +} + +bool as_bool(lisp_t *obj) +{ + assert(IS_TAG(obj, BOOL)); + return (bool)UNTAG(obj, BOOL); +} + +f64 as_float(lisp_t *obj) +{ + assert(IS_TAG(obj, FLOAT)); + f64 *f = (f64 *)UNTAG(obj, FLOAT); + return f ? *f : 0; +} + +vec_t *as_vec(lisp_t *obj) +{ + assert(IS_TAG(obj, VEC)); + return (vec_t *)UNTAG(obj, VEC); +} + +vec_t *as_str(lisp_t *obj) +{ + assert(IS_TAG(obj, STR)); + return (vec_t *)UNTAG(obj, STR); +} + +sv_t serialise(context_t *ctx, lisp_t *ptr) +{ + enum Tag t = tag_get(ptr); + switch (t) + { + case TAG_NIL: + { + return SV("NIL", 3); + } + case TAG_INT: + { + i64 i = as_int(ptr); + sv_t s = sv_fmt(&ctx->scratch, +#if DEBUG > 1 + "int[" +#endif + "%ld" +#if DEBUG > 1 + "]" +#endif + , + i); + return s; + } + case TAG_CHAR: + { + u32 codepoint = as_char(ptr); + return sv_fmt(&ctx->scratch, +#if DEBUG > 1 + "char[" +#endif + "%lc" +#if DEBUG > 1 + "]" +#endif + , + codepoint); + } + case TAG_SYM: + { + cell_t *cell = as_sym(ptr); + sv_t s = sv_make(&ctx->scratch, (char *)cell->data, cell->size); +#if DEBUG > 1 + s = sv_fmt(&ctx->scratch, "sym[" PR_SV "]", SV_FMT(s)); +#endif + return s; + } + case TAG_SSYM: + { + char data[7]; + sv_t rsym = SV(data, 0); + as_ssym(ptr, &rsym); + sv_t s = sv_copy(&ctx->scratch, rsym); + +#if DEBUG > 1 + s = sv_fmt(&ctx->scratch, "ssym[" PR_SV "]", SV_FMT(s)); +#endif + return s; + } + case TAG_BOOL: + { + return sv_fmt(&ctx->scratch, +#if DEBUG > 1 + "bool[" +#endif + "%s" +#if DEBUG > 1 + "]" +#endif + , + as_bool(ptr) ? "#t" : "#f"); + } + case TAG_FLOAT: + { + return sv_fmt(&ctx->scratch, +#if DEBUG > 1 + "float[" +#endif + "%lf" +#if DEBUG > 1 + "]" +#endif + , + as_float(ptr)); + } + case TAG_CONS: + { + if (!CAR(ptr) && !CDR(ptr)) +#if DEBUG > 1 + return SV("lst[NIL]", 8); +#else + return SV("NIL", 3); +#endif + + sv_t s = {0}; + + for (lisp_t *lsp = ptr; lsp; lsp = IS_TAG(lsp, CONS) ? CDR(lsp) : NIL) + { + lisp_t *lmember = lsp; + if (IS_TAG(lsp, CONS)) + lmember = CAR(lmember); + + sv_t member = serialise(ctx, lmember); + s = sv_concat(&ctx->scratch, s, member); + if (IS_TAG(lsp, CONS) && !CDR(lsp)) + continue; + else if (IS_TAG(CDR(lsp), CONS)) + // normal list + s = sv_append(&ctx->scratch, s, " ", 1); + else + // dotted list + s = sv_append(&ctx->scratch, s, " . ", 3); + } + +#if DEBUG > 1 + s = sv_fmt(&ctx->scratch, "lst[" PR_SV "]", SV_FMT(s)); +#else + s = sv_fmt(&ctx->scratch, "(" PR_SV ")", SV_FMT(s)); +#endif + return s; + } + case TAG_VEC: + { + vec_t *vec = as_vec(ptr); + if (!vec) +#if DEBUG > 1 + return SV("vec[NIL]", 8); +#else + return SV("#()", 3); +#endif + else if (vec->size < sizeof(&ptr)) +#if DEBUG > 1 + return SV("vec[0/0 #()]", 13); +#else + return SV("#()", 3); +#endif + + sv_t s = {0}; + for (u64 i = 0; i < vec->size / sizeof(&ptr); ++i) + { + lisp_t *lmember = ((lisp_t **)vec->data)[i]; + sv_t member = serialise(ctx, lmember); + s = sv_concat(&ctx->scratch, s, member); + if (i == ((vec->size / sizeof(&ptr)) - 1)) + continue; + s = sv_append(&ctx->scratch, s, " ", 1); + } + +#if DEBUG > 1 + s = sv_fmt(&ctx->scratch, "vec[%lu/%lu #(" PR_SV ")]", vec->size, vec->cap, + SV_FMT(s)); +#else + s = sv_fmt(&ctx->scratch, "#(" PR_SV ")", SV_FMT(s)); +#endif + return s; + break; + } + case TAG_STR: + { + vec_t *vec = as_str(ptr); + sv_t sv = {0}; + if (vec) + sv = SV((char *)vec->data, vec->size); + else + sv = SV("", 0); + return sv_fmt(&ctx->scratch, +#if DEBUG > 1 + "str[" +#else + "\"" +#endif + PR_SV +#if DEBUG > 1 + "]" +#else + "\"" +#endif + , + SV_FMT(sv)); + } + case NUM_TAGS: + default: + assert(false && "serialise: unreachable"); + return SV(0, 0); + } +} diff --git a/lisp/lisp.h b/lisp/lisp.h new file mode 100644 index 0000000..2892bdb --- /dev/null +++ b/lisp/lisp.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-06 + * Description: Object model where we deal with contexts + */ + +#ifndef LISP_H +#define LISP_H + +#include <lib/sv.h> +#include <lib/vec.h> +#include <lisp/context.h> +#include <lisp/tag.h> + +typedef struct Cons +{ + lisp_t *car, *cdr; +} cons_t; + +typedef struct Cell +{ + u64 size; + u8 data[]; +} cell_t; + +lisp_t *make_int(i64 integer); +lisp_t *make_sym(context_t *ctx, char *data, u64 size); +lisp_t *make_cons(context_t *ctx, lisp_t *car, lisp_t *cdr); +lisp_t *make_list(context_t *ctx, lisp_t **lisps, u64 size); +lisp_t *make_vec(context_t *ctx, u32 size); +lisp_t *make_str(context_t *ctx, char *data, u64 size); + +i64 as_int(lisp_t *obj); +u32 as_char(lisp_t *obj); +cell_t *as_sym(lisp_t *obj); +void as_ssym(lisp_t *obj, sv_t *sv); +bool as_bool(lisp_t *obj); +f64 as_float(lisp_t *obj); +cons_t *as_cons(lisp_t *obj); +vec_t *as_vec(lisp_t *obj); +vec_t *as_str(lisp_t *obj); + +sv_t serialise(context_t *ctx, lisp_t *lisp); + +#define CAR(PTR) (as_cons(PTR)->car) +#define CDR(PTR) (as_cons(PTR)->cdr) + +#endif diff --git a/lisp/reader.c b/lisp/reader.c new file mode 100644 index 0000000..0c8a914 --- /dev/null +++ b/lisp/reader.c @@ -0,0 +1,426 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-16 + * Description: Implementation of parser + */ + +#include <lisp/reader.h> + +#include <ctype.h> +#include <string.h> + +bool is_digit(char c) +{ + return isdigit(c); +} + +bool is_alpha(char c) +{ + return isalpha(c); +} + +bool is_space(char c) +{ + return isspace(c); +} + +bool is_skip(char c) +{ + return is_space(c) || c == ';'; +} + +bool is_sym(char c) +{ + return strchr(SYM_CHARS, c) != NULL; +} + +void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv) +{ + inp->name = name; + inp->str = sv_copy(&ctx->read, sv); +} + +void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp) +{ + input->name = name; + // TODO: Choose a best fit (i.e. maximal capacity, unused) page + page_t *page = page_create(INPUT_CHUNK_SIZE); + // chunk should be in scratch space so we can reset it later. + char *chunk = context_salloc(ctx, INPUT_CHUNK_SIZE); + + u64 total_size = 0, size_read = 0; + while (!feof(fp)) + { + size_read = fread(chunk, 1, INPUT_CHUNK_SIZE, fp); + if (size_read > 0) + { + page_rappend(&page, chunk, size_read); + total_size += size_read; + } + } + + input->str = SV((char *)page->data, total_size); + + // Memory cleanup + context_reset_scratch(ctx); + arena_attach(&ctx->read, page); +} + +bool input_eof(input_t *input) +{ + return !input || (input->offset >= input->str.size) || + (input->str.data[input->offset] == '\0'); +} + +char input_peek(input_t *input, u64 offset) +{ + if (input_eof(input) || input->offset + offset >= input->str.size) + return '\0'; + return input->str.data[input->offset + offset]; +} + +char input_next(input_t *input, u64 offset) +{ + if (input_eof(input) || input->offset + offset >= input->str.size) + return '\0'; + input->offset += offset; + return input->str.data[input->offset]; +} + +void input_skip(input_t *inp) +{ + while (!input_eof(inp)) + { + sv_t current = sv_cut(inp->str, inp->offset); + // Skip any whitespace + inp->offset += sv_while(current, is_space); + current = sv_cut(inp->str, inp->offset); + // Is there a comment to skip? + if (current.size && current.data[0] == ';') + { + // Skip till newline + i64 newline = sv_find_subcstr(current, "\n", 1); + if (newline < 0) + inp->offset = inp->str.size; + else + inp->offset += newline + 1; + // Then skip any whitespace + } + // Multiline comment to skip? + else if (current.size > 2 && strncmp(current.data, "#|", 2) == 0) + { + i64 offset = sv_find_subcstr(current, "|#", 2); + if (offset < 0) + inp->offset = inp->str.size; + else + inp->offset += offset + 2; + // Then skip any whitespace + } + // Nothing to skip, stop. + else + break; + } + return; +} + +perr_t parse_int(context_t *ctx, input_t *inp, lisp_t **ret) +{ + debug("parse_int[%lu] => ", inp->offset); + + // TODO: Parse arbitrary sized integers + (void)ctx; + + bool negative = (input_peek(inp, 0) == '-'); + sv_t current = sv_cut(inp->str, inp->offset + (negative ? 1 : 0)); + sv_t digits = sv_chop(current, sv_while(current, is_digit)); + + debug("`" PR_SV "` => ", SV_FMT(digits)); + i64 x = (negative ? -1L : 1L) * strtol(digits.data, NULL, 10); + debug("%ld\n", x); + + input_next(inp, digits.size + (negative ? 1 : 0)); + + *ret = make_int(x); + return PERR_OK; +} + +perr_t parse_sym(context_t *ctx, input_t *inp, lisp_t **ret) +{ + debug("parse_sym[%lu] => ", inp->offset); + + sv_t current = sv_cut(inp->str, inp->offset); + sv_t sym = sv_chop(current, sv_while(current, is_sym)); + debug("`" PR_SV "`\n", SV_FMT(sym)); + + if (sym.size == 3) + { + // NOTE: We can't mutate sym directly because it's on `read` space. + + // TODO: Make this beautiful please. + char buf[3]; + for (u64 i = 0; i < 3; ++i) + buf[i] = toupper(sym.data[i]); + + // NOTE: NIL symbol to actual NIL + if (strncmp(buf, "NIL", 3) == 0) + { + input_next(inp, 3); + return NIL; + } + } + + lisp_t *lsym = make_sym(ctx, sym.data, sym.size); + input_next(inp, sym.size); + *ret = lsym; + + return PERR_OK; +} + +perr_t parse_bool(context_t *ctx, input_t *inp, lisp_t **ret) +{ + (void)ctx; + debug("parse_bool[%lu] => ", inp->offset); + char c = input_peek(inp, 1); + bool b = -1; + if (c == 't') + b = true; + else if (c == 'f') + b = false; + else + return PERR_EXPECTED_BOOLEAN; + *ret = tag_bool(b); + input_next(inp, 2); + return PERR_OK; +} + +perr_t parse_cons(context_t *ctx, input_t *inp, lisp_t **ret) +{ + // TODO: Put this in a symbol table + lisp_t *lisp_dot = make_sym(ctx, ".", 1); + debug("parse_cons[%lu] => (\n", inp->offset); + inp->offset += 1; + + lisp_t *root = NIL; + lisp_t **cur = NIL; + bool dotted = false; + + while (!input_eof(inp) && input_peek(inp, 0) != ')') + { + lisp_t *lisp = NIL; + perr_t res = parse(ctx, inp, &lisp); + if (res) + return res; + + // This is cheap to do + if (lisp == lisp_dot) + { + dotted = true; + continue; + } + + if (!root) + { + root = make_cons(ctx, lisp, NIL); + cur = &root; + } + else if (!dotted) + *cur = make_cons(ctx, lisp, NIL); + else + *cur = lisp; + + if (cur && !dotted) + cur = &as_cons(*cur)->cdr; + + input_skip(inp); + } + + if (input_peek(inp, 0) != ')') + return PERR_EXPECTED_CLOSE_BRACKET; + + input_next(inp, 1); + + debug(")\n"); + *ret = root; + return PERR_OK; +} + +perr_t parse_vec(context_t *ctx, input_t *inp, lisp_t **ret) +{ + debug("parse_vec[%lu] => [\n", inp->offset); + input_next(inp, 2); + + lisp_t *lvec = make_vec(ctx, 0); + vec_t *vec = as_vec(lvec); + + while (!input_eof(inp) && input_peek(inp, 0) != ')') + { + lisp_t *lisp = NIL; + perr_t res = parse(ctx, inp, &lisp); + if (res) + return res; + + vec_append(&ctx->memory, vec, &lisp, sizeof(lisp)); + input_skip(inp); + } + + if (input_peek(inp, 0) != ')') + return PERR_EXPECTED_CLOSE_BRACKET; + + input_next(inp, 1); + + debug("]\n"); + *ret = lvec; + return PERR_OK; +} + +perr_t parse_str(context_t *ctx, input_t *inp, lisp_t **ret) +{ + debug("parse_str[%lu] => ", inp->offset); + input_next(inp, 1); // 1 for the first speechmark + sv_t sv = sv_cut(inp->str, inp->offset); + i64 size = sv_find_subcstr(sv, "\"", 1); + if (size < 0) + return PERR_EXPECTED_SPEECH_MARK; + + input_next(inp, size + 1); // 1 for that last speechmark + sv_t str_content = sv_chop(sv, size); + debug("\"" PR_SV "\"\n", SV_FMT(str_content)); + *ret = make_str(ctx, str_content.data, str_content.size); + return PERR_OK; +} + +perr_t parse_quote(context_t *ctx, input_t *inp, lisp_t **ret) +{ + char c = input_peek(inp, 0); + if (!(c == '\'' || c == '`')) + return PERR_UNEXPECTED_CHAR; + input_next(inp, 1); + sv_t prefix = {0}; + if (c == '\'') + prefix = SV("quote", 5); + else if (c == '`') + prefix = SV("quasiquote", 10); + lisp_t *root = make_cons(ctx, make_sym(ctx, prefix.data, prefix.size), NIL); + lisp_t *rest = NIL; + perr_t perr = parse(ctx, inp, &rest); + if (perr) + return perr; + CDR(root) = make_cons(ctx, rest, NIL); + *ret = root; + return PERR_OK; +} + +// TODO: Make this interactable with user once we have evaluation +perr_t parse_reader_macro(context_t *ctx, input_t *inp, lisp_t **ret) +{ + char c = input_peek(inp, 1); + if (c == '\\') + { + // character or weird base integer + TODO("Not implemented reader macro for characters or weird bases"); + } + else if (c == '(') + return parse_vec(ctx, inp, ret); + else if (c == 't' || c == 'f') + return parse_bool(ctx, inp, ret); + else if (c == 'e') + { + // Scientific notation for floats + } + return PERR_UNEXPECTED_READER_MACRO_SYMBOL; +} + +static_assert(NUM_TAGS == 9); +perr_t parse(context_t *ctx, input_t *inp, lisp_t **ret) +{ + debug("parse => "); + input_skip(inp); + if (input_eof(inp)) + return PERR_EOF; + + char c = input_peek(inp, 0); + + if (is_digit(c) || (c == '-' && is_digit(input_peek(inp, 1)))) + return parse_int(ctx, inp, ret); + else if (c == '#') + return parse_reader_macro(ctx, inp, ret); + else if (is_sym(c)) + return parse_sym(ctx, inp, ret); + else if (c == '(') + return parse_cons(ctx, inp, ret); + else if (c == '\'' || c == '`') + return parse_quote(ctx, inp, ret); + else if (c == '\"') + return parse_str(ctx, inp, ret); + else + return PERR_UNEXPECTED_CHAR; +} + +perr_t parse_all(context_t *ctx, input_t *inp, vec_t *vec) +{ + while (!input_eof(inp)) + { + lisp_t *member = NIL; + perr_t err = parse(ctx, inp, &member); + + if (err) + return err; + else + vec_append(&ctx->scratch, vec, &member, sizeof(member)); + + input_skip(inp); + } + return PERR_OK; +} + +int print_perror(FILE *fp, input_t *inp, perr_t error) +{ + pos_t pos = input_offset_to_pos(inp); + fprintf(fp, "%s:%lu:%lu: %s", inp->name, pos.line, pos.col, + perr_to_cstr(error)); + switch (error) + { + case PERR_UNEXPECTED_CHAR: + fprintf(fp, "(`%c`)", input_peek(inp, 0)); + break; + case PERR_OK: + case PERR_EOF: + case PERR_EXPECTED_BOOLEAN: + case PERR_UNEXPECTED_READER_MACRO_SYMBOL: + case PERR_EXPECTED_CLOSE_BRACKET: + case PERR_EXPECTED_SPEECH_MARK: + default: + break; + } + fprintf(stderr, "\n"); + + return error; +} + +pos_t input_offset_to_pos(input_t *inp) +{ + pos_t pos = {.col = 1, .line = 1}; + for (u64 i = 0; i < inp->offset && i < inp->str.size; ++i) + { + char c = (inp->str.data[i]); + if (c == '\n') + { + ++pos.line; + pos.col = 1; + } + else + { + ++pos.col; + } + } + return pos; +} diff --git a/lisp/reader.h b/lisp/reader.h new file mode 100644 index 0000000..4bd0578 --- /dev/null +++ b/lisp/reader.h @@ -0,0 +1,98 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-16 + * Description: Parsing Lisp! + */ + +#ifndef READER_H +#define READER_H + +#include <lib/vec.h> +#include <lisp/context.h> +#include <lisp/lisp.h> + +#define INPUT_CHUNK_SIZE 512 +static const char SYM_CHARS[] = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + "¬!£$%^&*_-+={[]}:@~#<,>.?/"; + +typedef struct +{ + const char *name; + u64 offset; + sv_t str; +} input_t; + +void input_from_sv(context_t *ctx, input_t *inp, const char *name, sv_t sv); +void input_from_fp(context_t *ctx, input_t *input, const char *name, FILE *fp); +bool input_eof(input_t *input); + +typedef struct +{ + u64 col, line; +} pos_t; + +pos_t input_offset_to_pos(input_t *inp); + +typedef enum +{ + PERR_OK = 0, + PERR_EOF, + PERR_UNEXPECTED_CHAR, + PERR_EXPECTED_CLOSE_BRACKET, + PERR_EXPECTED_SPEECH_MARK, + PERR_UNEXPECTED_READER_MACRO_SYMBOL, + PERR_EXPECTED_BOOLEAN, +} perr_t; + +static inline const char *perr_to_cstr(perr_t perr) +{ + switch (perr) + { + case PERR_OK: + return "OK"; + break; + case PERR_EOF: + return "EOF"; + break; + case PERR_UNEXPECTED_CHAR: + return "UNEXPECTED_CHAR"; + break; + case PERR_EXPECTED_CLOSE_BRACKET: + return "EXPECTED_CLOSE_BRACKET"; + break; + case PERR_EXPECTED_SPEECH_MARK: + return "EXPECTED_SPEECH_MARK"; + break; + case PERR_UNEXPECTED_READER_MACRO_SYMBOL: + return "UNEXPECTED_READER_MACRO_SYMBOL"; + break; + case PERR_EXPECTED_BOOLEAN: + return "EXPECTED_BOOLEAN"; + break; + } + assert(false && "perr_to_cstr: unreachable"); + return ""; +} + +typedef struct +{ + lisp_t *result; + perr_t error; +} pres_t; + +perr_t parse(context_t *ctx, input_t *str, lisp_t **ret); +perr_t parse_all(context_t *ctx, input_t *str, vec_t *vec); + +int print_perror(FILE *fp, input_t *inp, perr_t error); + +#endif diff --git a/lisp/tag.c b/lisp/tag.c new file mode 100644 index 0000000..26db6d5 --- /dev/null +++ b/lisp/tag.c @@ -0,0 +1,94 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-06 + * Description: + */ + +#include <lisp/tag.h> +#include <string.h> + +lisp_t *tag_int(i64 i) +{ + return TAG((u64)i, INT); +} + +lisp_t *tag_cons(void *ptr) +{ + return TAG((u64)ptr, CONS); +} + +lisp_t *tag_sym(void *ptr) +{ + return TAG((u64)ptr, SYM); +} + +lisp_t *tag_ssym(const char *data, size_t size) +{ + assert(size <= 7); + u8 buffer[sizeof(u64)]; + memset(buffer, 0, sizeof(buffer)); + // in 8 bits we have: + // size - 3 bits (up to 7) + // tag - 5 bits + buffer[0] = size; + buffer[0] <<= SHIFT_SSYM; + buffer[0] |= TAG_SSYM; + memcpy(buffer + 1, data, size); + u64 word = 0; + memcpy(&word, buffer, sizeof(u64)); + return (lisp_t *)word; +} + +lisp_t *tag_bool(bool b) +{ + return TAG((u64)b, BOOL); +} + +lisp_t *tag_vec(void *ptr) +{ + return TAG((u64)ptr, VEC); +} + +lisp_t *tag_str(void *ptr) +{ + return TAG((u64)ptr, STR); +} + +lisp_t *tag_char(u32 codepoint) +{ + u64 w = codepoint; + return TAG(w, CHAR); +} + +enum Tag tag_get(lisp_t *ptr) +{ + static_assert(NUM_TAGS == 9); + if (!ptr) + return TAG_NIL; + else if (IS_TAG(ptr, INT)) + return TAG_INT; + else if (IS_TAG(ptr, CHAR)) + return TAG_CHAR; + else if (IS_TAG(ptr, SYM)) + return TAG_SYM; + else if (IS_TAG(ptr, SSYM)) + return TAG_SSYM; + else if (IS_TAG(ptr, BOOL)) + return TAG_BOOL; + else if (IS_TAG(ptr, VEC)) + return TAG_VEC; + else if (IS_TAG(ptr, STR)) + return TAG_STR; + else if (IS_TAG(ptr, CONS)) + return TAG_CONS; + return 0; +} diff --git a/lisp/tag.h b/lisp/tag.h new file mode 100644 index 0000000..06b7b3f --- /dev/null +++ b/lisp/tag.h @@ -0,0 +1,92 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License Version 2 for + * details. + + * You may distribute and modify this code under the terms of the GNU General + * Public License Version 2, which you should have received a copy of along with + * this program. If not, please go to <https://www.gnu.org/licenses/>. + + * Created: 2025-04-06 + * Description: + */ + +#ifndef TAG_H +#define TAG_H + +#include <base.h> + +#include <stdbool.h> +#include <stdlib.h> + +// Opaque structure to make tagged pointers a separate type from general +// pointers +typedef struct Obj lisp_t; + +#define NIL NULL + +enum Tag +{ + TAG_NIL = 0b00000000, // Atomic types + TAG_INT = 0b00000001, // special so we can encode 63 bit integers + TAG_SYM = 0b00000100, + TAG_SSYM = 0b00001000, + TAG_CHAR = 0b00001100, + TAG_BOOL = 0b00010000, + TAG_FLOAT = 0b00010100, + TAG_CONS = 0b00000010, // Container types + TAG_VEC = 0b00000110, + TAG_STR = 0b00001010, + + NUM_TAGS = 9, +}; + +enum Shift +{ + SHIFT_INT = 1, + SHIFT_SSYM = 5, + SHIFT_CHAR = 8, + SHIFT_SYM = 8, + SHIFT_BOOL = 8, + SHIFT_FLOAT = 8, + + SHIFT_CONS = 8, + SHIFT_VEC = 8, + SHIFT_STR = 8, +}; + +enum Mask +{ + MASK_INT = 0b00000001, + MASK_SSYM = 0b00011111, + MASK_SYM = 0b11111111, + MASK_CHAR = 0b11111111, + MASK_BOOL = 0b11111111, + MASK_FLOAT = 0b11111111, + + MASK_CONS = 0b11111111, + MASK_VEC = 0b11111111, + MASK_STR = 0b11111111, +}; + +#define TAG(PTR, TYPE) ((lisp_t *)(((PTR) << SHIFT_##TYPE) | TAG_##TYPE)) +#define IS_TAG(PTR, TYPE) (((u64)(PTR) & MASK_##TYPE) == TAG_##TYPE) +#define UNTAG(PTR, TYPE) (((u64)PTR) >> SHIFT_##TYPE) + +enum Tag tag_get(lisp_t *ptr); + +lisp_t *tag_int(i64 i); +lisp_t *tag_char(u32 codepoint); +lisp_t *tag_sym(void *ptr); +lisp_t *tag_ssym(const char *data, size_t size); +lisp_t *tag_bool(bool b); +lisp_t *tag_vec(void *ptr); +lisp_t *tag_str(void *ptr); +lisp_t *tag_cons(void *ptr); + +#define INT_MAX ((1L << 62) - 1) +#define INT_MIN (-(1L << 62)) + +#endif |