diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2025-08-19 22:39:45 +0100 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2025-08-19 22:40:50 +0100 |
commit | f14a2680fd030fbfa75a63bea8110db7c414e805 (patch) | |
tree | c4d580a45cf9bedd4060c5a8d261e913a0893566 | |
download | alisp-f14a2680fd030fbfa75a63bea8110db7c414e805.tar.gz alisp-f14a2680fd030fbfa75a63bea8110db7c414e805.tar.bz2 alisp-f14a2680fd030fbfa75a63bea8110db7c414e805.zip |
200 line symbol table implementation and the first commit
Setup build system (POSIX sh), gitignore, basic C file with an
implementation of something I really wanted to setup.
It just hashes a snippet of lorem ipsum. Testing seems to indicate
it's working. That's all it does lol.
This is a really pressing matter; all my previous Lisps always just
made the strings on the fly and that irked me deeply. I want a smart
implementation that really tries to save memory on something as
intensive as symbols.
-rw-r--r-- | .dir-locals.el | 6 | ||||
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | LICENSE | 24 | ||||
-rw-r--r-- | build.sh | 9 | ||||
-rw-r--r-- | main.c | 201 |
5 files changed, 245 insertions, 0 deletions
diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..9dd9c84 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,6 @@ +;;; Directory Local Variables -*- no-byte-compile: t -*- +;;; For more information see (info "(emacs) Directory Variables") + +((nil . ((compile-command . "sh build.sh") + (+license/license-choice . "Unlicense"))) + (c-mode . ((mode . clang-format)))) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..af974b9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +*.out +.cache/ +compile_commands.json +TAGS
\ No newline at end of file @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to <https://unlicense.org>
\ No newline at end of file diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..8669824 --- /dev/null +++ b/build.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env sh + +CFLAGS="-Wall -Wextra -std=c11 -ggdb" +SRC="main.c" +OUT="main.out" + +set -xe + +cc $CFLAGS -o $OUT $SRC; @@ -0,0 +1,201 @@ +/* Copyright (C) 2025 Aryadev Chavali + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Unlicense + * for details. + + * You may distribute and modify this code under the terms of the + * Unlicense, which you should have received a copy of along with this + * program. If not, please go to <https://unlicense.org/>. + + * Created: 2025-08-19 + * Description: Entrypoint + */ + +#include <malloc.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#define MAX(A, B) ((A) > (B) ? (A) : (B)) +#define MIN(A, B) ((A) < (B) ? (A) : (B)) + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef struct +{ + u64 size, capacity; + u8 bytes[]; +} vec_t; + +#define VEC_GET(P) (((vec_t *)(P)) - 1) +#define VEC_SIZE(P) (VEC_GET(P)->size) +#define VEC_CAP(P) (VEC_GET(P)->capacity) +#define VEC_MULT 2 + +void vec_make(void **ptr, u64 size) +{ + if (!ptr) + return; + vec_t *vector = calloc(1, sizeof(*vector) + size); + vector->size = 0; + vector->capacity = size; + *ptr = (vector + 1); +} + +void vec_free(void **data) +{ + if (!data || !*data) + return; + free(VEC_GET(*data)); + *data = NULL; +} + +void vec_ensure_remaining(void **ptr, u64 space) +{ + if (!ptr || !*ptr) + return; + vec_t *vec = VEC_GET(*ptr); + if (vec->capacity - vec->size < space) + { + void *new_vec = NULL; + vec_make(&new_vec, MAX(vec->capacity * VEC_MULT, vec->size + space)); + VEC_SIZE(new_vec) = vec->size; + memcpy(new_vec, *ptr, vec->size); + vec_free(ptr); + *ptr = new_vec; + } +} + +void vec_append_byte(void **ptr, u8 byte) +{ + vec_ensure_remaining(ptr, 1); + vec_t *vec = VEC_GET(*ptr); + vec->bytes[vec->size++] = byte; +} + +void vec_append(void **ptr, void *data, u64 size) +{ + vec_ensure_remaining(ptr, size); + vec_t *vec = VEC_GET(*ptr); + memcpy(*ptr + vec->size, data, size); + vec->size += size; +} + +void vec_clone(void **dest, void **src) +{ + if (!dest || !src || !*src) + return; + vec_make(dest, VEC_SIZE(*src)); + memcpy(*dest, *src, VEC_SIZE(*src)); + VEC_SIZE(*dest) = VEC_SIZE(*src); +} + +typedef struct +{ + u64 size; + char *data; +} sv_t; + +#define SV(DATA, SIZE) ((sv_t){.data = (DATA), .size = (SIZE)}) +#define SV_FMT(SV) (int)(SV).size, (SV).data +#define PR_SV "%.*s" + +sv_t sv_copy(sv_t old) +{ + char *newstr = calloc(1, old.size * sizeof(*newstr)); + memcpy(newstr, old.data, old.size); + return SV(newstr, old.size); +} + +typedef struct +{ + u64 count; // How many strings? + u64 capacity; // How many entry buckets? + sv_t *entries; // this is actually a vector on the inside lol +} sym_table_t; + +u64 djb2(sv_t string) +{ + u64 hash = 5381; + for (u64 i = 0; i < string.size; ++i) + hash = string.data[i] + (hash + (hash << 5)); + return hash; +} + +#define SYM_TABLE_INIT_SIZE 1024 + +void sym_table_init(sym_table_t *table) +{ + table->capacity = MAX(table->capacity, SYM_TABLE_INIT_SIZE); + table->count = 0; + vec_make((void **)&table->entries, table->capacity * sizeof(*table->entries)); +} + +sv_t sym_table_find(sym_table_t *table, sv_t sv) +{ + // TODO: Deal with resizing this when table->count > table->size / 2 + u64 index = djb2(sv) & (table->capacity - 1); + + for (sv_t comp = table->entries[index]; comp.data; index += 1, + index = index & (table->capacity - 1), comp = table->entries[index]) + // Is it present in the table? + if (sv.size == comp.size && strncmp(sv.data, comp.data, sv.size) == 0) + return comp; + + // Otherwise we need to duplicate and make it permanently interned + sv_t newsv = sv_copy(sv); + table->entries[index] = newsv; + ++table->count; + + return newsv; +} + +void sym_table_cleanup(sym_table_t *table) +{ + for (u64 i = 0; i < table->capacity; ++i) + if (table->entries[i].data) + free(table->entries[i].data); + vec_free((void **)&table->entries); + memset(table, 0, sizeof(*table)); +} + +int main(void) +{ + sym_table_t table = {0}; + sym_table_init(&table); + // Let's hash the words of lorem ipsum + const char *words[] = { + "aliquam", "erat", "volutpat", "nunc", "eleifend", + "leo", "vitae", "magna", "in", "id", + "erat", "non", "orci", "commodo", "lobortis", + "proin", "neque", "massa", "cursus", "ut", + "gravida", "ut", "lobortis", "eget", "lacus", + "sed", "diam", "praesent", "fermentum", "tempor", + "tellus", "nullam", "tempus", "mauris", "ac", + "felis", "vel", "velit", "tristique", "imperdiet", + "donec", "at", "pede", "etiam", "vel", + "neque", "nec", "dui", "dignissim", "bibendum", + "vivamus", "id", "enim", "phasellus", "neque", + "orci", "porta", "a", "aliquet", "quis", + "semper", "a", "massa", "phasellus", "purus", + "pellentesque", "tristique", "imperdiet", "tortor", "nam", + "euismod", "tellus", "id", "erat", + }; + + for (u64 i = 0; i < sizeof(words) / sizeof(words[0]); ++i) + { + sv_t sv = sym_table_find(&table, SV(words[i], strlen(words[i]))); + printf("%s => %p\n", words[i], sv.data); + } + return 0; +} |