aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2025-08-19 22:39:45 +0100
committerAryadev Chavali <aryadev@aryadevchavali.com>2025-08-19 22:40:50 +0100
commitf14a2680fd030fbfa75a63bea8110db7c414e805 (patch)
treec4d580a45cf9bedd4060c5a8d261e913a0893566
downloadalisp-f14a2680fd030fbfa75a63bea8110db7c414e805.tar.gz
alisp-f14a2680fd030fbfa75a63bea8110db7c414e805.tar.bz2
alisp-f14a2680fd030fbfa75a63bea8110db7c414e805.zip
200 line symbol table implementation and the first commit
Setup build system (POSIX sh), gitignore, basic C file with an implementation of something I really wanted to setup. It just hashes a snippet of lorem ipsum. Testing seems to indicate it's working. That's all it does lol. This is a really pressing matter; all my previous Lisps always just made the strings on the fly and that irked me deeply. I want a smart implementation that really tries to save memory on something as intensive as symbols.
-rw-r--r--.dir-locals.el6
-rw-r--r--.gitignore5
-rw-r--r--LICENSE24
-rw-r--r--build.sh9
-rw-r--r--main.c201
5 files changed, 245 insertions, 0 deletions
diff --git a/.dir-locals.el b/.dir-locals.el
new file mode 100644
index 0000000..9dd9c84
--- /dev/null
+++ b/.dir-locals.el
@@ -0,0 +1,6 @@
+;;; Directory Local Variables -*- no-byte-compile: t -*-
+;;; For more information see (info "(emacs) Directory Variables")
+
+((nil . ((compile-command . "sh build.sh")
+ (+license/license-choice . "Unlicense")))
+ (c-mode . ((mode . clang-format))))
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..af974b9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.o
+*.out
+.cache/
+compile_commands.json
+TAGS \ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3c577b0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <https://unlicense.org> \ No newline at end of file
diff --git a/build.sh b/build.sh
new file mode 100644
index 0000000..8669824
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env sh
+
+CFLAGS="-Wall -Wextra -std=c11 -ggdb"
+SRC="main.c"
+OUT="main.out"
+
+set -xe
+
+cc $CFLAGS -o $OUT $SRC;
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..23557d9
--- /dev/null
+++ b/main.c
@@ -0,0 +1,201 @@
+/* Copyright (C) 2025 Aryadev Chavali
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Unlicense
+ * for details.
+
+ * You may distribute and modify this code under the terms of the
+ * Unlicense, which you should have received a copy of along with this
+ * program. If not, please go to <https://unlicense.org/>.
+
+ * Created: 2025-08-19
+ * Description: Entrypoint
+ */
+
+#include <malloc.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#define MAX(A, B) ((A) > (B) ? (A) : (B))
+#define MIN(A, B) ((A) < (B) ? (A) : (B))
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+
+typedef struct
+{
+ u64 size, capacity;
+ u8 bytes[];
+} vec_t;
+
+#define VEC_GET(P) (((vec_t *)(P)) - 1)
+#define VEC_SIZE(P) (VEC_GET(P)->size)
+#define VEC_CAP(P) (VEC_GET(P)->capacity)
+#define VEC_MULT 2
+
+void vec_make(void **ptr, u64 size)
+{
+ if (!ptr)
+ return;
+ vec_t *vector = calloc(1, sizeof(*vector) + size);
+ vector->size = 0;
+ vector->capacity = size;
+ *ptr = (vector + 1);
+}
+
+void vec_free(void **data)
+{
+ if (!data || !*data)
+ return;
+ free(VEC_GET(*data));
+ *data = NULL;
+}
+
+void vec_ensure_remaining(void **ptr, u64 space)
+{
+ if (!ptr || !*ptr)
+ return;
+ vec_t *vec = VEC_GET(*ptr);
+ if (vec->capacity - vec->size < space)
+ {
+ void *new_vec = NULL;
+ vec_make(&new_vec, MAX(vec->capacity * VEC_MULT, vec->size + space));
+ VEC_SIZE(new_vec) = vec->size;
+ memcpy(new_vec, *ptr, vec->size);
+ vec_free(ptr);
+ *ptr = new_vec;
+ }
+}
+
+void vec_append_byte(void **ptr, u8 byte)
+{
+ vec_ensure_remaining(ptr, 1);
+ vec_t *vec = VEC_GET(*ptr);
+ vec->bytes[vec->size++] = byte;
+}
+
+void vec_append(void **ptr, void *data, u64 size)
+{
+ vec_ensure_remaining(ptr, size);
+ vec_t *vec = VEC_GET(*ptr);
+ memcpy(*ptr + vec->size, data, size);
+ vec->size += size;
+}
+
+void vec_clone(void **dest, void **src)
+{
+ if (!dest || !src || !*src)
+ return;
+ vec_make(dest, VEC_SIZE(*src));
+ memcpy(*dest, *src, VEC_SIZE(*src));
+ VEC_SIZE(*dest) = VEC_SIZE(*src);
+}
+
+typedef struct
+{
+ u64 size;
+ char *data;
+} sv_t;
+
+#define SV(DATA, SIZE) ((sv_t){.data = (DATA), .size = (SIZE)})
+#define SV_FMT(SV) (int)(SV).size, (SV).data
+#define PR_SV "%.*s"
+
+sv_t sv_copy(sv_t old)
+{
+ char *newstr = calloc(1, old.size * sizeof(*newstr));
+ memcpy(newstr, old.data, old.size);
+ return SV(newstr, old.size);
+}
+
+typedef struct
+{
+ u64 count; // How many strings?
+ u64 capacity; // How many entry buckets?
+ sv_t *entries; // this is actually a vector on the inside lol
+} sym_table_t;
+
+u64 djb2(sv_t string)
+{
+ u64 hash = 5381;
+ for (u64 i = 0; i < string.size; ++i)
+ hash = string.data[i] + (hash + (hash << 5));
+ return hash;
+}
+
+#define SYM_TABLE_INIT_SIZE 1024
+
+void sym_table_init(sym_table_t *table)
+{
+ table->capacity = MAX(table->capacity, SYM_TABLE_INIT_SIZE);
+ table->count = 0;
+ vec_make((void **)&table->entries, table->capacity * sizeof(*table->entries));
+}
+
+sv_t sym_table_find(sym_table_t *table, sv_t sv)
+{
+ // TODO: Deal with resizing this when table->count > table->size / 2
+ u64 index = djb2(sv) & (table->capacity - 1);
+
+ for (sv_t comp = table->entries[index]; comp.data; index += 1,
+ index = index & (table->capacity - 1), comp = table->entries[index])
+ // Is it present in the table?
+ if (sv.size == comp.size && strncmp(sv.data, comp.data, sv.size) == 0)
+ return comp;
+
+ // Otherwise we need to duplicate and make it permanently interned
+ sv_t newsv = sv_copy(sv);
+ table->entries[index] = newsv;
+ ++table->count;
+
+ return newsv;
+}
+
+void sym_table_cleanup(sym_table_t *table)
+{
+ for (u64 i = 0; i < table->capacity; ++i)
+ if (table->entries[i].data)
+ free(table->entries[i].data);
+ vec_free((void **)&table->entries);
+ memset(table, 0, sizeof(*table));
+}
+
+int main(void)
+{
+ sym_table_t table = {0};
+ sym_table_init(&table);
+ // Let's hash the words of lorem ipsum
+ const char *words[] = {
+ "aliquam", "erat", "volutpat", "nunc", "eleifend",
+ "leo", "vitae", "magna", "in", "id",
+ "erat", "non", "orci", "commodo", "lobortis",
+ "proin", "neque", "massa", "cursus", "ut",
+ "gravida", "ut", "lobortis", "eget", "lacus",
+ "sed", "diam", "praesent", "fermentum", "tempor",
+ "tellus", "nullam", "tempus", "mauris", "ac",
+ "felis", "vel", "velit", "tristique", "imperdiet",
+ "donec", "at", "pede", "etiam", "vel",
+ "neque", "nec", "dui", "dignissim", "bibendum",
+ "vivamus", "id", "enim", "phasellus", "neque",
+ "orci", "porta", "a", "aliquet", "quis",
+ "semper", "a", "massa", "phasellus", "purus",
+ "pellentesque", "tristique", "imperdiet", "tortor", "nam",
+ "euismod", "tellus", "id", "erat",
+ };
+
+ for (u64 i = 0; i < sizeof(words) / sizeof(words[0]); ++i)
+ {
+ sv_t sv = sym_table_find(&table, SV(words[i], strlen(words[i])));
+ printf("%s => %p\n", words[i], sv.data);
+ }
+ return 0;
+}