200 line symbol table implementation and the first commit
Setup build system (POSIX sh), gitignore, basic C file with an implementation of something I really wanted to setup. It just hashes a snippet of lorem ipsum. Testing seems to indicate it's working. That's all it does lol. This is a really pressing matter; all my previous Lisps always just made the strings on the fly and that irked me deeply. I want a smart implementation that really tries to save memory on something as intensive as symbols.
This commit is contained in:
6
.dir-locals.el
Normal file
6
.dir-locals.el
Normal file
@@ -0,0 +1,6 @@
|
||||
;;; Directory Local Variables -*- no-byte-compile: t -*-
|
||||
;;; For more information see (info "(emacs) Directory Variables")
|
||||
|
||||
((nil . ((compile-command . "sh build.sh")
|
||||
(+license/license-choice . "Unlicense")))
|
||||
(c-mode . ((mode . clang-format))))
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
*.o
|
||||
*.out
|
||||
.cache/
|
||||
compile_commands.json
|
||||
TAGS
|
||||
24
LICENSE
Normal file
24
LICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <https://unlicense.org>
|
||||
9
build.sh
Normal file
9
build.sh
Normal file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
CFLAGS="-Wall -Wextra -std=c11 -ggdb"
|
||||
SRC="main.c"
|
||||
OUT="main.out"
|
||||
|
||||
set -xe
|
||||
|
||||
cc $CFLAGS -o $OUT $SRC;
|
||||
201
main.c
Normal file
201
main.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/* Copyright (C) 2025 Aryadev Chavali
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Unlicense
|
||||
* for details.
|
||||
|
||||
* You may distribute and modify this code under the terms of the
|
||||
* Unlicense, which you should have received a copy of along with this
|
||||
* program. If not, please go to <https://unlicense.org/>.
|
||||
|
||||
* Created: 2025-08-19
|
||||
* Description: Entrypoint
|
||||
*/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX(A, B) ((A) > (B) ? (A) : (B))
|
||||
#define MIN(A, B) ((A) < (B) ? (A) : (B))
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef int64_t i64;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u64 size, capacity;
|
||||
u8 bytes[];
|
||||
} vec_t;
|
||||
|
||||
#define VEC_GET(P) (((vec_t *)(P)) - 1)
|
||||
#define VEC_SIZE(P) (VEC_GET(P)->size)
|
||||
#define VEC_CAP(P) (VEC_GET(P)->capacity)
|
||||
#define VEC_MULT 2
|
||||
|
||||
void vec_make(void **ptr, u64 size)
|
||||
{
|
||||
if (!ptr)
|
||||
return;
|
||||
vec_t *vector = calloc(1, sizeof(*vector) + size);
|
||||
vector->size = 0;
|
||||
vector->capacity = size;
|
||||
*ptr = (vector + 1);
|
||||
}
|
||||
|
||||
void vec_free(void **data)
|
||||
{
|
||||
if (!data || !*data)
|
||||
return;
|
||||
free(VEC_GET(*data));
|
||||
*data = NULL;
|
||||
}
|
||||
|
||||
void vec_ensure_remaining(void **ptr, u64 space)
|
||||
{
|
||||
if (!ptr || !*ptr)
|
||||
return;
|
||||
vec_t *vec = VEC_GET(*ptr);
|
||||
if (vec->capacity - vec->size < space)
|
||||
{
|
||||
void *new_vec = NULL;
|
||||
vec_make(&new_vec, MAX(vec->capacity * VEC_MULT, vec->size + space));
|
||||
VEC_SIZE(new_vec) = vec->size;
|
||||
memcpy(new_vec, *ptr, vec->size);
|
||||
vec_free(ptr);
|
||||
*ptr = new_vec;
|
||||
}
|
||||
}
|
||||
|
||||
void vec_append_byte(void **ptr, u8 byte)
|
||||
{
|
||||
vec_ensure_remaining(ptr, 1);
|
||||
vec_t *vec = VEC_GET(*ptr);
|
||||
vec->bytes[vec->size++] = byte;
|
||||
}
|
||||
|
||||
void vec_append(void **ptr, void *data, u64 size)
|
||||
{
|
||||
vec_ensure_remaining(ptr, size);
|
||||
vec_t *vec = VEC_GET(*ptr);
|
||||
memcpy(*ptr + vec->size, data, size);
|
||||
vec->size += size;
|
||||
}
|
||||
|
||||
void vec_clone(void **dest, void **src)
|
||||
{
|
||||
if (!dest || !src || !*src)
|
||||
return;
|
||||
vec_make(dest, VEC_SIZE(*src));
|
||||
memcpy(*dest, *src, VEC_SIZE(*src));
|
||||
VEC_SIZE(*dest) = VEC_SIZE(*src);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u64 size;
|
||||
char *data;
|
||||
} sv_t;
|
||||
|
||||
#define SV(DATA, SIZE) ((sv_t){.data = (DATA), .size = (SIZE)})
|
||||
#define SV_FMT(SV) (int)(SV).size, (SV).data
|
||||
#define PR_SV "%.*s"
|
||||
|
||||
sv_t sv_copy(sv_t old)
|
||||
{
|
||||
char *newstr = calloc(1, old.size * sizeof(*newstr));
|
||||
memcpy(newstr, old.data, old.size);
|
||||
return SV(newstr, old.size);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u64 count; // How many strings?
|
||||
u64 capacity; // How many entry buckets?
|
||||
sv_t *entries; // this is actually a vector on the inside lol
|
||||
} sym_table_t;
|
||||
|
||||
u64 djb2(sv_t string)
|
||||
{
|
||||
u64 hash = 5381;
|
||||
for (u64 i = 0; i < string.size; ++i)
|
||||
hash = string.data[i] + (hash + (hash << 5));
|
||||
return hash;
|
||||
}
|
||||
|
||||
#define SYM_TABLE_INIT_SIZE 1024
|
||||
|
||||
void sym_table_init(sym_table_t *table)
|
||||
{
|
||||
table->capacity = MAX(table->capacity, SYM_TABLE_INIT_SIZE);
|
||||
table->count = 0;
|
||||
vec_make((void **)&table->entries, table->capacity * sizeof(*table->entries));
|
||||
}
|
||||
|
||||
sv_t sym_table_find(sym_table_t *table, sv_t sv)
|
||||
{
|
||||
// TODO: Deal with resizing this when table->count > table->size / 2
|
||||
u64 index = djb2(sv) & (table->capacity - 1);
|
||||
|
||||
for (sv_t comp = table->entries[index]; comp.data; index += 1,
|
||||
index = index & (table->capacity - 1), comp = table->entries[index])
|
||||
// Is it present in the table?
|
||||
if (sv.size == comp.size && strncmp(sv.data, comp.data, sv.size) == 0)
|
||||
return comp;
|
||||
|
||||
// Otherwise we need to duplicate and make it permanently interned
|
||||
sv_t newsv = sv_copy(sv);
|
||||
table->entries[index] = newsv;
|
||||
++table->count;
|
||||
|
||||
return newsv;
|
||||
}
|
||||
|
||||
void sym_table_cleanup(sym_table_t *table)
|
||||
{
|
||||
for (u64 i = 0; i < table->capacity; ++i)
|
||||
if (table->entries[i].data)
|
||||
free(table->entries[i].data);
|
||||
vec_free((void **)&table->entries);
|
||||
memset(table, 0, sizeof(*table));
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
sym_table_t table = {0};
|
||||
sym_table_init(&table);
|
||||
// Let's hash the words of lorem ipsum
|
||||
const char *words[] = {
|
||||
"aliquam", "erat", "volutpat", "nunc", "eleifend",
|
||||
"leo", "vitae", "magna", "in", "id",
|
||||
"erat", "non", "orci", "commodo", "lobortis",
|
||||
"proin", "neque", "massa", "cursus", "ut",
|
||||
"gravida", "ut", "lobortis", "eget", "lacus",
|
||||
"sed", "diam", "praesent", "fermentum", "tempor",
|
||||
"tellus", "nullam", "tempus", "mauris", "ac",
|
||||
"felis", "vel", "velit", "tristique", "imperdiet",
|
||||
"donec", "at", "pede", "etiam", "vel",
|
||||
"neque", "nec", "dui", "dignissim", "bibendum",
|
||||
"vivamus", "id", "enim", "phasellus", "neque",
|
||||
"orci", "porta", "a", "aliquet", "quis",
|
||||
"semper", "a", "massa", "phasellus", "purus",
|
||||
"pellentesque", "tristique", "imperdiet", "tortor", "nam",
|
||||
"euismod", "tellus", "id", "erat",
|
||||
};
|
||||
|
||||
for (u64 i = 0; i < sizeof(words) / sizeof(words[0]); ++i)
|
||||
{
|
||||
sv_t sv = sym_table_find(&table, SV(words[i], strlen(words[i])));
|
||||
printf("%s => %p\n", words[i], sv.data);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user