Compare commits

...

10 Commits

Author SHA1 Message Date
Aryadev Chavali
88c9d01677 stream: Copy on stream_init_string
Just safer!
2025-10-19 23:04:40 +01:00
Aryadev Chavali
66c5134eb5 Remove impl folder 2025-10-19 23:04:04 +01:00
Aryadev Chavali
913b34588f build: simplify looking for library files 2025-10-19 22:25:24 +01:00
Aryadev Chavali
10c391367d build: Add some more warning flags for compilation-based-refactoring 2025-10-19 22:25:13 +01:00
Aryadev Chavali
cbfcf24ca2 stream: Introduce PIPE type
Main reason to have this at all is to make char-by-char reading
feasible.  This occurs at `stream_chunk`, and previously if we passed
in STDIN for `stream_init_file`, STDIN will only terminate once
STREAM_DEFAULT_CHUNK number of characters have been fed into the pipe.

This isn't desirable for STDIN (we really want to read char-by-char
for expressions), nor would it necessarily be desirable in network
applications.  So any stream marked STREAM_TYPE_PIPE will only chunk
character-by-character rather than genuine chunks.
2025-10-19 22:25:10 +01:00
Aryadev Chavali
030a289497 Some changes to how streams work, clean up alisp.org 2025-09-01 21:47:25 +01:00
Aryadev Chavali
1aa01d2a89 Merge remote-tracking branch 'origin/master' 2025-09-01 21:45:18 +01:00
Aryadev Chavali
700c3b1d1b Move implementation files into their own folder
main.c and test.c generate binary executables so they can stay in the
main folder, but the rest can go into their own dedicated folder to
make it look nicer
2025-09-01 21:26:01 +01:00
Aryadev Chavali
a9b08d3a11 Add printer for SV's that provides debug information 2025-08-29 20:04:35 +01:00
Aryadev Chavali
d62a11bb35 Modify main.c to better test stream 2025-08-29 20:04:21 +01:00
5 changed files with 97 additions and 82 deletions

30
alisp.h
View File

@@ -50,6 +50,7 @@ typedef struct
#define SV(DATA, SIZE) ((sv_t){.data = (DATA), .size = (SIZE)})
#define SV_FMT(SV) (int)(SV).size, (SV).data
#define PR_SV "%.*s"
#define PRD_SV "%d@%p"
sv_t sv_copy(sv_t);
@@ -99,6 +100,7 @@ void sym_table_cleanup(sym_table_t *);
typedef enum
{
STREAM_TYPE_STRING,
STREAM_TYPE_PIPE,
STREAM_TYPE_FILE,
} stream_type_t;
@@ -132,22 +134,28 @@ typedef struct
#define STREAM_DEFAULT_CHUNK 64
stream_err_t stream_init_string(stream_t *, char *, sv_t);
stream_err_t stream_init_pipe(stream_t *, char *, FILE *);
stream_err_t stream_init_file(stream_t *, char *, FILE *);
void stream_stop(stream_t *);
// end of stream
bool stream_eos(stream_t *);
// end of cache
// End of Content (i.e. we've consumed all cached content/file)
bool stream_eoc(stream_t *);
// size of immediately accessible content
u64 stream_size(stream_t *);
bool stream_chunk(stream_t *);
// Return current character, push position by 1
char stream_next(stream_t *);
// Peek current character, do not push position
char stream_peek(stream_t *);
// Seek forward or backward in the stream, return success
bool stream_seek(stream_t *, i64);
bool stream_seek_forward(stream_t *, u64);
bool stream_seek_backward(stream_t *, u64);
// Return a relative substring (using sv_t) of a given size
sv_t stream_substr(stream_t *, u64);
// Return an absolutely located substring (using sv_t) at given index and of
// given size.
sv_t stream_substr_abs(stream_t *, u64, u64);
/// Basic defintions for a Lisp
@@ -230,4 +238,18 @@ lisp_t *tag_sym(char *);
lisp_t *tag_cons(cons_t *);
lisp_t *tag_vec(vec_t *);
/// Reader
typedef enum
{
READ_OK = 0,
} read_err_t;
// Attempt to read an expression from the stream, storing it in a pointer,
// returning any errors if failed
read_err_t read(sys_t *, stream_t *, lisp_t **);
// Attempt to read all expressions from a stream till end of content, storing
// them in the given vector. Return any error at any point during the read.
read_err_t read_all(sys_t *, stream_t *, vec_t *);
#endif

View File

@@ -38,11 +38,14 @@ seamless in that regard. But we'll need to set a calling convention
in order to make calling into this seamless from a runtime
perspective.
* Tasks
** TODO Potentially optimise symbol table ingress :optimisation:design:
** TODO Capitalise symbols (TBD) :optimisation:design:
Should we capitalise symbols? This way, we limit the symbol table's
possible options a bit (potentially we could design a better hashing
algorithm?) and it would be kinda like an actual Lisp.
** TODO Reader system
** TODO Design Strings
We have ~sv_t~ so our basic C API is done. We just need pluggable
functions to construct and deconstruct strings as lisps.
** WIP Reader system
We need to design a reader system. The big idea: given a "stream" of
data, we can break out expressions from it. An expression could be
either an atomic value or a container.
@@ -51,7 +54,7 @@ The natural method is doing this one at a time (the runtime provides a
~read~ function to do this), but we can also convert an entire stream
into expressions by consuming it fully. So the principle function
here is ~read: stream -> expr~.
*** TODO Design streams
*** DONE Design streams
A stream needs to be able to provide characters for us to interpret in
our parsing. Lisp is an LL(1) grammar so we only really need one
character lookup, but seeking is very useful.
@@ -74,21 +77,17 @@ parsing isn't as concerned with the specifics of the underlying data
stream. We can use a tagged union of data structures representing the
different underlying stream types, then generate abstract functions
that provide common functionality.
**** TODO Design the tagged union
**** TODO Design the API
#+begin_src c
bool stream_eos(stream_t *);
char stream_next(stream_t *);
char stream_peek(stream_t *);
sv_t stream_substr(stream_t *, u64, u64);
bool stream_seek(stream_t *, i64);
bool stream_close(stream_t *);
#+end_src
*** TODO Figure out the possible parse errors
*** TODO Design what a "parser function" would look like
2025-08-29: A really basic interface that makes the parse stage a bit
easier. We're not going to do anything more advanced than the API
i.e. no parsing.
**** DONE Design the tagged union
**** DONE Design the API
*** WIP Figure out the possible parse errors
*** DONE Design what a "parser function" would look like
The general function is something like ~stream -> T | Err~. What
other state do we need to encode?
*** TODO Write a parser for integers
*** WIP Write a parser for integers
*** TODO Write a parser for symbols
*** TODO Write a parser for lists
*** TODO Write a parser for vectors

View File

@@ -2,9 +2,9 @@
set -xe
CFLAGS="-Wall -Wextra -std=c11 -ggdb -fsanitize=address -fsanitize=undefined"
LINK=""
LIB="sv.c vec.c symtable.c tag.c constructor.c stream.c sys.c"
CFLAGS="-Wall -Wextra -std=c11 -ggdb -fsanitize=address -fsanitize=undefined -Wswitch -Wswitch-enum"
LINK="-I."
LIB=$(find "./" -name "*.c" -not -name "main.c" -not -name "test.c")
OUT="alisp.out"
build() {

50
main.c
View File

@@ -21,53 +21,9 @@
int main(void)
{
stream_t stream = {0};
char _data[] = "Hello, world!";
sv_t data = SV(_data, ARRSIZE(_data) - 1);
char filename[] = "lorem.txt";
// stream_init_string(&stream, NULL, data);
FILE *fp = fopen(filename, "rb");
stream_init_file(&stream, filename, fp);
// stream_init_file(&stream, "stdin", stdin);
/// test 1
// printf("[debug]: setup stream pipe\n");
// do
// {
// printf("%s[%lu]: `%c`\n", stream.name, stream.position,
// stream_next(&stream));
// } while (!stream_eoc(&stream));
// printf("%lu/%lu\n", stream.position, stream_size(&stream));
/// test 2
sv_t a = stream_substr(&stream, 100);
sv_t a_ = sv_copy(a);
printf("`" PR_SV "`\n", SV_FMT(a));
stream_seek(&stream, 100);
sv_t b = stream_substr_abs(&stream, 0, 100);
sv_t b_ = sv_copy(b);
printf("`" PR_SV "`\n", SV_FMT(b));
printf("a=b ? %s\na_=b_ ? %s\n",
memcmp(&a, &b, sizeof(a)) == 0 ? "yes" : "no",
a_.size == b_.size && strncmp(a_.data, b_.data, a_.size) == 0 ? "yes"
: "no");
sv_t c = stream_substr(&stream, 100);
sv_t c_ = sv_copy(c);
printf("`" PR_SV "`\n", SV_FMT(c));
stream_seek(&stream, 100);
sv_t d = stream_substr_abs(&stream, stream.position - 100, 100);
sv_t d_ = sv_copy(d);
printf("`" PR_SV "`\n", SV_FMT(d));
printf("c=d ? %s\nc_=d_ ? %s\n",
memcmp(&c, &d, sizeof(a)) == 0 ? "yes" : "no",
c_.size == d_.size && strncmp(c_.data, d_.data, c_.size) == 0 ? "yes"
: "no");
printf("eos?=%s\n", stream_eos(&stream) ? "yes" : "no");
stream_init_pipe(&stream, "<stdin>", stdin);
sv_t sv = stream_substr(&stream, 10);
printf("=> `" PR_SV "`\n", SV_FMT(sv));
stream_stop(&stream);
// fclose(fp);
return 0;
}

View File

@@ -12,6 +12,7 @@
* Description: Stream implementation
*/
#include <stdlib.h>
#include <string.h>
#include "./alisp.h"
@@ -25,7 +26,23 @@ stream_err_t stream_init_string(stream_t *stream, char *name, sv_t contents)
stream->type = STREAM_TYPE_STRING;
stream->name = name;
stream->string = contents;
stream->string = sv_copy(contents);
return STREAM_ERR_OK;
}
stream_err_t stream_init_pipe(stream_t *stream, char *name, FILE *pipe)
{
if (!stream || !pipe)
return STREAM_ERR_INVALID_PTR;
name = name ? name : "<stream>";
memset(stream, 0, sizeof(*stream));
stream->type = STREAM_TYPE_PIPE;
stream->name = name;
stream->pipe.file = pipe;
vec_init(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
return STREAM_ERR_OK;
}
@@ -42,8 +59,6 @@ stream_err_t stream_init_file(stream_t *stream, char *name, FILE *pipe)
stream->pipe.file = pipe;
vec_init(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
// try to read an initial chunk
stream_chunk(stream);
return STREAM_ERR_OK;
}
@@ -55,8 +70,9 @@ void stream_stop(stream_t *stream)
switch (stream->type)
{
case STREAM_TYPE_STRING:
// Nothing to do, all dealt with outside of stream
free(stream->string.data);
break;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
// Must cleanup vector
vec_free(&stream->pipe.cache);
@@ -72,6 +88,7 @@ u64 stream_size(stream_t *stream)
{
case STREAM_TYPE_STRING:
return stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
return stream->pipe.cache.size;
default:
@@ -87,6 +104,7 @@ bool stream_eos(stream_t *stream)
{
case STREAM_TYPE_STRING:
return stream->position >= stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
return feof(stream->pipe.file);
default:
@@ -102,6 +120,7 @@ bool stream_eoc(stream_t *stream)
{
case STREAM_TYPE_STRING:
return stream->position >= stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
return feof(stream->pipe.file) &&
stream->position >= stream->pipe.cache.size;
@@ -114,20 +133,32 @@ bool stream_eoc(stream_t *stream)
bool stream_chunk(stream_t *stream)
{
assert(stream);
u64 to_read = STREAM_DEFAULT_CHUNK;
switch (stream->type)
{
case STREAM_TYPE_STRING:
// vacuously true
return true;
case STREAM_TYPE_PIPE:
to_read = 1;
// fallthrough
case STREAM_TYPE_FILE:
{
if (feof(stream->pipe.file))
// We can't read anymore. End of the line
return false;
vec_ensure_free(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
vec_ensure_free(&stream->pipe.cache, to_read);
int read = fread(vec_data(&stream->pipe.cache) + stream->pipe.cache.size, 1,
STREAM_DEFAULT_CHUNK, stream->pipe.file);
stream->pipe.cache.size += read;
return true;
to_read, stream->pipe.file);
// If we read something it's a good thing
if (read > 0)
{
stream->pipe.cache.size += read;
return true;
}
else
return false;
}
default:
FAIL("Unreachable");
@@ -154,11 +185,15 @@ char stream_peek(stream_t *stream)
{
case STREAM_TYPE_STRING:
return stream->string.data[stream->position];
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
{
// Cached already? We are done.
if (stream->position < stream->pipe.cache.size)
return ((char *)vec_data(&stream->pipe.cache))[stream->position];
{
const char *const str = vec_data(&stream->pipe.cache);
return str[stream->position];
}
// Try to read chunks in till we've reached it or we're at the end of the
// file.
@@ -204,6 +239,7 @@ bool stream_seek_forward(stream_t *stream, u64 offset)
stream->position += offset;
return true;
}
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
{
// Similar principle as stream_peek really...
@@ -223,7 +259,7 @@ bool stream_seek_forward(stream_t *stream, u64 offset)
continue;
// Same principle as the stream_eoc(stream) check.
if (stream->position + offset >= stream->pipe.cache.size)
if (stream->position + offset > stream->pipe.cache.size)
return false;
stream->position += offset;
return true;
@@ -248,7 +284,7 @@ sv_t stream_substr(stream_t *stream, u64 size)
if (stream_eoc(stream))
return SV(NULL, 0);
// TODO: this is kinda disgusting, any better way of doing this
// See if I can go forward enough to make this substring
u64 current_position = stream->position;
bool successful = stream_seek_forward(stream, size);
// Reset the position in either situation
@@ -263,6 +299,7 @@ sv_t stream_substr(stream_t *stream, u64 size)
case STREAM_TYPE_STRING:
ptr = stream->string.data;
break;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
ptr = vec_data(&stream->pipe.cache);
break;
@@ -282,11 +319,12 @@ sv_t stream_substr_abs(stream_t *stream, u64 index, u64 size)
if (index + size <= stream_size(stream))
return SV(stream->string.data + index, size);
return SV(NULL, 0);
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
{
if (index + size <= stream_size(stream))
return SV(vec_data(&stream->pipe.cache) + index, size);
// stream_size(stream) <= index + size => try reading chunks
// (index + size > stream_size(stream)) => try reading chunks
for (bool read_chunk = stream_chunk(stream);
read_chunk && index + size >= stream->pipe.cache.size;
read_chunk = stream_chunk(stream))