Compare commits

...

10 Commits

Author SHA1 Message Date
Aryadev Chavali
88c9d01677 stream: Copy on stream_init_string
Just safer!
2025-10-19 23:04:40 +01:00
Aryadev Chavali
66c5134eb5 Remove impl folder 2025-10-19 23:04:04 +01:00
Aryadev Chavali
913b34588f build: simplify looking for library files 2025-10-19 22:25:24 +01:00
Aryadev Chavali
10c391367d build: Add some more warning flags for compilation-based-refactoring 2025-10-19 22:25:13 +01:00
Aryadev Chavali
cbfcf24ca2 stream: Introduce PIPE type
Main reason to have this at all is to make char-by-char reading
feasible.  This occurs at `stream_chunk`, and previously if we passed
in STDIN for `stream_init_file`, STDIN will only terminate once
STREAM_DEFAULT_CHUNK number of characters have been fed into the pipe.

This isn't desirable for STDIN (we really want to read char-by-char
for expressions), nor would it necessarily be desirable in network
applications.  So any stream marked STREAM_TYPE_PIPE will only chunk
character-by-character rather than genuine chunks.
2025-10-19 22:25:10 +01:00
Aryadev Chavali
030a289497 Some changes to how streams work, clean up alisp.org 2025-09-01 21:47:25 +01:00
Aryadev Chavali
1aa01d2a89 Merge remote-tracking branch 'origin/master' 2025-09-01 21:45:18 +01:00
Aryadev Chavali
700c3b1d1b Move implementation files into their own folder
main.c and test.c generate binary executables so they can stay in the
main folder, but the rest can go into their own dedicated folder to
make it look nicer
2025-09-01 21:26:01 +01:00
Aryadev Chavali
a9b08d3a11 Add printer for SV's that provides debug information 2025-08-29 20:04:35 +01:00
Aryadev Chavali
d62a11bb35 Modify main.c to better test stream 2025-08-29 20:04:21 +01:00
5 changed files with 97 additions and 82 deletions

30
alisp.h
View File

@@ -50,6 +50,7 @@ typedef struct
#define SV(DATA, SIZE) ((sv_t){.data = (DATA), .size = (SIZE)}) #define SV(DATA, SIZE) ((sv_t){.data = (DATA), .size = (SIZE)})
#define SV_FMT(SV) (int)(SV).size, (SV).data #define SV_FMT(SV) (int)(SV).size, (SV).data
#define PR_SV "%.*s" #define PR_SV "%.*s"
#define PRD_SV "%d@%p"
sv_t sv_copy(sv_t); sv_t sv_copy(sv_t);
@@ -99,6 +100,7 @@ void sym_table_cleanup(sym_table_t *);
typedef enum typedef enum
{ {
STREAM_TYPE_STRING, STREAM_TYPE_STRING,
STREAM_TYPE_PIPE,
STREAM_TYPE_FILE, STREAM_TYPE_FILE,
} stream_type_t; } stream_type_t;
@@ -132,22 +134,28 @@ typedef struct
#define STREAM_DEFAULT_CHUNK 64 #define STREAM_DEFAULT_CHUNK 64
stream_err_t stream_init_string(stream_t *, char *, sv_t); stream_err_t stream_init_string(stream_t *, char *, sv_t);
stream_err_t stream_init_pipe(stream_t *, char *, FILE *);
stream_err_t stream_init_file(stream_t *, char *, FILE *); stream_err_t stream_init_file(stream_t *, char *, FILE *);
void stream_stop(stream_t *); void stream_stop(stream_t *);
// end of stream // End of Content (i.e. we've consumed all cached content/file)
bool stream_eos(stream_t *);
// end of cache
bool stream_eoc(stream_t *); bool stream_eoc(stream_t *);
// size of immediately accessible content
u64 stream_size(stream_t *); u64 stream_size(stream_t *);
bool stream_chunk(stream_t *); // Return current character, push position by 1
char stream_next(stream_t *); char stream_next(stream_t *);
// Peek current character, do not push position
char stream_peek(stream_t *); char stream_peek(stream_t *);
// Seek forward or backward in the stream, return success
bool stream_seek(stream_t *, i64); bool stream_seek(stream_t *, i64);
bool stream_seek_forward(stream_t *, u64); bool stream_seek_forward(stream_t *, u64);
bool stream_seek_backward(stream_t *, u64); bool stream_seek_backward(stream_t *, u64);
// Return a relative substring (using sv_t) of a given size
sv_t stream_substr(stream_t *, u64); sv_t stream_substr(stream_t *, u64);
// Return an absolutely located substring (using sv_t) at given index and of
// given size.
sv_t stream_substr_abs(stream_t *, u64, u64); sv_t stream_substr_abs(stream_t *, u64, u64);
/// Basic defintions for a Lisp /// Basic defintions for a Lisp
@@ -230,4 +238,18 @@ lisp_t *tag_sym(char *);
lisp_t *tag_cons(cons_t *); lisp_t *tag_cons(cons_t *);
lisp_t *tag_vec(vec_t *); lisp_t *tag_vec(vec_t *);
/// Reader
typedef enum
{
READ_OK = 0,
} read_err_t;
// Attempt to read an expression from the stream, storing it in a pointer,
// returning any errors if failed
read_err_t read(sys_t *, stream_t *, lisp_t **);
// Attempt to read all expressions from a stream till end of content, storing
// them in the given vector. Return any error at any point during the read.
read_err_t read_all(sys_t *, stream_t *, vec_t *);
#endif #endif

View File

@@ -38,11 +38,14 @@ seamless in that regard. But we'll need to set a calling convention
in order to make calling into this seamless from a runtime in order to make calling into this seamless from a runtime
perspective. perspective.
* Tasks * Tasks
** TODO Potentially optimise symbol table ingress :optimisation:design: ** TODO Capitalise symbols (TBD) :optimisation:design:
Should we capitalise symbols? This way, we limit the symbol table's Should we capitalise symbols? This way, we limit the symbol table's
possible options a bit (potentially we could design a better hashing possible options a bit (potentially we could design a better hashing
algorithm?) and it would be kinda like an actual Lisp. algorithm?) and it would be kinda like an actual Lisp.
** TODO Reader system ** TODO Design Strings
We have ~sv_t~ so our basic C API is done. We just need pluggable
functions to construct and deconstruct strings as lisps.
** WIP Reader system
We need to design a reader system. The big idea: given a "stream" of We need to design a reader system. The big idea: given a "stream" of
data, we can break out expressions from it. An expression could be data, we can break out expressions from it. An expression could be
either an atomic value or a container. either an atomic value or a container.
@@ -51,7 +54,7 @@ The natural method is doing this one at a time (the runtime provides a
~read~ function to do this), but we can also convert an entire stream ~read~ function to do this), but we can also convert an entire stream
into expressions by consuming it fully. So the principle function into expressions by consuming it fully. So the principle function
here is ~read: stream -> expr~. here is ~read: stream -> expr~.
*** TODO Design streams *** DONE Design streams
A stream needs to be able to provide characters for us to interpret in A stream needs to be able to provide characters for us to interpret in
our parsing. Lisp is an LL(1) grammar so we only really need one our parsing. Lisp is an LL(1) grammar so we only really need one
character lookup, but seeking is very useful. character lookup, but seeking is very useful.
@@ -74,21 +77,17 @@ parsing isn't as concerned with the specifics of the underlying data
stream. We can use a tagged union of data structures representing the stream. We can use a tagged union of data structures representing the
different underlying stream types, then generate abstract functions different underlying stream types, then generate abstract functions
that provide common functionality. that provide common functionality.
**** TODO Design the tagged union
**** TODO Design the API 2025-08-29: A really basic interface that makes the parse stage a bit
#+begin_src c easier. We're not going to do anything more advanced than the API
bool stream_eos(stream_t *); i.e. no parsing.
char stream_next(stream_t *); **** DONE Design the tagged union
char stream_peek(stream_t *); **** DONE Design the API
sv_t stream_substr(stream_t *, u64, u64); *** WIP Figure out the possible parse errors
bool stream_seek(stream_t *, i64); *** DONE Design what a "parser function" would look like
bool stream_close(stream_t *);
#+end_src
*** TODO Figure out the possible parse errors
*** TODO Design what a "parser function" would look like
The general function is something like ~stream -> T | Err~. What The general function is something like ~stream -> T | Err~. What
other state do we need to encode? other state do we need to encode?
*** TODO Write a parser for integers *** WIP Write a parser for integers
*** TODO Write a parser for symbols *** TODO Write a parser for symbols
*** TODO Write a parser for lists *** TODO Write a parser for lists
*** TODO Write a parser for vectors *** TODO Write a parser for vectors

View File

@@ -2,9 +2,9 @@
set -xe set -xe
CFLAGS="-Wall -Wextra -std=c11 -ggdb -fsanitize=address -fsanitize=undefined" CFLAGS="-Wall -Wextra -std=c11 -ggdb -fsanitize=address -fsanitize=undefined -Wswitch -Wswitch-enum"
LINK="" LINK="-I."
LIB="sv.c vec.c symtable.c tag.c constructor.c stream.c sys.c" LIB=$(find "./" -name "*.c" -not -name "main.c" -not -name "test.c")
OUT="alisp.out" OUT="alisp.out"
build() { build() {

50
main.c
View File

@@ -21,53 +21,9 @@
int main(void) int main(void)
{ {
stream_t stream = {0}; stream_t stream = {0};
char _data[] = "Hello, world!"; stream_init_pipe(&stream, "<stdin>", stdin);
sv_t data = SV(_data, ARRSIZE(_data) - 1); sv_t sv = stream_substr(&stream, 10);
char filename[] = "lorem.txt"; printf("=> `" PR_SV "`\n", SV_FMT(sv));
// stream_init_string(&stream, NULL, data);
FILE *fp = fopen(filename, "rb");
stream_init_file(&stream, filename, fp);
// stream_init_file(&stream, "stdin", stdin);
/// test 1
// printf("[debug]: setup stream pipe\n");
// do
// {
// printf("%s[%lu]: `%c`\n", stream.name, stream.position,
// stream_next(&stream));
// } while (!stream_eoc(&stream));
// printf("%lu/%lu\n", stream.position, stream_size(&stream));
/// test 2
sv_t a = stream_substr(&stream, 100);
sv_t a_ = sv_copy(a);
printf("`" PR_SV "`\n", SV_FMT(a));
stream_seek(&stream, 100);
sv_t b = stream_substr_abs(&stream, 0, 100);
sv_t b_ = sv_copy(b);
printf("`" PR_SV "`\n", SV_FMT(b));
printf("a=b ? %s\na_=b_ ? %s\n",
memcmp(&a, &b, sizeof(a)) == 0 ? "yes" : "no",
a_.size == b_.size && strncmp(a_.data, b_.data, a_.size) == 0 ? "yes"
: "no");
sv_t c = stream_substr(&stream, 100);
sv_t c_ = sv_copy(c);
printf("`" PR_SV "`\n", SV_FMT(c));
stream_seek(&stream, 100);
sv_t d = stream_substr_abs(&stream, stream.position - 100, 100);
sv_t d_ = sv_copy(d);
printf("`" PR_SV "`\n", SV_FMT(d));
printf("c=d ? %s\nc_=d_ ? %s\n",
memcmp(&c, &d, sizeof(a)) == 0 ? "yes" : "no",
c_.size == d_.size && strncmp(c_.data, d_.data, c_.size) == 0 ? "yes"
: "no");
printf("eos?=%s\n", stream_eos(&stream) ? "yes" : "no");
stream_stop(&stream); stream_stop(&stream);
// fclose(fp);
return 0; return 0;
} }

View File

@@ -12,6 +12,7 @@
* Description: Stream implementation * Description: Stream implementation
*/ */
#include <stdlib.h>
#include <string.h> #include <string.h>
#include "./alisp.h" #include "./alisp.h"
@@ -25,7 +26,23 @@ stream_err_t stream_init_string(stream_t *stream, char *name, sv_t contents)
stream->type = STREAM_TYPE_STRING; stream->type = STREAM_TYPE_STRING;
stream->name = name; stream->name = name;
stream->string = contents; stream->string = sv_copy(contents);
return STREAM_ERR_OK;
}
stream_err_t stream_init_pipe(stream_t *stream, char *name, FILE *pipe)
{
if (!stream || !pipe)
return STREAM_ERR_INVALID_PTR;
name = name ? name : "<stream>";
memset(stream, 0, sizeof(*stream));
stream->type = STREAM_TYPE_PIPE;
stream->name = name;
stream->pipe.file = pipe;
vec_init(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
return STREAM_ERR_OK; return STREAM_ERR_OK;
} }
@@ -42,8 +59,6 @@ stream_err_t stream_init_file(stream_t *stream, char *name, FILE *pipe)
stream->pipe.file = pipe; stream->pipe.file = pipe;
vec_init(&stream->pipe.cache, STREAM_DEFAULT_CHUNK); vec_init(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
// try to read an initial chunk
stream_chunk(stream);
return STREAM_ERR_OK; return STREAM_ERR_OK;
} }
@@ -55,8 +70,9 @@ void stream_stop(stream_t *stream)
switch (stream->type) switch (stream->type)
{ {
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
// Nothing to do, all dealt with outside of stream free(stream->string.data);
break; break;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
// Must cleanup vector // Must cleanup vector
vec_free(&stream->pipe.cache); vec_free(&stream->pipe.cache);
@@ -72,6 +88,7 @@ u64 stream_size(stream_t *stream)
{ {
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
return stream->string.size; return stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
return stream->pipe.cache.size; return stream->pipe.cache.size;
default: default:
@@ -87,6 +104,7 @@ bool stream_eos(stream_t *stream)
{ {
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
return stream->position >= stream->string.size; return stream->position >= stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
return feof(stream->pipe.file); return feof(stream->pipe.file);
default: default:
@@ -102,6 +120,7 @@ bool stream_eoc(stream_t *stream)
{ {
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
return stream->position >= stream->string.size; return stream->position >= stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
return feof(stream->pipe.file) && return feof(stream->pipe.file) &&
stream->position >= stream->pipe.cache.size; stream->position >= stream->pipe.cache.size;
@@ -114,21 +133,33 @@ bool stream_eoc(stream_t *stream)
bool stream_chunk(stream_t *stream) bool stream_chunk(stream_t *stream)
{ {
assert(stream); assert(stream);
u64 to_read = STREAM_DEFAULT_CHUNK;
switch (stream->type) switch (stream->type)
{ {
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
// vacuously true // vacuously true
return true; return true;
case STREAM_TYPE_PIPE:
to_read = 1;
// fallthrough
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
{ {
if (feof(stream->pipe.file)) if (feof(stream->pipe.file))
// We can't read anymore. End of the line
return false; return false;
vec_ensure_free(&stream->pipe.cache, STREAM_DEFAULT_CHUNK); vec_ensure_free(&stream->pipe.cache, to_read);
int read = fread(vec_data(&stream->pipe.cache) + stream->pipe.cache.size, 1, int read = fread(vec_data(&stream->pipe.cache) + stream->pipe.cache.size, 1,
STREAM_DEFAULT_CHUNK, stream->pipe.file); to_read, stream->pipe.file);
// If we read something it's a good thing
if (read > 0)
{
stream->pipe.cache.size += read; stream->pipe.cache.size += read;
return true; return true;
} }
else
return false;
}
default: default:
FAIL("Unreachable"); FAIL("Unreachable");
return 0; return 0;
@@ -154,11 +185,15 @@ char stream_peek(stream_t *stream)
{ {
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
return stream->string.data[stream->position]; return stream->string.data[stream->position];
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
{ {
// Cached already? We are done. // Cached already? We are done.
if (stream->position < stream->pipe.cache.size) if (stream->position < stream->pipe.cache.size)
return ((char *)vec_data(&stream->pipe.cache))[stream->position]; {
const char *const str = vec_data(&stream->pipe.cache);
return str[stream->position];
}
// Try to read chunks in till we've reached it or we're at the end of the // Try to read chunks in till we've reached it or we're at the end of the
// file. // file.
@@ -204,6 +239,7 @@ bool stream_seek_forward(stream_t *stream, u64 offset)
stream->position += offset; stream->position += offset;
return true; return true;
} }
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
{ {
// Similar principle as stream_peek really... // Similar principle as stream_peek really...
@@ -223,7 +259,7 @@ bool stream_seek_forward(stream_t *stream, u64 offset)
continue; continue;
// Same principle as the stream_eoc(stream) check. // Same principle as the stream_eoc(stream) check.
if (stream->position + offset >= stream->pipe.cache.size) if (stream->position + offset > stream->pipe.cache.size)
return false; return false;
stream->position += offset; stream->position += offset;
return true; return true;
@@ -248,7 +284,7 @@ sv_t stream_substr(stream_t *stream, u64 size)
if (stream_eoc(stream)) if (stream_eoc(stream))
return SV(NULL, 0); return SV(NULL, 0);
// TODO: this is kinda disgusting, any better way of doing this // See if I can go forward enough to make this substring
u64 current_position = stream->position; u64 current_position = stream->position;
bool successful = stream_seek_forward(stream, size); bool successful = stream_seek_forward(stream, size);
// Reset the position in either situation // Reset the position in either situation
@@ -263,6 +299,7 @@ sv_t stream_substr(stream_t *stream, u64 size)
case STREAM_TYPE_STRING: case STREAM_TYPE_STRING:
ptr = stream->string.data; ptr = stream->string.data;
break; break;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
ptr = vec_data(&stream->pipe.cache); ptr = vec_data(&stream->pipe.cache);
break; break;
@@ -282,11 +319,12 @@ sv_t stream_substr_abs(stream_t *stream, u64 index, u64 size)
if (index + size <= stream_size(stream)) if (index + size <= stream_size(stream))
return SV(stream->string.data + index, size); return SV(stream->string.data + index, size);
return SV(NULL, 0); return SV(NULL, 0);
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE: case STREAM_TYPE_FILE:
{ {
if (index + size <= stream_size(stream)) if (index + size <= stream_size(stream))
return SV(vec_data(&stream->pipe.cache) + index, size); return SV(vec_data(&stream->pipe.cache) + index, size);
// stream_size(stream) <= index + size => try reading chunks // (index + size > stream_size(stream)) => try reading chunks
for (bool read_chunk = stream_chunk(stream); for (bool read_chunk = stream_chunk(stream);
read_chunk && index + size >= stream->pipe.cache.size; read_chunk && index + size >= stream->pipe.cache.size;
read_chunk = stream_chunk(stream)) read_chunk = stream_chunk(stream))