Compare commits

..

13 Commits

Author SHA1 Message Date
Aryadev Chavali
fe727d75e4 remove breaks after return in switch-case 2026-02-09 09:57:04 +00:00
Aryadev Chavali
06a4eafbb9 stream: optimise stream_substr_abs, stream_till, stream_while
In the base cases of STRINGS and FILES, these functions should be very
quick (operating on stuff in memory), and amortized for pipes.
2026-02-09 09:57:04 +00:00
Aryadev Chavali
fc5a6eb8fb stream: refactor stream_seek_forward for changes in FILE API 2026-02-09 09:57:04 +00:00
Aryadev Chavali
5c7fb0fabd stream: stream_stop -> stream_free 2026-02-09 09:57:04 +00:00
Aryadev Chavali
f164427b47 stream: stream_reset
We can reuse the same stream by resetting it.  We don't delete the
cached data.
2026-02-09 09:57:04 +00:00
Aryadev Chavali
2238f348e1 stream: stream_line_col includes current position in computation 2026-02-09 09:57:04 +00:00
Aryadev Chavali
f56a59ff7a stream: STREAM_TYPE_FILE will now read file upfront
No more having to chunk read - if ~stream_init_file~ is used, the
constructor slurps the entire file into the cache.  This pays up front
for a bunch of checks essentially.

~stream_init_pipe~ should be used for chunked reading.
2026-02-09 09:57:04 +00:00
Aryadev Chavali
6a54c54bfb stream: a bit of tidying up 2026-02-09 09:57:04 +00:00
Aryadev Chavali
2855536bdd stream: a few further refactors due to new sv functions 2026-02-09 09:57:04 +00:00
Aryadev Chavali
8426f04734 sv: refactor to use sv_truncate 2026-02-09 09:57:04 +00:00
Aryadev Chavali
459f434e5d stream: bug fix on stream_while/stream_till (due to eda2711)
The new versions of stream_substr require exact sizes.
2026-02-09 09:57:04 +00:00
Aryadev Chavali
9ee8955908 stream: Refactor stream_substr and stream_substr_abs using stream_sv
Bit more elegant, and allows the caller code to focus on the task
rather than string management.
2026-02-09 09:57:04 +00:00
Aryadev Chavali
477abc9aa1 stream: stream_sv and stream_sv_abs
Just straight string views into the current stream content - obviously
may not be stable.  Helpful when analysing a non-moving stream.
2026-02-09 09:57:04 +00:00
5 changed files with 202 additions and 154 deletions

View File

@@ -53,8 +53,11 @@ typedef struct
stream_err_t stream_init_string(stream_t *, const char *, sv_t);
stream_err_t stream_init_pipe(stream_t *, const char *, FILE *);
// NOTE: stream_init_file will attempt to read all content from the FILE
// descriptor. Use with caution.
stream_err_t stream_init_file(stream_t *, const char *, FILE *);
void stream_stop(stream_t *);
void stream_reset(stream_t *);
void stream_free(stream_t *);
// End of Content (i.e. we've consumed all cached content/file)
bool stream_eoc(stream_t *);
@@ -70,6 +73,12 @@ u64 stream_seek(stream_t *, i64);
u64 stream_seek_forward(stream_t *, u64);
u64 stream_seek_backward(stream_t *, u64);
// Return a string view to stream data relative to the current position of the
// stream
sv_t stream_sv(stream_t *);
// Return a string view to data from the start of the stream
sv_t stream_sv_abs(stream_t *);
// Return a relative substring of a given size
sv_t stream_substr(stream_t *, u64);
// Return an absolute substring at given index and of given size.

View File

@@ -17,13 +17,10 @@ const char *read_err_to_cstr(read_err_t err)
{
case READ_ERR_OK:
return "OK";
break;
case READ_ERR_EOF:
return "EOF";
break;
case READ_ERR_UNKNOWN_CHAR:
return "UNKNOWN_CHAR";
break;
default:
FAIL("Unreachable");
}

View File

@@ -11,6 +11,8 @@
#include <alisp/base.h>
#include <alisp/stream.h>
#include <alisp/sv.h>
#include <alisp/vec.h>
const char *stream_err_to_cstr(stream_err_t err)
{
@@ -18,19 +20,14 @@ const char *stream_err_to_cstr(stream_err_t err)
{
case STREAM_ERR_INVALID_PTR:
return "INVALID PTR";
break;
case STREAM_ERR_FILE_NONEXISTENT:
return "FILE NONEXISTENT";
break;
case STREAM_ERR_FILE_READ:
return "FILE READ";
break;
case STREAM_ERR_PIPE_NONEXISTENT:
return "PIPE NONEXISTENT";
break;
case STREAM_ERR_OK:
return "OK";
break;
default:
FAIL("Unreachable");
}
@@ -65,8 +62,6 @@ stream_err_t stream_init_pipe(stream_t *stream, const char *name, FILE *pipe)
stream->name = name;
stream->pipe.file = pipe;
vec_init(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
return STREAM_ERR_OK;
}
@@ -82,12 +77,30 @@ stream_err_t stream_init_file(stream_t *stream, const char *name, FILE *pipe)
stream->type = STREAM_TYPE_FILE;
stream->name = name;
stream->pipe.file = pipe;
stream->pipe.file = NULL;
// NOTE: We're reading all the data from the file descriptor now.
fseek(pipe, 0, SEEK_END);
long size = ftell(pipe);
fseek(pipe, 0, SEEK_SET);
vec_ensure_free(&stream->pipe.cache, size);
int read = fread(vec_data(&stream->pipe.cache), 1, size, pipe);
// These must be equivalent for this function.
assert(read == size);
stream->pipe.cache.size += size;
return STREAM_ERR_OK;
}
void stream_stop(stream_t *stream)
void stream_reset(stream_t *stream)
{
if (!stream)
return;
stream->position = 0;
}
void stream_free(stream_t *stream)
{
if (!stream)
return;
@@ -97,11 +110,8 @@ void stream_stop(stream_t *stream)
free(stream->string.data);
break;
case STREAM_TYPE_FILE:
// ensure we reset the FILE pointer to the start
fseek(stream->pipe.file, 0, SEEK_SET);
// fallthrough
case STREAM_TYPE_PIPE:
// Must cleanup vector
// Must cleanup caching vector
vec_free(&stream->pipe.cache);
break;
}
@@ -124,31 +134,15 @@ u64 stream_size(stream_t *stream)
}
}
bool stream_eos(stream_t *stream)
{
assert(stream);
switch (stream->type)
{
case STREAM_TYPE_STRING:
return stream->position >= stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
return feof(stream->pipe.file);
default:
FAIL("Unreachable");
return 0;
}
}
bool stream_eoc(stream_t *stream)
{
assert(stream);
switch (stream->type)
{
case STREAM_TYPE_STRING:
return stream->position >= stream->string.size;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
case STREAM_TYPE_STRING:
return stream->position >= stream_size(stream);
case STREAM_TYPE_PIPE:
return feof(stream->pipe.file) &&
stream->position >= stream->pipe.cache.size;
default:
@@ -160,23 +154,20 @@ bool stream_eoc(stream_t *stream)
bool stream_chunk(stream_t *stream)
{
assert(stream);
u64 to_read = STREAM_DEFAULT_CHUNK;
switch (stream->type)
{
case STREAM_TYPE_STRING:
// vacuously true
return true;
case STREAM_TYPE_PIPE:
to_read = 1;
// fallthrough
case STREAM_TYPE_FILE:
case STREAM_TYPE_STRING:
// nothing to chunk, hence false
return false;
case STREAM_TYPE_PIPE:
{
if (feof(stream->pipe.file))
// We can't read anymore. End of the line
return false;
vec_ensure_free(&stream->pipe.cache, to_read);
vec_ensure_free(&stream->pipe.cache, STREAM_DEFAULT_CHUNK);
int read = fread(vec_data(&stream->pipe.cache) + stream->pipe.cache.size, 1,
to_read, stream->pipe.file);
STREAM_DEFAULT_CHUNK, stream->pipe.file);
// If we read something it's a good thing
if (read > 0)
@@ -185,7 +176,9 @@ bool stream_chunk(stream_t *stream)
return true;
}
else
{
return false;
}
}
default:
FAIL("Unreachable");
@@ -202,24 +195,20 @@ char stream_next(stream_t *stream)
char stream_peek(stream_t *stream)
{
// If we've reached end of stream, and end of content, there's really nothing
// to check here.
// End of the line? We're done.
if (stream_eoc(stream))
return '\0';
switch (stream->type)
{
case STREAM_TYPE_STRING:
return stream->string.data[stream->position];
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
case STREAM_TYPE_STRING:
return stream_sv(stream).data[0];
case STREAM_TYPE_PIPE:
{
// Cached already? We are done.
if (stream->position < stream->pipe.cache.size)
{
const char *const str = (char *)vec_data(&stream->pipe.cache);
return str[stream->position];
}
return stream_sv(stream).data[0];
// Try to read chunks in till we've reached it or we're at the end of the
// file.
@@ -231,7 +220,7 @@ char stream_peek(stream_t *stream)
// Same principle as the stream_eos(stream) check.
if (stream->position >= stream->pipe.cache.size)
return '\0';
return ((char *)vec_data(&stream->pipe.cache))[stream->position];
return stream_sv(stream).data[0];
}
default:
FAIL("Unreachable");
@@ -246,36 +235,38 @@ u64 stream_seek(stream_t *stream, i64 offset)
else if (offset > 0)
return stream_seek_forward(stream, offset);
else
// vacuously successful
return true;
return 0;
}
u64 stream_seek_forward(stream_t *stream, u64 offset)
{
if (stream_eoc(stream))
return 0;
else if (stream->position + offset < stream_size(stream))
{
stream->position += offset;
return offset;
}
// NOTE: The only case not caught by the above branches is exact-to-end
// movement (i.e. offset puts us exactly at the end of the stream) or movement
// beyond what we've cached.
switch (stream->type)
{
case STREAM_TYPE_FILE:
case STREAM_TYPE_STRING:
{
if (stream->position + offset >= stream->string.size)
return 0;
// Clamp in the case of FILE and STRING movement since they're already
// fully cached.
if (stream->position + offset >= stream_size(stream))
offset = stream_size(stream) - stream->position;
stream->position += offset;
return offset;
}
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
{
// Similar principle as stream_peek really...
// Cached already? We are done.
if (stream->position + offset < stream->pipe.cache.size)
{
stream->position += offset;
return offset;
}
// Pipes may have data remaining that hasn't been cached - we need to chunk
// before we can be sure to stop.
// Try to read chunks in till we've reached it or we're at the end of the
// file.
@@ -284,32 +275,58 @@ u64 stream_seek_forward(stream_t *stream, u64 offset)
read_chunk = stream_chunk(stream))
continue;
// Same principle as the stream_eoc(stream) check.
if (stream->position + offset > stream->pipe.cache.size)
{
offset = stream->pipe.cache.size - stream->position;
}
// NOTE: We've read everything from the pipe, but the offset is greater. We
// must clamp here.
if (stream->position + offset > stream_size(stream))
offset = stream_size(stream) - stream->position;
stream->position += offset;
return offset;
}
default:
FAIL("Unreachable");
return 0;
}
return 0;
}
u64 stream_seek_backward(stream_t *stream, u64 offset)
{
assert(stream);
if (!stream)
return 0;
if (stream->position < offset)
{
offset = stream->position;
}
stream->position -= offset;
return offset;
}
sv_t stream_sv(stream_t *stream)
{
sv_t sv = stream_sv_abs(stream);
return sv_chop_left(sv, stream->position);
}
sv_t stream_sv_abs(stream_t *stream)
{
if (!stream)
return SV(NULL, 0);
sv_t sv = {0};
switch (stream->type)
{
case STREAM_TYPE_STRING:
sv = stream->string;
break;
case STREAM_TYPE_FILE:
case STREAM_TYPE_PIPE:
sv = SV((char *)vec_data(&stream->pipe.cache), stream_size(stream));
break;
default:
FAIL("Unreachable");
return SV(NULL, 0);
}
return sv;
}
sv_t stream_substr(stream_t *stream, u64 size)
{
if (stream_eoc(stream))
@@ -324,104 +341,130 @@ sv_t stream_substr(stream_t *stream, u64 size)
if (successful != size)
return SV(NULL, 0);
char *ptr = NULL;
switch (stream->type)
{
case STREAM_TYPE_STRING:
ptr = stream->string.data;
break;
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
ptr = (char *)vec_data(&stream->pipe.cache);
break;
default:
FAIL("Unreachable");
return SV(NULL, 0);
}
return SV(ptr + stream->position, size);
sv_t sv = stream_sv(stream);
sv = sv_truncate(sv, size);
return sv;
}
sv_t stream_substr_abs(stream_t *stream, u64 index, u64 size)
{
switch (stream->type)
{
case STREAM_TYPE_STRING:
if (index + size <= stream_size(stream))
return SV(stream->string.data + index, size);
return SV(NULL, 0);
case STREAM_TYPE_PIPE:
case STREAM_TYPE_FILE:
{
if (index + size <= stream_size(stream))
return SV((char *)vec_data(&stream->pipe.cache) + index, size);
// (index + size > stream_size(stream)) => try reading chunks
for (bool read_chunk = stream_chunk(stream);
read_chunk && index + size >= stream->pipe.cache.size;
read_chunk = stream_chunk(stream))
continue;
if (index + size > stream_size(stream))
return SV(NULL, 0);
return SV((char *)vec_data(&stream->pipe.cache) + index, size);
{
// => try reading chunks till either we drop or we have enough space
for (bool read_chunk = stream_chunk(stream);
read_chunk && index + size >= stream->pipe.cache.size;
read_chunk = stream_chunk(stream))
continue;
}
break;
}
case STREAM_TYPE_STRING:
case STREAM_TYPE_FILE:
break;
default:
assert("Unreachable");
return SV(NULL, 0);
FAIL("Unreachable");
}
sv_t sv = stream_sv_abs(stream);
sv = sv_chop_left(sv, index);
sv = sv_truncate(sv, size);
return sv;
}
sv_t stream_till(stream_t *stream, const char *str)
{
if (stream_eoc(stream))
return SV(NULL, 0);
u64 current_position = stream->position;
for (char c = stream_peek(stream); c != '\0' && strchr(str, c) == NULL;
c = stream_next(stream))
continue;
u64 size = stream->position - current_position;
if (size == 0)
return SV(NULL, 0);
return stream_substr_abs(stream, current_position, size - 1);
sv_t cur_sv = stream_sv(stream);
sv_t sv = sv_till(cur_sv, str);
stream_seek_forward(stream, sv.size);
switch (stream->type)
{
case STREAM_TYPE_FILE:
case STREAM_TYPE_STRING:
return sv;
case STREAM_TYPE_PIPE:
{
if (cur_sv.size > sv.size)
return sv;
// Build a substring by hand while chunking data.
u64 index, size;
for (index = stream->position - sv.size, size = sv.size;
cur_sv.size == sv.size; size += sv.size)
{
cur_sv = stream_sv(stream);
sv = sv_till(cur_sv, str);
stream_seek_forward(stream, sv.size);
if (sv.size == 0)
// Must stop if this has happened; nothing else to pick up.
break;
}
return stream_substr_abs(stream, index, size);
}
default:
FAIL("Unreachable");
}
}
sv_t stream_while(stream_t *stream, const char *str)
{
if (stream_eoc(stream))
return SV(NULL, 0);
u64 current_position = stream->position;
for (char c = stream_peek(stream); c != '\0' && strchr(str, c);
c = stream_next(stream))
continue;
u64 size = stream->position - current_position;
if (size == 0)
return SV(NULL, 0);
return stream_substr_abs(stream, current_position, size - 1);
sv_t cur_sv = stream_sv(stream);
sv_t sv = sv_while(cur_sv, str);
stream_seek_forward(stream, sv.size);
switch (stream->type)
{
case STREAM_TYPE_FILE:
case STREAM_TYPE_STRING:
return sv;
case STREAM_TYPE_PIPE:
{
if (cur_sv.size > sv.size)
return sv;
// Build a substring by hand while chunking data.
u64 index, size;
for (index = stream->position - sv.size, size = sv.size;
cur_sv.size == sv.size; size += sv.size)
{
cur_sv = stream_sv(stream);
sv = sv_while(cur_sv, str);
stream_seek_forward(stream, sv.size);
if (sv.size == 0)
// Must stop if this has happened; nothing else to pick up.
break;
}
return stream_substr_abs(stream, index, size);
}
default:
FAIL("Unreachable");
}
}
void stream_line_col(stream_t *stream, u64 *line, u64 *col)
{
if (!stream || !line || !col)
return;
// Go through the cache, byte by byte.
char *cache = NULL;
u64 size = 0;
if (stream->type == STREAM_TYPE_STRING)
{
cache = stream->string.data;
size = stream->string.size;
}
else
{
cache = (char *)vec_data(&stream->pipe.cache);
size = stream->pipe.cache.size;
}
// Generate a string view from the stream of exactly the content /upto/
// stream.postion.
sv_t sv = stream_sv_abs(stream);
sv = sv_truncate(sv, stream->position + 1);
*line = 1;
*col = 0;
for (u64 i = 0; i < size; ++i)
// TODO: Could this be faster? Does it matter?
for (u64 i = 0; i < sv.size; ++i)
{
char c = cache[i];
char c = sv.data[i];
if (c == '\n')
{
*line += 1;

View File

@@ -60,7 +60,7 @@ sv_t sv_till(sv_t sv, const char *reject)
if (offset == sv.size)
return sv;
return sv_chop_right(sv, sv.size - offset);
return sv_truncate(sv, offset);
}
sv_t sv_while(sv_t sv, const char *accept)
@@ -75,7 +75,7 @@ sv_t sv_while(sv_t sv, const char *accept)
if (offset == sv.size)
return sv;
return sv_chop_right(sv, sv.size - offset);
return sv_truncate(sv, offset);
}
/* Copyright (C) 2025, 2026 Aryadev Chavali

View File

@@ -82,7 +82,7 @@ void stream_test_string(void)
test_strings[i].size);
TEST(!stream_eoc(&stream), "Not end of content already");
stream_stop(&stream);
stream_free(&stream);
TEST(strncmp(copy.data, test_strings[i].data, copy.size) == 0,
"Freeing a stream does not free the underlying memory it was derived "
"from");
@@ -96,7 +96,7 @@ void stream_test_string(void)
stream_err_to_cstr(err));
TEST(stream_size(&stream) == 0, "NULL stream size is 0");
TEST(stream_eoc(&stream), "NULL stream is always at end of content");
stream_stop(&stream);
stream_free(&stream);
TEST_END();
}
@@ -113,9 +113,8 @@ void stream_test_file(void)
TEST(err == STREAM_ERR_OK, "Expected initialisating to be okay: %s",
stream_err_to_cstr(err));
}
TEST(stream_size(&stream) == 0, "Stream doesn't read on init: size = %lu",
stream_size(&stream));
TEST(!stream_eoc(&stream), "Stream should not be at the EoC from init.");
stream_free(&stream);
}
// try to initialise the stream again but against a nonexistent file - we're
@@ -164,7 +163,7 @@ void stream_test_peek_next(void)
"(%c)",
c3, c2);
stream_stop(&stream);
stream_free(&stream);
}
// Invalid streams
@@ -181,7 +180,7 @@ void stream_test_peek_next(void)
"Next on an invalid stream should not affect position (%lu -> %lu)",
old_position, stream.position);
stream_stop(&stream);
stream_free(&stream);
}
TEST_END();
}
@@ -210,7 +209,7 @@ void stream_test_seek(void)
"stream (%lu -> %lu)",
old_position, stream.position);
stream_stop(&stream);
stream_free(&stream);
}
// Valid streams
@@ -268,7 +267,7 @@ void stream_test_seek(void)
"above.",
stream.position);
stream_stop(&stream);
stream_free(&stream);
}
TEST_END();
@@ -302,7 +301,7 @@ void stream_test_substr(void)
position, size);
}
stream_stop(&stream);
stream_free(&stream);
}
// Taking substrings of valid streams
@@ -377,7 +376,7 @@ void stream_test_substr(void)
}
}
stream_stop(&stream);
stream_free(&stream);
}
TEST_END();
}