Compare commits

..

8 Commits

Author SHA1 Message Date
Aryadev Chavali
7e3dd2679d parser/ast: Remove union name in ast_node_t
Destructures the names of the union into the namespace of the
structure itself; bit easier to use IMO.
2026-01-29 03:15:51 +00:00
Aryadev Chavali
8764b65aff parser: remove NIL as a known value
Not really needed or necessary.
2026-01-29 03:15:12 +00:00
Aryadev Chavali
2e24d3a618 parser/parser: slight tidy up in parse_string/parse_symbol 2026-01-29 03:14:36 +00:00
Aryadev Chavali
645ea5a04e main: Fix trivial pipes error in read_pipe, better comments overall. 2026-01-28 09:49:42 +00:00
Aryadev Chavali
9d8a0c1e22 examples: Remove newline from end of hello-world 2026-01-28 09:07:22 +00:00
Aryadev Chavali
65e4dc0b29 main: command line arguments for filename, and allow stdin parsing
- Now take a single command line argument for the filename to read and
  compile.
- If filename is "--", then read stdin until EOF using a different
  read handler (using ~vec_t~ along with buffered reading).
2026-01-28 09:06:00 +00:00
Aryadev Chavali
afc0f9c034 main: deal with file read errors more appropriately, unify error interface
- ~read_file~ now returns an error code and takes the ~sv_t~ (which
  contains the file contents) by pointer.  We can now deal with the
  error in ~main~ directly.
- Make the return code of ~main~ a variable which error branches can
  set.  Unify the error branch and normal branch code.  Pattern for
  error handling is now unified.
2026-01-28 09:02:46 +00:00
Aryadev Chavali
84996130b7 base: Added some extra logging macros
LOG, LOG_ERR.  LOG_ERR will always compile to a /stderr/ print.  LOG,
on the other hand, may not actually do anything if VERBOSE_LOGS is
not 1.  By default it is 0, so it must be defined when compiling to
enable - hence the adjustment of the Makefile.
2026-01-28 08:59:29 +00:00
7 changed files with 143 additions and 50 deletions

View File

@@ -9,7 +9,7 @@ OBJECTS:=$(patsubst %,$(DIST)/%.o, $(UNITS))
LDFLAGS=
GFLAGS=-Wall -Wextra -Wpedantic -std=c23 -I./include/
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined -DVERBOSE_LOGS=1
RFLAGS=-O3
MODE=release

View File

@@ -1 +1 @@
"Hello, world!\n" putstr
"Hello, world!\n" putstr

View File

@@ -33,13 +33,35 @@ typedef double f64;
#define MIN(A, B) ((A) > (B) ? (B) : (A))
#define ARRSIZE(A) ((sizeof(A)) / sizeof((A)[0]))
#define FAIL(...) \
#ifndef VERBOSE_LOGS
#define VERBOSE_LOGS 0
#endif
#if VERBOSE_LOGS
#define LOG(...) \
do \
{ \
fprintf(stdout, "LOG: "); \
fprintf(stdout, __VA_ARGS__); \
} while (0);
#else
#define LOG(...)
#endif
#define LOG_ERR(...) \
do \
{ \
fprintf(stderr, "FAIL: "); \
fprintf(stderr, __VA_ARGS__); \
assert(0); \
} while (0);
#define FAIL(...) \
do \
{ \
LOG_ERR("FAIL: "); \
LOG_ERR(__VA_ARGS__); \
assert(0); \
} while (0)
#endif
/* Copyright (C) 2026 Aryadev Chavali

View File

@@ -25,7 +25,6 @@ typedef enum
/// Known symbols - may reference callables or values.
typedef enum
{
AST_KNOWN_NIL = 0,
AST_KNOWN_PUTSTR,
NUM_AST_KNOWNS,
@@ -43,7 +42,7 @@ typedef struct
ast_known_t as_known;
sv_t as_symbol;
sv_t as_string;
} value;
};
} ast_node_t;
ast_node_t ast_node_known(u64 byte, ast_known_t known);

View File

@@ -18,28 +18,101 @@
#include <arl/parser/ast.h>
#include <arl/parser/parser.h>
sv_t read_file(const char *filename)
int read_file(const char *filename, sv_t *ret)
{
// NOTE: Stupidly simple. Presumes the file is NOT three pipes in a trench
// coat.
FILE *fp = fopen(filename, "rb");
if (!fp)
FAIL("File `%s` does not exist\n", filename);
return 1;
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
ret->size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *buffer = calloc(1, size + 1);
fread(buffer, size, 1, fp);
ret->data = calloc(1, ret->size + 1);
fread(ret->data, ret->size, 1, fp);
fclose(fp);
buffer[size] = '\0';
return SV(buffer, size);
ret->data[ret->size] = '\0';
return 0;
}
int main(void)
int read_pipe(FILE *pipe, sv_t *ret)
{
const char *filename = "./examples/hello-world.arl";
sv_t contents = read_file(filename);
printf("%s\n=> `" PR_SV "`\n", filename, SV_FMT(contents));
// NOTE: We can't read an entire pipe at once like we did for read_file. So
// let's read in buffered chunks, with a vector to keep them contiguous.
vec_t contents = {0};
char buffer[1024];
while (!feof(pipe))
{
size_t bytes_read = fread(buffer, 1, sizeof(buffer), pipe);
vec_append(&contents, buffer, bytes_read);
}
ret->size = contents.size;
// Get that null terminator in, but only after we've recorded the actual size
// of what's been read.
vec_append_byte(&contents, '\0');
if (contents.not_inlined)
{
// Take the heap pointer from us.
ret->data = vec_data(&contents);
}
else
{
// vec_data(&contents) is stack allocated; can't carry that out of this
// function!
ret->data = calloc(1, contents.size);
memcpy(ret->data, vec_data(&contents), contents.size);
}
return 0;
}
void usage(FILE *fp)
{
fprintf(fp, "Usage: arl [FILE]\n"
"Compiles [FILE] as ARL source code.\n"
" [FILE]: File to compile.\n"
"If FILE is \"--\", then read from stdin.\n");
}
int main(int argc, char *argv[])
{
int ret = 0;
char *filename = "";
if (argc == 1)
{
usage(stderr);
ret = 1;
goto end;
}
else
{
filename = argv[1];
}
int read_err = 0;
sv_t contents = {0};
if (strcmp(filename, "--") == 0)
{
filename = "stdin";
read_err = read_pipe(stdin, &contents);
}
else
{
read_err = read_file(filename, &contents);
}
if (read_err)
{
LOG_ERR("ERROR: Reading `%s`: ", filename);
perror("");
ret = 1;
goto end;
}
LOG("%s => `" PR_SV "`\n", filename, SV_FMT(contents));
parse_stream_t stream = {.byte = 0, .contents = contents};
ast_t ast = {0};
@@ -49,23 +122,23 @@ int main(void)
u64 line = 1, col = 0;
parse_stream_get_line_col(&stream, &line, &col);
fprintf(stderr, "%s:%lu:%lu: %s\n", filename, line, col,
parse_err_to_string(perr));
goto fail;
LOG_ERR("%s:%lu:%lu: %s\n", filename, line, col, parse_err_to_string(perr));
ret = 1;
goto end;
}
printf("=> Parsed %lu nodes\n", ast.nodes.size / sizeof(ast_node_t));
LOG("Parsed %lu nodes\n", ast.nodes.size / sizeof(ast_node_t));
#if VERBOSE_LOGS
ast_print(stdout, &ast);
#endif
printf("\n");
free(contents.data);
ast_free(&ast);
return 0;
fail:
end:
if (contents.data)
free(contents.data);
if (ast.nodes.capacity > 0)
ast_free(&ast);
return 1;
return ret;
}
/* Copyright (C) 2026 Aryadev Chavali

View File

@@ -13,8 +13,6 @@ const char *ast_known_to_cstr(ast_known_t known)
{
switch (known)
{
case AST_KNOWN_NIL:
return "nil";
case AST_KNOWN_PUTSTR:
return "putstr";
default:
@@ -27,7 +25,7 @@ ast_node_t ast_node_known(u64 byte, ast_known_t known)
return (ast_node_t){
.byte_location = byte,
.type = AST_NODE_TYPE_KNOWN,
.value = {.as_known = known},
.as_known = known,
};
}
@@ -36,7 +34,7 @@ ast_node_t ast_node_string(u64 byte, sv_t string)
return (ast_node_t){
.byte_location = byte,
.type = AST_NODE_TYPE_STRING,
.value = {.as_string = string},
.as_string = string,
};
}
@@ -45,7 +43,7 @@ ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
return (ast_node_t){
.byte_location = byte,
.type = AST_NODE_TYPE_SYMBOL,
.value = {.as_symbol = symbol},
.as_symbol = symbol,
};
}
@@ -59,13 +57,13 @@ void ast_node_print(FILE *fp, ast_node_t *node)
switch (node->type)
{
case AST_NODE_TYPE_KNOWN:
fprintf(fp, "KNOWN(%s)", ast_known_to_cstr(node->value.as_known));
fprintf(fp, "KNOWN(%s)", ast_known_to_cstr(node->as_known));
break;
case AST_NODE_TYPE_SYMBOL:
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->value.as_symbol));
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->as_symbol));
break;
case AST_NODE_TYPE_STRING:
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->value.as_string));
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->as_string));
break;
case NUM_AST_NODE_TYPES:
default:

View File

@@ -105,31 +105,32 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
{
// Increment the cursor just past the first speechmark
stream_advance(stream, 1);
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
u64 string_size = sv_till(current_contents, "\"");
if (string_size + stream->byte == stream_size(stream))
sv_t string = sv_chop_left(stream->contents, stream->byte);
string.size = sv_till(string, "\"");
// If we're at the edge of the stream, there must not have been any
// speechmarks.
if (string.size + stream->byte == stream_size(stream))
return PARSE_ERR_EXPECTED_SPEECH_MARKS;
// Bounds of string are well defined, generate an object and advance the
// stream
*ret =
ast_node_string(stream->byte - 1, SV(current_contents.data, string_size));
stream_advance(stream, string_size + 1);
// `string` is well defined, package and throw it back.
*ret = ast_node_string(stream->byte - 1, string);
stream_advance(stream, string.size + 1);
return PARSE_ERR_OK;
}
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
{
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
sv_t symbol =
SV(current_contents.data, sv_while(current_contents, SYMBOL_CHARS));
sv_t symbol = sv_chop_left(stream->contents, stream->byte);
symbol.size = sv_while(symbol, SYMBOL_CHARS);
// see if symbol is one of the already known symbols
static_assert(NUM_AST_KNOWNS == 2, "Expected number of AST KNOWN");
static_assert(NUM_AST_KNOWNS == 1, "Expected number of AST_KNOWNs");
for (ast_known_t i = 0; i < NUM_AST_KNOWNS; ++i)
{
const char *possible_prim = ast_known_to_cstr(i);
if (strlen(possible_prim) == symbol.size &&
strncmp(possible_prim, symbol.data, symbol.size) == 0)
const char *possible_known = ast_known_to_cstr(i);
if (strlen(possible_known) == symbol.size &&
strncmp(possible_known, symbol.data, symbol.size) == 0)
{
// Found a matching known symbol
*ret = ast_node_known(stream->byte, i);