Compare commits

...

8 Commits

Author SHA1 Message Date
Aryadev Chavali
7e3dd2679d parser/ast: Remove union name in ast_node_t
Destructures the names of the union into the namespace of the
structure itself; bit easier to use IMO.
2026-01-29 03:15:51 +00:00
Aryadev Chavali
8764b65aff parser: remove NIL as a known value
Not really needed or necessary.
2026-01-29 03:15:12 +00:00
Aryadev Chavali
2e24d3a618 parser/parser: slight tidy up in parse_string/parse_symbol 2026-01-29 03:14:36 +00:00
Aryadev Chavali
645ea5a04e main: Fix trivial pipes error in read_pipe, better comments overall. 2026-01-28 09:49:42 +00:00
Aryadev Chavali
9d8a0c1e22 examples: Remove newline from end of hello-world 2026-01-28 09:07:22 +00:00
Aryadev Chavali
65e4dc0b29 main: command line arguments for filename, and allow stdin parsing
- Now take a single command line argument for the filename to read and
  compile.
- If filename is "--", then read stdin until EOF using a different
  read handler (using ~vec_t~ along with buffered reading).
2026-01-28 09:06:00 +00:00
Aryadev Chavali
afc0f9c034 main: deal with file read errors more appropriately, unify error interface
- ~read_file~ now returns an error code and takes the ~sv_t~ (which
  contains the file contents) by pointer.  We can now deal with the
  error in ~main~ directly.
- Make the return code of ~main~ a variable which error branches can
  set.  Unify the error branch and normal branch code.  Pattern for
  error handling is now unified.
2026-01-28 09:02:46 +00:00
Aryadev Chavali
84996130b7 base: Added some extra logging macros
LOG, LOG_ERR.  LOG_ERR will always compile to a /stderr/ print.  LOG,
on the other hand, may not actually do anything if VERBOSE_LOGS is
not 1.  By default it is 0, so it must be defined when compiling to
enable - hence the adjustment of the Makefile.
2026-01-28 08:59:29 +00:00
7 changed files with 143 additions and 50 deletions

View File

@@ -9,7 +9,7 @@ OBJECTS:=$(patsubst %,$(DIST)/%.o, $(UNITS))
LDFLAGS= LDFLAGS=
GFLAGS=-Wall -Wextra -Wpedantic -std=c23 -I./include/ GFLAGS=-Wall -Wextra -Wpedantic -std=c23 -I./include/
DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined DFLAGS=-ggdb -fsanitize=address -fsanitize=undefined -DVERBOSE_LOGS=1
RFLAGS=-O3 RFLAGS=-O3
MODE=release MODE=release

View File

@@ -1 +1 @@
"Hello, world!\n" putstr "Hello, world!\n" putstr

View File

@@ -33,13 +33,35 @@ typedef double f64;
#define MIN(A, B) ((A) > (B) ? (B) : (A)) #define MIN(A, B) ((A) > (B) ? (B) : (A))
#define ARRSIZE(A) ((sizeof(A)) / sizeof((A)[0])) #define ARRSIZE(A) ((sizeof(A)) / sizeof((A)[0]))
#define FAIL(...) \ #ifndef VERBOSE_LOGS
#define VERBOSE_LOGS 0
#endif
#if VERBOSE_LOGS
#define LOG(...) \
do \
{ \
fprintf(stdout, "LOG: "); \
fprintf(stdout, __VA_ARGS__); \
} while (0);
#else
#define LOG(...)
#endif
#define LOG_ERR(...) \
do \ do \
{ \ { \
fprintf(stderr, "FAIL: "); \
fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, __VA_ARGS__); \
assert(0); \ } while (0);
#define FAIL(...) \
do \
{ \
LOG_ERR("FAIL: "); \
LOG_ERR(__VA_ARGS__); \
assert(0); \
} while (0) } while (0)
#endif #endif
/* Copyright (C) 2026 Aryadev Chavali /* Copyright (C) 2026 Aryadev Chavali

View File

@@ -25,7 +25,6 @@ typedef enum
/// Known symbols - may reference callables or values. /// Known symbols - may reference callables or values.
typedef enum typedef enum
{ {
AST_KNOWN_NIL = 0,
AST_KNOWN_PUTSTR, AST_KNOWN_PUTSTR,
NUM_AST_KNOWNS, NUM_AST_KNOWNS,
@@ -43,7 +42,7 @@ typedef struct
ast_known_t as_known; ast_known_t as_known;
sv_t as_symbol; sv_t as_symbol;
sv_t as_string; sv_t as_string;
} value; };
} ast_node_t; } ast_node_t;
ast_node_t ast_node_known(u64 byte, ast_known_t known); ast_node_t ast_node_known(u64 byte, ast_known_t known);

View File

@@ -18,28 +18,101 @@
#include <arl/parser/ast.h> #include <arl/parser/ast.h>
#include <arl/parser/parser.h> #include <arl/parser/parser.h>
sv_t read_file(const char *filename) int read_file(const char *filename, sv_t *ret)
{ {
// NOTE: Stupidly simple. Presumes the file is NOT three pipes in a trench
// coat.
FILE *fp = fopen(filename, "rb"); FILE *fp = fopen(filename, "rb");
if (!fp) if (!fp)
FAIL("File `%s` does not exist\n", filename); return 1;
fseek(fp, 0, SEEK_END); fseek(fp, 0, SEEK_END);
long size = ftell(fp); ret->size = ftell(fp);
fseek(fp, 0, SEEK_SET); fseek(fp, 0, SEEK_SET);
char *buffer = calloc(1, size + 1); ret->data = calloc(1, ret->size + 1);
fread(buffer, size, 1, fp); fread(ret->data, ret->size, 1, fp);
fclose(fp); fclose(fp);
buffer[size] = '\0'; ret->data[ret->size] = '\0';
return SV(buffer, size); return 0;
} }
int main(void) int read_pipe(FILE *pipe, sv_t *ret)
{ {
const char *filename = "./examples/hello-world.arl"; // NOTE: We can't read an entire pipe at once like we did for read_file. So
sv_t contents = read_file(filename); // let's read in buffered chunks, with a vector to keep them contiguous.
printf("%s\n=> `" PR_SV "`\n", filename, SV_FMT(contents)); vec_t contents = {0};
char buffer[1024];
while (!feof(pipe))
{
size_t bytes_read = fread(buffer, 1, sizeof(buffer), pipe);
vec_append(&contents, buffer, bytes_read);
}
ret->size = contents.size;
// Get that null terminator in, but only after we've recorded the actual size
// of what's been read.
vec_append_byte(&contents, '\0');
if (contents.not_inlined)
{
// Take the heap pointer from us.
ret->data = vec_data(&contents);
}
else
{
// vec_data(&contents) is stack allocated; can't carry that out of this
// function!
ret->data = calloc(1, contents.size);
memcpy(ret->data, vec_data(&contents), contents.size);
}
return 0;
}
void usage(FILE *fp)
{
fprintf(fp, "Usage: arl [FILE]\n"
"Compiles [FILE] as ARL source code.\n"
" [FILE]: File to compile.\n"
"If FILE is \"--\", then read from stdin.\n");
}
int main(int argc, char *argv[])
{
int ret = 0;
char *filename = "";
if (argc == 1)
{
usage(stderr);
ret = 1;
goto end;
}
else
{
filename = argv[1];
}
int read_err = 0;
sv_t contents = {0};
if (strcmp(filename, "--") == 0)
{
filename = "stdin";
read_err = read_pipe(stdin, &contents);
}
else
{
read_err = read_file(filename, &contents);
}
if (read_err)
{
LOG_ERR("ERROR: Reading `%s`: ", filename);
perror("");
ret = 1;
goto end;
}
LOG("%s => `" PR_SV "`\n", filename, SV_FMT(contents));
parse_stream_t stream = {.byte = 0, .contents = contents}; parse_stream_t stream = {.byte = 0, .contents = contents};
ast_t ast = {0}; ast_t ast = {0};
@@ -49,23 +122,23 @@ int main(void)
u64 line = 1, col = 0; u64 line = 1, col = 0;
parse_stream_get_line_col(&stream, &line, &col); parse_stream_get_line_col(&stream, &line, &col);
fprintf(stderr, "%s:%lu:%lu: %s\n", filename, line, col, LOG_ERR("%s:%lu:%lu: %s\n", filename, line, col, parse_err_to_string(perr));
parse_err_to_string(perr)); ret = 1;
goto fail; goto end;
} }
printf("=> Parsed %lu nodes\n", ast.nodes.size / sizeof(ast_node_t));
LOG("Parsed %lu nodes\n", ast.nodes.size / sizeof(ast_node_t));
#if VERBOSE_LOGS
ast_print(stdout, &ast); ast_print(stdout, &ast);
#endif
printf("\n"); printf("\n");
free(contents.data); end:
ast_free(&ast);
return 0;
fail:
if (contents.data) if (contents.data)
free(contents.data); free(contents.data);
if (ast.nodes.capacity > 0) if (ast.nodes.capacity > 0)
ast_free(&ast); ast_free(&ast);
return 1; return ret;
} }
/* Copyright (C) 2026 Aryadev Chavali /* Copyright (C) 2026 Aryadev Chavali

View File

@@ -13,8 +13,6 @@ const char *ast_known_to_cstr(ast_known_t known)
{ {
switch (known) switch (known)
{ {
case AST_KNOWN_NIL:
return "nil";
case AST_KNOWN_PUTSTR: case AST_KNOWN_PUTSTR:
return "putstr"; return "putstr";
default: default:
@@ -27,7 +25,7 @@ ast_node_t ast_node_known(u64 byte, ast_known_t known)
return (ast_node_t){ return (ast_node_t){
.byte_location = byte, .byte_location = byte,
.type = AST_NODE_TYPE_KNOWN, .type = AST_NODE_TYPE_KNOWN,
.value = {.as_known = known}, .as_known = known,
}; };
} }
@@ -36,7 +34,7 @@ ast_node_t ast_node_string(u64 byte, sv_t string)
return (ast_node_t){ return (ast_node_t){
.byte_location = byte, .byte_location = byte,
.type = AST_NODE_TYPE_STRING, .type = AST_NODE_TYPE_STRING,
.value = {.as_string = string}, .as_string = string,
}; };
} }
@@ -45,7 +43,7 @@ ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
return (ast_node_t){ return (ast_node_t){
.byte_location = byte, .byte_location = byte,
.type = AST_NODE_TYPE_SYMBOL, .type = AST_NODE_TYPE_SYMBOL,
.value = {.as_symbol = symbol}, .as_symbol = symbol,
}; };
} }
@@ -59,13 +57,13 @@ void ast_node_print(FILE *fp, ast_node_t *node)
switch (node->type) switch (node->type)
{ {
case AST_NODE_TYPE_KNOWN: case AST_NODE_TYPE_KNOWN:
fprintf(fp, "KNOWN(%s)", ast_known_to_cstr(node->value.as_known)); fprintf(fp, "KNOWN(%s)", ast_known_to_cstr(node->as_known));
break; break;
case AST_NODE_TYPE_SYMBOL: case AST_NODE_TYPE_SYMBOL:
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->value.as_symbol)); fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->as_symbol));
break; break;
case AST_NODE_TYPE_STRING: case AST_NODE_TYPE_STRING:
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->value.as_string)); fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->as_string));
break; break;
case NUM_AST_NODE_TYPES: case NUM_AST_NODE_TYPES:
default: default:

View File

@@ -105,31 +105,32 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
{ {
// Increment the cursor just past the first speechmark // Increment the cursor just past the first speechmark
stream_advance(stream, 1); stream_advance(stream, 1);
sv_t current_contents = sv_chop_left(stream->contents, stream->byte); sv_t string = sv_chop_left(stream->contents, stream->byte);
u64 string_size = sv_till(current_contents, "\""); string.size = sv_till(string, "\"");
if (string_size + stream->byte == stream_size(stream))
// If we're at the edge of the stream, there must not have been any
// speechmarks.
if (string.size + stream->byte == stream_size(stream))
return PARSE_ERR_EXPECTED_SPEECH_MARKS; return PARSE_ERR_EXPECTED_SPEECH_MARKS;
// Bounds of string are well defined, generate an object and advance the
// stream // `string` is well defined, package and throw it back.
*ret = *ret = ast_node_string(stream->byte - 1, string);
ast_node_string(stream->byte - 1, SV(current_contents.data, string_size)); stream_advance(stream, string.size + 1);
stream_advance(stream, string_size + 1);
return PARSE_ERR_OK; return PARSE_ERR_OK;
} }
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret) parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
{ {
sv_t current_contents = sv_chop_left(stream->contents, stream->byte); sv_t symbol = sv_chop_left(stream->contents, stream->byte);
sv_t symbol = symbol.size = sv_while(symbol, SYMBOL_CHARS);
SV(current_contents.data, sv_while(current_contents, SYMBOL_CHARS));
// see if symbol is one of the already known symbols // see if symbol is one of the already known symbols
static_assert(NUM_AST_KNOWNS == 2, "Expected number of AST KNOWN"); static_assert(NUM_AST_KNOWNS == 1, "Expected number of AST_KNOWNs");
for (ast_known_t i = 0; i < NUM_AST_KNOWNS; ++i) for (ast_known_t i = 0; i < NUM_AST_KNOWNS; ++i)
{ {
const char *possible_prim = ast_known_to_cstr(i); const char *possible_known = ast_known_to_cstr(i);
if (strlen(possible_prim) == symbol.size && if (strlen(possible_known) == symbol.size &&
strncmp(possible_prim, symbol.data, symbol.size) == 0) strncmp(possible_known, symbol.data, symbol.size) == 0)
{ {
// Found a matching known symbol // Found a matching known symbol
*ret = ast_node_known(stream->byte, i); *ret = ast_node_known(stream->byte, i);