From 6d507e239c3d7ddeafebcdbc1b12b9fc5510dd70 Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Sat, 24 Jan 2026 02:58:07 +0000 Subject: [PATCH] parser: Better AST primitives commit 1588e7b46d82a98776e582d133889554497ecd2d Author: Aryadev Chavali Date: Sat Jan 24 02:55:12 2026 +0000 parser/parser: parse_symbol now supports primitives parse_symbol now investigates if the parsed symbol data is actually just a primitive (linear search through all primitives). If it is, return a primitive first. Otherwise, generate a symbol as per previous form of routine. commit 62c91990c43dbebbcd2a06cdc86f3f0c900f2e07 Author: Aryadev Chavali Date: Sat Jan 24 02:40:26 2026 +0000 parser/ast: Added support for node level primitives These are just an enumeration of primitives we already expect to be present within a program. Instead of leaving everything as a symbol, we can compile certain symbols into the enumeration ahead of time to make later stages easier. --- src/arl/parser/ast.c | 43 ++++++++++++++++++++++++++++++++++------- src/arl/parser/ast.h | 24 +++++++++++++++++++---- src/arl/parser/parser.c | 30 ++++++++++++++++++++++------ 3 files changed, 80 insertions(+), 17 deletions(-) diff --git a/src/arl/parser/ast.c b/src/arl/parser/ast.c index 9b77e04..174f128 100644 --- a/src/arl/parser/ast.c +++ b/src/arl/parser/ast.c @@ -5,15 +5,38 @@ * Commentary: See ast.h. */ +#include #include #include +const char *ast_prim_to_cstr(ast_prim_t prim) +{ + switch (prim) + { + case AST_PRIM_NIL: + return "nil"; + case AST_PRIM_PRINTLN: + return "println"; + default: + FAIL("Unexpected AST primitive value: %d\n", prim); + } +} + +ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive) +{ + return (ast_node_t){ + .byte_location = byte, + .type = AST_NODE_TYPE_PRIMITIVE, + .value = {.as_prim = primitive}, + }; +} + ast_node_t ast_node_string(u64 byte, sv_t string) { return (ast_node_t){ .byte_location = byte, .type = AST_NODE_TYPE_STRING, - .value = {string}, + .value = {.as_string = string}, }; } @@ -22,25 +45,31 @@ ast_node_t ast_node_symbol(u64 byte, sv_t symbol) return (ast_node_t){ .byte_location = byte, .type = AST_NODE_TYPE_SYMBOL, - .value = {symbol}, + .value = {.as_symbol = symbol}, }; } -void ast_node_print(FILE *fp, ast_node_t *obj) +void ast_node_print(FILE *fp, ast_node_t *node) { - if (!obj) + if (!node) { fprintf(fp, "NIL"); return; } - switch (obj->type) + switch (node->type) { + case AST_NODE_TYPE_PRIMITIVE: + fprintf(fp, "PRIMITIVE(%s)", ast_prim_to_cstr(node->value.as_prim)); + break; case AST_NODE_TYPE_SYMBOL: - fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(obj->value.as_symbol)); + fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->value.as_symbol)); break; case AST_NODE_TYPE_STRING: - fprintf(fp, "STRING(" PR_SV ")", SV_FMT(obj->value.as_string)); + fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->value.as_string)); break; + case NUM_AST_NODE_TYPES: + default: + FAIL("Unexpected node type: %d\n", node->type); } } diff --git a/src/arl/parser/ast.h b/src/arl/parser/ast.h index 4f641d6..6e174c8 100644 --- a/src/arl/parser/ast.h +++ b/src/arl/parser/ast.h @@ -15,10 +15,24 @@ /// Types the AST can encode typedef enum { - AST_NODE_TYPE_SYMBOL = 0, + AST_NODE_TYPE_PRIMITIVE = 0, + AST_NODE_TYPE_SYMBOL, AST_NODE_TYPE_STRING, + + NUM_AST_NODE_TYPES, } ast_node_type_t; +/// Primitives (values, callables, etc) as symbols +typedef enum +{ + AST_PRIM_NIL = 0, + AST_PRIM_PRINTLN, + + NUM_AST_PRIMS, +} ast_prim_t; + +const char *ast_prim_to_cstr(ast_prim_t); + /// Node of the AST as a tagged union typedef struct { @@ -26,14 +40,16 @@ typedef struct ast_node_type_t type; union { - sv_t as_string; + ast_prim_t as_prim; sv_t as_symbol; + sv_t as_string; } value; } ast_node_t; -ast_node_t ast_node_string(u64 byte, sv_t string); +ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive); ast_node_t ast_node_symbol(u64 byte, sv_t symbol); -void ast_node_print(FILE *fp, ast_node_t *obj); +ast_node_t ast_node_string(u64 byte, sv_t string); +void ast_node_print(FILE *fp, ast_node_t *node); /// The AST as a flat collection of nodes typedef struct diff --git a/src/arl/parser/parser.c b/src/arl/parser/parser.c index 4d3aee9..03e660f 100644 --- a/src/arl/parser/parser.c +++ b/src/arl/parser/parser.c @@ -5,10 +5,11 @@ * Commentary: See parser.h */ -#include "arl/lib/sv.h" #include #include +#include +#include #include /// Expected characters in a symbol @@ -27,7 +28,7 @@ const char *parse_err_to_string(parse_err_t err) case PARSE_ERR_UNKNOWN_CHAR: return "UNKNOWN_CHAR"; default: - FAIL("Unexpected value for parse_err_t: %d\n", err); + FAIL("Unexpected parse_err_t value: %d\n", err); } } @@ -119,10 +120,27 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret) parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret) { sv_t current_contents = sv_chop_left(stream->contents, stream->byte); - u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS); - // Generate symbol - *ret = ast_node_symbol(stream->byte, SV(current_contents.data, symbol_size)); - stream_advance(stream, symbol_size); + sv_t symbol = + SV(current_contents.data, sv_while(current_contents, SYMBOL_CHARS)); + + // see if symbol is one of the AST primitives we can parse AOT + static_assert(NUM_AST_PRIMS == 2, "Expected number of AST primitives"); + for (ast_prim_t i = 0; i < NUM_AST_PRIMS; ++i) + { + const char *possible_prim = ast_prim_to_cstr(i); + if (strlen(possible_prim) == symbol.size && + strncmp(possible_prim, symbol.data, symbol.size) == 0) + { + // Found a matching primitive + *ret = ast_node_prim(stream->byte, i); + goto end; + } + } + + // otherwise, it must be a fresh symbol i.e. user defined + *ret = ast_node_symbol(stream->byte, symbol); +end: + stream_advance(stream, symbol.size); return PARSE_ERR_OK; }