parser: Better AST primitives

commit 1588e7b46d
Author: Aryadev Chavali <aryadev@aryadevchavali.com>
Date:   Sat Jan 24 02:55:12 2026 +0000

    parser/parser: parse_symbol now supports primitives

    parse_symbol now investigates if the parsed symbol data is actually
    just a primitive (linear search through all primitives).  If it is,
    return a primitive first.  Otherwise, generate a symbol as per
    previous form of routine.

commit 62c91990c4
Author: Aryadev Chavali <aryadev@aryadevchavali.com>
Date:   Sat Jan 24 02:40:26 2026 +0000

    parser/ast: Added support for node level primitives

    These are just an enumeration of primitives we already expect to be
    present within a program.  Instead of leaving everything as a symbol,
    we can compile certain symbols into the enumeration ahead of time to
    make later stages easier.
This commit is contained in:
2026-01-24 02:58:07 +00:00
parent 51d59b6a83
commit 6d507e239c
3 changed files with 80 additions and 17 deletions

View File

@@ -5,15 +5,38 @@
* Commentary: See ast.h. * Commentary: See ast.h.
*/ */
#include <arl/lib/base.h>
#include <arl/lib/vec.h> #include <arl/lib/vec.h>
#include <arl/parser/ast.h> #include <arl/parser/ast.h>
const char *ast_prim_to_cstr(ast_prim_t prim)
{
switch (prim)
{
case AST_PRIM_NIL:
return "nil";
case AST_PRIM_PRINTLN:
return "println";
default:
FAIL("Unexpected AST primitive value: %d\n", prim);
}
}
ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive)
{
return (ast_node_t){
.byte_location = byte,
.type = AST_NODE_TYPE_PRIMITIVE,
.value = {.as_prim = primitive},
};
}
ast_node_t ast_node_string(u64 byte, sv_t string) ast_node_t ast_node_string(u64 byte, sv_t string)
{ {
return (ast_node_t){ return (ast_node_t){
.byte_location = byte, .byte_location = byte,
.type = AST_NODE_TYPE_STRING, .type = AST_NODE_TYPE_STRING,
.value = {string}, .value = {.as_string = string},
}; };
} }
@@ -22,25 +45,31 @@ ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
return (ast_node_t){ return (ast_node_t){
.byte_location = byte, .byte_location = byte,
.type = AST_NODE_TYPE_SYMBOL, .type = AST_NODE_TYPE_SYMBOL,
.value = {symbol}, .value = {.as_symbol = symbol},
}; };
} }
void ast_node_print(FILE *fp, ast_node_t *obj) void ast_node_print(FILE *fp, ast_node_t *node)
{ {
if (!obj) if (!node)
{ {
fprintf(fp, "NIL"); fprintf(fp, "NIL");
return; return;
} }
switch (obj->type) switch (node->type)
{ {
case AST_NODE_TYPE_PRIMITIVE:
fprintf(fp, "PRIMITIVE(%s)", ast_prim_to_cstr(node->value.as_prim));
break;
case AST_NODE_TYPE_SYMBOL: case AST_NODE_TYPE_SYMBOL:
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(obj->value.as_symbol)); fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->value.as_symbol));
break; break;
case AST_NODE_TYPE_STRING: case AST_NODE_TYPE_STRING:
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(obj->value.as_string)); fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->value.as_string));
break; break;
case NUM_AST_NODE_TYPES:
default:
FAIL("Unexpected node type: %d\n", node->type);
} }
} }

View File

@@ -15,10 +15,24 @@
/// Types the AST can encode /// Types the AST can encode
typedef enum typedef enum
{ {
AST_NODE_TYPE_SYMBOL = 0, AST_NODE_TYPE_PRIMITIVE = 0,
AST_NODE_TYPE_SYMBOL,
AST_NODE_TYPE_STRING, AST_NODE_TYPE_STRING,
NUM_AST_NODE_TYPES,
} ast_node_type_t; } ast_node_type_t;
/// Primitives (values, callables, etc) as symbols
typedef enum
{
AST_PRIM_NIL = 0,
AST_PRIM_PRINTLN,
NUM_AST_PRIMS,
} ast_prim_t;
const char *ast_prim_to_cstr(ast_prim_t);
/// Node of the AST as a tagged union /// Node of the AST as a tagged union
typedef struct typedef struct
{ {
@@ -26,14 +40,16 @@ typedef struct
ast_node_type_t type; ast_node_type_t type;
union union
{ {
sv_t as_string; ast_prim_t as_prim;
sv_t as_symbol; sv_t as_symbol;
sv_t as_string;
} value; } value;
} ast_node_t; } ast_node_t;
ast_node_t ast_node_string(u64 byte, sv_t string); ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive);
ast_node_t ast_node_symbol(u64 byte, sv_t symbol); ast_node_t ast_node_symbol(u64 byte, sv_t symbol);
void ast_node_print(FILE *fp, ast_node_t *obj); ast_node_t ast_node_string(u64 byte, sv_t string);
void ast_node_print(FILE *fp, ast_node_t *node);
/// The AST as a flat collection of nodes /// The AST as a flat collection of nodes
typedef struct typedef struct

View File

@@ -5,10 +5,11 @@
* Commentary: See parser.h * Commentary: See parser.h
*/ */
#include "arl/lib/sv.h"
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <string.h>
#include <arl/lib/sv.h>
#include <arl/parser/ast.h>
#include <arl/parser/parser.h> #include <arl/parser/parser.h>
/// Expected characters in a symbol /// Expected characters in a symbol
@@ -27,7 +28,7 @@ const char *parse_err_to_string(parse_err_t err)
case PARSE_ERR_UNKNOWN_CHAR: case PARSE_ERR_UNKNOWN_CHAR:
return "UNKNOWN_CHAR"; return "UNKNOWN_CHAR";
default: default:
FAIL("Unexpected value for parse_err_t: %d\n", err); FAIL("Unexpected parse_err_t value: %d\n", err);
} }
} }
@@ -119,10 +120,27 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret) parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
{ {
sv_t current_contents = sv_chop_left(stream->contents, stream->byte); sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS); sv_t symbol =
// Generate symbol SV(current_contents.data, sv_while(current_contents, SYMBOL_CHARS));
*ret = ast_node_symbol(stream->byte, SV(current_contents.data, symbol_size));
stream_advance(stream, symbol_size); // see if symbol is one of the AST primitives we can parse AOT
static_assert(NUM_AST_PRIMS == 2, "Expected number of AST primitives");
for (ast_prim_t i = 0; i < NUM_AST_PRIMS; ++i)
{
const char *possible_prim = ast_prim_to_cstr(i);
if (strlen(possible_prim) == symbol.size &&
strncmp(possible_prim, symbol.data, symbol.size) == 0)
{
// Found a matching primitive
*ret = ast_node_prim(stream->byte, i);
goto end;
}
}
// otherwise, it must be a fresh symbol i.e. user defined
*ret = ast_node_symbol(stream->byte, symbol);
end:
stream_advance(stream, symbol.size);
return PARSE_ERR_OK; return PARSE_ERR_OK;
} }