parser: Better AST primitives
commit1588e7b46dAuthor: Aryadev Chavali <aryadev@aryadevchavali.com> Date: Sat Jan 24 02:55:12 2026 +0000 parser/parser: parse_symbol now supports primitives parse_symbol now investigates if the parsed symbol data is actually just a primitive (linear search through all primitives). If it is, return a primitive first. Otherwise, generate a symbol as per previous form of routine. commit62c91990c4Author: Aryadev Chavali <aryadev@aryadevchavali.com> Date: Sat Jan 24 02:40:26 2026 +0000 parser/ast: Added support for node level primitives These are just an enumeration of primitives we already expect to be present within a program. Instead of leaving everything as a symbol, we can compile certain symbols into the enumeration ahead of time to make later stages easier.
This commit is contained in:
@@ -5,15 +5,38 @@
|
||||
* Commentary: See ast.h.
|
||||
*/
|
||||
|
||||
#include <arl/lib/base.h>
|
||||
#include <arl/lib/vec.h>
|
||||
#include <arl/parser/ast.h>
|
||||
|
||||
const char *ast_prim_to_cstr(ast_prim_t prim)
|
||||
{
|
||||
switch (prim)
|
||||
{
|
||||
case AST_PRIM_NIL:
|
||||
return "nil";
|
||||
case AST_PRIM_PRINTLN:
|
||||
return "println";
|
||||
default:
|
||||
FAIL("Unexpected AST primitive value: %d\n", prim);
|
||||
}
|
||||
}
|
||||
|
||||
ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive)
|
||||
{
|
||||
return (ast_node_t){
|
||||
.byte_location = byte,
|
||||
.type = AST_NODE_TYPE_PRIMITIVE,
|
||||
.value = {.as_prim = primitive},
|
||||
};
|
||||
}
|
||||
|
||||
ast_node_t ast_node_string(u64 byte, sv_t string)
|
||||
{
|
||||
return (ast_node_t){
|
||||
.byte_location = byte,
|
||||
.type = AST_NODE_TYPE_STRING,
|
||||
.value = {string},
|
||||
.value = {.as_string = string},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -22,25 +45,31 @@ ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
|
||||
return (ast_node_t){
|
||||
.byte_location = byte,
|
||||
.type = AST_NODE_TYPE_SYMBOL,
|
||||
.value = {symbol},
|
||||
.value = {.as_symbol = symbol},
|
||||
};
|
||||
}
|
||||
|
||||
void ast_node_print(FILE *fp, ast_node_t *obj)
|
||||
void ast_node_print(FILE *fp, ast_node_t *node)
|
||||
{
|
||||
if (!obj)
|
||||
if (!node)
|
||||
{
|
||||
fprintf(fp, "NIL");
|
||||
return;
|
||||
}
|
||||
switch (obj->type)
|
||||
switch (node->type)
|
||||
{
|
||||
case AST_NODE_TYPE_PRIMITIVE:
|
||||
fprintf(fp, "PRIMITIVE(%s)", ast_prim_to_cstr(node->value.as_prim));
|
||||
break;
|
||||
case AST_NODE_TYPE_SYMBOL:
|
||||
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(obj->value.as_symbol));
|
||||
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->value.as_symbol));
|
||||
break;
|
||||
case AST_NODE_TYPE_STRING:
|
||||
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(obj->value.as_string));
|
||||
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->value.as_string));
|
||||
break;
|
||||
case NUM_AST_NODE_TYPES:
|
||||
default:
|
||||
FAIL("Unexpected node type: %d\n", node->type);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,10 +15,24 @@
|
||||
/// Types the AST can encode
|
||||
typedef enum
|
||||
{
|
||||
AST_NODE_TYPE_SYMBOL = 0,
|
||||
AST_NODE_TYPE_PRIMITIVE = 0,
|
||||
AST_NODE_TYPE_SYMBOL,
|
||||
AST_NODE_TYPE_STRING,
|
||||
|
||||
NUM_AST_NODE_TYPES,
|
||||
} ast_node_type_t;
|
||||
|
||||
/// Primitives (values, callables, etc) as symbols
|
||||
typedef enum
|
||||
{
|
||||
AST_PRIM_NIL = 0,
|
||||
AST_PRIM_PRINTLN,
|
||||
|
||||
NUM_AST_PRIMS,
|
||||
} ast_prim_t;
|
||||
|
||||
const char *ast_prim_to_cstr(ast_prim_t);
|
||||
|
||||
/// Node of the AST as a tagged union
|
||||
typedef struct
|
||||
{
|
||||
@@ -26,14 +40,16 @@ typedef struct
|
||||
ast_node_type_t type;
|
||||
union
|
||||
{
|
||||
sv_t as_string;
|
||||
ast_prim_t as_prim;
|
||||
sv_t as_symbol;
|
||||
sv_t as_string;
|
||||
} value;
|
||||
} ast_node_t;
|
||||
|
||||
ast_node_t ast_node_string(u64 byte, sv_t string);
|
||||
ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive);
|
||||
ast_node_t ast_node_symbol(u64 byte, sv_t symbol);
|
||||
void ast_node_print(FILE *fp, ast_node_t *obj);
|
||||
ast_node_t ast_node_string(u64 byte, sv_t string);
|
||||
void ast_node_print(FILE *fp, ast_node_t *node);
|
||||
|
||||
/// The AST as a flat collection of nodes
|
||||
typedef struct
|
||||
|
||||
@@ -5,10 +5,11 @@
|
||||
* Commentary: See parser.h
|
||||
*/
|
||||
|
||||
#include "arl/lib/sv.h"
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <arl/lib/sv.h>
|
||||
#include <arl/parser/ast.h>
|
||||
#include <arl/parser/parser.h>
|
||||
|
||||
/// Expected characters in a symbol
|
||||
@@ -27,7 +28,7 @@ const char *parse_err_to_string(parse_err_t err)
|
||||
case PARSE_ERR_UNKNOWN_CHAR:
|
||||
return "UNKNOWN_CHAR";
|
||||
default:
|
||||
FAIL("Unexpected value for parse_err_t: %d\n", err);
|
||||
FAIL("Unexpected parse_err_t value: %d\n", err);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,10 +120,27 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
|
||||
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
|
||||
{
|
||||
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
||||
u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS);
|
||||
// Generate symbol
|
||||
*ret = ast_node_symbol(stream->byte, SV(current_contents.data, symbol_size));
|
||||
stream_advance(stream, symbol_size);
|
||||
sv_t symbol =
|
||||
SV(current_contents.data, sv_while(current_contents, SYMBOL_CHARS));
|
||||
|
||||
// see if symbol is one of the AST primitives we can parse AOT
|
||||
static_assert(NUM_AST_PRIMS == 2, "Expected number of AST primitives");
|
||||
for (ast_prim_t i = 0; i < NUM_AST_PRIMS; ++i)
|
||||
{
|
||||
const char *possible_prim = ast_prim_to_cstr(i);
|
||||
if (strlen(possible_prim) == symbol.size &&
|
||||
strncmp(possible_prim, symbol.data, symbol.size) == 0)
|
||||
{
|
||||
// Found a matching primitive
|
||||
*ret = ast_node_prim(stream->byte, i);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise, it must be a fresh symbol i.e. user defined
|
||||
*ret = ast_node_symbol(stream->byte, symbol);
|
||||
end:
|
||||
stream_advance(stream, symbol.size);
|
||||
return PARSE_ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user