parser: Better AST primitives
commit1588e7b46dAuthor: Aryadev Chavali <aryadev@aryadevchavali.com> Date: Sat Jan 24 02:55:12 2026 +0000 parser/parser: parse_symbol now supports primitives parse_symbol now investigates if the parsed symbol data is actually just a primitive (linear search through all primitives). If it is, return a primitive first. Otherwise, generate a symbol as per previous form of routine. commit62c91990c4Author: Aryadev Chavali <aryadev@aryadevchavali.com> Date: Sat Jan 24 02:40:26 2026 +0000 parser/ast: Added support for node level primitives These are just an enumeration of primitives we already expect to be present within a program. Instead of leaving everything as a symbol, we can compile certain symbols into the enumeration ahead of time to make later stages easier.
This commit is contained in:
@@ -5,15 +5,38 @@
|
|||||||
* Commentary: See ast.h.
|
* Commentary: See ast.h.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <arl/lib/base.h>
|
||||||
#include <arl/lib/vec.h>
|
#include <arl/lib/vec.h>
|
||||||
#include <arl/parser/ast.h>
|
#include <arl/parser/ast.h>
|
||||||
|
|
||||||
|
const char *ast_prim_to_cstr(ast_prim_t prim)
|
||||||
|
{
|
||||||
|
switch (prim)
|
||||||
|
{
|
||||||
|
case AST_PRIM_NIL:
|
||||||
|
return "nil";
|
||||||
|
case AST_PRIM_PRINTLN:
|
||||||
|
return "println";
|
||||||
|
default:
|
||||||
|
FAIL("Unexpected AST primitive value: %d\n", prim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive)
|
||||||
|
{
|
||||||
|
return (ast_node_t){
|
||||||
|
.byte_location = byte,
|
||||||
|
.type = AST_NODE_TYPE_PRIMITIVE,
|
||||||
|
.value = {.as_prim = primitive},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
ast_node_t ast_node_string(u64 byte, sv_t string)
|
ast_node_t ast_node_string(u64 byte, sv_t string)
|
||||||
{
|
{
|
||||||
return (ast_node_t){
|
return (ast_node_t){
|
||||||
.byte_location = byte,
|
.byte_location = byte,
|
||||||
.type = AST_NODE_TYPE_STRING,
|
.type = AST_NODE_TYPE_STRING,
|
||||||
.value = {string},
|
.value = {.as_string = string},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,25 +45,31 @@ ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
|
|||||||
return (ast_node_t){
|
return (ast_node_t){
|
||||||
.byte_location = byte,
|
.byte_location = byte,
|
||||||
.type = AST_NODE_TYPE_SYMBOL,
|
.type = AST_NODE_TYPE_SYMBOL,
|
||||||
.value = {symbol},
|
.value = {.as_symbol = symbol},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_node_print(FILE *fp, ast_node_t *obj)
|
void ast_node_print(FILE *fp, ast_node_t *node)
|
||||||
{
|
{
|
||||||
if (!obj)
|
if (!node)
|
||||||
{
|
{
|
||||||
fprintf(fp, "NIL");
|
fprintf(fp, "NIL");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
switch (obj->type)
|
switch (node->type)
|
||||||
{
|
{
|
||||||
|
case AST_NODE_TYPE_PRIMITIVE:
|
||||||
|
fprintf(fp, "PRIMITIVE(%s)", ast_prim_to_cstr(node->value.as_prim));
|
||||||
|
break;
|
||||||
case AST_NODE_TYPE_SYMBOL:
|
case AST_NODE_TYPE_SYMBOL:
|
||||||
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(obj->value.as_symbol));
|
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->value.as_symbol));
|
||||||
break;
|
break;
|
||||||
case AST_NODE_TYPE_STRING:
|
case AST_NODE_TYPE_STRING:
|
||||||
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(obj->value.as_string));
|
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->value.as_string));
|
||||||
break;
|
break;
|
||||||
|
case NUM_AST_NODE_TYPES:
|
||||||
|
default:
|
||||||
|
FAIL("Unexpected node type: %d\n", node->type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,10 +15,24 @@
|
|||||||
/// Types the AST can encode
|
/// Types the AST can encode
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
AST_NODE_TYPE_SYMBOL = 0,
|
AST_NODE_TYPE_PRIMITIVE = 0,
|
||||||
|
AST_NODE_TYPE_SYMBOL,
|
||||||
AST_NODE_TYPE_STRING,
|
AST_NODE_TYPE_STRING,
|
||||||
|
|
||||||
|
NUM_AST_NODE_TYPES,
|
||||||
} ast_node_type_t;
|
} ast_node_type_t;
|
||||||
|
|
||||||
|
/// Primitives (values, callables, etc) as symbols
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
AST_PRIM_NIL = 0,
|
||||||
|
AST_PRIM_PRINTLN,
|
||||||
|
|
||||||
|
NUM_AST_PRIMS,
|
||||||
|
} ast_prim_t;
|
||||||
|
|
||||||
|
const char *ast_prim_to_cstr(ast_prim_t);
|
||||||
|
|
||||||
/// Node of the AST as a tagged union
|
/// Node of the AST as a tagged union
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@@ -26,14 +40,16 @@ typedef struct
|
|||||||
ast_node_type_t type;
|
ast_node_type_t type;
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
sv_t as_string;
|
ast_prim_t as_prim;
|
||||||
sv_t as_symbol;
|
sv_t as_symbol;
|
||||||
|
sv_t as_string;
|
||||||
} value;
|
} value;
|
||||||
} ast_node_t;
|
} ast_node_t;
|
||||||
|
|
||||||
ast_node_t ast_node_string(u64 byte, sv_t string);
|
ast_node_t ast_node_prim(u64 byte, ast_prim_t primitive);
|
||||||
ast_node_t ast_node_symbol(u64 byte, sv_t symbol);
|
ast_node_t ast_node_symbol(u64 byte, sv_t symbol);
|
||||||
void ast_node_print(FILE *fp, ast_node_t *obj);
|
ast_node_t ast_node_string(u64 byte, sv_t string);
|
||||||
|
void ast_node_print(FILE *fp, ast_node_t *node);
|
||||||
|
|
||||||
/// The AST as a flat collection of nodes
|
/// The AST as a flat collection of nodes
|
||||||
typedef struct
|
typedef struct
|
||||||
|
|||||||
@@ -5,10 +5,11 @@
|
|||||||
* Commentary: See parser.h
|
* Commentary: See parser.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "arl/lib/sv.h"
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <arl/lib/sv.h>
|
||||||
|
#include <arl/parser/ast.h>
|
||||||
#include <arl/parser/parser.h>
|
#include <arl/parser/parser.h>
|
||||||
|
|
||||||
/// Expected characters in a symbol
|
/// Expected characters in a symbol
|
||||||
@@ -27,7 +28,7 @@ const char *parse_err_to_string(parse_err_t err)
|
|||||||
case PARSE_ERR_UNKNOWN_CHAR:
|
case PARSE_ERR_UNKNOWN_CHAR:
|
||||||
return "UNKNOWN_CHAR";
|
return "UNKNOWN_CHAR";
|
||||||
default:
|
default:
|
||||||
FAIL("Unexpected value for parse_err_t: %d\n", err);
|
FAIL("Unexpected parse_err_t value: %d\n", err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,10 +120,27 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
|
|||||||
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
|
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
|
||||||
{
|
{
|
||||||
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
||||||
u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS);
|
sv_t symbol =
|
||||||
// Generate symbol
|
SV(current_contents.data, sv_while(current_contents, SYMBOL_CHARS));
|
||||||
*ret = ast_node_symbol(stream->byte, SV(current_contents.data, symbol_size));
|
|
||||||
stream_advance(stream, symbol_size);
|
// see if symbol is one of the AST primitives we can parse AOT
|
||||||
|
static_assert(NUM_AST_PRIMS == 2, "Expected number of AST primitives");
|
||||||
|
for (ast_prim_t i = 0; i < NUM_AST_PRIMS; ++i)
|
||||||
|
{
|
||||||
|
const char *possible_prim = ast_prim_to_cstr(i);
|
||||||
|
if (strlen(possible_prim) == symbol.size &&
|
||||||
|
strncmp(possible_prim, symbol.data, symbol.size) == 0)
|
||||||
|
{
|
||||||
|
// Found a matching primitive
|
||||||
|
*ret = ast_node_prim(stream->byte, i);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise, it must be a fresh symbol i.e. user defined
|
||||||
|
*ret = ast_node_symbol(stream->byte, symbol);
|
||||||
|
end:
|
||||||
|
stream_advance(stream, symbol.size);
|
||||||
return PARSE_ERR_OK;
|
return PARSE_ERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user