parser -> lexer
That's the real purpose of this module; it's not really generating an AST since ARL's syntax isn't tree like whatsoever. The next stage will be something closer to an AST, in the sense we'll be introducing: - Syntactical analysis - Type Checking
This commit is contained in:
4
Makefile
4
Makefile
@@ -3,8 +3,8 @@ CC=cc
|
|||||||
DIST=build
|
DIST=build
|
||||||
OUT=$(DIST)/arl.out
|
OUT=$(DIST)/arl.out
|
||||||
|
|
||||||
MODULES=. lib parser
|
MODULES=. lib lexer
|
||||||
UNITS=main lib/vec lib/sv parser/ast parser/parser
|
UNITS=main lib/vec lib/sv lexer/token lexer/lexer
|
||||||
OBJECTS:=$(patsubst %,$(DIST)/%.o, $(UNITS))
|
OBJECTS:=$(patsubst %,$(DIST)/%.o, $(UNITS))
|
||||||
|
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
|
|||||||
@@ -1,38 +1,38 @@
|
|||||||
/* parser.h: Parser which takes character buffers and yields an AST
|
/* lexer.h: Lexer which takes character buffers and yields a sequence of tokens.
|
||||||
* Created: 2026-01-22
|
* Created: 2026-01-22
|
||||||
* Author: Aryadev Chavali
|
* Author: Aryadev Chavali
|
||||||
* License: See end of file
|
* License: See end of file
|
||||||
* Commentary:
|
* Commentary:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef PARSER_H
|
#ifndef LEXER_H
|
||||||
#define PARSER_H
|
#define LEXER_H
|
||||||
|
|
||||||
#include <arl/parser/ast.h>
|
#include <arl/lexer/token.h>
|
||||||
|
|
||||||
/// Parser streams, utilised when generating an AST.
|
/// Token streams, utilised when lexing.
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
u64 byte;
|
u64 byte;
|
||||||
sv_t contents;
|
sv_t contents;
|
||||||
} parse_stream_t;
|
} lex_stream_t;
|
||||||
|
|
||||||
/// Types of errors that may occur during parsing
|
/// Types of errors that may occur during lexing
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
PARSE_ERR_OK = 0,
|
LEX_ERR_OK = 0,
|
||||||
PARSE_ERR_EXPECTED_SPEECH_MARKS,
|
LEX_ERR_EXPECTED_SPEECH_MARKS,
|
||||||
PARSE_ERR_UNKNOWN_CHAR,
|
LEX_ERR_UNKNOWN_CHAR,
|
||||||
} parse_err_t;
|
} lex_err_t;
|
||||||
const char *parse_err_to_string(parse_err_t err);
|
const char *lex_err_to_string(lex_err_t err);
|
||||||
|
|
||||||
// Generates an AST from STREAM, storing it in OUT. Returns any errors it may
|
// Generates a token stream from a lex_stream_t, storing it in OUT. Returns any
|
||||||
// generate.
|
// errors it may generate.
|
||||||
parse_err_t parse(ast_t *out, parse_stream_t *stream);
|
lex_err_t lex_stream(token_stream_t *out, lex_stream_t *stream);
|
||||||
|
|
||||||
// Computes the line and column that STREAM is currently pointing at in its
|
// Computes the line and column that STREAM is currently pointing at in its
|
||||||
// buffer, storing it in LINE and COL.
|
// buffer, storing it in LINE and COL.
|
||||||
void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col);
|
void lex_stream_get_line_col(lex_stream_t *stream, u64 *line, u64 *col);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
73
include/arl/lexer/token.h
Normal file
73
include/arl/lexer/token.h
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
/* token.h: General definition of tokens, and a sequence of them.
|
||||||
|
* Created: 2026-01-22
|
||||||
|
* Author: Aryadev Chavali
|
||||||
|
* License: See end of file
|
||||||
|
* Commentary:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TOKEN_H
|
||||||
|
#define TOKEN_H
|
||||||
|
|
||||||
|
#include <arl/lib/base.h>
|
||||||
|
#include <arl/lib/sv.h>
|
||||||
|
#include <arl/lib/vec.h>
|
||||||
|
|
||||||
|
/// Types of tokens
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
TOKEN_TYPE_KNOWN = 0,
|
||||||
|
TOKEN_TYPE_SYMBOL,
|
||||||
|
TOKEN_TYPE_STRING,
|
||||||
|
|
||||||
|
NUM_TOKEN_TYPES,
|
||||||
|
} token_type_t;
|
||||||
|
|
||||||
|
/// Known symbols which later stages would benefit from.
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
TOKEN_KNOWN_PUTSTR,
|
||||||
|
NUM_TOKEN_KNOWNS,
|
||||||
|
} token_known_t;
|
||||||
|
|
||||||
|
const char *token_known_to_cstr(token_known_t);
|
||||||
|
|
||||||
|
/// Tokens are a tagged union
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
u64 byte_location;
|
||||||
|
token_type_t type;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
token_known_t as_known;
|
||||||
|
sv_t as_symbol;
|
||||||
|
sv_t as_string;
|
||||||
|
};
|
||||||
|
} token_t;
|
||||||
|
|
||||||
|
token_t token_known(u64 byte, token_known_t known);
|
||||||
|
token_t token_symbol(u64 byte, sv_t symbol);
|
||||||
|
token_t token_string(u64 byte, sv_t string);
|
||||||
|
void token_print(FILE *fp, token_t *token);
|
||||||
|
|
||||||
|
/// Sequence of tokens
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
vec_t vec;
|
||||||
|
} token_stream_t;
|
||||||
|
|
||||||
|
void token_stream_free(token_stream_t *token);
|
||||||
|
void token_stream_print(FILE *fp, token_stream_t *token);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
* which you should have received a copy of along with this program. If not,
|
||||||
|
* please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
*/
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
/* ast.h: General definition of the AST and nodes within it.
|
|
||||||
* Created: 2026-01-22
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary:
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef AST_H
|
|
||||||
#define AST_H
|
|
||||||
|
|
||||||
#include <arl/lib/base.h>
|
|
||||||
#include <arl/lib/sv.h>
|
|
||||||
#include <arl/lib/vec.h>
|
|
||||||
|
|
||||||
/// Types the AST can encode
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
AST_NODE_TYPE_KNOWN = 0,
|
|
||||||
AST_NODE_TYPE_SYMBOL,
|
|
||||||
AST_NODE_TYPE_STRING,
|
|
||||||
|
|
||||||
NUM_AST_NODE_TYPES,
|
|
||||||
} ast_node_type_t;
|
|
||||||
|
|
||||||
/// Known symbols - may reference callables or values.
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
AST_KNOWN_PUTSTR,
|
|
||||||
|
|
||||||
NUM_AST_KNOWNS,
|
|
||||||
} ast_known_t;
|
|
||||||
|
|
||||||
const char *ast_known_to_cstr(ast_known_t);
|
|
||||||
|
|
||||||
/// Node of the AST as a tagged union
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
u64 byte_location;
|
|
||||||
ast_node_type_t type;
|
|
||||||
union
|
|
||||||
{
|
|
||||||
ast_known_t as_known;
|
|
||||||
sv_t as_symbol;
|
|
||||||
sv_t as_string;
|
|
||||||
};
|
|
||||||
} ast_node_t;
|
|
||||||
|
|
||||||
ast_node_t ast_node_known(u64 byte, ast_known_t known);
|
|
||||||
ast_node_t ast_node_symbol(u64 byte, sv_t symbol);
|
|
||||||
ast_node_t ast_node_string(u64 byte, sv_t string);
|
|
||||||
void ast_node_print(FILE *fp, ast_node_t *node);
|
|
||||||
|
|
||||||
/// The AST as a flat collection of nodes
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
vec_t nodes;
|
|
||||||
} ast_t;
|
|
||||||
|
|
||||||
void ast_free(ast_t *ast);
|
|
||||||
void ast_print(FILE *fp, ast_t *ast);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
@@ -1,44 +1,44 @@
|
|||||||
/* parser.c: Implementation of parser.
|
/* lexr.c: Implementation of lexr.
|
||||||
* Created: 2026-01-22
|
* Created: 2026-01-22
|
||||||
* Author: Aryadev Chavali
|
* Author: Aryadev Chavali
|
||||||
* License: See end of file
|
* License: See end of file
|
||||||
* Commentary: See /include/arl/parser/parser.h
|
* Commentary: See /include/arl/lexr/lexr.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <arl/lexer/lexer.h>
|
||||||
|
#include <arl/lexer/token.h>
|
||||||
#include <arl/lib/sv.h>
|
#include <arl/lib/sv.h>
|
||||||
#include <arl/parser/ast.h>
|
|
||||||
#include <arl/parser/parser.h>
|
|
||||||
|
|
||||||
/// Expected characters in a symbol
|
/// Expected characters in a symbol
|
||||||
static const char *SYMBOL_CHARS =
|
static const char *SYMBOL_CHARS =
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&'()*+,-./"
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&'()*+,-./"
|
||||||
":;<=>?@\\^_`{|}~0123456789";
|
":;<=>?@\\^_`{|}~0123456789";
|
||||||
|
|
||||||
const char *parse_err_to_string(parse_err_t err)
|
const char *lex_err_to_string(lex_err_t err)
|
||||||
{
|
{
|
||||||
switch (err)
|
switch (err)
|
||||||
{
|
{
|
||||||
case PARSE_ERR_OK:
|
case LEX_ERR_OK:
|
||||||
return "OK";
|
return "OK";
|
||||||
case PARSE_ERR_EXPECTED_SPEECH_MARKS:
|
case LEX_ERR_EXPECTED_SPEECH_MARKS:
|
||||||
return "EXPECTED_SPEECH_MARKS";
|
return "EXPECTED_SPEECH_MARKS";
|
||||||
case PARSE_ERR_UNKNOWN_CHAR:
|
case LEX_ERR_UNKNOWN_CHAR:
|
||||||
return "UNKNOWN_CHAR";
|
return "UNKNOWN_CHAR";
|
||||||
default:
|
default:
|
||||||
FAIL("Unexpected parse_err_t value: %d\n", err);
|
FAIL("Unexpected lex_err_t value: %d\n", err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Prototypes for streams
|
/// Prototypes for streams
|
||||||
bool stream_eos(parse_stream_t *stream);
|
bool stream_eos(lex_stream_t *stream);
|
||||||
char stream_peek(parse_stream_t *stream);
|
char stream_peek(lex_stream_t *stream);
|
||||||
void stream_advance(parse_stream_t *stream, u64 size);
|
void stream_advance(lex_stream_t *stream, u64 size);
|
||||||
u64 stream_size(parse_stream_t *stream);
|
u64 stream_size(lex_stream_t *stream);
|
||||||
|
|
||||||
void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col)
|
void lex_stream_get_line_col(lex_stream_t *stream, u64 *line, u64 *col)
|
||||||
{
|
{
|
||||||
assert(stream && line && col && "Expected valid pointers.");
|
assert(stream && line && col && "Expected valid pointers.");
|
||||||
for (u64 i = 0; i < stream->byte; ++i)
|
for (u64 i = 0; i < stream->byte; ++i)
|
||||||
@@ -56,11 +56,11 @@ void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Prototypes for parsing subroutines
|
/// Prototypes for lexing subroutines
|
||||||
parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret);
|
lex_err_t lex_string(lex_stream_t *stream, token_t *ret);
|
||||||
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret);
|
lex_err_t lex_symbol(lex_stream_t *stream, token_t *ret);
|
||||||
|
|
||||||
parse_err_t parse(ast_t *out, parse_stream_t *stream)
|
lex_err_t lex_stream(token_stream_t *out, lex_stream_t *stream)
|
||||||
{
|
{
|
||||||
assert(out && stream && "Expected valid pointers");
|
assert(out && stream && "Expected valid pointers");
|
||||||
while (!stream_eos(stream))
|
while (!stream_eos(stream))
|
||||||
@@ -76,32 +76,32 @@ parse_err_t parse(ast_t *out, parse_stream_t *stream)
|
|||||||
}
|
}
|
||||||
else if (cur == '"')
|
else if (cur == '"')
|
||||||
{
|
{
|
||||||
// we make a copy for parse_string to mess with
|
// we make a copy for lex_string to mess with
|
||||||
ast_node_t ret = {0};
|
token_t ret = {0};
|
||||||
parse_err_t perr = parse_string(stream, &ret);
|
lex_err_t perr = lex_string(stream, &ret);
|
||||||
if (perr)
|
if (perr)
|
||||||
return perr;
|
return perr;
|
||||||
vec_append(&out->nodes, &ret, sizeof(ret));
|
vec_append(&out->vec, &ret, sizeof(ret));
|
||||||
}
|
}
|
||||||
else if (strchr(SYMBOL_CHARS, cur) && !isdigit(cur))
|
else if (strchr(SYMBOL_CHARS, cur) && !isdigit(cur))
|
||||||
{
|
{
|
||||||
// we make a copy for parse_symbol to mess with
|
// we make a copy for lex_symbol to mess with
|
||||||
ast_node_t ret = {0};
|
token_t ret = {0};
|
||||||
parse_err_t perr = parse_symbol(stream, &ret);
|
lex_err_t perr = lex_symbol(stream, &ret);
|
||||||
if (perr)
|
if (perr)
|
||||||
return perr;
|
return perr;
|
||||||
|
|
||||||
vec_append(&out->nodes, &ret, sizeof(ret));
|
vec_append(&out->vec, &ret, sizeof(ret));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return PARSE_ERR_UNKNOWN_CHAR;
|
return LEX_ERR_UNKNOWN_CHAR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return PARSE_ERR_OK;
|
return LEX_ERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
|
lex_err_t lex_string(lex_stream_t *stream, token_t *ret)
|
||||||
{
|
{
|
||||||
// Increment the cursor just past the first speechmark
|
// Increment the cursor just past the first speechmark
|
||||||
stream_advance(stream, 1);
|
stream_advance(stream, 1);
|
||||||
@@ -111,46 +111,46 @@ parse_err_t parse_string(parse_stream_t *stream, ast_node_t *ret)
|
|||||||
// If we're at the edge of the stream, there must not have been any
|
// If we're at the edge of the stream, there must not have been any
|
||||||
// speechmarks.
|
// speechmarks.
|
||||||
if (string.size + stream->byte == stream_size(stream))
|
if (string.size + stream->byte == stream_size(stream))
|
||||||
return PARSE_ERR_EXPECTED_SPEECH_MARKS;
|
return LEX_ERR_EXPECTED_SPEECH_MARKS;
|
||||||
|
|
||||||
// `string` is well defined, package and throw it back.
|
// `string` is well defined, package and throw it back.
|
||||||
*ret = ast_node_string(stream->byte - 1, string);
|
*ret = token_string(stream->byte - 1, string);
|
||||||
stream_advance(stream, string.size + 1);
|
stream_advance(stream, string.size + 1);
|
||||||
return PARSE_ERR_OK;
|
return LEX_ERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_err_t parse_symbol(parse_stream_t *stream, ast_node_t *ret)
|
lex_err_t lex_symbol(lex_stream_t *stream, token_t *ret)
|
||||||
{
|
{
|
||||||
sv_t symbol = sv_chop_left(stream->contents, stream->byte);
|
sv_t symbol = sv_chop_left(stream->contents, stream->byte);
|
||||||
symbol.size = sv_while(symbol, SYMBOL_CHARS);
|
symbol.size = sv_while(symbol, SYMBOL_CHARS);
|
||||||
|
|
||||||
// see if symbol is one of the already known symbols
|
// see if symbol is one of the already known symbols
|
||||||
static_assert(NUM_AST_KNOWNS == 1, "Expected number of AST_KNOWNs");
|
static_assert(NUM_TOKEN_KNOWNS == 1, "Expected number of TOKEN_KNOWNs");
|
||||||
for (ast_known_t i = 0; i < NUM_AST_KNOWNS; ++i)
|
for (token_known_t i = 0; i < NUM_TOKEN_KNOWNS; ++i)
|
||||||
{
|
{
|
||||||
const char *possible_known = ast_known_to_cstr(i);
|
const char *possible_known = token_known_to_cstr(i);
|
||||||
if (strlen(possible_known) == symbol.size &&
|
if (strlen(possible_known) == symbol.size &&
|
||||||
strncmp(possible_known, symbol.data, symbol.size) == 0)
|
strncmp(possible_known, symbol.data, symbol.size) == 0)
|
||||||
{
|
{
|
||||||
// Found a matching known symbol
|
// Found a matching known symbol
|
||||||
*ret = ast_node_known(stream->byte, i);
|
*ret = token_known(stream->byte, i);
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// otherwise, it must be a fresh symbol i.e. user defined
|
// otherwise, it must be a fresh symbol i.e. user defined
|
||||||
*ret = ast_node_symbol(stream->byte, symbol);
|
*ret = token_symbol(stream->byte, symbol);
|
||||||
end:
|
end:
|
||||||
stream_advance(stream, symbol.size);
|
stream_advance(stream, symbol.size);
|
||||||
return PARSE_ERR_OK;
|
return LEX_ERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool stream_eos(parse_stream_t *stream)
|
bool stream_eos(lex_stream_t *stream)
|
||||||
{
|
{
|
||||||
return stream->byte >= stream->contents.size;
|
return stream->byte >= stream->contents.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
char stream_peek(parse_stream_t *stream)
|
char stream_peek(lex_stream_t *stream)
|
||||||
{
|
{
|
||||||
if (stream_eos(stream))
|
if (stream_eos(stream))
|
||||||
return '\0';
|
return '\0';
|
||||||
@@ -158,7 +158,7 @@ char stream_peek(parse_stream_t *stream)
|
|||||||
return stream->contents.data[stream->byte];
|
return stream->contents.data[stream->byte];
|
||||||
}
|
}
|
||||||
|
|
||||||
void stream_advance(parse_stream_t *stream, u64 size)
|
void stream_advance(lex_stream_t *stream, u64 size)
|
||||||
{
|
{
|
||||||
if (stream->byte + size >= stream->contents.size)
|
if (stream->byte + size >= stream->contents.size)
|
||||||
stream->byte = stream->contents.size;
|
stream->byte = stream->contents.size;
|
||||||
@@ -166,7 +166,7 @@ void stream_advance(parse_stream_t *stream, u64 size)
|
|||||||
stream->byte += size;
|
stream->byte += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 stream_size(parse_stream_t *stream)
|
u64 stream_size(lex_stream_t *stream)
|
||||||
{
|
{
|
||||||
return stream->contents.size;
|
return stream->contents.size;
|
||||||
}
|
}
|
||||||
115
src/lexer/token.c
Normal file
115
src/lexer/token.c
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
/* token.c: Implementation of TOKEN constructor/destructor functions
|
||||||
|
* Created: 2026-01-22
|
||||||
|
* Author: Aryadev Chavali
|
||||||
|
* License: See end of file
|
||||||
|
* Commentary: See /include/arl/lexer/token.h.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <arl/lexer/token.h>
|
||||||
|
#include <arl/lib/base.h>
|
||||||
|
#include <arl/lib/vec.h>
|
||||||
|
|
||||||
|
const char *token_known_to_cstr(token_known_t known)
|
||||||
|
{
|
||||||
|
switch (known)
|
||||||
|
{
|
||||||
|
case TOKEN_KNOWN_PUTSTR:
|
||||||
|
return "putstr";
|
||||||
|
default:
|
||||||
|
FAIL("Unexpected TOKEN_KNOWN value: %d\n", known);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
token_t token_known(u64 byte, token_known_t known)
|
||||||
|
{
|
||||||
|
return (token_t){
|
||||||
|
.byte_location = byte,
|
||||||
|
.type = TOKEN_TYPE_KNOWN,
|
||||||
|
.as_known = known,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
token_t token_string(u64 byte, sv_t string)
|
||||||
|
{
|
||||||
|
return (token_t){
|
||||||
|
.byte_location = byte,
|
||||||
|
.type = TOKEN_TYPE_STRING,
|
||||||
|
.as_string = string,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
token_t token_symbol(u64 byte, sv_t symbol)
|
||||||
|
{
|
||||||
|
return (token_t){
|
||||||
|
.byte_location = byte,
|
||||||
|
.type = TOKEN_TYPE_SYMBOL,
|
||||||
|
.as_symbol = symbol,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void token_print(FILE *fp, token_t *token)
|
||||||
|
{
|
||||||
|
if (!token)
|
||||||
|
{
|
||||||
|
fprintf(fp, "NIL");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
switch (token->type)
|
||||||
|
{
|
||||||
|
case TOKEN_TYPE_KNOWN:
|
||||||
|
fprintf(fp, "KNOWN(%s)", token_known_to_cstr(token->as_known));
|
||||||
|
break;
|
||||||
|
case TOKEN_TYPE_SYMBOL:
|
||||||
|
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(token->as_symbol));
|
||||||
|
break;
|
||||||
|
case TOKEN_TYPE_STRING:
|
||||||
|
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(token->as_string));
|
||||||
|
break;
|
||||||
|
case NUM_TOKEN_TYPES:
|
||||||
|
default:
|
||||||
|
FAIL("Unexpected token type: %d\n", token->type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void token_stream_print(FILE *fp, token_stream_t *token)
|
||||||
|
{
|
||||||
|
if (!token)
|
||||||
|
{
|
||||||
|
fprintf(fp, "{}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fprintf(fp, "{");
|
||||||
|
if (token->vec.size == 0)
|
||||||
|
{
|
||||||
|
fprintf(fp, "}\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
for (u64 i = 0; i < token->vec.size / sizeof(token_t); ++i)
|
||||||
|
{
|
||||||
|
token_t item = VEC_GET(&token->vec, i, token_t);
|
||||||
|
fprintf(fp, "\t[%lu]: ", i);
|
||||||
|
token_print(fp, &item);
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
}
|
||||||
|
fprintf(fp, "}");
|
||||||
|
}
|
||||||
|
|
||||||
|
void token_stream_free(token_stream_t *stream)
|
||||||
|
{
|
||||||
|
// we can free the vector itself and we're done
|
||||||
|
vec_free(&stream->vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copyright (C) 2026 Aryadev Chavali
|
||||||
|
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
||||||
|
|
||||||
|
* You may distribute and modify this code under the terms of the MIT License,
|
||||||
|
* which you should have received a copy of along with this program. If not,
|
||||||
|
* please go to <https://opensource.org/license/MIT>.
|
||||||
|
|
||||||
|
*/
|
||||||
21
src/main.c
21
src/main.c
@@ -12,11 +12,11 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <arl/lexer/lexer.h>
|
||||||
|
#include <arl/lexer/token.h>
|
||||||
#include <arl/lib/base.h>
|
#include <arl/lib/base.h>
|
||||||
#include <arl/lib/sv.h>
|
#include <arl/lib/sv.h>
|
||||||
#include <arl/lib/vec.h>
|
#include <arl/lib/vec.h>
|
||||||
#include <arl/parser/ast.h>
|
|
||||||
#include <arl/parser/parser.h>
|
|
||||||
|
|
||||||
int read_file(const char *filename, sv_t *ret)
|
int read_file(const char *filename, sv_t *ret)
|
||||||
{
|
{
|
||||||
@@ -114,30 +114,29 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
LOG("%s => `" PR_SV "`\n", filename, SV_FMT(contents));
|
LOG("%s => `" PR_SV "`\n", filename, SV_FMT(contents));
|
||||||
|
|
||||||
parse_stream_t stream = {.byte = 0, .contents = contents};
|
lex_stream_t stream = {.byte = 0, .contents = contents};
|
||||||
ast_t ast = {0};
|
token_stream_t tokens = {0};
|
||||||
parse_err_t perr = parse(&ast, &stream);
|
lex_err_t perr = lex_stream(&tokens, &stream);
|
||||||
if (perr)
|
if (perr)
|
||||||
{
|
{
|
||||||
u64 line = 1, col = 0;
|
u64 line = 1, col = 0;
|
||||||
parse_stream_get_line_col(&stream, &line, &col);
|
lex_stream_get_line_col(&stream, &line, &col);
|
||||||
|
|
||||||
LOG_ERR("%s:%lu:%lu: %s\n", filename, line, col, parse_err_to_string(perr));
|
LOG_ERR("%s:%lu:%lu: %s\n", filename, line, col, lex_err_to_string(perr));
|
||||||
ret = 1;
|
ret = 1;
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG("Parsed %lu nodes\n", ast.nodes.size / sizeof(ast_node_t));
|
LOG("Lexed %lu tokens\n", tokens.vec.size / sizeof(token_t));
|
||||||
#if VERBOSE_LOGS
|
#if VERBOSE_LOGS
|
||||||
ast_print(stdout, &ast);
|
token_stream_print(stdout, &tokens);
|
||||||
#endif
|
#endif
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
end:
|
end:
|
||||||
if (contents.data)
|
if (contents.data)
|
||||||
free(contents.data);
|
free(contents.data);
|
||||||
if (ast.nodes.capacity > 0)
|
token_stream_free(&tokens);
|
||||||
ast_free(&ast);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
115
src/parser/ast.c
115
src/parser/ast.c
@@ -1,115 +0,0 @@
|
|||||||
/* ast.c: Implementation of AST constructor/destructor functions
|
|
||||||
* Created: 2026-01-22
|
|
||||||
* Author: Aryadev Chavali
|
|
||||||
* License: See end of file
|
|
||||||
* Commentary: See /include/arl/parser/ast.h.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <arl/lib/base.h>
|
|
||||||
#include <arl/lib/vec.h>
|
|
||||||
#include <arl/parser/ast.h>
|
|
||||||
|
|
||||||
const char *ast_known_to_cstr(ast_known_t known)
|
|
||||||
{
|
|
||||||
switch (known)
|
|
||||||
{
|
|
||||||
case AST_KNOWN_PUTSTR:
|
|
||||||
return "putstr";
|
|
||||||
default:
|
|
||||||
FAIL("Unexpected AST_KNOWN value: %d\n", known);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ast_node_t ast_node_known(u64 byte, ast_known_t known)
|
|
||||||
{
|
|
||||||
return (ast_node_t){
|
|
||||||
.byte_location = byte,
|
|
||||||
.type = AST_NODE_TYPE_KNOWN,
|
|
||||||
.as_known = known,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
ast_node_t ast_node_string(u64 byte, sv_t string)
|
|
||||||
{
|
|
||||||
return (ast_node_t){
|
|
||||||
.byte_location = byte,
|
|
||||||
.type = AST_NODE_TYPE_STRING,
|
|
||||||
.as_string = string,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
ast_node_t ast_node_symbol(u64 byte, sv_t symbol)
|
|
||||||
{
|
|
||||||
return (ast_node_t){
|
|
||||||
.byte_location = byte,
|
|
||||||
.type = AST_NODE_TYPE_SYMBOL,
|
|
||||||
.as_symbol = symbol,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void ast_node_print(FILE *fp, ast_node_t *node)
|
|
||||||
{
|
|
||||||
if (!node)
|
|
||||||
{
|
|
||||||
fprintf(fp, "NIL");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
switch (node->type)
|
|
||||||
{
|
|
||||||
case AST_NODE_TYPE_KNOWN:
|
|
||||||
fprintf(fp, "KNOWN(%s)", ast_known_to_cstr(node->as_known));
|
|
||||||
break;
|
|
||||||
case AST_NODE_TYPE_SYMBOL:
|
|
||||||
fprintf(fp, "SYMBOL(" PR_SV ")", SV_FMT(node->as_symbol));
|
|
||||||
break;
|
|
||||||
case AST_NODE_TYPE_STRING:
|
|
||||||
fprintf(fp, "STRING(" PR_SV ")", SV_FMT(node->as_string));
|
|
||||||
break;
|
|
||||||
case NUM_AST_NODE_TYPES:
|
|
||||||
default:
|
|
||||||
FAIL("Unexpected node type: %d\n", node->type);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ast_print(FILE *fp, ast_t *ast)
|
|
||||||
{
|
|
||||||
if (!ast)
|
|
||||||
{
|
|
||||||
fprintf(fp, "{}");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
fprintf(fp, "{");
|
|
||||||
if (ast->nodes.size == 0)
|
|
||||||
{
|
|
||||||
fprintf(fp, "}\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(fp, "\n");
|
|
||||||
for (u64 i = 0; i < ast->nodes.size / sizeof(ast_node_t); ++i)
|
|
||||||
{
|
|
||||||
ast_node_t item = VEC_GET(&ast->nodes, i, ast_node_t);
|
|
||||||
fprintf(fp, "\t[%lu]: ", i);
|
|
||||||
ast_node_print(fp, &item);
|
|
||||||
fprintf(fp, "\n");
|
|
||||||
}
|
|
||||||
fprintf(fp, "}");
|
|
||||||
}
|
|
||||||
|
|
||||||
void ast_free(ast_t *ast)
|
|
||||||
{
|
|
||||||
// we can free the vector itself and we're done
|
|
||||||
vec_free(&ast->nodes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copyright (C) 2026 Aryadev Chavali
|
|
||||||
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE. See the MIT License for details.
|
|
||||||
|
|
||||||
* You may distribute and modify this code under the terms of the MIT License,
|
|
||||||
* which you should have received a copy of along with this program. If not,
|
|
||||||
* please go to <https://opensource.org/license/MIT>.
|
|
||||||
|
|
||||||
*/
|
|
||||||
Reference in New Issue
Block a user