parser/ast|parser: Use byte as indicator of position instead of line/col
Much faster than dealing with the line and column as we go. In the vast majority of cases this data is completely unnecessary, so this is wasted effort. At the point where we need accurate line/column information, we can compute it - in an error state, it really doesn't matter that we're spending that extra time to compute it.
This commit is contained in:
@@ -8,23 +8,21 @@
|
|||||||
#include <arl/lib/vec.h>
|
#include <arl/lib/vec.h>
|
||||||
#include <arl/parser/ast.h>
|
#include <arl/parser/ast.h>
|
||||||
|
|
||||||
obj_t obj_string(u64 line, u64 col, sv_t string)
|
obj_t obj_string(u64 byte, sv_t string)
|
||||||
{
|
{
|
||||||
return (obj_t){
|
return (obj_t){
|
||||||
.line = line,
|
.byte = byte,
|
||||||
.column = col,
|
.type = TYPE_STRING,
|
||||||
.type = TYPE_STRING,
|
.value = {string},
|
||||||
.value = {string},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
obj_t obj_symbol(u64 line, u64 col, sv_t symbol)
|
obj_t obj_symbol(u64 byte, sv_t symbol)
|
||||||
{
|
{
|
||||||
return (obj_t){
|
return (obj_t){
|
||||||
.line = line,
|
.byte = byte,
|
||||||
.column = col,
|
.type = TYPE_SYMBOL,
|
||||||
.type = TYPE_SYMBOL,
|
.value = {symbol},
|
||||||
.value = {symbol},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,8 +20,7 @@ typedef enum
|
|||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
u64 line, column;
|
u64 byte;
|
||||||
|
|
||||||
type_t type;
|
type_t type;
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
@@ -30,8 +29,8 @@ typedef struct
|
|||||||
} value;
|
} value;
|
||||||
} obj_t;
|
} obj_t;
|
||||||
|
|
||||||
obj_t obj_string(u64 line, u64 col, sv_t string);
|
obj_t obj_string(u64 byte, sv_t string);
|
||||||
obj_t obj_symbol(u64 line, u64 col, sv_t symbol);
|
obj_t obj_symbol(u64 byte, sv_t symbol);
|
||||||
void obj_print(FILE *fp, obj_t *obj);
|
void obj_print(FILE *fp, obj_t *obj);
|
||||||
|
|
||||||
// Our AST is simply a vector of objects. Nesting and tree like structure is
|
// Our AST is simply a vector of objects. Nesting and tree like structure is
|
||||||
|
|||||||
@@ -33,16 +33,37 @@ const char *parse_err_to_string(parse_err_t err)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Prototypes for parsing
|
/// Prototypes for streams
|
||||||
bool stream_eos(parse_stream_t *stream);
|
bool stream_eos(parse_stream_t *stream);
|
||||||
char stream_peek(parse_stream_t *stream);
|
char stream_peek(parse_stream_t *stream);
|
||||||
void stream_advance(parse_stream_t *stream, u64 size);
|
void stream_advance(parse_stream_t *stream, u64 size);
|
||||||
u64 stream_size(parse_stream_t *stream);
|
u64 stream_size(parse_stream_t *stream);
|
||||||
|
|
||||||
|
void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col)
|
||||||
|
{
|
||||||
|
assert(stream && line && col && "Expected valid pointers.");
|
||||||
|
for (u64 i = 0; i < stream->byte; ++i)
|
||||||
|
{
|
||||||
|
char c = stream->contents.data[i];
|
||||||
|
if (c == '\n')
|
||||||
|
{
|
||||||
|
*line += 1;
|
||||||
|
*col = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*col += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prototypes for parsing subroutines
|
||||||
parse_err_t parse_string(parse_stream_t *stream, obj_t *ret);
|
parse_err_t parse_string(parse_stream_t *stream, obj_t *ret);
|
||||||
parse_err_t parse_symbol(parse_stream_t *stream, obj_t *ret);
|
parse_err_t parse_symbol(parse_stream_t *stream, obj_t *ret);
|
||||||
|
|
||||||
parse_err_t parse(ast_t *out, parse_stream_t *stream)
|
parse_err_t parse(ast_t *out, parse_stream_t *stream)
|
||||||
{
|
{
|
||||||
|
assert(out && stream && "Expected valid pointers");
|
||||||
while (!stream_eos(stream))
|
while (!stream_eos(stream))
|
||||||
{
|
{
|
||||||
char cur = stream_peek(stream);
|
char cur = stream_peek(stream);
|
||||||
@@ -85,32 +106,30 @@ parse_err_t parse_string(parse_stream_t *stream, obj_t *ret)
|
|||||||
{
|
{
|
||||||
// Increment the cursor just past the first speechmark
|
// Increment the cursor just past the first speechmark
|
||||||
stream_advance(stream, 1);
|
stream_advance(stream, 1);
|
||||||
sv_t current_contents = sv_chop_left(stream->contents, stream->cursor);
|
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
||||||
u64 string_size = sv_till(current_contents, "\"");
|
u64 string_size = sv_till(current_contents, "\"");
|
||||||
if (string_size + stream->cursor == stream_size(stream))
|
if (string_size + stream->byte == stream_size(stream))
|
||||||
return PARSE_ERR_EXPECTED_SPEECH_MARKS;
|
return PARSE_ERR_EXPECTED_SPEECH_MARKS;
|
||||||
// Bounds of string are well defined, generate an object and advance the
|
// Bounds of string are well defined, generate an object and advance the
|
||||||
// stream
|
// stream
|
||||||
*ret = obj_string(stream->line, stream->column - 1,
|
*ret = obj_string(stream->byte - 1, SV(current_contents.data, string_size));
|
||||||
SV(current_contents.data, string_size));
|
|
||||||
stream_advance(stream, string_size + 1);
|
stream_advance(stream, string_size + 1);
|
||||||
return PARSE_ERR_OK;
|
return PARSE_ERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_err_t parse_symbol(parse_stream_t *stream, obj_t *ret)
|
parse_err_t parse_symbol(parse_stream_t *stream, obj_t *ret)
|
||||||
{
|
{
|
||||||
sv_t current_contents = sv_chop_left(stream->contents, stream->cursor);
|
sv_t current_contents = sv_chop_left(stream->contents, stream->byte);
|
||||||
u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS);
|
u64 symbol_size = sv_while(current_contents, SYMBOL_CHARS);
|
||||||
// Generate symbol
|
// Generate symbol
|
||||||
*ret = obj_symbol(stream->line, stream->column,
|
*ret = obj_symbol(stream->byte, SV(current_contents.data, symbol_size));
|
||||||
SV(current_contents.data, symbol_size));
|
|
||||||
stream_advance(stream, symbol_size);
|
stream_advance(stream, symbol_size);
|
||||||
return PARSE_ERR_OK;
|
return PARSE_ERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool stream_eos(parse_stream_t *stream)
|
bool stream_eos(parse_stream_t *stream)
|
||||||
{
|
{
|
||||||
return stream->cursor >= stream->contents.size;
|
return stream->byte >= stream->contents.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
char stream_peek(parse_stream_t *stream)
|
char stream_peek(parse_stream_t *stream)
|
||||||
@@ -118,29 +137,15 @@ char stream_peek(parse_stream_t *stream)
|
|||||||
if (stream_eos(stream))
|
if (stream_eos(stream))
|
||||||
return '\0';
|
return '\0';
|
||||||
else
|
else
|
||||||
return stream->contents.data[stream->cursor];
|
return stream->contents.data[stream->byte];
|
||||||
}
|
}
|
||||||
|
|
||||||
void stream_advance(parse_stream_t *stream, u64 size)
|
void stream_advance(parse_stream_t *stream, u64 size)
|
||||||
{
|
{
|
||||||
if (stream->cursor + size >= stream->contents.size)
|
if (stream->byte + size >= stream->contents.size)
|
||||||
stream->cursor = stream->contents.size;
|
stream->byte = stream->contents.size;
|
||||||
else
|
else
|
||||||
{
|
stream->byte += size;
|
||||||
for (u64 i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
++stream->cursor;
|
|
||||||
if (stream_peek(stream) == '\n')
|
|
||||||
{
|
|
||||||
stream->line++;
|
|
||||||
stream->column = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
stream->column++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 stream_size(parse_stream_t *stream)
|
u64 stream_size(parse_stream_t *stream)
|
||||||
|
|||||||
@@ -21,10 +21,11 @@ const char *parse_err_to_string(parse_err_t err);
|
|||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
u64 line, column, cursor;
|
u64 byte;
|
||||||
sv_t contents;
|
sv_t contents;
|
||||||
} parse_stream_t;
|
} parse_stream_t;
|
||||||
|
|
||||||
|
void parse_stream_get_line_col(parse_stream_t *stream, u64 *line, u64 *col);
|
||||||
parse_err_t parse(ast_t *out, parse_stream_t *stream);
|
parse_err_t parse(ast_t *out, parse_stream_t *stream);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user