From 16dcc88a53467046de3f4cf8fb5249686e760eae Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Wed, 29 Nov 2023 15:43:53 +0000 Subject: Refactored preprocessor to preprocess_(use|macro)_blocks and process_presults We have distinct functions for the use blocks and the macro blocks, which each generate wholesale new token streams via `token_copy` so we don't run into weird errors around ownership of the internal strings of each token. Furthermore, process_presults now uses the stream index in each presult to report errors when stuff goes wrong. --- asm/parser.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- asm/parser.h | 13 ++++-- 2 files changed, 125 insertions(+), 23 deletions(-) (limited to 'asm') diff --git a/asm/parser.c b/asm/parser.c index 7e60c0d..c97028d 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -365,10 +365,80 @@ block_t search_blocks(block_t *blocks, size_t n, char *name, size_t name_size) return (block_t){0}; } -perr_t preprocessor(token_stream_t *stream) +perr_t preprocess_use_blocks(token_stream_t *stream, token_stream_t *new) +{ + token_stream_t new_stream = {0}; + darr_init(&new_stream, sizeof(token_t)); + // %USE -> #TOKENS_IN(FILENAME) + for (size_t i = 0; i < stream->available; ++i) + { + token_t t = DARR_AT(token_t, stream->data, i); + if (t.type == TOKEN_PP_USE) + { + if (i + 1 >= stream->available || + DARR_AT(token_t, stream->data, i + 1).type != TOKEN_LITERAL_STRING) + { + stream->used = i; + for (size_t i = 0; i < new_stream.available; ++i) + free(TOKEN_STREAM_AT(new_stream.data, i).str); + free(new_stream.data); + return PERR_PREPROCESSOR_EXPECTED_STRING; + } + // Load and tokenise another file + ++i; + t = DARR_AT(token_t, stream->data, i); + FILE *fp = fopen(t.str, "rb"); + if (!fp) + { + for (size_t i = 0; i < new_stream.available; ++i) + free(TOKEN_STREAM_AT(new_stream.data, i).str); + free(new_stream.data); + stream->used = i; + return PERR_PREPROCESSOR_FILE_NONEXISTENT; + } + buffer_t buffer = darr_read_file(fp); + fclose(fp); + + token_stream_t fstream = {0}; + lerr_t lerr = tokenise_buffer(&buffer, &fstream); + free(buffer.data); + if (lerr) + { + if (fstream.data) + { + for (size_t i = 0; i < fstream.available; ++i) + free(TOKEN_STREAM_AT(fstream.data, i).str); + free(fstream.data); + } + for (size_t i = 0; i < new_stream.available; ++i) + free(TOKEN_STREAM_AT(new_stream.data, i).str); + free(new_stream.data); + stream->used = i; + return PERR_PREPROCESSOR_FILE_PARSE_ERROR; + } + darr_append_bytes(&new_stream, fstream.data, + sizeof(token_t) * fstream.available); + free(fstream.data); + } + else + { + token_t copy = token_copy(t); + darr_append_bytes(&new_stream, (byte *)©, sizeof(copy)); + } + } + + new_stream.available = new_stream.used / sizeof(token_t); + new_stream.used = 0; + *new = new_stream; + + return PERR_OK; +} + +perr_t preprocess_macro_blocks(token_stream_t *stream, token_stream_t *new) { darr_t block_registry = {0}; darr_init(&block_registry, sizeof(block_t)); + for (size_t i = 0; i < stream->available; ++i) { token_t t = DARR_AT(token_t, stream->data, i); @@ -467,25 +537,40 @@ perr_t preprocessor(token_stream_t *stream) // Free block registry free(block_registry.data); - // Free the old stream inline code - for (size_t i = 0; i < stream->available; ++i) + new_stream.available = new_stream.used / sizeof(token_t); + new_stream.used = 0; + *new = new_stream; + + return PERR_OK; +} + +perr_t preprocessor(token_stream_t *stream) +{ + token_stream_t use_blocks = {0}; + perr_t perr = preprocess_use_blocks(stream, &use_blocks); + if (perr) + return perr; + + token_stream_t macro_blocks = {0}; + perr = preprocess_macro_blocks(&use_blocks, ¯o_blocks); + if (perr) { - token_t t = DARR_AT(token_t, stream->data, i); - if (t.type == TOKEN_PP_CONST) - { - // Free till end - for (; i < stream->available && t.type != TOKEN_PP_END; - ++i, t = DARR_AT(token_t, stream->data, i)) - free(t.str); - free(t.str); - } - else if (t.type == TOKEN_PP_REFERENCE) - free(t.str); + stream->used = use_blocks.used; + for (size_t i = 0; i < use_blocks.available; ++i) + free(TOKEN_STREAM_AT(use_blocks.data, i).str); + free(use_blocks.data); + return perr; } + + for (size_t i = 0; i < use_blocks.available; ++i) + free(TOKEN_STREAM_AT(use_blocks.data, i).str); + free(use_blocks.data); + + for (size_t i = 0; i < stream->available; ++i) + free(TOKEN_STREAM_AT(stream->data, i).str); free(stream->data); - new_stream.available = new_stream.used / sizeof(token_t); - new_stream.used = 0; - *stream = new_stream; + + *stream = macro_blocks; return PERR_OK; } @@ -698,6 +783,8 @@ perr_t process_presults(presult_t *results, size_t res_count, } } #endif + assert(result_reached && "process_presults: result_reached is NULL?!"); + *result_reached = 0; label_t start_label = {0}; darr_t label_registry = {0}; @@ -720,6 +807,7 @@ perr_t process_presults(presult_t *results, size_t res_count, if (offset < 0 && ((word)(-offset)) > inst_count) { free(label_registry.data); + *result_reached = i; return PERR_INVALID_RELATIVE_ADDRESS; } results[i].instruction.operand.as_word = ((s_word)inst_count) + offset; @@ -774,6 +862,7 @@ perr_t process_presults(presult_t *results, size_t res_count, { free(instr_darr.data); free(label_registry.data); + *result_reached = i; return PERR_UNKNOWN_LABEL; } @@ -827,9 +916,15 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) ++stream->used; } - perr = process_presults((presult_t *)presults.data, - presults.used / sizeof(presult_t), program_ptr); - + size_t results_processed = 0; + perr = process_presults((presult_t *)presults.data, + presults.used / sizeof(presult_t), &results_processed, + program_ptr); + if (results_processed != presults.used / sizeof(presult_t)) + { + presult_t pres = DARR_AT(presult_t, presults.data, results_processed); + stream->used = pres.stream_index; + } presults_free((presult_t *)presults.data, presults.used / sizeof(presult_t)); free(presults.data); return perr; diff --git a/asm/parser.h b/asm/parser.h index cef73b9..7e2d1b7 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -86,12 +86,19 @@ typedef struct block_t search_blocks(block_t *, size_t, char *, size_t); +perr_t preprocess_use_blocks(token_stream_t *, token_stream_t *); +perr_t preprocess_macro_blocks(token_stream_t *, token_stream_t *); // Analyses then inlines corresponding tokens into stream directly perr_t preprocessor(token_stream_t *); -// Parses from the preprocessed stream + +// Parses the next "parse result" from stream perr_t parse_next(token_stream_t *, presult_t *); -// Deals with address building -perr_t process_presults(presult_t *, size_t, prog_t **); +// Constructs a program from the set of parse results (from repeatedly +// calling parse_next) +perr_t process_presults(presult_t *, size_t, size_t *, prog_t **); + +// Preprocesses, generates results then constructs a program all in +// one routine (thing to call in most use cases). perr_t parse_stream(token_stream_t *, prog_t **); #endif -- cgit v1.2.3-13-gbd6f