diff options
-rw-r--r-- | asm/lexer.c | 45 | ||||
-rw-r--r-- | asm/lexer.h | 2 | ||||
-rw-r--r-- | asm/parser.c | 54 | ||||
-rw-r--r-- | asm/parser.h | 3 | ||||
-rw-r--r-- | todo.org | 23 |
5 files changed, 86 insertions, 41 deletions
diff --git a/asm/lexer.c b/asm/lexer.c index 108cb64..8b0a061 100644 --- a/asm/lexer.c +++ b/asm/lexer.c @@ -28,6 +28,8 @@ const char *token_type_as_cstr(token_type_t type) return "PP_CONST"; case TOKEN_PP_END: return "PP_END"; + case TOKEN_PP_REFERENCE: + return "PP_REFERENCE"; case TOKEN_GLOBAL: return "GLOBAL"; case TOKEN_STAR: @@ -114,12 +116,12 @@ const char *lerr_as_cstr(lerr_t lerr) { switch (lerr) { - case LERR_INVALID_CHAR_LITERAL: - return "INVALID_CHAR_LITERAL"; - break; case LERR_OK: return "OK"; - break; + case LERR_INVALID_CHAR_LITERAL: + return "INVALID_CHAR_LITERAL"; + case LERR_INVALID_PREPROCESSOR_DIRECTIVE: + return "INVALID_PREPROCESSOR_DIRECTIVE"; } return ""; } @@ -150,7 +152,7 @@ bool is_valid_hex_char(char c) (c >= 'A' && c <= 'F'); } -token_t tokenise_symbol(buffer_t *buffer, size_t *column) +lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token) { static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!"); @@ -170,8 +172,25 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column) if (sym_size > 1 && strncmp(opcode, "%", 1) == 0) { - // Some kind of preprocessing directive - // TODO: Implement tokeniser for preprocessing directive + // Some preprocessing directive + if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0) + { + type = TOKEN_PP_CONST; + offset = 6; + } + else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0) + { + type = TOKEN_PP_END; + offset = 4; + } + else + return LERR_INVALID_PREPROCESSOR_DIRECTIVE; + } + else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0) + { + // A reference to a preprocessing constant + offset = 1; + type = TOKEN_PP_REFERENCE; } else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0) { @@ -384,7 +403,8 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column) } *column += sym_size - 1; buffer->used += sym_size; - return ret; + *token = ret; + return LERR_OK; } token_t tokenise_number_literal(buffer_t *buffer, size_t *column) @@ -494,7 +514,14 @@ lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr) is_valid_hex_char(buffer->data[buffer->used + 1])) t = tokenise_hex_literal(buffer, &column); else if (is_symbol(c)) - t = tokenise_symbol(buffer, &column); + { + lerr_t lerr = tokenise_symbol(buffer, &column, &t); + if (lerr) + { + free(tokens.data); + return lerr; + } + } else if (c == '\'') { if (space_left(buffer) < 2) diff --git a/asm/lexer.h b/asm/lexer.h index 8470a18..8d62440 100644 --- a/asm/lexer.h +++ b/asm/lexer.h @@ -19,6 +19,7 @@ typedef enum TokenType { TOKEN_PP_CONST, TOKEN_PP_END, + TOKEN_PP_REFERENCE, TOKEN_GLOBAL, TOKEN_STAR, TOKEN_LITERAL_NUMBER, @@ -72,6 +73,7 @@ typedef enum { LERR_OK = 0, LERR_INVALID_CHAR_LITERAL, + LERR_INVALID_PREPROCESSOR_DIRECTIVE, } lerr_t; const char *lerr_as_cstr(lerr_t); diff --git a/asm/parser.c b/asm/parser.c index 06ed580..ce71116 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -59,10 +59,13 @@ void presult_free(presult_t res) free(res.label.name); break; case PRES_PP_CONST: + for (size_t i = 0; i < res.instructions.used / sizeof(presult_t); ++i) + presult_free(DARR_AT(presult_t, res.instructions.data, i)); + free(res.instructions.data); + break; case PRES_LABEL_ADDRESS: case PRES_RELATIVE_ADDRESS: case PRES_COMPLETE_RESULT: - free(res.instructions.data); break; } } @@ -291,12 +294,24 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret) perr_t perr = PERR_OK; switch (token.type) { - case TOKEN_PP_CONST: - - break; + case TOKEN_PP_END: case TOKEN_LITERAL_NUMBER: case TOKEN_LITERAL_CHAR: return PERR_EXPECTED_SYMBOL; + case TOKEN_PP_CONST: { + ++stream->used; + ret->type = PRES_PP_CONST; + darr_init(&ret->instructions, ); + while (stream->used < stream->available && + TOKEN_STREAM_AT(stream->data, stream->used).type != TOKEN_PP_END) + { + presult_t body = {0}; + perr_t perr = parse_next(stream, &body); + } + break; + } + case TOKEN_PP_REFERENCE: + break; case TOKEN_GLOBAL: { if (stream->used + 1 >= stream->available || TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL) @@ -461,9 +476,8 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret) return perr; } -label_t search_labels(label_t *labels, size_t n, char *name) +label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size) { - size_t name_size = strlen(name); for (size_t i = 0; i < n; ++i) { label_t label = labels[i]; @@ -557,9 +571,9 @@ perr_t process_presults(presult_t *results, size_t res_count, prog_header_t header = {0}; if (start_label.name_size > 0) { - label_t label = - search_labels((label_t *)label_registry.data, - label_registry.used / sizeof(label_t), start_label); + label_t label = search_labels((label_t *)label_registry.data, + label_registry.used / sizeof(label_t), + start_label.name, start_label.name_size); if (!label.name) { free(instr_darr.data); @@ -575,10 +589,10 @@ perr_t process_presults(presult_t *results, size_t res_count, switch (res.type) { case PRES_LABEL_ADDRESS: { - inst_t inst = {0}; - label_t label = - search_labels((label_t *)label_registry.data, - label_registry.used / sizeof(label_t), res.label); + inst_t inst = {0}; + label_t label = search_labels((label_t *)label_registry.data, + label_registry.used / sizeof(label_t), + res.label.name, res.label.size); if (!label.name) { @@ -625,12 +639,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) if (err) { for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) - { - presult_t res = ((presult_t *)presults.data)[i]; - if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL || - res.type == PRES_GLOBAL_LABEL) - free(res.label); - } + presult_free(DARR_AT(presult_t, presults.data, i)); free(presults.data); return err; } @@ -642,12 +651,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) process_presults((presult_t *)presults.data, presults.used / sizeof(presult_t), program_ptr); for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) - { - presult_t res = ((presult_t *)presults.data)[i]; - if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL || - res.type == PRES_GLOBAL_LABEL) - free(res.label); - } + presult_free(DARR_AT(presult_t, presults.data, i)); free(presults.data); return perr; } diff --git a/asm/parser.h b/asm/parser.h index 46f8bc8..01358fc 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -72,9 +72,10 @@ typedef struct word addr; } label_t; -label_t search_labels(label_t *, size_t, char *); +label_t search_labels(label_t *, size_t, char *, size_t); perr_t parse_next(token_stream_t *, presult_t *); +perr_t preprocessor(presult_t *, size_t, presult_t *); perr_t process_presults(presult_t *, size_t, prog_t **); perr_t parse_stream(token_stream_t *, prog_t **); @@ -43,22 +43,33 @@ directives. Essentially a directive which assigns some literal to a symbol as a constant. Something like #+begin_src asm -%const:n:20%end +%const(n) 20 %end #+end_src -Where you'd refer to the definition using ~$n~. The assembler will -look for symbols like ~$n~ and insert definitions it finds for them. -Can be useful when defining data type bounds and other useful constants. +Then, during my program I could use it like so +#+begin_src asm +... + push.word $n + print.word +#+end_src + +The preprocessor should convert this to the equivalent code of +#+begin_src asm +... + push.word 20 + print.word +#+end_src 2023-11-04: You could even put full program instructions for a constant potentially #+begin_src asm -%const:print-1: +%const(print-1) push.word 1 print.word %end #+end_src -which when referred to ~$print-1~ would just insert the bytecode given inline. +which when referred to ~$print-1~ would just insert the bytecode given +in sequence. * Completed ** DONE Write a label/jump system :ASM: Essentially a user should be able to write arbitrary labels (maybe |