diff options
-rw-r--r-- | asm/parser.c | 274 | ||||
-rw-r--r-- | asm/parser.h | 29 |
2 files changed, 233 insertions, 70 deletions
diff --git a/asm/parser.c b/asm/parser.c index ce71116..282a6c5 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -39,10 +39,16 @@ const char *perr_as_cstr(perr_t perr) return "EXPECTED_LABEL"; case PERR_EXPECTED_OPERAND: return "EXPECTED_OPERAND"; - case PERR_UNKNOWN_LABEL: - return "UNKNOWN_LABEL"; + case PERR_PREPROCESSOR_EXPECTED_END: + return "PREPROCESSOR_EXPECTED_END"; + case PERR_PREPROCESSOR_EXPECTED_NAME: + return "PREPROCESSOR_EXPECTED_NAME"; + case PERR_PREPROCESSOR_UNKNOWN_NAME: + return "PREPROCESSOR_UNKNOWN_NAME"; case PERR_INVALID_RELATIVE_ADDRESS: return "INVALID_RELATIVE_ADDRESS"; + case PERR_UNKNOWN_LABEL: + return "UNKNOWN_LABEL"; case PERR_UNKNOWN_OPERATOR: return "UNKNOWN_OPERATOR"; default: @@ -50,26 +56,62 @@ const char *perr_as_cstr(perr_t perr) } } +presult_t presult_label(const char *name, size_t size, s_word addr) +{ + presult_t res = {.address = addr, + .label = {.name = malloc(size + 1), .size = size}}; + memcpy(res.label.name, name, size); + res.label.name[size] = '\0'; + return res; +} + +presult_t presult_label_ref(inst_t base, const char *label, size_t size) +{ + presult_t pres = presult_label(label, size, 0); + pres.instruction = base; + pres.type = PRES_LABEL_ADDRESS; + return pres; +} + +presult_t presult_instruction(inst_t inst) +{ + return (presult_t){.instruction = inst, .type = PRES_COMPLETE_RESULT}; +} + +presult_t presult_relative(inst_t inst, s_word addr) +{ + return (presult_t){ + .instruction = inst, .address = addr, .type = PRES_RELATIVE_ADDRESS}; +} + +presult_t presult_global(const char *name, size_t size, s_word addr) +{ + presult_t res = presult_label(name, size, addr); + res.type = PRES_GLOBAL_LABEL; + return res; +} + void presult_free(presult_t res) { switch (res.type) { + case PRES_LABEL_ADDRESS: case PRES_GLOBAL_LABEL: case PRES_LABEL: free(res.label.name); break; - case PRES_PP_CONST: - for (size_t i = 0; i < res.instructions.used / sizeof(presult_t); ++i) - presult_free(DARR_AT(presult_t, res.instructions.data, i)); - free(res.instructions.data); - break; - case PRES_LABEL_ADDRESS: case PRES_RELATIVE_ADDRESS: case PRES_COMPLETE_RESULT: break; } } +void presults_free(presult_t *ptr, size_t number) +{ + for (size_t i = 0; i < number; ++i) + presult_free(ptr[i]); +} + perr_t parse_word(token_t token, word *ret) { if (token.type == TOKEN_LITERAL_NUMBER) @@ -144,19 +186,14 @@ perr_t parse_word_label_or_relative(token_stream_t *stream, presult_t *res) token_t token = TOKEN_STREAM_AT(stream->data, stream->used); if (token.type == TOKEN_SYMBOL) { - res->type = PRES_LABEL_ADDRESS; - res->label.size = token.str_size; - res->label.name = calloc(res->label.size + 1, 1); - memcpy(res->label.name, token.str, res->label.size + 1); + *res = presult_label_ref(res->instruction, token.str, token.str_size); return PERR_OK; } else if (token.type == TOKEN_LITERAL_CHAR || token.type == TOKEN_LITERAL_NUMBER) { res->type = PRES_COMPLETE_RESULT; - darr_init(&res->instructions, sizeof(inst_t)); - return parse_word( - token, &DARR_AT(inst_t, res->instructions.data, 0).operand.as_word); + return parse_word(token, &res->instruction.operand.as_word); } else if (token.type == TOKEN_STAR) { @@ -263,9 +300,8 @@ perr_t parse_utype_inst_with_operand(token_stream_t *stream, inst_t *ret) perr_t parse_jump_inst_operand(token_stream_t *stream, presult_t *res) { - perr_t inst_err = parse_utype_inst( - stream, &DARR_AT(inst_t, res->instructions.data, - res->instructions.used / sizeof(inst_t))); + perr_t inst_err = parse_utype_inst(stream, &res->instruction); + if (inst_err) return inst_err; ++stream->used; @@ -288,41 +324,164 @@ perr_t parse_type_inst_with_operand(token_stream_t *stream, inst_t *ret) return PERR_OK; } +label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size) +{ + for (size_t i = 0; i < n; ++i) + { + label_t label = labels[i]; + if (label.name_size == name_size && + strncmp(label.name, name, name_size) == 0) + return label; + } + + return (label_t){0}; +} + +block_t search_blocks(block_t *blocks, size_t n, char *name, size_t name_size) +{ + for (size_t i = 0; i < n; ++i) + { + block_t block = blocks[i]; + if (block.name_size == name_size && + strncmp(block.name, name, name_size) == 0) + return block; + } + + return (block_t){0}; +} + +perr_t preprocessor(token_stream_t *stream) +{ + darr_t block_registry = {0}; + darr_init(&block_registry, sizeof(block_t)); + for (size_t i = 0; i < stream->available; ++i) + { + token_t t = DARR_AT(token_t, stream->data, i); + if (t.type == TOKEN_PP_CONST) + { + char *sym = t.str; + size_t start = strcspn(sym, "("); + size_t end = strcspn(sym, ")"); + if (end == t.str_size || start == t.str_size || start == end + 1) + { + free(block_registry.data); + return PERR_PREPROCESSOR_EXPECTED_NAME; + } + block_t block = {.name = sym + start + 1, .name_size = end - start - 1}; + ++i; + size_t prev = i; + token_t t = {0}; + for (t = DARR_AT(token_t, stream->data, i); + i < stream->available && t.type != TOKEN_PP_END; + ++i, t = DARR_AT(token_t, stream->data, i)) + continue; + if (t.type != TOKEN_PP_END) + { + stream->used = i; + free(block_registry.data); + return PERR_PREPROCESSOR_EXPECTED_END; + } + + block.code.data = stream->data + (prev * sizeof(token_t)); + block.code.available = i - prev; + block.code.used = block.code.available; + darr_append_bytes(&block_registry, (byte *)&block, sizeof(block)); + } + } + + if (block_registry.used == 0) + { + // Nothing to preprocess + free(block_registry.data); + return PERR_OK; + } + + token_stream_t new_stream = {0}; + darr_init(&new_stream, sizeof(token_t)); + + for (size_t i = 0; i < stream->available; ++i) + { + token_t t = DARR_AT(token_t, stream->data, i); + if (t.type == TOKEN_PP_CONST) + { + // Skip till after end + for (; i < stream->available && t.type != TOKEN_PP_END; + ++i, t = DARR_AT(token_t, stream->data, i)) + continue; + } + else if (t.type == TOKEN_PP_REFERENCE) + { + // Find the reference in the block registry + block_t block = search_blocks((block_t *)block_registry.data, + block_registry.used, t.str, t.str_size); + if (!block.name) + { + free(new_stream.data); + free(block_registry.data); + stream->used = i; + return PERR_PREPROCESSOR_UNKNOWN_NAME; + } + + // Inline the block found + for (size_t j = 0; j < block.code.used; j++) + { + token_t b_token = DARR_AT(token_t, block.code.data, j); + token_t copy = b_token; + copy.str = malloc(copy.str_size + 1); + memcpy(copy.str, b_token.str, copy.str_size + 1); + darr_append_bytes(&new_stream, (byte *)©, sizeof(token_t)); + } + } + else + // Insert into stream as is + darr_append_bytes(&new_stream, (byte *)&t, sizeof(t)); + } + + // Free block registry + free(block_registry.data); + + // Free the old stream inline code + for (size_t i = 0; i < stream->available; ++i) + { + token_t t = DARR_AT(token_t, stream->data, i); + if (t.type == TOKEN_PP_CONST) + { + // Free till end + for (; i < stream->available && t.type != TOKEN_PP_END; + ++i, t = DARR_AT(token_t, stream->data, i)) + free(t.str); + free(t.str); + } + else if (t.type == TOKEN_PP_REFERENCE) + free(t.str); + } + free(stream->data); + new_stream.available = new_stream.used / sizeof(token_t); + new_stream.used = 0; + *stream = new_stream; + + return PERR_OK; +} + perr_t parse_next(token_stream_t *stream, presult_t *ret) { token_t token = TOKEN_STREAM_AT(stream->data, stream->used); perr_t perr = PERR_OK; switch (token.type) { + case TOKEN_PP_CONST: + case TOKEN_PP_REFERENCE: case TOKEN_PP_END: case TOKEN_LITERAL_NUMBER: case TOKEN_LITERAL_CHAR: return PERR_EXPECTED_SYMBOL; - case TOKEN_PP_CONST: { - ++stream->used; - ret->type = PRES_PP_CONST; - darr_init(&ret->instructions, ); - while (stream->used < stream->available && - TOKEN_STREAM_AT(stream->data, stream->used).type != TOKEN_PP_END) - { - presult_t body = {0}; - perr_t perr = parse_next(stream, &body); - } - break; - } - case TOKEN_PP_REFERENCE: - break; case TOKEN_GLOBAL: { if (stream->used + 1 >= stream->available || TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL) return PERR_EXPECTED_LABEL; ++stream->used; token_t label = TOKEN_STREAM_AT(stream->data, stream->used); - *ret = - (presult_t){.type = PRES_GLOBAL_LABEL, - .label = (struct PLabel){.name = malloc(label.str_size + 1), - .size = label.str_size}}; - memcpy(ret->label.name, label.str, label.str_size + 1); + *ret = presult_global(label.str, label.str_size, 0); return PERR_OK; } case TOKEN_NOOP: @@ -346,7 +505,7 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret) perr = parse_utype_inst_with_operand(stream, &ret->instruction); break; case TOKEN_MOV: - *ret = presult_instruction(INST_PUSH_REG(BYTE, 0)); + *ret = presult_instruction(INST_MOV(BYTE, 0)); perr = parse_utype_inst_with_operand(stream, &ret->instruction); break; case TOKEN_DUP: @@ -476,19 +635,6 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret) return perr; } -label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size) -{ - for (size_t i = 0; i < n; ++i) - { - label_t label = labels[i]; - if (label.name_size == name_size && - strncmp(label.name, name, name_size) == 0) - return label; - } - - return (label_t){0}; -} - perr_t process_presults(presult_t *results, size_t res_count, prog_t **program_ptr) { @@ -500,10 +646,10 @@ perr_t process_presults(presult_t *results, size_t res_count, switch (pres.type) { case PRES_LABEL: - printf("\tLABEL: label=%s\n", pres.label); + printf("\tLABEL: label=%s\n", pres.label.name); break; case PRES_LABEL_ADDRESS: - printf("\tLABEL_CALL: label=%s, inst=", pres.label); + printf("\tLABEL_CALL: label=%s, inst=", pres.label.name); inst_print(pres.instruction, stdout); printf("\n"); break; @@ -513,7 +659,7 @@ perr_t process_presults(presult_t *results, size_t res_count, printf("\n"); break; case PRES_GLOBAL_LABEL: - printf("\tSET_GLOBAL_START: name=%s\n", pres.label); + printf("\tSET_GLOBAL_START: name=%s\n", pres.label.name); break; case PRES_COMPLETE_RESULT: printf("\tCOMPLETE: inst="); @@ -559,9 +705,10 @@ perr_t process_presults(presult_t *results, size_t res_count, } case PRES_LABEL_ADDRESS: case PRES_COMPLETE_RESULT: - default: inst_count++; break; + default: + break; } } @@ -630,6 +777,10 @@ perr_t process_presults(presult_t *results, size_t res_count, perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) { + // Preprocessor + perr_t perr = preprocessor(stream); + if (perr) + return perr; darr_t presults = {0}; darr_init(&presults, sizeof(presult_t)); while (stream->used < stream->available) @@ -638,8 +789,8 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) perr_t err = parse_next(stream, &pres); if (err) { - for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) - presult_free(DARR_AT(presult_t, presults.data, i)); + presults_free((presult_t *)presults.data, + presults.used / sizeof(presult_t)); free(presults.data); return err; } @@ -647,11 +798,10 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr) ++stream->used; } - perr_t perr = - process_presults((presult_t *)presults.data, - presults.used / sizeof(presult_t), program_ptr); - for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i) - presult_free(DARR_AT(presult_t, presults.data, i)); + perr = process_presults((presult_t *)presults.data, + presults.used / sizeof(presult_t), program_ptr); + + presults_free((presult_t *)presults.data, presults.used / sizeof(presult_t)); free(presults.data); return perr; } diff --git a/asm/parser.h b/asm/parser.h index 01358fc..a8748d0 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -27,6 +27,9 @@ typedef enum PERR_EXPECTED_SYMBOL, PERR_EXPECTED_LABEL, PERR_EXPECTED_OPERAND, + PERR_PREPROCESSOR_EXPECTED_NAME, + PERR_PREPROCESSOR_EXPECTED_END, + PERR_PREPROCESSOR_UNKNOWN_NAME, PERR_INVALID_RELATIVE_ADDRESS, PERR_UNKNOWN_OPERATOR, PERR_UNKNOWN_LABEL, @@ -38,32 +41,30 @@ const char *perr_as_cstr(perr_t); typedef struct { inst_t instruction; - darr_t instructions; + s_word address; struct PLabel { char *name; size_t size; } label; - s_word address; enum PResult_Type { PRES_LABEL = 0, PRES_LABEL_ADDRESS, PRES_GLOBAL_LABEL, PRES_RELATIVE_ADDRESS, - PRES_PP_CONST, PRES_COMPLETE_RESULT, } type; } presult_t; // TODO: Implement these presult_t presult_label(const char *, size_t, s_word); +presult_t presult_label_ref(inst_t, const char *, size_t); presult_t presult_instruction(inst_t); -presult_t presult_instructions(size_t); -presult_t presult_addr(s_word); -presult_t pres_pp_const(const char *, s_word, size_t); -// TODO: Refactor parser.c to use this instead +presult_t presult_relative(inst_t, s_word); +presult_t presult_global(const char *, size_t, s_word); void presult_free(presult_t); +void presults_free(presult_t *, size_t); typedef struct { @@ -74,8 +75,20 @@ typedef struct label_t search_labels(label_t *, size_t, char *, size_t); +typedef struct +{ + char *name; + size_t name_size; + darr_t code; +} block_t; + +block_t search_blocks(block_t *, size_t, char *, size_t); + +// Analyses then inlines corresponding tokens into stream directly +perr_t preprocessor(token_stream_t *); +// Parses from the preprocessed stream perr_t parse_next(token_stream_t *, presult_t *); -perr_t preprocessor(presult_t *, size_t, presult_t *); +// Deals with address building perr_t process_presults(presult_t *, size_t, prog_t **); perr_t parse_stream(token_stream_t *, prog_t **); |