diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2023-10-26 07:15:52 +0100 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2023-10-26 07:15:52 +0100 |
commit | 7bf6b53230868f419331bc7ef2044b02502aae76 (patch) | |
tree | 91e36d235a9f125f80a8bd388abffacfd766cf27 | |
parent | 131d70a9a37bdbfacaea9cfe2f78867dd9e16942 (diff) | |
download | ovm-7bf6b53230868f419331bc7ef2044b02502aae76.tar.gz ovm-7bf6b53230868f419331bc7ef2044b02502aae76.tar.bz2 ovm-7bf6b53230868f419331bc7ef2044b02502aae76.zip |
Unified literal for numbers, main program now tokenises
-rw-r--r-- | asm/lexer.c | 81 | ||||
-rw-r--r-- | asm/lexer.h | 8 | ||||
-rw-r--r-- | asm/main.c | 18 | ||||
-rw-r--r-- | asm/parser.c | 2 | ||||
-rw-r--r-- | asm/parser.h | 2 |
5 files changed, 33 insertions, 78 deletions
diff --git a/asm/lexer.c b/asm/lexer.c index 03f7d05..2f64e80 100644 --- a/asm/lexer.c +++ b/asm/lexer.c @@ -20,12 +20,8 @@ const char *token_type_as_cstr(token_type_t type) { switch (type) { - case TOKEN_LITERAL_BYTE: - return "LITERAL_BYTE"; - case TOKEN_LITERAL_HWORD: - return "LITERAL_HWORD"; - case TOKEN_LITERAL_WORD: - return "LITERAL_WORD"; + case TOKEN_LITERAL_NUMBER: + return "LITERAL_NUMBER"; case TOKEN_LITERAL_CHAR: return "LITERAL_CHAR"; case TOKEN_SYMBOL: @@ -43,7 +39,7 @@ size_t space_left(buffer_t *buffer) bool is_symbol(char c) { - return isalpha(c) || c == '-' || c == '_'; + return isalpha(c) || c == '-' || c == '_' || c == '.'; } token_t tokenise_symbol(buffer_t *buffer) @@ -60,37 +56,17 @@ token_t tokenise_symbol(buffer_t *buffer) return token; } -void tokenise_literal(buffer_t *buffer, token_t *token) +token_t tokenise_number_literal(buffer_t *buffer) { - token->str_size = 0; - for (; token->str_size < space_left(buffer) && - isdigit(buffer->data[buffer->used + token->str_size]); - ++token->str_size) + token_t token = {.type = TOKEN_LITERAL_NUMBER, .str_size = 0}; + for (; token.str_size < space_left(buffer) && + isdigit(buffer->data[buffer->used + token.str_size]); + ++token.str_size) continue; - token->str = calloc(token->str_size + 1, 1); - memcpy(token->str, buffer->data + buffer->used, token->str_size); - token->str[token->str_size] = '\0'; - buffer->used += token->str_size; -} - -token_t tokenise_byte_literal(buffer_t *buffer) -{ - token_t token = {.type = TOKEN_LITERAL_BYTE}; - tokenise_literal(buffer, &token); - return token; -} - -token_t tokenise_hword_literal(buffer_t *buffer) -{ - token_t token = {.type = TOKEN_LITERAL_HWORD}; - tokenise_literal(buffer, &token); - return token; -} - -token_t tokenise_word_literal(buffer_t *buffer) -{ - token_t token = {.type = TOKEN_LITERAL_WORD}; - tokenise_literal(buffer, &token); + token.str = calloc(token.str_size + 1, 1); + memcpy(token.str, buffer->data + buffer->used, token.str_size); + token.str[token.str_size] = '\0'; + buffer->used += token.str_size; return token; } @@ -103,7 +79,7 @@ token_t tokenise_char_literal(buffer_t *buffer) return token; } -token_t *tokenise_buffer(buffer_t *buffer, size_t *n_tokens) +token_stream_t tokenise_buffer(buffer_t *buffer) { buffer_t tokens = {0}; darr_init(&tokens, sizeof(token_t)); @@ -120,28 +96,8 @@ token_t *tokenise_buffer(buffer_t *buffer, size_t *n_tokens) continue; is_token = false; } - else if (space_left(buffer) > 1 && isdigit(buffer->data[buffer->used + 1])) - { - // Parsing numeric literals - switch (c) - { - case 'b': - buffer->used++; - t = tokenise_byte_literal(buffer); - break; - case 'h': - buffer->used++; - t = tokenise_hword_literal(buffer); - break; - case 'w': - buffer->used++; - t = tokenise_word_literal(buffer); - break; - default: - // TODO: Lex Error (INVALID_NUMERIC_LITERAL) - exit(1); - } - } + else if (isdigit(c)) + t = tokenise_number_literal(buffer); else if (is_symbol(c)) t = tokenise_symbol(buffer); else if (c == '\'') @@ -151,9 +107,12 @@ token_t *tokenise_buffer(buffer_t *buffer, size_t *n_tokens) exit(1); t = tokenise_char_literal(buffer); } + if (is_token) darr_append_bytes(&tokens, (byte *)&t, sizeof(t)); } - *n_tokens = tokens.used / sizeof(token_t); - return (token_t *)tokens.data; + size_t n_tokens = tokens.used / sizeof(token_t); + tokens.available = n_tokens; + tokens.used = 0; + return tokens; } diff --git a/asm/lexer.h b/asm/lexer.h index 01badf4..bd9dfb2 100644 --- a/asm/lexer.h +++ b/asm/lexer.h @@ -16,10 +16,8 @@ typedef enum TokenType { - TOKEN_LITERAL_BYTE, + TOKEN_LITERAL_NUMBER, TOKEN_LITERAL_CHAR, - TOKEN_LITERAL_HWORD, - TOKEN_LITERAL_WORD, TOKEN_SYMBOL, } token_type_t; @@ -31,9 +29,9 @@ typedef struct } token_t; typedef darr_t buffer_t; +typedef darr_t token_stream_t; const char *token_type_as_cstr(token_type_t type); - -token_t *tokenise_buffer(buffer_t *, size_t *); +token_stream_t tokenise_buffer(buffer_t *); #endif @@ -20,18 +20,18 @@ int main(void) darr_t buffer = darr_read_file(fp); fclose(fp); - size_t n = 0; - token_t *tokens = tokenise_buffer(&buffer, &n); - printf("%lu bytes -> %lu tokens\n", buffer.used, n); + token_stream_t tokens = tokenise_buffer(&buffer); + printf("%lu bytes -> %lu tokens\n", buffer.used, tokens.available); free(buffer.data); - for (size_t i = 0; i < n; ++i) - printf("%s(%.*s)\n", token_type_as_cstr(tokens[i].type), - (int)tokens[i].str_size, tokens[i].str); + for (size_t i = 0; i < tokens.available; ++i) + printf("%s(%.*s)\n", token_type_as_cstr(((token_t *)tokens.data)[i].type), + (int)((token_t *)tokens.data)[i].str_size, + ((token_t *)tokens.data)[i].str); // Free the tokens - for (size_t i = 0; i < n; ++i) - free(tokens[i].str); - free(tokens); + for (size_t i = 0; i < tokens.available; ++i) + free(((token_t *)tokens.data)[i].str); + free(tokens.data); return 0; } diff --git a/asm/parser.c b/asm/parser.c index f796dfc..5521585 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -7,5 +7,5 @@ * Created: 2023-10-24 * Author: Aryadev Chavali - * Description: + * Description: Parser for assembly language */ diff --git a/asm/parser.h b/asm/parser.h index 699539b..88dc0e9 100644 --- a/asm/parser.h +++ b/asm/parser.h @@ -16,8 +16,6 @@ #include <vm/inst.h> -typedef darr_t token_stream_t; - inst_t parse_next_inst(token_stream_t *); inst_t *parse_stream(token_stream_t *, size_t *); |