From 75dc36cd197ab47ffd1dbbad887dd7bac88c8add Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Tue, 31 Oct 2023 20:39:26 +0000 Subject: Lexer now returns errors on failure Currently only for invalid character literals, but still a possible problem. --- asm/lexer.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------- asm/lexer.h | 9 +++++++- asm/main.c | 22 +++++++++++++++++++- 3 files changed, 91 insertions(+), 9 deletions(-) (limited to 'asm') diff --git a/asm/lexer.c b/asm/lexer.c index 51a8ec6..6a1f027 100644 --- a/asm/lexer.c +++ b/asm/lexer.c @@ -31,6 +31,20 @@ const char *token_type_as_cstr(token_type_t type) return ""; } +const char *lerr_as_cstr(lerr_t lerr) +{ + switch (lerr) + { + case LERR_INVALID_CHAR_LITERAL: + return "INVALID_CHAR_LITERAL"; + break; + case LERR_OK: + return "OK"; + break; + } + return ""; +} + size_t space_left(buffer_t *buffer) { if (buffer->available == buffer->used) @@ -95,10 +109,10 @@ token_t tokenise_char_literal(buffer_t *buffer, size_t *column) return token; } -token_stream_t tokenise_buffer(buffer_t *buffer) +lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr) { size_t column = 0, line = 1; - buffer_t tokens = {0}; + token_stream_t tokens = {0}; darr_init(&tokens, sizeof(token_t)); while (space_left(buffer) != 0) { @@ -139,10 +153,50 @@ token_stream_t tokenise_buffer(buffer_t *buffer) t = tokenise_symbol(buffer, &column); else if (c == '\'') { - if (space_left(buffer) < 2 || buffer->data[buffer->used + 2] != '\'') - // TODO: Lex Error (INVALID_CHAR_LITERAL) - exit(1); - t = tokenise_char_literal(buffer, &column); + if (space_left(buffer) < 2) + { + free(tokens.data); + return LERR_INVALID_CHAR_LITERAL; + } + else if (buffer->data[buffer->used + 1] == '\\') + { + char escape = '\0'; + if (space_left(buffer) < 3 || buffer->data[buffer->used + 3] != '\'') + { + free(tokens.data); + return LERR_INVALID_CHAR_LITERAL; + } + switch (buffer->data[buffer->used + 2]) + { + case 'n': + escape = '\n'; + break; + case 't': + escape = '\t'; + break; + case 'r': + escape = '\r'; + break; + case '\\': + escape = '\\'; + break; + default: + column += 2; + free(tokens.data); + return LERR_INVALID_CHAR_LITERAL; + break; + } + + t = (token_t){.type = TOKEN_LITERAL_CHAR, + .str = malloc(1), + .str_size = 1, + .column = column}; + column += 4; + buffer->used += 4; + t.str[0] = escape; + } + else + t = tokenise_char_literal(buffer, &column); } if (is_token) @@ -154,5 +208,6 @@ token_stream_t tokenise_buffer(buffer_t *buffer) size_t n_tokens = tokens.used / sizeof(token_t); tokens.available = n_tokens; tokens.used = 0; - return tokens; + *tokens_ptr = tokens; + return LERR_OK; } diff --git a/asm/lexer.h b/asm/lexer.h index d2e0028..1e68d8b 100644 --- a/asm/lexer.h +++ b/asm/lexer.h @@ -30,11 +30,18 @@ typedef struct size_t str_size; } token_t; +typedef enum +{ + LERR_OK = 0, + LERR_INVALID_CHAR_LITERAL, +} lerr_t; +const char *lerr_as_cstr(lerr_t); + typedef darr_t buffer_t; typedef darr_t token_stream_t; #define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX]) const char *token_type_as_cstr(token_type_t type); -token_stream_t tokenise_buffer(buffer_t *); +lerr_t tokenise_buffer(buffer_t *, token_stream_t *); #endif diff --git a/asm/main.c b/asm/main.c index 2f1102b..bfa12e0 100644 --- a/asm/main.c +++ b/asm/main.c @@ -41,7 +41,27 @@ int main(int argc, char *argv[]) darr_t buffer = darr_read_file(fp); fclose(fp); - token_stream_t tokens = tokenise_buffer(&buffer); + token_stream_t tokens = {0}; + lerr_t lex_error = tokenise_buffer(&buffer, &tokens); + if (lex_error) + { + // Compute the line/newlines by hand + size_t column = 0, line = 1; + for (size_t i = 0; i < buffer.used; ++i) + { + if (buffer.data[i] == '\n') + { + column = 0; + ++line; + } + else + ++column; + } + fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column, + lerr_as_cstr(lex_error)); + ret = 255 - lex_error; + goto end; + } #if VERBOSE >= 1 printf("[%sTOKENISER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET, buffer.used, tokens.available); -- cgit v1.2.3-13-gbd6f