aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2023-10-31 20:39:26 +0000
committerAryadev Chavali <aryadev@aryadevchavali.com>2023-10-31 20:39:26 +0000
commit75dc36cd197ab47ffd1dbbad887dd7bac88c8add (patch)
tree913d6c944dd9be9286fe36e5fe7462efb191b539
parentfa640f13e8a231ec3a619cc360289828cc4fc114 (diff)
downloadovm-75dc36cd197ab47ffd1dbbad887dd7bac88c8add.tar.gz
ovm-75dc36cd197ab47ffd1dbbad887dd7bac88c8add.tar.bz2
ovm-75dc36cd197ab47ffd1dbbad887dd7bac88c8add.zip
Lexer now returns errors on failure
Currently only for invalid character literals, but still a possible problem.
-rw-r--r--asm/lexer.c69
-rw-r--r--asm/lexer.h9
-rw-r--r--asm/main.c22
3 files changed, 91 insertions, 9 deletions
diff --git a/asm/lexer.c b/asm/lexer.c
index 51a8ec6..6a1f027 100644
--- a/asm/lexer.c
+++ b/asm/lexer.c
@@ -31,6 +31,20 @@ const char *token_type_as_cstr(token_type_t type)
return "";
}
+const char *lerr_as_cstr(lerr_t lerr)
+{
+ switch (lerr)
+ {
+ case LERR_INVALID_CHAR_LITERAL:
+ return "INVALID_CHAR_LITERAL";
+ break;
+ case LERR_OK:
+ return "OK";
+ break;
+ }
+ return "";
+}
+
size_t space_left(buffer_t *buffer)
{
if (buffer->available == buffer->used)
@@ -95,10 +109,10 @@ token_t tokenise_char_literal(buffer_t *buffer, size_t *column)
return token;
}
-token_stream_t tokenise_buffer(buffer_t *buffer)
+lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
{
size_t column = 0, line = 1;
- buffer_t tokens = {0};
+ token_stream_t tokens = {0};
darr_init(&tokens, sizeof(token_t));
while (space_left(buffer) != 0)
{
@@ -139,10 +153,50 @@ token_stream_t tokenise_buffer(buffer_t *buffer)
t = tokenise_symbol(buffer, &column);
else if (c == '\'')
{
- if (space_left(buffer) < 2 || buffer->data[buffer->used + 2] != '\'')
- // TODO: Lex Error (INVALID_CHAR_LITERAL)
- exit(1);
- t = tokenise_char_literal(buffer, &column);
+ if (space_left(buffer) < 2)
+ {
+ free(tokens.data);
+ return LERR_INVALID_CHAR_LITERAL;
+ }
+ else if (buffer->data[buffer->used + 1] == '\\')
+ {
+ char escape = '\0';
+ if (space_left(buffer) < 3 || buffer->data[buffer->used + 3] != '\'')
+ {
+ free(tokens.data);
+ return LERR_INVALID_CHAR_LITERAL;
+ }
+ switch (buffer->data[buffer->used + 2])
+ {
+ case 'n':
+ escape = '\n';
+ break;
+ case 't':
+ escape = '\t';
+ break;
+ case 'r':
+ escape = '\r';
+ break;
+ case '\\':
+ escape = '\\';
+ break;
+ default:
+ column += 2;
+ free(tokens.data);
+ return LERR_INVALID_CHAR_LITERAL;
+ break;
+ }
+
+ t = (token_t){.type = TOKEN_LITERAL_CHAR,
+ .str = malloc(1),
+ .str_size = 1,
+ .column = column};
+ column += 4;
+ buffer->used += 4;
+ t.str[0] = escape;
+ }
+ else
+ t = tokenise_char_literal(buffer, &column);
}
if (is_token)
@@ -154,5 +208,6 @@ token_stream_t tokenise_buffer(buffer_t *buffer)
size_t n_tokens = tokens.used / sizeof(token_t);
tokens.available = n_tokens;
tokens.used = 0;
- return tokens;
+ *tokens_ptr = tokens;
+ return LERR_OK;
}
diff --git a/asm/lexer.h b/asm/lexer.h
index d2e0028..1e68d8b 100644
--- a/asm/lexer.h
+++ b/asm/lexer.h
@@ -30,11 +30,18 @@ typedef struct
size_t str_size;
} token_t;
+typedef enum
+{
+ LERR_OK = 0,
+ LERR_INVALID_CHAR_LITERAL,
+} lerr_t;
+const char *lerr_as_cstr(lerr_t);
+
typedef darr_t buffer_t;
typedef darr_t token_stream_t;
#define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX])
const char *token_type_as_cstr(token_type_t type);
-token_stream_t tokenise_buffer(buffer_t *);
+lerr_t tokenise_buffer(buffer_t *, token_stream_t *);
#endif
diff --git a/asm/main.c b/asm/main.c
index 2f1102b..bfa12e0 100644
--- a/asm/main.c
+++ b/asm/main.c
@@ -41,7 +41,27 @@ int main(int argc, char *argv[])
darr_t buffer = darr_read_file(fp);
fclose(fp);
- token_stream_t tokens = tokenise_buffer(&buffer);
+ token_stream_t tokens = {0};
+ lerr_t lex_error = tokenise_buffer(&buffer, &tokens);
+ if (lex_error)
+ {
+ // Compute the line/newlines by hand
+ size_t column = 0, line = 1;
+ for (size_t i = 0; i < buffer.used; ++i)
+ {
+ if (buffer.data[i] == '\n')
+ {
+ column = 0;
+ ++line;
+ }
+ else
+ ++column;
+ }
+ fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column,
+ lerr_as_cstr(lex_error));
+ ret = 255 - lex_error;
+ goto end;
+ }
#if VERBOSE >= 1
printf("[%sTOKENISER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET,
buffer.used, tokens.available);