Lexer now returns errors on failure

Currently only for invalid character literals, but still a possible
problem.
This commit is contained in:
2023-10-31 20:39:26 +00:00
parent fa640f13e8
commit 75dc36cd19
3 changed files with 91 additions and 9 deletions

View File

@@ -31,6 +31,20 @@ const char *token_type_as_cstr(token_type_t type)
return "";
}
const char *lerr_as_cstr(lerr_t lerr)
{
switch (lerr)
{
case LERR_INVALID_CHAR_LITERAL:
return "INVALID_CHAR_LITERAL";
break;
case LERR_OK:
return "OK";
break;
}
return "";
}
size_t space_left(buffer_t *buffer)
{
if (buffer->available == buffer->used)
@@ -95,10 +109,10 @@ token_t tokenise_char_literal(buffer_t *buffer, size_t *column)
return token;
}
token_stream_t tokenise_buffer(buffer_t *buffer)
lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
{
size_t column = 0, line = 1;
buffer_t tokens = {0};
token_stream_t tokens = {0};
darr_init(&tokens, sizeof(token_t));
while (space_left(buffer) != 0)
{
@@ -139,10 +153,50 @@ token_stream_t tokenise_buffer(buffer_t *buffer)
t = tokenise_symbol(buffer, &column);
else if (c == '\'')
{
if (space_left(buffer) < 2 || buffer->data[buffer->used + 2] != '\'')
// TODO: Lex Error (INVALID_CHAR_LITERAL)
exit(1);
t = tokenise_char_literal(buffer, &column);
if (space_left(buffer) < 2)
{
free(tokens.data);
return LERR_INVALID_CHAR_LITERAL;
}
else if (buffer->data[buffer->used + 1] == '\\')
{
char escape = '\0';
if (space_left(buffer) < 3 || buffer->data[buffer->used + 3] != '\'')
{
free(tokens.data);
return LERR_INVALID_CHAR_LITERAL;
}
switch (buffer->data[buffer->used + 2])
{
case 'n':
escape = '\n';
break;
case 't':
escape = '\t';
break;
case 'r':
escape = '\r';
break;
case '\\':
escape = '\\';
break;
default:
column += 2;
free(tokens.data);
return LERR_INVALID_CHAR_LITERAL;
break;
}
t = (token_t){.type = TOKEN_LITERAL_CHAR,
.str = malloc(1),
.str_size = 1,
.column = column};
column += 4;
buffer->used += 4;
t.str[0] = escape;
}
else
t = tokenise_char_literal(buffer, &column);
}
if (is_token)
@@ -154,5 +208,6 @@ token_stream_t tokenise_buffer(buffer_t *buffer)
size_t n_tokens = tokens.used / sizeof(token_t);
tokens.available = n_tokens;
tokens.used = 0;
return tokens;
*tokens_ptr = tokens;
return LERR_OK;
}

View File

@@ -30,11 +30,18 @@ typedef struct
size_t str_size;
} token_t;
typedef enum
{
LERR_OK = 0,
LERR_INVALID_CHAR_LITERAL,
} lerr_t;
const char *lerr_as_cstr(lerr_t);
typedef darr_t buffer_t;
typedef darr_t token_stream_t;
#define TOKEN_STREAM_AT(STREAM_DATA, INDEX) (((token_t *)(STREAM_DATA))[INDEX])
const char *token_type_as_cstr(token_type_t type);
token_stream_t tokenise_buffer(buffer_t *);
lerr_t tokenise_buffer(buffer_t *, token_stream_t *);
#endif

View File

@@ -41,7 +41,27 @@ int main(int argc, char *argv[])
darr_t buffer = darr_read_file(fp);
fclose(fp);
token_stream_t tokens = tokenise_buffer(&buffer);
token_stream_t tokens = {0};
lerr_t lex_error = tokenise_buffer(&buffer, &tokens);
if (lex_error)
{
// Compute the line/newlines by hand
size_t column = 0, line = 1;
for (size_t i = 0; i < buffer.used; ++i)
{
if (buffer.data[i] == '\n')
{
column = 0;
++line;
}
else
++column;
}
fprintf(stderr, "%s:%lu:%lu: %s\n", source_file, line, column,
lerr_as_cstr(lex_error));
ret = 255 - lex_error;
goto end;
}
#if VERBOSE >= 1
printf("[%sTOKENISER%s]: %lu bytes -> %lu tokens\n", TERM_GREEN, TERM_RESET,
buffer.used, tokens.available);