Current work on preprocessor

This commit is contained in:
2023-11-06 08:16:15 +00:00
parent 4ae6c05276
commit 6e524569c3
5 changed files with 86 additions and 41 deletions

View File

@@ -28,6 +28,8 @@ const char *token_type_as_cstr(token_type_t type)
return "PP_CONST";
case TOKEN_PP_END:
return "PP_END";
case TOKEN_PP_REFERENCE:
return "PP_REFERENCE";
case TOKEN_GLOBAL:
return "GLOBAL";
case TOKEN_STAR:
@@ -114,12 +116,12 @@ const char *lerr_as_cstr(lerr_t lerr)
{
switch (lerr)
{
case LERR_INVALID_CHAR_LITERAL:
return "INVALID_CHAR_LITERAL";
break;
case LERR_OK:
return "OK";
break;
case LERR_INVALID_CHAR_LITERAL:
return "INVALID_CHAR_LITERAL";
case LERR_INVALID_PREPROCESSOR_DIRECTIVE:
return "INVALID_PREPROCESSOR_DIRECTIVE";
}
return "";
}
@@ -150,7 +152,7 @@ bool is_valid_hex_char(char c)
(c >= 'A' && c <= 'F');
}
token_t tokenise_symbol(buffer_t *buffer, size_t *column)
lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token)
{
static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!");
@@ -170,8 +172,25 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
if (sym_size > 1 && strncmp(opcode, "%", 1) == 0)
{
// Some kind of preprocessing directive
// TODO: Implement tokeniser for preprocessing directive
// Some preprocessing directive
if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0)
{
type = TOKEN_PP_CONST;
offset = 6;
}
else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0)
{
type = TOKEN_PP_END;
offset = 4;
}
else
return LERR_INVALID_PREPROCESSOR_DIRECTIVE;
}
else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0)
{
// A reference to a preprocessing constant
offset = 1;
type = TOKEN_PP_REFERENCE;
}
else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
{
@@ -384,7 +403,8 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
}
*column += sym_size - 1;
buffer->used += sym_size;
return ret;
*token = ret;
return LERR_OK;
}
token_t tokenise_number_literal(buffer_t *buffer, size_t *column)
@@ -494,7 +514,14 @@ lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
is_valid_hex_char(buffer->data[buffer->used + 1]))
t = tokenise_hex_literal(buffer, &column);
else if (is_symbol(c))
t = tokenise_symbol(buffer, &column);
{
lerr_t lerr = tokenise_symbol(buffer, &column, &t);
if (lerr)
{
free(tokens.data);
return lerr;
}
}
else if (c == '\'')
{
if (space_left(buffer) < 2)

View File

@@ -19,6 +19,7 @@ typedef enum TokenType
{
TOKEN_PP_CONST,
TOKEN_PP_END,
TOKEN_PP_REFERENCE,
TOKEN_GLOBAL,
TOKEN_STAR,
TOKEN_LITERAL_NUMBER,
@@ -72,6 +73,7 @@ typedef enum
{
LERR_OK = 0,
LERR_INVALID_CHAR_LITERAL,
LERR_INVALID_PREPROCESSOR_DIRECTIVE,
} lerr_t;
const char *lerr_as_cstr(lerr_t);

View File

@@ -59,10 +59,13 @@ void presult_free(presult_t res)
free(res.label.name);
break;
case PRES_PP_CONST:
for (size_t i = 0; i < res.instructions.used / sizeof(presult_t); ++i)
presult_free(DARR_AT(presult_t, res.instructions.data, i));
free(res.instructions.data);
break;
case PRES_LABEL_ADDRESS:
case PRES_RELATIVE_ADDRESS:
case PRES_COMPLETE_RESULT:
free(res.instructions.data);
break;
}
}
@@ -291,12 +294,24 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret)
perr_t perr = PERR_OK;
switch (token.type)
{
case TOKEN_PP_CONST:
break;
case TOKEN_PP_END:
case TOKEN_LITERAL_NUMBER:
case TOKEN_LITERAL_CHAR:
return PERR_EXPECTED_SYMBOL;
case TOKEN_PP_CONST: {
++stream->used;
ret->type = PRES_PP_CONST;
darr_init(&ret->instructions, );
while (stream->used < stream->available &&
TOKEN_STREAM_AT(stream->data, stream->used).type != TOKEN_PP_END)
{
presult_t body = {0};
perr_t perr = parse_next(stream, &body);
}
break;
}
case TOKEN_PP_REFERENCE:
break;
case TOKEN_GLOBAL: {
if (stream->used + 1 >= stream->available ||
TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL)
@@ -461,9 +476,8 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret)
return perr;
}
label_t search_labels(label_t *labels, size_t n, char *name)
label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size)
{
size_t name_size = strlen(name);
for (size_t i = 0; i < n; ++i)
{
label_t label = labels[i];
@@ -557,9 +571,9 @@ perr_t process_presults(presult_t *results, size_t res_count,
prog_header_t header = {0};
if (start_label.name_size > 0)
{
label_t label =
search_labels((label_t *)label_registry.data,
label_registry.used / sizeof(label_t), start_label);
label_t label = search_labels((label_t *)label_registry.data,
label_registry.used / sizeof(label_t),
start_label.name, start_label.name_size);
if (!label.name)
{
free(instr_darr.data);
@@ -575,10 +589,10 @@ perr_t process_presults(presult_t *results, size_t res_count,
switch (res.type)
{
case PRES_LABEL_ADDRESS: {
inst_t inst = {0};
label_t label =
search_labels((label_t *)label_registry.data,
label_registry.used / sizeof(label_t), res.label);
inst_t inst = {0};
label_t label = search_labels((label_t *)label_registry.data,
label_registry.used / sizeof(label_t),
res.label.name, res.label.size);
if (!label.name)
{
@@ -625,12 +639,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
if (err)
{
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
{
presult_t res = ((presult_t *)presults.data)[i];
if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL ||
res.type == PRES_GLOBAL_LABEL)
free(res.label);
}
presult_free(DARR_AT(presult_t, presults.data, i));
free(presults.data);
return err;
}
@@ -642,12 +651,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
process_presults((presult_t *)presults.data,
presults.used / sizeof(presult_t), program_ptr);
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
{
presult_t res = ((presult_t *)presults.data)[i];
if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL ||
res.type == PRES_GLOBAL_LABEL)
free(res.label);
}
presult_free(DARR_AT(presult_t, presults.data, i));
free(presults.data);
return perr;
}

View File

@@ -72,9 +72,10 @@ typedef struct
word addr;
} label_t;
label_t search_labels(label_t *, size_t, char *);
label_t search_labels(label_t *, size_t, char *, size_t);
perr_t parse_next(token_stream_t *, presult_t *);
perr_t preprocessor(presult_t *, size_t, presult_t *);
perr_t process_presults(presult_t *, size_t, prog_t **);
perr_t parse_stream(token_stream_t *, prog_t **);

View File

@@ -43,22 +43,33 @@ directives.
Essentially a directive which assigns some literal to a symbol as a
constant. Something like
#+begin_src asm
%const:n:20%end
%const(n) 20 %end
#+end_src
Where you'd refer to the definition using ~$n~. The assembler will
look for symbols like ~$n~ and insert definitions it finds for them.
Can be useful when defining data type bounds and other useful constants.
Then, during my program I could use it like so
#+begin_src asm
...
push.word $n
print.word
#+end_src
The preprocessor should convert this to the equivalent code of
#+begin_src asm
...
push.word 20
print.word
#+end_src
2023-11-04: You could even put full program instructions for a
constant potentially
#+begin_src asm
%const:print-1:
%const(print-1)
push.word 1
print.word
%end
#+end_src
which when referred to ~$print-1~ would just insert the bytecode given inline.
which when referred to ~$print-1~ would just insert the bytecode given
in sequence.
* Completed
** DONE Write a label/jump system :ASM:
Essentially a user should be able to write arbitrary labels (maybe