Current work on preprocessor
This commit is contained in:
45
asm/lexer.c
45
asm/lexer.c
@@ -28,6 +28,8 @@ const char *token_type_as_cstr(token_type_t type)
|
|||||||
return "PP_CONST";
|
return "PP_CONST";
|
||||||
case TOKEN_PP_END:
|
case TOKEN_PP_END:
|
||||||
return "PP_END";
|
return "PP_END";
|
||||||
|
case TOKEN_PP_REFERENCE:
|
||||||
|
return "PP_REFERENCE";
|
||||||
case TOKEN_GLOBAL:
|
case TOKEN_GLOBAL:
|
||||||
return "GLOBAL";
|
return "GLOBAL";
|
||||||
case TOKEN_STAR:
|
case TOKEN_STAR:
|
||||||
@@ -114,12 +116,12 @@ const char *lerr_as_cstr(lerr_t lerr)
|
|||||||
{
|
{
|
||||||
switch (lerr)
|
switch (lerr)
|
||||||
{
|
{
|
||||||
case LERR_INVALID_CHAR_LITERAL:
|
|
||||||
return "INVALID_CHAR_LITERAL";
|
|
||||||
break;
|
|
||||||
case LERR_OK:
|
case LERR_OK:
|
||||||
return "OK";
|
return "OK";
|
||||||
break;
|
case LERR_INVALID_CHAR_LITERAL:
|
||||||
|
return "INVALID_CHAR_LITERAL";
|
||||||
|
case LERR_INVALID_PREPROCESSOR_DIRECTIVE:
|
||||||
|
return "INVALID_PREPROCESSOR_DIRECTIVE";
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
@@ -150,7 +152,7 @@ bool is_valid_hex_char(char c)
|
|||||||
(c >= 'A' && c <= 'F');
|
(c >= 'A' && c <= 'F');
|
||||||
}
|
}
|
||||||
|
|
||||||
token_t tokenise_symbol(buffer_t *buffer, size_t *column)
|
lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token)
|
||||||
{
|
{
|
||||||
static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!");
|
static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!");
|
||||||
|
|
||||||
@@ -170,8 +172,25 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
|
|||||||
|
|
||||||
if (sym_size > 1 && strncmp(opcode, "%", 1) == 0)
|
if (sym_size > 1 && strncmp(opcode, "%", 1) == 0)
|
||||||
{
|
{
|
||||||
// Some kind of preprocessing directive
|
// Some preprocessing directive
|
||||||
// TODO: Implement tokeniser for preprocessing directive
|
if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0)
|
||||||
|
{
|
||||||
|
type = TOKEN_PP_CONST;
|
||||||
|
offset = 6;
|
||||||
|
}
|
||||||
|
else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0)
|
||||||
|
{
|
||||||
|
type = TOKEN_PP_END;
|
||||||
|
offset = 4;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return LERR_INVALID_PREPROCESSOR_DIRECTIVE;
|
||||||
|
}
|
||||||
|
else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0)
|
||||||
|
{
|
||||||
|
// A reference to a preprocessing constant
|
||||||
|
offset = 1;
|
||||||
|
type = TOKEN_PP_REFERENCE;
|
||||||
}
|
}
|
||||||
else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
|
else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
|
||||||
{
|
{
|
||||||
@@ -384,7 +403,8 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
|
|||||||
}
|
}
|
||||||
*column += sym_size - 1;
|
*column += sym_size - 1;
|
||||||
buffer->used += sym_size;
|
buffer->used += sym_size;
|
||||||
return ret;
|
*token = ret;
|
||||||
|
return LERR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
token_t tokenise_number_literal(buffer_t *buffer, size_t *column)
|
token_t tokenise_number_literal(buffer_t *buffer, size_t *column)
|
||||||
@@ -494,7 +514,14 @@ lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
|
|||||||
is_valid_hex_char(buffer->data[buffer->used + 1]))
|
is_valid_hex_char(buffer->data[buffer->used + 1]))
|
||||||
t = tokenise_hex_literal(buffer, &column);
|
t = tokenise_hex_literal(buffer, &column);
|
||||||
else if (is_symbol(c))
|
else if (is_symbol(c))
|
||||||
t = tokenise_symbol(buffer, &column);
|
{
|
||||||
|
lerr_t lerr = tokenise_symbol(buffer, &column, &t);
|
||||||
|
if (lerr)
|
||||||
|
{
|
||||||
|
free(tokens.data);
|
||||||
|
return lerr;
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (c == '\'')
|
else if (c == '\'')
|
||||||
{
|
{
|
||||||
if (space_left(buffer) < 2)
|
if (space_left(buffer) < 2)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ typedef enum TokenType
|
|||||||
{
|
{
|
||||||
TOKEN_PP_CONST,
|
TOKEN_PP_CONST,
|
||||||
TOKEN_PP_END,
|
TOKEN_PP_END,
|
||||||
|
TOKEN_PP_REFERENCE,
|
||||||
TOKEN_GLOBAL,
|
TOKEN_GLOBAL,
|
||||||
TOKEN_STAR,
|
TOKEN_STAR,
|
||||||
TOKEN_LITERAL_NUMBER,
|
TOKEN_LITERAL_NUMBER,
|
||||||
@@ -72,6 +73,7 @@ typedef enum
|
|||||||
{
|
{
|
||||||
LERR_OK = 0,
|
LERR_OK = 0,
|
||||||
LERR_INVALID_CHAR_LITERAL,
|
LERR_INVALID_CHAR_LITERAL,
|
||||||
|
LERR_INVALID_PREPROCESSOR_DIRECTIVE,
|
||||||
} lerr_t;
|
} lerr_t;
|
||||||
const char *lerr_as_cstr(lerr_t);
|
const char *lerr_as_cstr(lerr_t);
|
||||||
|
|
||||||
|
|||||||
54
asm/parser.c
54
asm/parser.c
@@ -59,10 +59,13 @@ void presult_free(presult_t res)
|
|||||||
free(res.label.name);
|
free(res.label.name);
|
||||||
break;
|
break;
|
||||||
case PRES_PP_CONST:
|
case PRES_PP_CONST:
|
||||||
|
for (size_t i = 0; i < res.instructions.used / sizeof(presult_t); ++i)
|
||||||
|
presult_free(DARR_AT(presult_t, res.instructions.data, i));
|
||||||
|
free(res.instructions.data);
|
||||||
|
break;
|
||||||
case PRES_LABEL_ADDRESS:
|
case PRES_LABEL_ADDRESS:
|
||||||
case PRES_RELATIVE_ADDRESS:
|
case PRES_RELATIVE_ADDRESS:
|
||||||
case PRES_COMPLETE_RESULT:
|
case PRES_COMPLETE_RESULT:
|
||||||
free(res.instructions.data);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -291,12 +294,24 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret)
|
|||||||
perr_t perr = PERR_OK;
|
perr_t perr = PERR_OK;
|
||||||
switch (token.type)
|
switch (token.type)
|
||||||
{
|
{
|
||||||
case TOKEN_PP_CONST:
|
case TOKEN_PP_END:
|
||||||
|
|
||||||
break;
|
|
||||||
case TOKEN_LITERAL_NUMBER:
|
case TOKEN_LITERAL_NUMBER:
|
||||||
case TOKEN_LITERAL_CHAR:
|
case TOKEN_LITERAL_CHAR:
|
||||||
return PERR_EXPECTED_SYMBOL;
|
return PERR_EXPECTED_SYMBOL;
|
||||||
|
case TOKEN_PP_CONST: {
|
||||||
|
++stream->used;
|
||||||
|
ret->type = PRES_PP_CONST;
|
||||||
|
darr_init(&ret->instructions, );
|
||||||
|
while (stream->used < stream->available &&
|
||||||
|
TOKEN_STREAM_AT(stream->data, stream->used).type != TOKEN_PP_END)
|
||||||
|
{
|
||||||
|
presult_t body = {0};
|
||||||
|
perr_t perr = parse_next(stream, &body);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case TOKEN_PP_REFERENCE:
|
||||||
|
break;
|
||||||
case TOKEN_GLOBAL: {
|
case TOKEN_GLOBAL: {
|
||||||
if (stream->used + 1 >= stream->available ||
|
if (stream->used + 1 >= stream->available ||
|
||||||
TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL)
|
TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL)
|
||||||
@@ -461,9 +476,8 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret)
|
|||||||
return perr;
|
return perr;
|
||||||
}
|
}
|
||||||
|
|
||||||
label_t search_labels(label_t *labels, size_t n, char *name)
|
label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size)
|
||||||
{
|
{
|
||||||
size_t name_size = strlen(name);
|
|
||||||
for (size_t i = 0; i < n; ++i)
|
for (size_t i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
label_t label = labels[i];
|
label_t label = labels[i];
|
||||||
@@ -557,9 +571,9 @@ perr_t process_presults(presult_t *results, size_t res_count,
|
|||||||
prog_header_t header = {0};
|
prog_header_t header = {0};
|
||||||
if (start_label.name_size > 0)
|
if (start_label.name_size > 0)
|
||||||
{
|
{
|
||||||
label_t label =
|
label_t label = search_labels((label_t *)label_registry.data,
|
||||||
search_labels((label_t *)label_registry.data,
|
label_registry.used / sizeof(label_t),
|
||||||
label_registry.used / sizeof(label_t), start_label);
|
start_label.name, start_label.name_size);
|
||||||
if (!label.name)
|
if (!label.name)
|
||||||
{
|
{
|
||||||
free(instr_darr.data);
|
free(instr_darr.data);
|
||||||
@@ -575,10 +589,10 @@ perr_t process_presults(presult_t *results, size_t res_count,
|
|||||||
switch (res.type)
|
switch (res.type)
|
||||||
{
|
{
|
||||||
case PRES_LABEL_ADDRESS: {
|
case PRES_LABEL_ADDRESS: {
|
||||||
inst_t inst = {0};
|
inst_t inst = {0};
|
||||||
label_t label =
|
label_t label = search_labels((label_t *)label_registry.data,
|
||||||
search_labels((label_t *)label_registry.data,
|
label_registry.used / sizeof(label_t),
|
||||||
label_registry.used / sizeof(label_t), res.label);
|
res.label.name, res.label.size);
|
||||||
|
|
||||||
if (!label.name)
|
if (!label.name)
|
||||||
{
|
{
|
||||||
@@ -625,12 +639,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
|
|||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
|
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
|
||||||
{
|
presult_free(DARR_AT(presult_t, presults.data, i));
|
||||||
presult_t res = ((presult_t *)presults.data)[i];
|
|
||||||
if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL ||
|
|
||||||
res.type == PRES_GLOBAL_LABEL)
|
|
||||||
free(res.label);
|
|
||||||
}
|
|
||||||
free(presults.data);
|
free(presults.data);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@@ -642,12 +651,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
|
|||||||
process_presults((presult_t *)presults.data,
|
process_presults((presult_t *)presults.data,
|
||||||
presults.used / sizeof(presult_t), program_ptr);
|
presults.used / sizeof(presult_t), program_ptr);
|
||||||
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
|
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
|
||||||
{
|
presult_free(DARR_AT(presult_t, presults.data, i));
|
||||||
presult_t res = ((presult_t *)presults.data)[i];
|
|
||||||
if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL ||
|
|
||||||
res.type == PRES_GLOBAL_LABEL)
|
|
||||||
free(res.label);
|
|
||||||
}
|
|
||||||
free(presults.data);
|
free(presults.data);
|
||||||
return perr;
|
return perr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,9 +72,10 @@ typedef struct
|
|||||||
word addr;
|
word addr;
|
||||||
} label_t;
|
} label_t;
|
||||||
|
|
||||||
label_t search_labels(label_t *, size_t, char *);
|
label_t search_labels(label_t *, size_t, char *, size_t);
|
||||||
|
|
||||||
perr_t parse_next(token_stream_t *, presult_t *);
|
perr_t parse_next(token_stream_t *, presult_t *);
|
||||||
|
perr_t preprocessor(presult_t *, size_t, presult_t *);
|
||||||
perr_t process_presults(presult_t *, size_t, prog_t **);
|
perr_t process_presults(presult_t *, size_t, prog_t **);
|
||||||
perr_t parse_stream(token_stream_t *, prog_t **);
|
perr_t parse_stream(token_stream_t *, prog_t **);
|
||||||
|
|
||||||
|
|||||||
23
todo.org
23
todo.org
@@ -43,22 +43,33 @@ directives.
|
|||||||
Essentially a directive which assigns some literal to a symbol as a
|
Essentially a directive which assigns some literal to a symbol as a
|
||||||
constant. Something like
|
constant. Something like
|
||||||
#+begin_src asm
|
#+begin_src asm
|
||||||
%const:n:20%end
|
%const(n) 20 %end
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
Where you'd refer to the definition using ~$n~. The assembler will
|
Then, during my program I could use it like so
|
||||||
look for symbols like ~$n~ and insert definitions it finds for them.
|
#+begin_src asm
|
||||||
Can be useful when defining data type bounds and other useful constants.
|
...
|
||||||
|
push.word $n
|
||||||
|
print.word
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
The preprocessor should convert this to the equivalent code of
|
||||||
|
#+begin_src asm
|
||||||
|
...
|
||||||
|
push.word 20
|
||||||
|
print.word
|
||||||
|
#+end_src
|
||||||
|
|
||||||
2023-11-04: You could even put full program instructions for a
|
2023-11-04: You could even put full program instructions for a
|
||||||
constant potentially
|
constant potentially
|
||||||
#+begin_src asm
|
#+begin_src asm
|
||||||
%const:print-1:
|
%const(print-1)
|
||||||
push.word 1
|
push.word 1
|
||||||
print.word
|
print.word
|
||||||
%end
|
%end
|
||||||
#+end_src
|
#+end_src
|
||||||
which when referred to ~$print-1~ would just insert the bytecode given inline.
|
which when referred to ~$print-1~ would just insert the bytecode given
|
||||||
|
in sequence.
|
||||||
* Completed
|
* Completed
|
||||||
** DONE Write a label/jump system :ASM:
|
** DONE Write a label/jump system :ASM:
|
||||||
Essentially a user should be able to write arbitrary labels (maybe
|
Essentially a user should be able to write arbitrary labels (maybe
|
||||||
|
|||||||
Reference in New Issue
Block a user