aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2023-11-06 08:16:15 +0000
committerAryadev Chavali <aryadev@aryadevchavali.com>2023-11-06 08:16:15 +0000
commit6e524569c34b0fb41c85280af5ea3f924999bbdf (patch)
tree425b790210dadb7832edc4e6f75fc126da36422f
parent4ae6c052764767f8281576a4ed938e1d6cf7c688 (diff)
downloadovm-6e524569c34b0fb41c85280af5ea3f924999bbdf.tar.gz
ovm-6e524569c34b0fb41c85280af5ea3f924999bbdf.tar.bz2
ovm-6e524569c34b0fb41c85280af5ea3f924999bbdf.zip
Current work on preprocessor
-rw-r--r--asm/lexer.c45
-rw-r--r--asm/lexer.h2
-rw-r--r--asm/parser.c54
-rw-r--r--asm/parser.h3
-rw-r--r--todo.org23
5 files changed, 86 insertions, 41 deletions
diff --git a/asm/lexer.c b/asm/lexer.c
index 108cb64..8b0a061 100644
--- a/asm/lexer.c
+++ b/asm/lexer.c
@@ -28,6 +28,8 @@ const char *token_type_as_cstr(token_type_t type)
return "PP_CONST";
case TOKEN_PP_END:
return "PP_END";
+ case TOKEN_PP_REFERENCE:
+ return "PP_REFERENCE";
case TOKEN_GLOBAL:
return "GLOBAL";
case TOKEN_STAR:
@@ -114,12 +116,12 @@ const char *lerr_as_cstr(lerr_t lerr)
{
switch (lerr)
{
- case LERR_INVALID_CHAR_LITERAL:
- return "INVALID_CHAR_LITERAL";
- break;
case LERR_OK:
return "OK";
- break;
+ case LERR_INVALID_CHAR_LITERAL:
+ return "INVALID_CHAR_LITERAL";
+ case LERR_INVALID_PREPROCESSOR_DIRECTIVE:
+ return "INVALID_PREPROCESSOR_DIRECTIVE";
}
return "";
}
@@ -150,7 +152,7 @@ bool is_valid_hex_char(char c)
(c >= 'A' && c <= 'F');
}
-token_t tokenise_symbol(buffer_t *buffer, size_t *column)
+lerr_t tokenise_symbol(buffer_t *buffer, size_t *column, token_t *token)
{
static_assert(NUMBER_OF_OPCODES == 98, "tokenise_buffer: Out of date!");
@@ -170,8 +172,25 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
if (sym_size > 1 && strncmp(opcode, "%", 1) == 0)
{
- // Some kind of preprocessing directive
- // TODO: Implement tokeniser for preprocessing directive
+ // Some preprocessing directive
+ if (sym_size > 6 && strncmp(opcode + 1, "CONST", 5) == 0)
+ {
+ type = TOKEN_PP_CONST;
+ offset = 6;
+ }
+ else if (sym_size == 4 && strncmp(opcode + 1, "END", 3) == 0)
+ {
+ type = TOKEN_PP_END;
+ offset = 4;
+ }
+ else
+ return LERR_INVALID_PREPROCESSOR_DIRECTIVE;
+ }
+ else if (sym_size > 1 && strncmp(opcode, "$", 1) == 0)
+ {
+ // A reference to a preprocessing constant
+ offset = 1;
+ type = TOKEN_PP_REFERENCE;
}
else if (sym_size == 4 && strncmp(opcode, "NOOP", 4) == 0)
{
@@ -384,7 +403,8 @@ token_t tokenise_symbol(buffer_t *buffer, size_t *column)
}
*column += sym_size - 1;
buffer->used += sym_size;
- return ret;
+ *token = ret;
+ return LERR_OK;
}
token_t tokenise_number_literal(buffer_t *buffer, size_t *column)
@@ -494,7 +514,14 @@ lerr_t tokenise_buffer(buffer_t *buffer, token_stream_t *tokens_ptr)
is_valid_hex_char(buffer->data[buffer->used + 1]))
t = tokenise_hex_literal(buffer, &column);
else if (is_symbol(c))
- t = tokenise_symbol(buffer, &column);
+ {
+ lerr_t lerr = tokenise_symbol(buffer, &column, &t);
+ if (lerr)
+ {
+ free(tokens.data);
+ return lerr;
+ }
+ }
else if (c == '\'')
{
if (space_left(buffer) < 2)
diff --git a/asm/lexer.h b/asm/lexer.h
index 8470a18..8d62440 100644
--- a/asm/lexer.h
+++ b/asm/lexer.h
@@ -19,6 +19,7 @@ typedef enum TokenType
{
TOKEN_PP_CONST,
TOKEN_PP_END,
+ TOKEN_PP_REFERENCE,
TOKEN_GLOBAL,
TOKEN_STAR,
TOKEN_LITERAL_NUMBER,
@@ -72,6 +73,7 @@ typedef enum
{
LERR_OK = 0,
LERR_INVALID_CHAR_LITERAL,
+ LERR_INVALID_PREPROCESSOR_DIRECTIVE,
} lerr_t;
const char *lerr_as_cstr(lerr_t);
diff --git a/asm/parser.c b/asm/parser.c
index 06ed580..ce71116 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -59,10 +59,13 @@ void presult_free(presult_t res)
free(res.label.name);
break;
case PRES_PP_CONST:
+ for (size_t i = 0; i < res.instructions.used / sizeof(presult_t); ++i)
+ presult_free(DARR_AT(presult_t, res.instructions.data, i));
+ free(res.instructions.data);
+ break;
case PRES_LABEL_ADDRESS:
case PRES_RELATIVE_ADDRESS:
case PRES_COMPLETE_RESULT:
- free(res.instructions.data);
break;
}
}
@@ -291,12 +294,24 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret)
perr_t perr = PERR_OK;
switch (token.type)
{
- case TOKEN_PP_CONST:
-
- break;
+ case TOKEN_PP_END:
case TOKEN_LITERAL_NUMBER:
case TOKEN_LITERAL_CHAR:
return PERR_EXPECTED_SYMBOL;
+ case TOKEN_PP_CONST: {
+ ++stream->used;
+ ret->type = PRES_PP_CONST;
+ darr_init(&ret->instructions, );
+ while (stream->used < stream->available &&
+ TOKEN_STREAM_AT(stream->data, stream->used).type != TOKEN_PP_END)
+ {
+ presult_t body = {0};
+ perr_t perr = parse_next(stream, &body);
+ }
+ break;
+ }
+ case TOKEN_PP_REFERENCE:
+ break;
case TOKEN_GLOBAL: {
if (stream->used + 1 >= stream->available ||
TOKEN_STREAM_AT(stream->data, stream->used + 1).type != TOKEN_SYMBOL)
@@ -461,9 +476,8 @@ perr_t parse_next(token_stream_t *stream, presult_t *ret)
return perr;
}
-label_t search_labels(label_t *labels, size_t n, char *name)
+label_t search_labels(label_t *labels, size_t n, char *name, size_t name_size)
{
- size_t name_size = strlen(name);
for (size_t i = 0; i < n; ++i)
{
label_t label = labels[i];
@@ -557,9 +571,9 @@ perr_t process_presults(presult_t *results, size_t res_count,
prog_header_t header = {0};
if (start_label.name_size > 0)
{
- label_t label =
- search_labels((label_t *)label_registry.data,
- label_registry.used / sizeof(label_t), start_label);
+ label_t label = search_labels((label_t *)label_registry.data,
+ label_registry.used / sizeof(label_t),
+ start_label.name, start_label.name_size);
if (!label.name)
{
free(instr_darr.data);
@@ -575,10 +589,10 @@ perr_t process_presults(presult_t *results, size_t res_count,
switch (res.type)
{
case PRES_LABEL_ADDRESS: {
- inst_t inst = {0};
- label_t label =
- search_labels((label_t *)label_registry.data,
- label_registry.used / sizeof(label_t), res.label);
+ inst_t inst = {0};
+ label_t label = search_labels((label_t *)label_registry.data,
+ label_registry.used / sizeof(label_t),
+ res.label.name, res.label.size);
if (!label.name)
{
@@ -625,12 +639,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
if (err)
{
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
- {
- presult_t res = ((presult_t *)presults.data)[i];
- if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL ||
- res.type == PRES_GLOBAL_LABEL)
- free(res.label);
- }
+ presult_free(DARR_AT(presult_t, presults.data, i));
free(presults.data);
return err;
}
@@ -642,12 +651,7 @@ perr_t parse_stream(token_stream_t *stream, prog_t **program_ptr)
process_presults((presult_t *)presults.data,
presults.used / sizeof(presult_t), program_ptr);
for (size_t i = 0; i < (presults.used / sizeof(presult_t)); ++i)
- {
- presult_t res = ((presult_t *)presults.data)[i];
- if (res.type == PRES_LABEL_ADDRESS || res.type == PRES_LABEL ||
- res.type == PRES_GLOBAL_LABEL)
- free(res.label);
- }
+ presult_free(DARR_AT(presult_t, presults.data, i));
free(presults.data);
return perr;
}
diff --git a/asm/parser.h b/asm/parser.h
index 46f8bc8..01358fc 100644
--- a/asm/parser.h
+++ b/asm/parser.h
@@ -72,9 +72,10 @@ typedef struct
word addr;
} label_t;
-label_t search_labels(label_t *, size_t, char *);
+label_t search_labels(label_t *, size_t, char *, size_t);
perr_t parse_next(token_stream_t *, presult_t *);
+perr_t preprocessor(presult_t *, size_t, presult_t *);
perr_t process_presults(presult_t *, size_t, prog_t **);
perr_t parse_stream(token_stream_t *, prog_t **);
diff --git a/todo.org b/todo.org
index 60c6585..30a1574 100644
--- a/todo.org
+++ b/todo.org
@@ -43,22 +43,33 @@ directives.
Essentially a directive which assigns some literal to a symbol as a
constant. Something like
#+begin_src asm
-%const:n:20%end
+%const(n) 20 %end
#+end_src
-Where you'd refer to the definition using ~$n~. The assembler will
-look for symbols like ~$n~ and insert definitions it finds for them.
-Can be useful when defining data type bounds and other useful constants.
+Then, during my program I could use it like so
+#+begin_src asm
+...
+ push.word $n
+ print.word
+#+end_src
+
+The preprocessor should convert this to the equivalent code of
+#+begin_src asm
+...
+ push.word 20
+ print.word
+#+end_src
2023-11-04: You could even put full program instructions for a
constant potentially
#+begin_src asm
-%const:print-1:
+%const(print-1)
push.word 1
print.word
%end
#+end_src
-which when referred to ~$print-1~ would just insert the bytecode given inline.
+which when referred to ~$print-1~ would just insert the bytecode given
+in sequence.
* Completed
** DONE Write a label/jump system :ASM:
Essentially a user should be able to write arbitrary labels (maybe