aboutsummaryrefslogtreecommitdiff
path: root/asm/lexer.hpp
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 16:52:58 +0630
committerAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 16:52:58 +0630
commite7a09c0de457b8cc3512687b406137b8a471c3c4 (patch)
tree70abc9d28ca6987be6b4f2560d05e7cf9ab63f20 /asm/lexer.hpp
parent1e6f60a869e578bdd390029da71d5dc920ef1b70 (diff)
downloadovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.gz
ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.bz2
ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.zip
Wrote a new lexer API in C++
Essentially a refactor of the C formed lexer into C++ style. I can already see some benefits from doing this, in particular speed of prototyping.
Diffstat (limited to 'asm/lexer.hpp')
-rw-r--r--asm/lexer.hpp96
1 files changed, 96 insertions, 0 deletions
diff --git a/asm/lexer.hpp b/asm/lexer.hpp
new file mode 100644
index 0000000..4e8439b
--- /dev/null
+++ b/asm/lexer.hpp
@@ -0,0 +1,96 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license. You should have received a copy of the GPLv2
+ * license with this file. If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Lexer for assembly language
+ */
+
+#ifndef LEXER_HPP
+#define LEXER_HPP
+
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+enum token_type_t
+{
+ PP_CONST, // %const(<symbol>)...
+ PP_USE, // %use <string>
+ PP_END, // %end
+ PP_REFERENCE, // $<symbol>
+ GLOBAL,
+ STAR,
+ LITERAL_NUMBER,
+ LITERAL_CHAR,
+ LITERAL_STRING,
+ NOOP,
+ HALT,
+ PUSH,
+ POP,
+ PUSH_REG,
+ MOV,
+ DUP,
+ MALLOC,
+ MALLOC_STACK,
+ MSET,
+ MSET_STACK,
+ MGET,
+ MGET_STACK,
+ MDELETE,
+ MSIZE,
+ NOT,
+ OR,
+ AND,
+ XOR,
+ EQ,
+ LT,
+ LTE,
+ GT,
+ GTE,
+ PLUS,
+ SUB,
+ MULT,
+ PRINT,
+ JUMP_ABS,
+ JUMP_STACK,
+ JUMP_IF,
+ CALL,
+ CALL_STACK,
+ RET,
+ SYMBOL,
+};
+
+const char *token_type_as_cstr(token_type_t type);
+
+struct token_t
+{
+ token_type_t type;
+ size_t column, line;
+ std::string content;
+
+ token_t();
+ token_t(token_type_t, std::string, size_t col = 0, size_t line = 0);
+};
+
+std::ostream &operator<<(std::ostream &, token_t &);
+
+enum lerr_t
+{
+ OK = 0,
+ INVALID_CHAR_LITERAL,
+ INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
+ INVALID_STRING_LITERAL,
+ INVALID_NUMBER_LITERAL,
+ INVALID_PREPROCESSOR_DIRECTIVE,
+};
+const char *lerr_as_cstr(lerr_t);
+
+lerr_t tokenise_string(std::string_view, std::vector<token_t> &);
+
+#endif