aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 16:52:58 +0630
committerAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 16:52:58 +0630
commite7a09c0de457b8cc3512687b406137b8a471c3c4 (patch)
tree70abc9d28ca6987be6b4f2560d05e7cf9ab63f20
parent1e6f60a869e578bdd390029da71d5dc920ef1b70 (diff)
downloadovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.gz
ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.bz2
ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.zip
Wrote a new lexer API in C++
Essentially a refactor of the C formed lexer into C++ style. I can already see some benefits from doing this, in particular speed of prototyping.
-rw-r--r--Makefile10
-rw-r--r--asm/lexer.hpp96
2 files changed, 101 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 3fc42a8..e0c076b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,11 +2,11 @@ CC=gcc
CPP=g++
VERBOSE=0
-GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I.
+GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I$(shell pwd)
DEBUG-FLAGS=-ggdb -fsanitize=address
RELEASE-FLAGS=-O3
CFLAGS:=$(GENERAL-FLAGS) -std=c11 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
-CPPFLAGS:=$(GENERAL_FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
+CPPFLAGS:=$(GENERAL-FLAGS) -std=c++17 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
LIBS=-lm
DIST=build
@@ -35,7 +35,7 @@ VM_OUT=$(DIST)/ovm.out
## ASSEMBLY setup
ASM_DIST=$(DIST)/asm
ASM_SRC=asm
-ASM_CODE:=$(addprefix $(ASM_SRC)/, )
+ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.cpp)
ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.cpp=$(ASM_DIST)/%.o)
ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d
ASM_CFLAGS=$(CPPFLAGS)
@@ -81,8 +81,8 @@ $(ASM_OUT): $(LIB_OBJECTS) $(ASM_OBJECTS) $(ASM_DIST)/main.o
-include $(ASM_DEPS)
$(ASM_DIST)/%.o: $(ASM_SRC)/%.cpp
- @$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS)
- @echo "$(TERM_YELLOW)$@$(TERM_RESET): $<"
+ $(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS)
+ echo "$(TERM_YELLOW)$@$(TERM_RESET): $<"
## EXAMPLES recipes
$(EXAMPLES_DIST)/%.out: $(EXAMPLES_SRC)/%.asm $(ASM_OUT)
diff --git a/asm/lexer.hpp b/asm/lexer.hpp
new file mode 100644
index 0000000..4e8439b
--- /dev/null
+++ b/asm/lexer.hpp
@@ -0,0 +1,96 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license. You should have received a copy of the GPLv2
+ * license with this file. If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Lexer for assembly language
+ */
+
+#ifndef LEXER_HPP
+#define LEXER_HPP
+
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+enum token_type_t
+{
+ PP_CONST, // %const(<symbol>)...
+ PP_USE, // %use <string>
+ PP_END, // %end
+ PP_REFERENCE, // $<symbol>
+ GLOBAL,
+ STAR,
+ LITERAL_NUMBER,
+ LITERAL_CHAR,
+ LITERAL_STRING,
+ NOOP,
+ HALT,
+ PUSH,
+ POP,
+ PUSH_REG,
+ MOV,
+ DUP,
+ MALLOC,
+ MALLOC_STACK,
+ MSET,
+ MSET_STACK,
+ MGET,
+ MGET_STACK,
+ MDELETE,
+ MSIZE,
+ NOT,
+ OR,
+ AND,
+ XOR,
+ EQ,
+ LT,
+ LTE,
+ GT,
+ GTE,
+ PLUS,
+ SUB,
+ MULT,
+ PRINT,
+ JUMP_ABS,
+ JUMP_STACK,
+ JUMP_IF,
+ CALL,
+ CALL_STACK,
+ RET,
+ SYMBOL,
+};
+
+const char *token_type_as_cstr(token_type_t type);
+
+struct token_t
+{
+ token_type_t type;
+ size_t column, line;
+ std::string content;
+
+ token_t();
+ token_t(token_type_t, std::string, size_t col = 0, size_t line = 0);
+};
+
+std::ostream &operator<<(std::ostream &, token_t &);
+
+enum lerr_t
+{
+ OK = 0,
+ INVALID_CHAR_LITERAL,
+ INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
+ INVALID_STRING_LITERAL,
+ INVALID_NUMBER_LITERAL,
+ INVALID_PREPROCESSOR_DIRECTIVE,
+};
+const char *lerr_as_cstr(lerr_t);
+
+lerr_t tokenise_string(std::string_view, std::vector<token_t> &);
+
+#endif