Wrote a new lexer API in C++

Essentially a refactor of the C formed lexer into C++ style. I can already see some benefits from doing this, in particular speed of prototyping.
author: Aryadev Chavali <aryadev@aryadevchavali.com> 2024-04-14 16:52:58 +0630
committer: Aryadev Chavali <aryadev@aryadevchavali.com> 2024-04-14 16:52:58 +0630
commit: e7a09c0de457b8cc3512687b406137b8a471c3c4 (patch)
tree: 70abc9d28ca6987be6b4f2560d05e7cf9ab63f20
parent: 1e6f60a869e578bdd390029da71d5dc920ef1b70 (diff)
download: ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.gz
ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.bz2
ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.zip
2 files changed, 101 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 3fc42a8..e0c076b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,11 +2,11 @@ CC=gcc
 CPP=g++
 
 VERBOSE=0
-GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I.
+GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I$(shell pwd)
 DEBUG-FLAGS=-ggdb -fsanitize=address
 RELEASE-FLAGS=-O3
 CFLAGS:=$(GENERAL-FLAGS) -std=c11 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
-CPPFLAGS:=$(GENERAL_FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
+CPPFLAGS:=$(GENERAL-FLAGS) -std=c++17 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
 
 LIBS=-lm
 DIST=build
@@ -35,7 +35,7 @@ VM_OUT=$(DIST)/ovm.out
 ## ASSEMBLY setup
 ASM_DIST=$(DIST)/asm
 ASM_SRC=asm
-ASM_CODE:=$(addprefix $(ASM_SRC)/, )
+ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.cpp)
 ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.cpp=$(ASM_DIST)/%.o)
 ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d
 ASM_CFLAGS=$(CPPFLAGS)
@@ -81,8 +81,8 @@ $(ASM_OUT): $(LIB_OBJECTS) $(ASM_OBJECTS) $(ASM_DIST)/main.o
 -include $(ASM_DEPS)
 
 $(ASM_DIST)/%.o: $(ASM_SRC)/%.cpp
-	@$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS)
-	@echo "$(TERM_YELLOW)$@$(TERM_RESET): $<"
+	$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS)
+	echo "$(TERM_YELLOW)$@$(TERM_RESET): $<"
 
 ## EXAMPLES recipes
 $(EXAMPLES_DIST)/%.out: $(EXAMPLES_SRC)/%.asm $(ASM_OUT)
diff --git a/asm/lexer.hpp b/asm/lexer.hpp
new file mode 100644
index 0000000..4e8439b
--- /dev/null
+++ b/asm/lexer.hpp
@@ -0,0 +1,96 @@
+/* Copyright (C) 2024 Aryadev Chavali
+
+ * You may distribute and modify this code under the terms of the
+ * GPLv2 license.  You should have received a copy of the GPLv2
+ * license with this file.  If not, please write to:
+ * aryadev@aryadevchavali.com.
+
+ * Created: 2024-04-14
+ * Author: Aryadev Chavali
+ * Description: Lexer for assembly language
+ */
+
+#ifndef LEXER_HPP
+#define LEXER_HPP
+
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+enum token_type_t
+{
+  PP_CONST,     // %const(<symbol>)...
+  PP_USE,       // %use <string>
+  PP_END,       // %end
+  PP_REFERENCE, // $<symbol>
+  GLOBAL,
+  STAR,
+  LITERAL_NUMBER,
+  LITERAL_CHAR,
+  LITERAL_STRING,
+  NOOP,
+  HALT,
+  PUSH,
+  POP,
+  PUSH_REG,
+  MOV,
+  DUP,
+  MALLOC,
+  MALLOC_STACK,
+  MSET,
+  MSET_STACK,
+  MGET,
+  MGET_STACK,
+  MDELETE,
+  MSIZE,
+  NOT,
+  OR,
+  AND,
+  XOR,
+  EQ,
+  LT,
+  LTE,
+  GT,
+  GTE,
+  PLUS,
+  SUB,
+  MULT,
+  PRINT,
+  JUMP_ABS,
+  JUMP_STACK,
+  JUMP_IF,
+  CALL,
+  CALL_STACK,
+  RET,
+  SYMBOL,
+};
+
+const char *token_type_as_cstr(token_type_t type);
+
+struct token_t
+{
+  token_type_t type;
+  size_t column, line;
+  std::string content;
+
+  token_t();
+  token_t(token_type_t, std::string, size_t col = 0, size_t line = 0);
+};
+
+std::ostream &operator<<(std::ostream &, token_t &);
+
+enum lerr_t
+{
+  OK = 0,
+  INVALID_CHAR_LITERAL,
+  INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
+  INVALID_STRING_LITERAL,
+  INVALID_NUMBER_LITERAL,
+  INVALID_PREPROCESSOR_DIRECTIVE,
+};
+const char *lerr_as_cstr(lerr_t);
+
+lerr_t tokenise_string(std::string_view, std::vector<token_t> &);
+
+#endif
author	Aryadev Chavali <aryadev@aryadevchavali.com>	2024-04-14 16:52:58 +0630
committer	Aryadev Chavali <aryadev@aryadevchavali.com>	2024-04-14 16:52:58 +0630
commit	e7a09c0de457b8cc3512687b406137b8a471c3c4 (patch)
tree	70abc9d28ca6987be6b4f2560d05e7cf9ab63f20
parent	1e6f60a869e578bdd390029da71d5dc920ef1b70 (diff)
download	ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.gz ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.bz2 ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.zip