diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2024-04-14 16:52:58 +0630 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2024-04-14 16:52:58 +0630 |
commit | e7a09c0de457b8cc3512687b406137b8a471c3c4 (patch) | |
tree | 70abc9d28ca6987be6b4f2560d05e7cf9ab63f20 | |
parent | 1e6f60a869e578bdd390029da71d5dc920ef1b70 (diff) | |
download | ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.gz ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.tar.bz2 ovm-e7a09c0de457b8cc3512687b406137b8a471c3c4.zip |
Wrote a new lexer API in C++
Essentially a refactor of the C formed lexer into C++ style. I can
already see some benefits from doing this, in particular speed of
prototyping.
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | asm/lexer.hpp | 96 |
2 files changed, 101 insertions, 5 deletions
@@ -2,11 +2,11 @@ CC=gcc CPP=g++ VERBOSE=0 -GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I. +GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I$(shell pwd) DEBUG-FLAGS=-ggdb -fsanitize=address RELEASE-FLAGS=-O3 CFLAGS:=$(GENERAL-FLAGS) -std=c11 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) -CPPFLAGS:=$(GENERAL_FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) +CPPFLAGS:=$(GENERAL-FLAGS) -std=c++17 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) LIBS=-lm DIST=build @@ -35,7 +35,7 @@ VM_OUT=$(DIST)/ovm.out ## ASSEMBLY setup ASM_DIST=$(DIST)/asm ASM_SRC=asm -ASM_CODE:=$(addprefix $(ASM_SRC)/, ) +ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.cpp) ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.cpp=$(ASM_DIST)/%.o) ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d ASM_CFLAGS=$(CPPFLAGS) @@ -81,8 +81,8 @@ $(ASM_OUT): $(LIB_OBJECTS) $(ASM_OBJECTS) $(ASM_DIST)/main.o -include $(ASM_DEPS) $(ASM_DIST)/%.o: $(ASM_SRC)/%.cpp - @$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS) - @echo "$(TERM_YELLOW)$@$(TERM_RESET): $<" + $(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS) + echo "$(TERM_YELLOW)$@$(TERM_RESET): $<" ## EXAMPLES recipes $(EXAMPLES_DIST)/%.out: $(EXAMPLES_SRC)/%.asm $(ASM_OUT) diff --git a/asm/lexer.hpp b/asm/lexer.hpp new file mode 100644 index 0000000..4e8439b --- /dev/null +++ b/asm/lexer.hpp @@ -0,0 +1,96 @@ +/* Copyright (C) 2024 Aryadev Chavali + + * You may distribute and modify this code under the terms of the + * GPLv2 license. You should have received a copy of the GPLv2 + * license with this file. If not, please write to: + * aryadev@aryadevchavali.com. + + * Created: 2024-04-14 + * Author: Aryadev Chavali + * Description: Lexer for assembly language + */ + +#ifndef LEXER_HPP +#define LEXER_HPP + +#include <ostream> +#include <string> +#include <tuple> +#include <vector> + +enum token_type_t +{ + PP_CONST, // %const(<symbol>)... + PP_USE, // %use <string> + PP_END, // %end + PP_REFERENCE, // $<symbol> + GLOBAL, + STAR, + LITERAL_NUMBER, + LITERAL_CHAR, + LITERAL_STRING, + NOOP, + HALT, + PUSH, + POP, + PUSH_REG, + MOV, + DUP, + MALLOC, + MALLOC_STACK, + MSET, + MSET_STACK, + MGET, + MGET_STACK, + MDELETE, + MSIZE, + NOT, + OR, + AND, + XOR, + EQ, + LT, + LTE, + GT, + GTE, + PLUS, + SUB, + MULT, + PRINT, + JUMP_ABS, + JUMP_STACK, + JUMP_IF, + CALL, + CALL_STACK, + RET, + SYMBOL, +}; + +const char *token_type_as_cstr(token_type_t type); + +struct token_t +{ + token_type_t type; + size_t column, line; + std::string content; + + token_t(); + token_t(token_type_t, std::string, size_t col = 0, size_t line = 0); +}; + +std::ostream &operator<<(std::ostream &, token_t &); + +enum lerr_t +{ + OK = 0, + INVALID_CHAR_LITERAL, + INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, + INVALID_STRING_LITERAL, + INVALID_NUMBER_LITERAL, + INVALID_PREPROCESSOR_DIRECTIVE, +}; +const char *lerr_as_cstr(lerr_t); + +lerr_t tokenise_string(std::string_view, std::vector<token_t> &); + +#endif |