From e7a09c0de457b8cc3512687b406137b8a471c3c4 Mon Sep 17 00:00:00 2001 From: Aryadev Chavali Date: Sun, 14 Apr 2024 16:52:58 +0630 Subject: Wrote a new lexer API in C++ Essentially a refactor of the C formed lexer into C++ style. I can already see some benefits from doing this, in particular speed of prototyping. --- Makefile | 10 +++---- asm/lexer.hpp | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 asm/lexer.hpp diff --git a/Makefile b/Makefile index 3fc42a8..e0c076b 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,11 @@ CC=gcc CPP=g++ VERBOSE=0 -GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I. +GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I$(shell pwd) DEBUG-FLAGS=-ggdb -fsanitize=address RELEASE-FLAGS=-O3 CFLAGS:=$(GENERAL-FLAGS) -std=c11 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) -CPPFLAGS:=$(GENERAL_FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) +CPPFLAGS:=$(GENERAL-FLAGS) -std=c++17 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE) LIBS=-lm DIST=build @@ -35,7 +35,7 @@ VM_OUT=$(DIST)/ovm.out ## ASSEMBLY setup ASM_DIST=$(DIST)/asm ASM_SRC=asm -ASM_CODE:=$(addprefix $(ASM_SRC)/, ) +ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.cpp) ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.cpp=$(ASM_DIST)/%.o) ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d ASM_CFLAGS=$(CPPFLAGS) @@ -81,8 +81,8 @@ $(ASM_OUT): $(LIB_OBJECTS) $(ASM_OBJECTS) $(ASM_DIST)/main.o -include $(ASM_DEPS) $(ASM_DIST)/%.o: $(ASM_SRC)/%.cpp - @$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS) - @echo "$(TERM_YELLOW)$@$(TERM_RESET): $<" + $(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS) + echo "$(TERM_YELLOW)$@$(TERM_RESET): $<" ## EXAMPLES recipes $(EXAMPLES_DIST)/%.out: $(EXAMPLES_SRC)/%.asm $(ASM_OUT) diff --git a/asm/lexer.hpp b/asm/lexer.hpp new file mode 100644 index 0000000..4e8439b --- /dev/null +++ b/asm/lexer.hpp @@ -0,0 +1,96 @@ +/* Copyright (C) 2024 Aryadev Chavali + + * You may distribute and modify this code under the terms of the + * GPLv2 license. You should have received a copy of the GPLv2 + * license with this file. If not, please write to: + * aryadev@aryadevchavali.com. + + * Created: 2024-04-14 + * Author: Aryadev Chavali + * Description: Lexer for assembly language + */ + +#ifndef LEXER_HPP +#define LEXER_HPP + +#include +#include +#include +#include + +enum token_type_t +{ + PP_CONST, // %const()... + PP_USE, // %use + PP_END, // %end + PP_REFERENCE, // $ + GLOBAL, + STAR, + LITERAL_NUMBER, + LITERAL_CHAR, + LITERAL_STRING, + NOOP, + HALT, + PUSH, + POP, + PUSH_REG, + MOV, + DUP, + MALLOC, + MALLOC_STACK, + MSET, + MSET_STACK, + MGET, + MGET_STACK, + MDELETE, + MSIZE, + NOT, + OR, + AND, + XOR, + EQ, + LT, + LTE, + GT, + GTE, + PLUS, + SUB, + MULT, + PRINT, + JUMP_ABS, + JUMP_STACK, + JUMP_IF, + CALL, + CALL_STACK, + RET, + SYMBOL, +}; + +const char *token_type_as_cstr(token_type_t type); + +struct token_t +{ + token_type_t type; + size_t column, line; + std::string content; + + token_t(); + token_t(token_type_t, std::string, size_t col = 0, size_t line = 0); +}; + +std::ostream &operator<<(std::ostream &, token_t &); + +enum lerr_t +{ + OK = 0, + INVALID_CHAR_LITERAL, + INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, + INVALID_STRING_LITERAL, + INVALID_NUMBER_LITERAL, + INVALID_PREPROCESSOR_DIRECTIVE, +}; +const char *lerr_as_cstr(lerr_t); + +lerr_t tokenise_string(std::string_view, std::vector &); + +#endif -- cgit v1.2.3-13-gbd6f