Wrote a new lexer API in C++

Essentially a refactor of the C formed lexer into C++ style.  I can
already see some benefits from doing this, in particular speed of
prototyping.
This commit is contained in:
2024-04-14 16:52:58 +06:30
parent 1e6f60a869
commit e7a09c0de4
2 changed files with 101 additions and 5 deletions

View File

@@ -2,11 +2,11 @@ CC=gcc
CPP=g++
VERBOSE=0
GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I.
GENERAL-FLAGS=-Wall -Wextra -Werror -Wswitch-enum -I$(shell pwd)
DEBUG-FLAGS=-ggdb -fsanitize=address
RELEASE-FLAGS=-O3
CFLAGS:=$(GENERAL-FLAGS) -std=c11 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
CPPFLAGS:=$(GENERAL_FLAGS) $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
CPPFLAGS:=$(GENERAL-FLAGS) -std=c++17 $(DEBUG-FLAGS) -D VERBOSE=$(VERBOSE)
LIBS=-lm
DIST=build
@@ -35,7 +35,7 @@ VM_OUT=$(DIST)/ovm.out
## ASSEMBLY setup
ASM_DIST=$(DIST)/asm
ASM_SRC=asm
ASM_CODE:=$(addprefix $(ASM_SRC)/, )
ASM_CODE:=$(addprefix $(ASM_SRC)/, lexer.cpp)
ASM_OBJECTS:=$(ASM_CODE:$(ASM_SRC)/%.cpp=$(ASM_DIST)/%.o)
ASM_DEPS:=$(ASM_OBJECTS:%.o=%.d) $(ASM_DIST)/main.d
ASM_CFLAGS=$(CPPFLAGS)
@@ -81,8 +81,8 @@ $(ASM_OUT): $(LIB_OBJECTS) $(ASM_OBJECTS) $(ASM_DIST)/main.o
-include $(ASM_DEPS)
$(ASM_DIST)/%.o: $(ASM_SRC)/%.cpp
@$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS)
@echo "$(TERM_YELLOW)$@$(TERM_RESET): $<"
$(CPP) $(ASM_CFLAGS) -MMD -c $< -o $@ $(LIBS)
echo "$(TERM_YELLOW)$@$(TERM_RESET): $<"
## EXAMPLES recipes
$(EXAMPLES_DIST)/%.out: $(EXAMPLES_SRC)/%.asm $(ASM_OUT)

96
asm/lexer.hpp Normal file
View File

@@ -0,0 +1,96 @@
/* Copyright (C) 2024 Aryadev Chavali
* You may distribute and modify this code under the terms of the
* GPLv2 license. You should have received a copy of the GPLv2
* license with this file. If not, please write to:
* aryadev@aryadevchavali.com.
* Created: 2024-04-14
* Author: Aryadev Chavali
* Description: Lexer for assembly language
*/
#ifndef LEXER_HPP
#define LEXER_HPP
#include <ostream>
#include <string>
#include <tuple>
#include <vector>
enum token_type_t
{
PP_CONST, // %const(<symbol>)...
PP_USE, // %use <string>
PP_END, // %end
PP_REFERENCE, // $<symbol>
GLOBAL,
STAR,
LITERAL_NUMBER,
LITERAL_CHAR,
LITERAL_STRING,
NOOP,
HALT,
PUSH,
POP,
PUSH_REG,
MOV,
DUP,
MALLOC,
MALLOC_STACK,
MSET,
MSET_STACK,
MGET,
MGET_STACK,
MDELETE,
MSIZE,
NOT,
OR,
AND,
XOR,
EQ,
LT,
LTE,
GT,
GTE,
PLUS,
SUB,
MULT,
PRINT,
JUMP_ABS,
JUMP_STACK,
JUMP_IF,
CALL,
CALL_STACK,
RET,
SYMBOL,
};
const char *token_type_as_cstr(token_type_t type);
struct token_t
{
token_type_t type;
size_t column, line;
std::string content;
token_t();
token_t(token_type_t, std::string, size_t col = 0, size_t line = 0);
};
std::ostream &operator<<(std::ostream &, token_t &);
enum lerr_t
{
OK = 0,
INVALID_CHAR_LITERAL,
INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
INVALID_STRING_LITERAL,
INVALID_NUMBER_LITERAL,
INVALID_PREPROCESSOR_DIRECTIVE,
};
const char *lerr_as_cstr(lerr_t);
lerr_t tokenise_string(std::string_view, std::vector<token_t> &);
#endif