Reworked lexer to deal with invalid type suffixes

Now ~push.magic~ will result in an error about it being an invalid
type suffix.
This commit is contained in:
2024-06-01 14:37:24 +01:00
parent 42dbf515f0
commit 15d39dcfe7
2 changed files with 254 additions and 49 deletions

View File

@@ -42,6 +42,64 @@ namespace Lexer
return (src.size() > match.size() && src.substr(0, match.size()) == match); return (src.size() > match.size() && src.substr(0, match.size()) == match);
} }
Err tokenise_unsigned_type(const string_view &symbol,
Token::OperandType &type, size_t column,
size_t line)
{
if (symbol == "BYTE")
{
type = Token::OperandType::BYTE;
return Err{};
}
else if (symbol == "HWORD")
{
type = Token::OperandType::HWORD;
return Err{};
}
else if (symbol == "WORD")
{
type = Token::OperandType::WORD;
return Err{};
}
return Err{Err::Type::EXPECTED_UNSIGNED_TYPE_SUFFIX, column, line};
}
Err tokenise_signed_type(const string_view &symbol, Token::OperandType &type,
size_t column, size_t line)
{
if (symbol == "BYTE")
{
type = Token::OperandType::BYTE;
return Err{};
}
else if (symbol == "CHAR")
{
type = Token::OperandType::CHAR;
return Err{};
}
else if (symbol == "HWORD")
{
type = Token::OperandType::HWORD;
return Err{};
}
else if (symbol == "INT")
{
type = Token::OperandType::INT;
return Err{};
}
else if (symbol == "WORD")
{
type = Token::OperandType::WORD;
return Err{};
}
else if (symbol == "LONG")
{
type = Token::OperandType::LONG;
return Err{};
}
return Err{Err::Type::EXPECTED_TYPE_SUFFIX, column, line};
}
Err tokenise_symbol(string_view &source, size_t &column, size_t line, Err tokenise_symbol(string_view &source, size_t &column, size_t line,
Token &token) Token &token)
{ {
@@ -84,47 +142,91 @@ namespace Lexer
} }
else if (initial_match(sym, "PUSH.REG.")) else if (initial_match(sym, "PUSH.REG."))
{ {
token = Token{Token::Type::PUSH_REG, sym.substr(9)}; token.type = Token::Type::PUSH_REG;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(9),
token.operand_type, column + 9, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "PUSH.")) else if (initial_match(sym, "PUSH."))
{ {
token = Token{Token::Type::PUSH, sym.substr(5)}; token.type = Token::Type::PUSH;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(5),
token.operand_type, column + 5, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "POP.")) else if (initial_match(sym, "POP."))
{ {
token = Token{Token::Type::POP, sym.substr(4)}; token.type = Token::Type::POP;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MOV.")) else if (initial_match(sym, "MOV."))
{ {
token = Token{Token::Type::MOV, sym.substr(4)}; token.type = Token::Type::MOV;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "DUP.")) else if (initial_match(sym, "DUP."))
{ {
token = Token{Token::Type::DUP, sym.substr(4)}; token.type = Token::Type::DUP;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MALLOC.STACK.")) else if (initial_match(sym, "MALLOC.STACK."))
{ {
token = Token{Token::Type::MALLOC_STACK, sym.substr(13)}; token.type = Token::Type::MALLOC_STACK;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(13),
token.operand_type, column + 13, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MALLOC.")) else if (initial_match(sym, "MALLOC."))
{ {
token = Token{Token::Type::MALLOC, sym.substr(7)}; token.type = Token::Type::MALLOC;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(7),
token.operand_type, column + 7, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MSETOKEN.STACK.")) else if (initial_match(sym, "MSET.STACK."))
{ {
token = Token{Token::Type::MSET_STACK, sym.substr(11)}; token.type = Token::Type::MSET_STACK;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(11),
token.operand_type, column + 11, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MSETOKEN.")) else if (initial_match(sym, "MSET."))
{ {
token = Token{Token::Type::MSET, sym.substr(5)}; token.type = Token::Type::MSET;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(5),
token.operand_type, column + 5, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MGETOKEN.STACK.")) else if (initial_match(sym, "MGET.STACK."))
{ {
token = Token{Token::Type::MGET_STACK, sym.substr(11)}; token.type = Token::Type::MGET_STACK;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(11),
token.operand_type, column + 11, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MGETOKEN.")) else if (initial_match(sym, "MGET."))
{ {
token = Token{Token::Type::MGET, sym.substr(5)}; token.type = Token::Type::MGET;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(5),
token.operand_type, column + 5, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (sym == "MDELETE") else if (sym == "MDELETE")
{ {
@@ -134,57 +236,109 @@ namespace Lexer
{ {
token.type = Token::Type::MSIZE; token.type = Token::Type::MSIZE;
} }
else if (initial_match(sym, "NOTOKEN.")) else if (initial_match(sym, "NOT."))
{ {
token = Token{Token::Type::NOT, sym.substr(4)}; token.type = Token::Type::NOT;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "OR.")) else if (initial_match(sym, "OR."))
{ {
token = Token{Token::Type::OR, sym.substr(3)}; token.type = Token::Type::OR;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(3),
token.operand_type, column + 3, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "AND.")) else if (initial_match(sym, "AND."))
{ {
token = Token{Token::Type::AND, sym.substr(4)}; token.type = Token::Type::AND;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "XOR.")) else if (initial_match(sym, "XOR."))
{ {
token = Token{Token::Type::XOR, sym.substr(4)}; token.type = Token::Type::XOR;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "EQ.")) else if (initial_match(sym, "EQ."))
{ {
token = Token{Token::Type::EQ, sym.substr(3)}; token.type = Token::Type::EQ;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(3),
token.operand_type, column + 3, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "LTE.")) else if (initial_match(sym, "LTE."))
{ {
token = Token{Token::Type::LTE, sym.substr(4)}; token.type = Token::Type::LTE;
Err err = tokenise_signed_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "LTOKEN.")) else if (initial_match(sym, "LT."))
{ {
token = Token{Token::Type::LT, sym.substr(3)}; token.type = Token::Type::LT;
Err err = tokenise_signed_type(std::string_view{sym}.substr(3),
token.operand_type, column + 3, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "GTE.")) else if (initial_match(sym, "GTE."))
{ {
token = Token{Token::Type::GTE, sym.substr(4)}; token.type = Token::Type::GTE;
Err err = tokenise_signed_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "GTOKEN.")) else if (initial_match(sym, "GT."))
{ {
token = Token{Token::Type::GT, sym.substr(3)}; token.type = Token::Type::GT;
Err err = tokenise_signed_type(std::string_view{sym}.substr(3),
token.operand_type, column + 3, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "SUB.")) else if (initial_match(sym, "SUB."))
{ {
token = Token{Token::Type::SUB, sym.substr(4)}; token.type = Token::Type::SUB;
Err err = tokenise_signed_type(std::string_view{sym}.substr(4),
token.operand_type, column + 4, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "PLUS.")) else if (initial_match(sym, "PLUS."))
{ {
token = Token{Token::Type::PLUS, sym.substr(5)}; token.type = Token::Type::PLUS;
Err err = tokenise_signed_type(std::string_view{sym}.substr(5),
token.operand_type, column + 5, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "MULTOKEN.")) else if (initial_match(sym, "MULT."))
{ {
token = Token{Token::Type::MULT, sym.substr(5)}; token.type = Token::Type::MULT;
Err err = tokenise_signed_type(std::string_view{sym}.substr(5),
token.operand_type, column + 5, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (initial_match(sym, "PRINTOKEN.")) else if (initial_match(sym, "PRINT."))
{ {
token = Token{Token::Type::PRINT, sym.substr(6)}; token.type = Token::Type::PRINT;
Err err = tokenise_signed_type(std::string_view{sym}.substr(6),
token.operand_type, column + 6, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (sym == "JUMP.ABS") else if (sym == "JUMP.ABS")
{ {
@@ -196,7 +350,11 @@ namespace Lexer
} }
else if (initial_match(sym, "JUMP.IF.")) else if (initial_match(sym, "JUMP.IF."))
{ {
token = Token{Token::Type::JUMP_IF, sym.substr(8)}; token.type = Token::Type::JUMP_IF;
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(8),
token.operand_type, column + 8, line);
if (err.type != Err::Type::OK)
return err;
} }
else if (sym == "CALL.STACK") else if (sym == "CALL.STACK")
{ {
@@ -216,11 +374,10 @@ namespace Lexer
} }
else else
{ {
token.type = Token::Type::SYMBOL; token.type = Token::Type::SYMBOL;
token.content = sym;
} }
if (token.content == "")
token.content = sym;
token.column = column; token.column = column;
column += sym.size() - 1; column += sym.size() - 1;
return Err(); return Err();
@@ -421,8 +578,10 @@ namespace Lexer
Token::Token() Token::Token()
{} {}
Token::Token(Token::Type type, string content, size_t col, size_t line) Token::Token(Token::Type type, string_view content, size_t col, size_t line,
: type{type}, column{col}, line{line}, content{content} OperandType optype)
: type{type}, column{col}, line{line}, content{content},
operand_type{optype}
{} {}
Err::Err(Err::Type type, size_t col, size_t line) Err::Err(Err::Type type, size_t col, size_t line)
@@ -525,11 +684,38 @@ namespace Lexer
return ""; return "";
} }
std::string to_string(const Token::OperandType &type)
{
switch (type)
{
case Token::OperandType::NIL:
return "NIL";
case Token::OperandType::BYTE:
return "BYTE";
case Token::OperandType::CHAR:
return "CHAR";
case Token::OperandType::HWORD:
return "HWORD";
case Token::OperandType::INT:
return "INT";
case Token::OperandType::WORD:
return "WORD";
case Token::OperandType::LONG:
return "LONG";
}
return "";
}
std::string to_string(const Token &t) std::string to_string(const Token &t)
{ {
std::stringstream stream; std::stringstream stream;
stream << to_string(t.type) << "(`" << t.content << "`)@" << t.line << ", " stream << to_string(t.type);
<< t.column;
if (t.operand_type != Token::OperandType::NIL)
stream << "[" << to_string(t.operand_type) << "]";
if (t.content != "")
stream << "(`" << t.content << "`)";
stream << "@" << t.line << ", " << t.column;
return stream.str(); return stream.str();
} }
@@ -549,6 +735,10 @@ namespace Lexer
return "INVALID_NUMBER_LITERAL"; return "INVALID_NUMBER_LITERAL";
case Err::Type::INVALID_PREPROCESSOR_DIRECTIVE: case Err::Type::INVALID_PREPROCESSOR_DIRECTIVE:
return "INVALID_PREPROCESSOR_DIRECTIVE"; return "INVALID_PREPROCESSOR_DIRECTIVE";
case Err::Type::EXPECTED_TYPE_SUFFIX:
return "EXPECTED_TYPE_SUFFIX";
case Err::Type::EXPECTED_UNSIGNED_TYPE_SUFFIX:
return "EXPECTED_UNSIGNED_TYPE_SUFFIX";
case Err::Type::UNKNOWN_LEXEME: case Err::Type::UNKNOWN_LEXEME:
return "UNKNOWN_LEXEME"; return "UNKNOWN_LEXEME";
default: default:

View File

@@ -71,13 +71,21 @@ namespace Lexer
size_t column, line; size_t column, line;
std::string content; std::string content;
Token(); enum class OperandType
Token(Token::Type, std::string, size_t col = 0, size_t line = 0); {
}; NIL,
BYTE,
CHAR,
HWORD,
INT,
WORD,
LONG
} operand_type;
std::ostream &operator<<(std::ostream &, const Token &); Token();
std::string to_string(const Token::Type &); Token(Token::Type, std::string_view content = "", size_t col = 0,
std::string to_string(const Token &); size_t line = 0, OperandType optype = OperandType::NIL);
};
struct Err struct Err
{ {
@@ -90,17 +98,24 @@ namespace Lexer
INVALID_STRING_LITERAL, INVALID_STRING_LITERAL,
INVALID_NUMBER_LITERAL, INVALID_NUMBER_LITERAL,
INVALID_PREPROCESSOR_DIRECTIVE, INVALID_PREPROCESSOR_DIRECTIVE,
EXPECTED_TYPE_SUFFIX,
EXPECTED_UNSIGNED_TYPE_SUFFIX,
UNKNOWN_LEXEME, UNKNOWN_LEXEME,
} type; } type;
Err(Type type = Type::OK, size_t col = 0, size_t line = 0); Err(Type type = Type::OK, size_t col = 0, size_t line = 0);
}; };
std::ostream &operator<<(std::ostream &, const Err &); Err tokenise_buffer(std::string_view, std::vector<Token *> &);
std::string to_string(const Token::Type &);
std::string to_string(const Token::OperandType &);
std::string to_string(const Token &);
std::string to_string(const Err::Type &); std::string to_string(const Err::Type &);
std::string to_string(const Err &); std::string to_string(const Err &);
Err tokenise_buffer(std::string_view, std::vector<Token *> &); std::ostream &operator<<(std::ostream &, const Token &);
std::ostream &operator<<(std::ostream &, const Err &);
} // namespace Lexer } // namespace Lexer
#endif #endif