Reworked lexer to deal with invalid type suffixes
Now ~push.magic~ will result in an error about it being an invalid type suffix.
This commit is contained in:
272
src/lexer.cpp
272
src/lexer.cpp
@@ -42,6 +42,64 @@ namespace Lexer
|
||||
return (src.size() > match.size() && src.substr(0, match.size()) == match);
|
||||
}
|
||||
|
||||
Err tokenise_unsigned_type(const string_view &symbol,
|
||||
Token::OperandType &type, size_t column,
|
||||
size_t line)
|
||||
{
|
||||
if (symbol == "BYTE")
|
||||
{
|
||||
type = Token::OperandType::BYTE;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "HWORD")
|
||||
{
|
||||
type = Token::OperandType::HWORD;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "WORD")
|
||||
{
|
||||
type = Token::OperandType::WORD;
|
||||
return Err{};
|
||||
}
|
||||
return Err{Err::Type::EXPECTED_UNSIGNED_TYPE_SUFFIX, column, line};
|
||||
}
|
||||
|
||||
Err tokenise_signed_type(const string_view &symbol, Token::OperandType &type,
|
||||
size_t column, size_t line)
|
||||
{
|
||||
if (symbol == "BYTE")
|
||||
{
|
||||
type = Token::OperandType::BYTE;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "CHAR")
|
||||
{
|
||||
type = Token::OperandType::CHAR;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "HWORD")
|
||||
{
|
||||
type = Token::OperandType::HWORD;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "INT")
|
||||
{
|
||||
type = Token::OperandType::INT;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "WORD")
|
||||
{
|
||||
type = Token::OperandType::WORD;
|
||||
return Err{};
|
||||
}
|
||||
else if (symbol == "LONG")
|
||||
{
|
||||
type = Token::OperandType::LONG;
|
||||
return Err{};
|
||||
}
|
||||
return Err{Err::Type::EXPECTED_TYPE_SUFFIX, column, line};
|
||||
}
|
||||
|
||||
Err tokenise_symbol(string_view &source, size_t &column, size_t line,
|
||||
Token &token)
|
||||
{
|
||||
@@ -84,47 +142,91 @@ namespace Lexer
|
||||
}
|
||||
else if (initial_match(sym, "PUSH.REG."))
|
||||
{
|
||||
token = Token{Token::Type::PUSH_REG, sym.substr(9)};
|
||||
token.type = Token::Type::PUSH_REG;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(9),
|
||||
token.operand_type, column + 9, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "PUSH."))
|
||||
{
|
||||
token = Token{Token::Type::PUSH, sym.substr(5)};
|
||||
token.type = Token::Type::PUSH;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(5),
|
||||
token.operand_type, column + 5, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "POP."))
|
||||
{
|
||||
token = Token{Token::Type::POP, sym.substr(4)};
|
||||
token.type = Token::Type::POP;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MOV."))
|
||||
{
|
||||
token = Token{Token::Type::MOV, sym.substr(4)};
|
||||
token.type = Token::Type::MOV;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "DUP."))
|
||||
{
|
||||
token = Token{Token::Type::DUP, sym.substr(4)};
|
||||
token.type = Token::Type::DUP;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MALLOC.STACK."))
|
||||
{
|
||||
token = Token{Token::Type::MALLOC_STACK, sym.substr(13)};
|
||||
token.type = Token::Type::MALLOC_STACK;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(13),
|
||||
token.operand_type, column + 13, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MALLOC."))
|
||||
{
|
||||
token = Token{Token::Type::MALLOC, sym.substr(7)};
|
||||
token.type = Token::Type::MALLOC;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(7),
|
||||
token.operand_type, column + 7, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MSETOKEN.STACK."))
|
||||
else if (initial_match(sym, "MSET.STACK."))
|
||||
{
|
||||
token = Token{Token::Type::MSET_STACK, sym.substr(11)};
|
||||
token.type = Token::Type::MSET_STACK;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(11),
|
||||
token.operand_type, column + 11, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MSETOKEN."))
|
||||
else if (initial_match(sym, "MSET."))
|
||||
{
|
||||
token = Token{Token::Type::MSET, sym.substr(5)};
|
||||
token.type = Token::Type::MSET;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(5),
|
||||
token.operand_type, column + 5, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MGETOKEN.STACK."))
|
||||
else if (initial_match(sym, "MGET.STACK."))
|
||||
{
|
||||
token = Token{Token::Type::MGET_STACK, sym.substr(11)};
|
||||
token.type = Token::Type::MGET_STACK;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(11),
|
||||
token.operand_type, column + 11, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MGETOKEN."))
|
||||
else if (initial_match(sym, "MGET."))
|
||||
{
|
||||
token = Token{Token::Type::MGET, sym.substr(5)};
|
||||
token.type = Token::Type::MGET;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(5),
|
||||
token.operand_type, column + 5, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (sym == "MDELETE")
|
||||
{
|
||||
@@ -134,57 +236,109 @@ namespace Lexer
|
||||
{
|
||||
token.type = Token::Type::MSIZE;
|
||||
}
|
||||
else if (initial_match(sym, "NOTOKEN."))
|
||||
else if (initial_match(sym, "NOT."))
|
||||
{
|
||||
token = Token{Token::Type::NOT, sym.substr(4)};
|
||||
token.type = Token::Type::NOT;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "OR."))
|
||||
{
|
||||
token = Token{Token::Type::OR, sym.substr(3)};
|
||||
token.type = Token::Type::OR;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(3),
|
||||
token.operand_type, column + 3, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "AND."))
|
||||
{
|
||||
token = Token{Token::Type::AND, sym.substr(4)};
|
||||
token.type = Token::Type::AND;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "XOR."))
|
||||
{
|
||||
token = Token{Token::Type::XOR, sym.substr(4)};
|
||||
token.type = Token::Type::XOR;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "EQ."))
|
||||
{
|
||||
token = Token{Token::Type::EQ, sym.substr(3)};
|
||||
token.type = Token::Type::EQ;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(3),
|
||||
token.operand_type, column + 3, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "LTE."))
|
||||
{
|
||||
token = Token{Token::Type::LTE, sym.substr(4)};
|
||||
token.type = Token::Type::LTE;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "LTOKEN."))
|
||||
else if (initial_match(sym, "LT."))
|
||||
{
|
||||
token = Token{Token::Type::LT, sym.substr(3)};
|
||||
token.type = Token::Type::LT;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(3),
|
||||
token.operand_type, column + 3, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "GTE."))
|
||||
{
|
||||
token = Token{Token::Type::GTE, sym.substr(4)};
|
||||
token.type = Token::Type::GTE;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "GTOKEN."))
|
||||
else if (initial_match(sym, "GT."))
|
||||
{
|
||||
token = Token{Token::Type::GT, sym.substr(3)};
|
||||
token.type = Token::Type::GT;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(3),
|
||||
token.operand_type, column + 3, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "SUB."))
|
||||
{
|
||||
token = Token{Token::Type::SUB, sym.substr(4)};
|
||||
token.type = Token::Type::SUB;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(4),
|
||||
token.operand_type, column + 4, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "PLUS."))
|
||||
{
|
||||
token = Token{Token::Type::PLUS, sym.substr(5)};
|
||||
token.type = Token::Type::PLUS;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(5),
|
||||
token.operand_type, column + 5, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "MULTOKEN."))
|
||||
else if (initial_match(sym, "MULT."))
|
||||
{
|
||||
token = Token{Token::Type::MULT, sym.substr(5)};
|
||||
token.type = Token::Type::MULT;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(5),
|
||||
token.operand_type, column + 5, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (initial_match(sym, "PRINTOKEN."))
|
||||
else if (initial_match(sym, "PRINT."))
|
||||
{
|
||||
token = Token{Token::Type::PRINT, sym.substr(6)};
|
||||
token.type = Token::Type::PRINT;
|
||||
Err err = tokenise_signed_type(std::string_view{sym}.substr(6),
|
||||
token.operand_type, column + 6, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (sym == "JUMP.ABS")
|
||||
{
|
||||
@@ -196,7 +350,11 @@ namespace Lexer
|
||||
}
|
||||
else if (initial_match(sym, "JUMP.IF."))
|
||||
{
|
||||
token = Token{Token::Type::JUMP_IF, sym.substr(8)};
|
||||
token.type = Token::Type::JUMP_IF;
|
||||
Err err = tokenise_unsigned_type(std::string_view{sym}.substr(8),
|
||||
token.operand_type, column + 8, line);
|
||||
if (err.type != Err::Type::OK)
|
||||
return err;
|
||||
}
|
||||
else if (sym == "CALL.STACK")
|
||||
{
|
||||
@@ -216,11 +374,10 @@ namespace Lexer
|
||||
}
|
||||
else
|
||||
{
|
||||
token.type = Token::Type::SYMBOL;
|
||||
token.type = Token::Type::SYMBOL;
|
||||
token.content = sym;
|
||||
}
|
||||
|
||||
if (token.content == "")
|
||||
token.content = sym;
|
||||
token.column = column;
|
||||
column += sym.size() - 1;
|
||||
return Err();
|
||||
@@ -421,8 +578,10 @@ namespace Lexer
|
||||
Token::Token()
|
||||
{}
|
||||
|
||||
Token::Token(Token::Type type, string content, size_t col, size_t line)
|
||||
: type{type}, column{col}, line{line}, content{content}
|
||||
Token::Token(Token::Type type, string_view content, size_t col, size_t line,
|
||||
OperandType optype)
|
||||
: type{type}, column{col}, line{line}, content{content},
|
||||
operand_type{optype}
|
||||
{}
|
||||
|
||||
Err::Err(Err::Type type, size_t col, size_t line)
|
||||
@@ -525,11 +684,38 @@ namespace Lexer
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string to_string(const Token::OperandType &type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Token::OperandType::NIL:
|
||||
return "NIL";
|
||||
case Token::OperandType::BYTE:
|
||||
return "BYTE";
|
||||
case Token::OperandType::CHAR:
|
||||
return "CHAR";
|
||||
case Token::OperandType::HWORD:
|
||||
return "HWORD";
|
||||
case Token::OperandType::INT:
|
||||
return "INT";
|
||||
case Token::OperandType::WORD:
|
||||
return "WORD";
|
||||
case Token::OperandType::LONG:
|
||||
return "LONG";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string to_string(const Token &t)
|
||||
{
|
||||
std::stringstream stream;
|
||||
stream << to_string(t.type) << "(`" << t.content << "`)@" << t.line << ", "
|
||||
<< t.column;
|
||||
stream << to_string(t.type);
|
||||
|
||||
if (t.operand_type != Token::OperandType::NIL)
|
||||
stream << "[" << to_string(t.operand_type) << "]";
|
||||
if (t.content != "")
|
||||
stream << "(`" << t.content << "`)";
|
||||
stream << "@" << t.line << ", " << t.column;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
@@ -549,6 +735,10 @@ namespace Lexer
|
||||
return "INVALID_NUMBER_LITERAL";
|
||||
case Err::Type::INVALID_PREPROCESSOR_DIRECTIVE:
|
||||
return "INVALID_PREPROCESSOR_DIRECTIVE";
|
||||
case Err::Type::EXPECTED_TYPE_SUFFIX:
|
||||
return "EXPECTED_TYPE_SUFFIX";
|
||||
case Err::Type::EXPECTED_UNSIGNED_TYPE_SUFFIX:
|
||||
return "EXPECTED_UNSIGNED_TYPE_SUFFIX";
|
||||
case Err::Type::UNKNOWN_LEXEME:
|
||||
return "UNKNOWN_LEXEME";
|
||||
default:
|
||||
|
||||
@@ -71,13 +71,21 @@ namespace Lexer
|
||||
size_t column, line;
|
||||
std::string content;
|
||||
|
||||
Token();
|
||||
Token(Token::Type, std::string, size_t col = 0, size_t line = 0);
|
||||
};
|
||||
enum class OperandType
|
||||
{
|
||||
NIL,
|
||||
BYTE,
|
||||
CHAR,
|
||||
HWORD,
|
||||
INT,
|
||||
WORD,
|
||||
LONG
|
||||
} operand_type;
|
||||
|
||||
std::ostream &operator<<(std::ostream &, const Token &);
|
||||
std::string to_string(const Token::Type &);
|
||||
std::string to_string(const Token &);
|
||||
Token();
|
||||
Token(Token::Type, std::string_view content = "", size_t col = 0,
|
||||
size_t line = 0, OperandType optype = OperandType::NIL);
|
||||
};
|
||||
|
||||
struct Err
|
||||
{
|
||||
@@ -90,17 +98,24 @@ namespace Lexer
|
||||
INVALID_STRING_LITERAL,
|
||||
INVALID_NUMBER_LITERAL,
|
||||
INVALID_PREPROCESSOR_DIRECTIVE,
|
||||
EXPECTED_TYPE_SUFFIX,
|
||||
EXPECTED_UNSIGNED_TYPE_SUFFIX,
|
||||
UNKNOWN_LEXEME,
|
||||
} type;
|
||||
|
||||
Err(Type type = Type::OK, size_t col = 0, size_t line = 0);
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, const Err &);
|
||||
Err tokenise_buffer(std::string_view, std::vector<Token *> &);
|
||||
|
||||
std::string to_string(const Token::Type &);
|
||||
std::string to_string(const Token::OperandType &);
|
||||
std::string to_string(const Token &);
|
||||
std::string to_string(const Err::Type &);
|
||||
std::string to_string(const Err &);
|
||||
|
||||
Err tokenise_buffer(std::string_view, std::vector<Token *> &);
|
||||
std::ostream &operator<<(std::ostream &, const Token &);
|
||||
std::ostream &operator<<(std::ostream &, const Err &);
|
||||
} // namespace Lexer
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user