Namespace the lexer module

Future proofing any name collisions.
This commit is contained in:
2024-06-01 01:52:17 +01:00
parent 83ad8b832b
commit 4b85f90a52
5 changed files with 593 additions and 579 deletions

View File

@@ -24,23 +24,27 @@ static_assert(NUMBER_OF_OPCODES == 99, "ERROR: Lexer is out of date");
using std::string, std::string_view, std::pair, std::make_pair; using std::string, std::string_view, std::pair, std::make_pair;
const auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV" namespace Lexer
{
constexpr auto VALID_SYMBOL =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
"WXYZ0123456789-_.:%#$", "WXYZ0123456789-_.:%#$",
VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF"; VALID_DIGIT = "0123456789",
VALID_HEX = "0123456789abcdefABCDEF";
bool is_char_in_s(char c, const char *s) bool is_char_in_s(char c, const char *s)
{ {
return string_view(s).find(c) != string::npos; return string_view(s).find(c) != string::npos;
} }
bool initial_match(string_view src, string_view match) bool initial_match(string_view src, string_view match)
{ {
return (src.size() > match.size() && src.substr(0, match.size()) == match); return (src.size() > match.size() && src.substr(0, match.size()) == match);
} }
pair<Token, lerr_t> tokenise_symbol(string_view &source, size_t &column, pair<Token, lerr_t> tokenise_symbol(string_view &source, size_t &column,
size_t line) size_t line)
{ {
auto end = source.find_first_not_of(VALID_SYMBOL); auto end = source.find_first_not_of(VALID_SYMBOL);
if (end == string::npos) if (end == string::npos)
end = source.size() - 1; end = source.size() - 1;
@@ -221,10 +225,10 @@ pair<Token, lerr_t> tokenise_symbol(string_view &source, size_t &column,
t.column = column; t.column = column;
column += sym.size() - 1; column += sym.size() - 1;
return make_pair(t, lerr_t()); return make_pair(t, lerr_t());
} }
Token tokenise_literal_number(string_view &source, size_t &column) Token tokenise_literal_number(string_view &source, size_t &column)
{ {
bool is_negative = false; bool is_negative = false;
if (source[0] == '-') if (source[0] == '-')
{ {
@@ -244,10 +248,10 @@ Token tokenise_literal_number(string_view &source, size_t &column)
column += digits.size() + (is_negative ? 1 : 0); column += digits.size() + (is_negative ? 1 : 0);
return t; return t;
} }
Token tokenise_literal_hex(string_view &source, size_t &column) Token tokenise_literal_hex(string_view &source, size_t &column)
{ {
// Remove x char from source // Remove x char from source
source.remove_prefix(1); source.remove_prefix(1);
auto end = source.find_first_not_of(VALID_HEX); auto end = source.find_first_not_of(VALID_HEX);
@@ -260,11 +264,11 @@ Token tokenise_literal_hex(string_view &source, size_t &column)
column += digits.size() + 1; column += digits.size() + 1;
return t; return t;
} }
pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column, pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
size_t &line) size_t &line)
{ {
Token t{}; Token t{};
auto end = source.find('\'', 1); auto end = source.find('\'', 1);
if (source.size() < 3 || end == 1 || end > 3) if (source.size() < 3 || end == 1 || end > 3)
@@ -275,9 +279,9 @@ pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
// Escape sequence // Escape sequence
char escape = '\0'; char escape = '\0';
if (source.size() < 4 || source[3] != '\'') if (source.size() < 4 || source[3] != '\'')
return make_pair(t, return make_pair(
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, t, lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, column,
column, line)); line));
switch (source[2]) switch (source[2])
{ {
case 'n': case 'n':
@@ -294,9 +298,9 @@ pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
break; break;
default: default:
column += 2; column += 2;
return make_pair(t, return make_pair(
lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, t, lerr_t(lerr_type_t::INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, column,
column, line)); line));
break; break;
} }
t = Token{Token::Type::LITERAL_CHAR, std::to_string(escape), column}; t = Token{Token::Type::LITERAL_CHAR, std::to_string(escape), column};
@@ -310,20 +314,20 @@ pair<Token, lerr_t> tokenise_literal_char(string_view &source, size_t &column,
source.remove_prefix(3); source.remove_prefix(3);
} }
return make_pair(t, lerr_t()); return make_pair(t, lerr_t());
} }
Token tokenise_literal_string(string_view &source, size_t &column, size_t end) Token tokenise_literal_string(string_view &source, size_t &column, size_t end)
{ {
source.remove_prefix(1); source.remove_prefix(1);
Token token{Token::Type::LITERAL_STRING, string(source.substr(0, end - 1)), Token token{Token::Type::LITERAL_STRING, string(source.substr(0, end - 1)),
column}; column};
source.remove_prefix(end); source.remove_prefix(end);
column += end + 1; column += end + 1;
return token; return token;
} }
lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens) lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
{ {
size_t column = 0, line = 1; size_t column = 0, line = 1;
while (source.size() > 0) while (source.size() > 0)
{ {
@@ -334,7 +338,8 @@ lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
{ {
size_t i; size_t i;
for (i = 0; for (i = 0;
i < source.size() && (isspace(source[i]) || source[i] == '\0'); ++i) i < source.size() && (isspace(source[i]) || source[i] == '\0');
++i)
{ {
++column; ++column;
if (source[i] == '\n') if (source[i] == '\n')
@@ -417,23 +422,23 @@ lerr_t tokenise_buffer(string_view source, std::vector<Token *> &tokens)
} }
} }
return lerr_t{}; return lerr_t{};
} }
std::ostream &operator<<(std::ostream &os, Token &t) std::ostream &operator<<(std::ostream &os, Token &t)
{ {
return os << token_type_as_cstr(t.type) << "(`" << t.content << "`)@" return os << token_type_as_cstr(t.type) << "(`" << t.content << "`)@"
<< t.line << ", " << t.column; << t.line << ", " << t.column;
} }
Token::Token() Token::Token()
{} {}
Token::Token(Token::Type type, string content, size_t col, size_t line) Token::Token(Token::Type type, string content, size_t col, size_t line)
: type{type}, column{col}, line{line}, content{content} : type{type}, column{col}, line{line}, content{content}
{} {}
const char *token_type_as_cstr(Token::Type type) const char *token_type_as_cstr(Token::Type type)
{ {
switch (type) switch (type)
{ {
case Token::Type::PP_USE: case Token::Type::PP_USE:
@@ -526,10 +531,10 @@ const char *token_type_as_cstr(Token::Type type)
return "SYMBOL"; return "SYMBOL";
} }
return ""; return "";
} }
std::ostream &operator<<(std::ostream &os, lerr_t &lerr) std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
{ {
os << lerr.line << ":" << lerr.col << ": "; os << lerr.line << ":" << lerr.col << ": ";
switch (lerr.type) switch (lerr.type)
{ {
@@ -558,8 +563,9 @@ std::ostream &operator<<(std::ostream &os, lerr_t &lerr)
break; break;
} }
return os; return os;
} }
lerr_t::lerr_t(lerr_type_t type, size_t col, size_t line) lerr_t::lerr_t(lerr_type_t type, size_t col, size_t line)
: col{col}, line{line}, type{type} : col{col}, line{line}, type{type}
{} {}
} // namespace Lexer

View File

@@ -18,8 +18,10 @@
#include <tuple> #include <tuple>
#include <vector> #include <vector>
struct Token namespace Lexer
{ {
struct Token
{
enum class Type enum class Type
{ {
PP_CONST, // %const(<symbol>)... PP_CONST, // %const(<symbol>)...
@@ -72,14 +74,14 @@ struct Token
Token(); Token();
Token(Token::Type, std::string, size_t col = 0, size_t line = 0); Token(Token::Type, std::string, size_t col = 0, size_t line = 0);
}; };
const char *token_type_as_cstr(Token::Type type); const char *token_type_as_cstr(Token::Type type);
std::ostream &operator<<(std::ostream &, Token &); std::ostream &operator<<(std::ostream &, Token &);
enum class lerr_type_t enum class lerr_type_t
{ {
OK = 0, OK = 0,
INVALID_CHAR_LITERAL, INVALID_CHAR_LITERAL,
INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE, INVALID_CHAR_LITERAL_ESCAPE_SEQUENCE,
@@ -87,18 +89,19 @@ enum class lerr_type_t
INVALID_NUMBER_LITERAL, INVALID_NUMBER_LITERAL,
INVALID_PREPROCESSOR_DIRECTIVE, INVALID_PREPROCESSOR_DIRECTIVE,
UNKNOWN_LEXEME, UNKNOWN_LEXEME,
}; };
struct lerr_t struct lerr_t
{ {
size_t col, line; size_t col, line;
lerr_type_t type; lerr_type_t type;
lerr_t(lerr_type_t type = lerr_type_t::OK, size_t col = 0, size_t line = 0); lerr_t(lerr_type_t type = lerr_type_t::OK, size_t col = 0, size_t line = 0);
}; };
std::ostream &operator<<(std::ostream &, lerr_t &); std::ostream &operator<<(std::ostream &, lerr_t &);
lerr_t tokenise_buffer(std::string_view, std::vector<Token *> &); lerr_t tokenise_buffer(std::string_view, std::vector<Token *> &);
} // namespace Lexer
#endif #endif

View File

@@ -30,6 +30,8 @@ extern "C"
using std::cout, std::cerr, std::endl; using std::cout, std::cerr, std::endl;
using std::pair, std::string, std::string_view, std::vector; using std::pair, std::string, std::string_view, std::vector;
using Lexer::Token, Lexer::lerr_t, Lexer::lerr_type_t;
void usage(const char *program_name, FILE *fp) void usage(const char *program_name, FILE *fp)
{ {
fprintf(fp, fprintf(fp,

View File

@@ -18,6 +18,8 @@
using std::pair, std::vector, std::make_pair, std::string, std::string_view; using std::pair, std::vector, std::make_pair, std::string, std::string_view;
using Lexer::Token, Lexer::lerr_t, Lexer::lerr_type_t;
#define VCLEAR(V) \ #define VCLEAR(V) \
std::for_each((V).begin(), (V).end(), \ std::for_each((V).begin(), (V).end(), \
[](Token *t) \ [](Token *t) \

View File

@@ -30,33 +30,34 @@ enum pp_err_type_t
struct pp_err_t struct pp_err_t
{ {
const Token *reference; const Lexer::Token *reference;
pp_err_type_t type; pp_err_type_t type;
lerr_t lerr; Lexer::lerr_t lerr;
pp_err_t(); pp_err_t();
pp_err_t(pp_err_type_t); pp_err_t(pp_err_type_t);
pp_err_t(pp_err_type_t, const Token *); pp_err_t(pp_err_type_t, const Lexer::Token *);
pp_err_t(pp_err_type_t, const Token *, lerr_t); pp_err_t(pp_err_type_t, const Lexer::Token *, Lexer::lerr_t);
}; };
std::ostream &operator<<(std::ostream &, pp_err_t &); std::ostream &operator<<(std::ostream &, pp_err_t &);
struct pp_unit_t struct pp_unit_t
{ {
const Token *const token; const Lexer::Token *const token;
struct struct
{ {
std::string_view name; std::string_view name;
std::vector<pp_unit_t> elements; std::vector<pp_unit_t> elements;
} container; } container;
pp_unit_t(const Token *const); pp_unit_t(const Lexer::Token *const);
pp_unit_t(std::string_view, std::vector<pp_unit_t>); pp_unit_t(std::string_view, std::vector<pp_unit_t>);
}; };
std::vector<pp_unit_t> tokens_to_units(const std::vector<Token *> &); std::vector<pp_unit_t> tokens_to_units(const std::vector<Lexer::Token *> &);
pp_err_t preprocess_use(std::vector<pp_unit_t> &); pp_err_t preprocess_use(std::vector<pp_unit_t> &);
pp_err_t preprocesser(const std::vector<Token *> &, std::vector<Token *> &); pp_err_t preprocesser(const std::vector<Lexer::Token *> &,
std::vector<Lexer::Token *> &);
#endif #endif