diff options
author | Aryadev Chavali <aryadev@aryadevchavali.com> | 2024-04-14 16:57:46 +0630 |
---|---|---|
committer | Aryadev Chavali <aryadev@aryadevchavali.com> | 2024-04-14 16:57:46 +0630 |
commit | 3c46fde66ac71e4804cdd5fe076ed930db5d7122 (patch) | |
tree | 081aa97020ffef2468d4d8dbe78adc0de6793413 | |
parent | 4f8f5111687355669d6df8db7a404fd9aaecacc4 (diff) | |
download | ovm-3c46fde66ac71e4804cdd5fe076ed930db5d7122.tar.gz ovm-3c46fde66ac71e4804cdd5fe076ed930db5d7122.tar.bz2 ovm-3c46fde66ac71e4804cdd5fe076ed930db5d7122.zip |
Implemented tokenise_literal_hex
Note the overall size of this function in comparison to the C version,
as well as its clarity.
Of course, it is doing allocations in the background through
std::string which requires more profiling if I want to make this super
efficientâ„¢ but honestly the assembler just needs to work, whereas the
runtime needs to be fast.
-rw-r--r-- | asm/lexer.cpp | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/asm/lexer.cpp b/asm/lexer.cpp index 189b330..f8abf0a 100644 --- a/asm/lexer.cpp +++ b/asm/lexer.cpp @@ -20,7 +20,7 @@ using std::string, std::string_view, std::pair, std::make_pair; constexpr auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV" "WXYZ0123456789-_.:()%#$", - VALID_DIGIT = "0123456789"; + VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF"; bool is_char_in_s(char c, const char *s) { @@ -237,3 +237,19 @@ token_t tokenise_literal_number(string_view &source, size_t &column) return t; } + +token_t tokenise_literal_hex(string_view &source, size_t &column) +{ + // Remove x char from source + source.remove_prefix(1); + auto end = source.find_first_not_of(VALID_HEX); + if (end == string::npos) + end = source.size() - 1; + string digits{source.substr(0, end)}; + source.remove_prefix(end); + + token_t t = {token_type_t::LITERAL_NUMBER, "0x" + digits, column}; + + column += digits.size() + 1; + return t; +} |