aboutsummaryrefslogtreecommitdiff
path: root/asm
diff options
context:
space:
mode:
authorAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 16:57:46 +0630
committerAryadev Chavali <aryadev@aryadevchavali.com>2024-04-14 16:57:46 +0630
commit3c46fde66ac71e4804cdd5fe076ed930db5d7122 (patch)
tree081aa97020ffef2468d4d8dbe78adc0de6793413 /asm
parent4f8f5111687355669d6df8db7a404fd9aaecacc4 (diff)
downloadovm-3c46fde66ac71e4804cdd5fe076ed930db5d7122.tar.gz
ovm-3c46fde66ac71e4804cdd5fe076ed930db5d7122.tar.bz2
ovm-3c46fde66ac71e4804cdd5fe076ed930db5d7122.zip
Implemented tokenise_literal_hex
Note the overall size of this function in comparison to the C version, as well as its clarity. Of course, it is doing allocations in the background through std::string which requires more profiling if I want to make this super efficientâ„¢ but honestly the assembler just needs to work, whereas the runtime needs to be fast.
Diffstat (limited to 'asm')
-rw-r--r--asm/lexer.cpp18
1 files changed, 17 insertions, 1 deletions
diff --git a/asm/lexer.cpp b/asm/lexer.cpp
index 189b330..f8abf0a 100644
--- a/asm/lexer.cpp
+++ b/asm/lexer.cpp
@@ -20,7 +20,7 @@ using std::string, std::string_view, std::pair, std::make_pair;
constexpr auto VALID_SYMBOL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV"
"WXYZ0123456789-_.:()%#$",
- VALID_DIGIT = "0123456789";
+ VALID_DIGIT = "0123456789", VALID_HEX = "0123456789abcdefABCDEF";
bool is_char_in_s(char c, const char *s)
{
@@ -237,3 +237,19 @@ token_t tokenise_literal_number(string_view &source, size_t &column)
return t;
}
+
+token_t tokenise_literal_hex(string_view &source, size_t &column)
+{
+ // Remove x char from source
+ source.remove_prefix(1);
+ auto end = source.find_first_not_of(VALID_HEX);
+ if (end == string::npos)
+ end = source.size() - 1;
+ string digits{source.substr(0, end)};
+ source.remove_prefix(end);
+
+ token_t t = {token_type_t::LITERAL_NUMBER, "0x" + digits, column};
+
+ column += digits.size() + 1;
+ return t;
+}