From 53bb3952b87ac4c0accee08291f9319a0fe779be Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 20 Dec 2015 21:19:03 -0500 Subject: [PATCH] Lexer: Added hex number support --- src/Lexer.cpp | 35 +++++++++++++++++++++++++++++++++-- src/Lexer.h | 4 +++- test/lexer.t.cpp | 8 ++++++-- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 431ddec5..e5052db2 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -51,8 +51,9 @@ bool Lexer::token (std::string& token, Lexer::Type& type) if (isEOS ()) return false; - if (isString (token, type, "'\"") || - isWord (token, type)) + if (isString (token, type, "'\"") || + isHexNumber (token, type) || + isWord (token, type)) return true; return false; @@ -64,6 +65,7 @@ const std::string Lexer::typeName (const Lexer::Type& type) { switch (type) { + case Lexer::Type::hex: return "hex"; case Lexer::Type::string: return "string"; case Lexer::Type::word: return "word"; } @@ -234,6 +236,34 @@ bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string& return false; } +//////////////////////////////////////////////////////////////////////////////// +// Lexer::Type::hex +// 0xX+ +bool Lexer::isHexNumber (std::string& token, Lexer::Type& type) +{ + std::size_t marker = _cursor; + + if (_eos - marker >= 3 && + _text[marker + 0] == '0' && + _text[marker + 1] == 'x') + { + marker += 2; + + while (isHexDigit (_text[marker])) + ++marker; + + if (marker - _cursor > 2) + { + token = _text.substr (_cursor, marker - _cursor); + type = Lexer::Type::hex; + _cursor = marker; + return true; + } + } + + return false; +} + //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::word // [^\s]+ @@ -262,6 +292,7 @@ bool Lexer::isWord (std::string& token, Lexer::Type& type) std::string Lexer::typeToString (Lexer::Type type) { if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m"; + else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m"; else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; else return std::string ("\033[37;41m") + "unknown" + "\033[0m"; } diff --git a/src/Lexer.h b/src/Lexer.h index f3c5b20a..ebd5655c 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -35,7 +35,8 @@ class Lexer { public: - enum class Type { string, + enum class Type { hex, + string, word }; Lexer (const std::string&); @@ -60,6 +61,7 @@ public: // Stream Classifiers. bool isEOS () const; bool isString (std::string&, Lexer::Type&, const std::string&); + bool isHexNumber (std::string&, Lexer::Type&); bool isWord (std::string&, Lexer::Type&); private: diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index f90e6fc6..5f483bb2 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -34,7 +34,7 @@ //////////////////////////////////////////////////////////////////////////////// int main (int, char**) { - UnitTest t (124); + UnitTest t (131); std::vector > tokens; std::string token; @@ -77,7 +77,6 @@ int main (int, char**) Lexer l1 (" \t "); t.notok (l1.token (token, type), "' \\t ' --> no tokens"); - // static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&); std::string::size_type cursor = 0; std::string word; @@ -149,6 +148,10 @@ int main (int, char**) { "\"\tfoo\t\"", { { "\"\tfoo\t\"", Lexer::Type::string }, NO, NO, NO, NO }, }, { "\"\\u20A43\"", { { "\"₤3\"", Lexer::Type::string }, NO, NO, NO, NO }, }, { "\"U+20AC4\"", { { "\"€4\"", Lexer::Type::string }, NO, NO, NO, NO }, }, + + // Number + { "0x2f", { { "0x2f", Lexer::Type::hex }, NO, NO, NO, NO }, }, + }; #define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0])) @@ -183,6 +186,7 @@ int main (int, char**) } } + t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)"); t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)"); t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");