Lexer: Added hex number support

This commit is contained in:
Paul Beckingham 2015-12-20 21:19:03 -05:00
parent b9e5d94178
commit 53bb3952b8
3 changed files with 42 additions and 5 deletions

View file

@ -51,8 +51,9 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
if (isEOS ()) if (isEOS ())
return false; return false;
if (isString (token, type, "'\"") || if (isString (token, type, "'\"") ||
isWord (token, type)) isHexNumber (token, type) ||
isWord (token, type))
return true; return true;
return false; return false;
@ -64,6 +65,7 @@ const std::string Lexer::typeName (const Lexer::Type& type)
{ {
switch (type) switch (type)
{ {
case Lexer::Type::hex: return "hex";
case Lexer::Type::string: return "string"; case Lexer::Type::string: return "string";
case Lexer::Type::word: return "word"; case Lexer::Type::word: return "word";
} }
@ -234,6 +236,34 @@ bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string&
return false; return false;
} }
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::hex
// 0xX+
bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (_eos - marker >= 3 &&
_text[marker + 0] == '0' &&
_text[marker + 1] == 'x')
{
marker += 2;
while (isHexDigit (_text[marker]))
++marker;
if (marker - _cursor > 2)
{
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::hex;
_cursor = marker;
return true;
}
}
return false;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::word // Lexer::Type::word
// [^\s]+ // [^\s]+
@ -262,6 +292,7 @@ bool Lexer::isWord (std::string& token, Lexer::Type& type)
std::string Lexer::typeToString (Lexer::Type type) std::string Lexer::typeToString (Lexer::Type type)
{ {
if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m"; if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
else return std::string ("\033[37;41m") + "unknown" + "\033[0m"; else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
} }

View file

@ -35,7 +35,8 @@
class Lexer class Lexer
{ {
public: public:
enum class Type { string, enum class Type { hex,
string,
word }; word };
Lexer (const std::string&); Lexer (const std::string&);
@ -60,6 +61,7 @@ public:
// Stream Classifiers. // Stream Classifiers.
bool isEOS () const; bool isEOS () const;
bool isString (std::string&, Lexer::Type&, const std::string&); bool isString (std::string&, Lexer::Type&, const std::string&);
bool isHexNumber (std::string&, Lexer::Type&);
bool isWord (std::string&, Lexer::Type&); bool isWord (std::string&, Lexer::Type&);
private: private:

View file

@ -34,7 +34,7 @@
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int main (int, char**) int main (int, char**)
{ {
UnitTest t (124); UnitTest t (131);
std::vector <std::pair <std::string, Lexer::Type>> tokens; std::vector <std::pair <std::string, Lexer::Type>> tokens;
std::string token; std::string token;
@ -77,7 +77,6 @@ int main (int, char**)
Lexer l1 (" \t "); Lexer l1 (" \t ");
t.notok (l1.token (token, type), "' \\t ' --> no tokens"); t.notok (l1.token (token, type), "' \\t ' --> no tokens");
// static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&); // static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
std::string::size_type cursor = 0; std::string::size_type cursor = 0;
std::string word; std::string word;
@ -149,6 +148,10 @@ int main (int, char**)
{ "\"\tfoo\t\"", { { "\"\tfoo\t\"", Lexer::Type::string }, NO, NO, NO, NO }, }, { "\"\tfoo\t\"", { { "\"\tfoo\t\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"\\u20A43\"", { { "\"₤3\"", Lexer::Type::string }, NO, NO, NO, NO }, }, { "\"\\u20A43\"", { { "\"₤3\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"U+20AC4\"", { { "\"€4\"", Lexer::Type::string }, NO, NO, NO, NO }, }, { "\"U+20AC4\"", { { "\"€4\"", Lexer::Type::string }, NO, NO, NO, NO }, },
// Number
{ "0x2f", { { "0x2f", Lexer::Type::hex }, NO, NO, NO, NO }, },
}; };
#define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0])) #define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0]))
@ -183,6 +186,7 @@ int main (int, char**)
} }
} }
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)"); t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)"); t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");