Lexer: Added hex number support

This commit is contained in:
Paul Beckingham 2015-12-20 21:19:03 -05:00
parent b9e5d94178
commit 53bb3952b8
3 changed files with 42 additions and 5 deletions

View file

@ -51,8 +51,9 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
if (isEOS ())
return false;
if (isString (token, type, "'\"") ||
isWord (token, type))
if (isString (token, type, "'\"") ||
isHexNumber (token, type) ||
isWord (token, type))
return true;
return false;
@ -64,6 +65,7 @@ const std::string Lexer::typeName (const Lexer::Type& type)
{
switch (type)
{
case Lexer::Type::hex: return "hex";
case Lexer::Type::string: return "string";
case Lexer::Type::word: return "word";
}
@ -234,6 +236,34 @@ bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string&
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::hex
// 0xX+
bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (_eos - marker >= 3 &&
_text[marker + 0] == '0' &&
_text[marker + 1] == 'x')
{
marker += 2;
while (isHexDigit (_text[marker]))
++marker;
if (marker - _cursor > 2)
{
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::hex;
_cursor = marker;
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::word
// [^\s]+
@ -262,6 +292,7 @@ bool Lexer::isWord (std::string& token, Lexer::Type& type)
std::string Lexer::typeToString (Lexer::Type type)
{
if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
}

View file

@ -35,7 +35,8 @@
class Lexer
{
public:
enum class Type { string,
enum class Type { hex,
string,
word };
Lexer (const std::string&);
@ -60,6 +61,7 @@ public:
// Stream Classifiers.
bool isEOS () const;
bool isString (std::string&, Lexer::Type&, const std::string&);
bool isHexNumber (std::string&, Lexer::Type&);
bool isWord (std::string&, Lexer::Type&);
private:

View file

@ -34,7 +34,7 @@
////////////////////////////////////////////////////////////////////////////////
int main (int, char**)
{
UnitTest t (124);
UnitTest t (131);
std::vector <std::pair <std::string, Lexer::Type>> tokens;
std::string token;
@ -77,7 +77,6 @@ int main (int, char**)
Lexer l1 (" \t ");
t.notok (l1.token (token, type), "' \\t ' --> no tokens");
// static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
std::string::size_type cursor = 0;
std::string word;
@ -149,6 +148,10 @@ int main (int, char**)
{ "\"\tfoo\t\"", { { "\"\tfoo\t\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"\\u20A43\"", { { "\"₤3\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"U+20AC4\"", { { "\"€4\"", Lexer::Type::string }, NO, NO, NO, NO }, },
// Number
{ "0x2f", { { "0x2f", Lexer::Type::hex }, NO, NO, NO, NO }, },
};
#define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0]))
@ -183,6 +186,7 @@ int main (int, char**)
}
}
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");