Lexer: Added pattern support

This commit is contained in:
Paul Beckingham 2015-12-22 15:26:57 -05:00
parent fbc47dc1e4
commit ee667cb68c
3 changed files with 33 additions and 1 deletions

View file

@ -54,6 +54,7 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
if (isString (token, type, "'\"") ||
isHexNumber (token, type) ||
isNumber (token, type) ||
isPattern (token, type) ||
isWord (token, type))
return true;
@ -69,6 +70,7 @@ const std::string Lexer::typeName (const Lexer::Type& type)
case Lexer::Type::number: return "number";
case Lexer::Type::hex: return "hex";
case Lexer::Type::string: return "string";
case Lexer::Type::pattern: return "pattern";
case Lexer::Type::word: return "word";
}
@ -391,6 +393,27 @@ bool Lexer::isWord (std::string& token, Lexer::Type& type)
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::pattern
// / <unquoted-string> / <EOS> | <isWhitespace>
bool Lexer::isPattern (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
std::string word;
if (readWord (_text, "/", _cursor, word) &&
(isEOS () ||
isWhitespace (_text[_cursor])))
{
token = _text.substr (marker, _cursor - marker);
type = Lexer::Type::pattern;
return true;
}
_cursor = marker;
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Static
std::string Lexer::typeToString (Lexer::Type type)
@ -398,6 +421,7 @@ std::string Lexer::typeToString (Lexer::Type type)
if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m";
else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m";
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
}

View file

@ -37,6 +37,7 @@ class Lexer
public:
enum class Type { number, hex,
string,
pattern,
word };
Lexer (const std::string&);
@ -65,6 +66,7 @@ public:
bool isNumber (std::string&, Lexer::Type&);
bool isInteger (std::string&, Lexer::Type&);
bool isHexNumber (std::string&, Lexer::Type&);
bool isPattern (std::string&, Lexer::Type&);
bool isWord (std::string&, Lexer::Type&);
private:

View file

@ -34,7 +34,7 @@
////////////////////////////////////////////////////////////////////////////////
int main (int, char**)
{
UnitTest t (171);
UnitTest t (190);
std::vector <std::pair <std::string, Lexer::Type>> tokens;
std::string token;
@ -162,6 +162,11 @@ int main (int, char**)
} results[5];
} lexerTests[] =
{
// Pattern
{ "/foo/", { { "/foo/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
{ "/a\\/b/", { { "/a\\/b/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
{ "/'/", { { "/'/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
// Word
{ "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, },
@ -218,6 +223,7 @@ int main (int, char**)
t.is (Lexer::typeName (Lexer::Type::number), "number", "Lexer::typeName (Lexer::Type::number)");
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)");
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");
// std::string Lexer::trimLeft (const std::string& in, const std::string&)