Lexer: Added path support

This commit is contained in:
Paul Beckingham 2015-12-24 00:26:34 -05:00
parent 703ec44ad0
commit 476551c563
3 changed files with 54 additions and 1 deletions

View file

@ -55,6 +55,7 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
isURL (token, type) || isURL (token, type) ||
isHexNumber (token, type) || isHexNumber (token, type) ||
isNumber (token, type) || isNumber (token, type) ||
isPath (token, type) ||
isPattern (token, type) || isPattern (token, type) ||
isWord (token, type)) isWord (token, type))
return true; return true;
@ -72,6 +73,7 @@ const std::string Lexer::typeName (const Lexer::Type& type)
case Lexer::Type::hex: return "hex"; case Lexer::Type::hex: return "hex";
case Lexer::Type::string: return "string"; case Lexer::Type::string: return "string";
case Lexer::Type::url: return "url"; case Lexer::Type::url: return "url";
case Lexer::Type::path: return "path";
case Lexer::Type::pattern: return "pattern"; case Lexer::Type::pattern: return "pattern";
case Lexer::Type::word: return "word"; case Lexer::Type::word: return "word";
} }
@ -432,6 +434,50 @@ bool Lexer::isURL (std::string& token, Lexer::Type& type)
return false; return false;
} }
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::path
// ( / <non-slash, non-whitespace> )+
bool Lexer::isPath (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
int slashCount = 0;
while (1)
{
if (_text[marker] == '/')
{
++marker;
++slashCount;
}
else
break;
if (_text[marker] &&
! isWhitespace (_text[marker]) &&
_text[marker] != '/')
{
utf8_next_char (_text, marker);
while (_text[marker] &&
! isWhitespace (_text[marker]) &&
_text[marker] != '/')
utf8_next_char (_text, marker);
}
else
break;
}
if (marker > _cursor &&
slashCount > 3)
{
type = Lexer::Type::path;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::pattern // Lexer::Type::pattern
// / <unquoted-string> / <EOS> | <isWhitespace> // / <unquoted-string> / <EOS> | <isWhitespace>
@ -461,6 +507,7 @@ std::string Lexer::typeToString (Lexer::Type type)
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m"; else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m"; else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m";
else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m"; else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m";
else if (type == Lexer::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m";
else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m"; else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m";
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
else return std::string ("\033[37;41m") + "unknown" + "\033[0m"; else return std::string ("\033[37;41m") + "unknown" + "\033[0m";

View file

@ -38,6 +38,7 @@ public:
enum class Type { number, hex, enum class Type { number, hex,
string, string,
url, url,
path,
pattern, pattern,
word }; word };
@ -68,6 +69,7 @@ public:
bool isInteger (std::string&, Lexer::Type&); bool isInteger (std::string&, Lexer::Type&);
bool isHexNumber (std::string&, Lexer::Type&); bool isHexNumber (std::string&, Lexer::Type&);
bool isURL (std::string&, Lexer::Type&); bool isURL (std::string&, Lexer::Type&);
bool isPath (std::string&, Lexer::Type&);
bool isPattern (std::string&, Lexer::Type&); bool isPattern (std::string&, Lexer::Type&);
bool isWord (std::string&, Lexer::Type&); bool isWord (std::string&, Lexer::Type&);

View file

@ -34,7 +34,7 @@
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int main (int, char**) int main (int, char**)
{ {
UnitTest t (203); UnitTest t (210);
std::vector <std::pair <std::string, Lexer::Type>> tokens; std::vector <std::pair <std::string, Lexer::Type>> tokens;
std::string token; std::string token;
@ -167,6 +167,9 @@ int main (int, char**)
{ "/a\\/b/", { { "/a\\/b/", Lexer::Type::pattern }, NO, NO, NO, NO }, }, { "/a\\/b/", { { "/a\\/b/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
{ "/'/", { { "/'/", Lexer::Type::pattern }, NO, NO, NO, NO }, }, { "/'/", { { "/'/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
// Path
{ "/long/path/to/file.txt", { { "/long/path/to/file.txt", Lexer::Type::path }, NO, NO, NO, NO }, },
// Word // Word
{ "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, }, { "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, },
@ -228,6 +231,7 @@ int main (int, char**)
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)"); t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)"); t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
t.is (Lexer::typeName (Lexer::Type::url), "url", "Lexer::typeName (Lexer::Type::url)"); t.is (Lexer::typeName (Lexer::Type::url), "url", "Lexer::typeName (Lexer::Type::url)");
t.is (Lexer::typeName (Lexer::Type::path), "path", "Lexer::typeName (Lexer::Type::path)");
t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)"); t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)");
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)"); t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");