diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 7cb0e5e4..91650c6d 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -55,6 +55,7 @@ bool Lexer::token (std::string& token, Lexer::Type& type) isURL (token, type) || isHexNumber (token, type) || isNumber (token, type) || + isPath (token, type) || isPattern (token, type) || isWord (token, type)) return true; @@ -72,6 +73,7 @@ const std::string Lexer::typeName (const Lexer::Type& type) case Lexer::Type::hex: return "hex"; case Lexer::Type::string: return "string"; case Lexer::Type::url: return "url"; + case Lexer::Type::path: return "path"; case Lexer::Type::pattern: return "pattern"; case Lexer::Type::word: return "word"; } @@ -432,6 +434,50 @@ bool Lexer::isURL (std::string& token, Lexer::Type& type) return false; } +//////////////////////////////////////////////////////////////////////////////// +// Lexer::Type::path +// ( / )+ +bool Lexer::isPath (std::string& token, Lexer::Type& type) +{ + std::size_t marker = _cursor; + int slashCount = 0; + + while (1) + { + if (_text[marker] == '/') + { + ++marker; + ++slashCount; + } + else + break; + + if (_text[marker] && + ! isWhitespace (_text[marker]) && + _text[marker] != '/') + { + utf8_next_char (_text, marker); + while (_text[marker] && + ! isWhitespace (_text[marker]) && + _text[marker] != '/') + utf8_next_char (_text, marker); + } + else + break; + } + + if (marker > _cursor && + slashCount > 3) + { + type = Lexer::Type::path; + token = _text.substr (_cursor, marker - _cursor); + _cursor = marker; + return true; + } + + return false; +} + //////////////////////////////////////////////////////////////////////////////// // Lexer::Type::pattern // / / | @@ -461,6 +507,7 @@ std::string Lexer::typeToString (Lexer::Type type) else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m"; else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m"; else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m"; + else if (type == Lexer::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m"; else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m"; else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; else return std::string ("\033[37;41m") + "unknown" + "\033[0m"; diff --git a/src/Lexer.h b/src/Lexer.h index 66157bb2..a5e7ed19 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -38,6 +38,7 @@ public: enum class Type { number, hex, string, url, + path, pattern, word }; @@ -68,6 +69,7 @@ public: bool isInteger (std::string&, Lexer::Type&); bool isHexNumber (std::string&, Lexer::Type&); bool isURL (std::string&, Lexer::Type&); + bool isPath (std::string&, Lexer::Type&); bool isPattern (std::string&, Lexer::Type&); bool isWord (std::string&, Lexer::Type&); diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index 7ac29dfa..e016ae69 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -34,7 +34,7 @@ //////////////////////////////////////////////////////////////////////////////// int main (int, char**) { - UnitTest t (203); + UnitTest t (210); std::vector > tokens; std::string token; @@ -167,6 +167,9 @@ int main (int, char**) { "/a\\/b/", { { "/a\\/b/", Lexer::Type::pattern }, NO, NO, NO, NO }, }, { "/'/", { { "/'/", Lexer::Type::pattern }, NO, NO, NO, NO }, }, + // Path + { "/long/path/to/file.txt", { { "/long/path/to/file.txt", Lexer::Type::path }, NO, NO, NO, NO }, }, + // Word { "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, }, @@ -228,6 +231,7 @@ int main (int, char**) t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)"); t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)"); t.is (Lexer::typeName (Lexer::Type::url), "url", "Lexer::typeName (Lexer::Type::url)"); + t.is (Lexer::typeName (Lexer::Type::path), "path", "Lexer::typeName (Lexer::Type::path)"); t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)"); t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");