mirror of
https://github.com/GothenburgBitFactory/timewarrior.git
synced 2025-07-07 20:06:39 +02:00
Lexer: Aded URL support
This commit is contained in:
parent
78653a16cf
commit
703ec44ad0
3 changed files with 48 additions and 1 deletions
|
@ -52,6 +52,7 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
|
|||
return false;
|
||||
|
||||
if (isString (token, type, "'\"") ||
|
||||
isURL (token, type) ||
|
||||
isHexNumber (token, type) ||
|
||||
isNumber (token, type) ||
|
||||
isPattern (token, type) ||
|
||||
|
@ -70,6 +71,7 @@ const std::string Lexer::typeName (const Lexer::Type& type)
|
|||
case Lexer::Type::number: return "number";
|
||||
case Lexer::Type::hex: return "hex";
|
||||
case Lexer::Type::string: return "string";
|
||||
case Lexer::Type::url: return "url";
|
||||
case Lexer::Type::pattern: return "pattern";
|
||||
case Lexer::Type::word: return "word";
|
||||
}
|
||||
|
@ -393,6 +395,43 @@ bool Lexer::isWord (std::string& token, Lexer::Type& type)
|
|||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Lexer::Type::url
|
||||
// http [s] :// ...
|
||||
bool Lexer::isURL (std::string& token, Lexer::Type& type)
|
||||
{
|
||||
std::size_t marker = _cursor;
|
||||
|
||||
if (_eos - _cursor > 9 && // length 'https://*'
|
||||
(_text[marker + 0] == 'h' || _text[marker + 0] == 'H') &&
|
||||
(_text[marker + 1] == 't' || _text[marker + 1] == 'T') &&
|
||||
(_text[marker + 2] == 't' || _text[marker + 2] == 'T') &&
|
||||
(_text[marker + 3] == 'p' || _text[marker + 3] == 'P'))
|
||||
{
|
||||
marker += 4;
|
||||
if (_text[marker + 0] == 's' || _text[marker + 0] == 'S')
|
||||
++marker;
|
||||
|
||||
if (_text[marker + 0] == ':' &&
|
||||
_text[marker + 1] == '/' &&
|
||||
_text[marker + 2] == '/')
|
||||
{
|
||||
marker += 3;
|
||||
|
||||
while (marker < _eos &&
|
||||
! isWhitespace (_text[marker]))
|
||||
utf8_next_char (_text, marker);
|
||||
|
||||
token = _text.substr (_cursor, marker - _cursor);
|
||||
type = Lexer::Type::url;
|
||||
_cursor = marker;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Lexer::Type::pattern
|
||||
// / <unquoted-string> / <EOS> | <isWhitespace>
|
||||
|
@ -421,6 +460,7 @@ std::string Lexer::typeToString (Lexer::Type type)
|
|||
if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
|
||||
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
|
||||
else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m";
|
||||
else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m";
|
||||
else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m";
|
||||
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
|
||||
else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
|
||||
|
|
|
@ -37,6 +37,7 @@ class Lexer
|
|||
public:
|
||||
enum class Type { number, hex,
|
||||
string,
|
||||
url,
|
||||
pattern,
|
||||
word };
|
||||
|
||||
|
@ -66,6 +67,7 @@ public:
|
|||
bool isNumber (std::string&, Lexer::Type&);
|
||||
bool isInteger (std::string&, Lexer::Type&);
|
||||
bool isHexNumber (std::string&, Lexer::Type&);
|
||||
bool isURL (std::string&, Lexer::Type&);
|
||||
bool isPattern (std::string&, Lexer::Type&);
|
||||
bool isWord (std::string&, Lexer::Type&);
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main (int, char**)
|
||||
{
|
||||
UnitTest t (190);
|
||||
UnitTest t (203);
|
||||
|
||||
std::vector <std::pair <std::string, Lexer::Type>> tokens;
|
||||
std::string token;
|
||||
|
@ -170,6 +170,10 @@ int main (int, char**)
|
|||
// Word
|
||||
{ "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, },
|
||||
|
||||
// URL
|
||||
{ "http://tasktools.org", { { "http://tasktools.org", Lexer::Type::url }, NO, NO, NO, NO }, },
|
||||
{ "https://bug.tasktools.org", { { "https://bug.tasktools.org", Lexer::Type::url }, NO, NO, NO, NO }, },
|
||||
|
||||
// String
|
||||
{ "'one two'", { { "'one two'", Lexer::Type::string }, NO, NO, NO, NO }, },
|
||||
{ "\"three\"", { { "\"three\"", Lexer::Type::string }, NO, NO, NO, NO }, },
|
||||
|
@ -223,6 +227,7 @@ int main (int, char**)
|
|||
t.is (Lexer::typeName (Lexer::Type::number), "number", "Lexer::typeName (Lexer::Type::number)");
|
||||
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
|
||||
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
|
||||
t.is (Lexer::typeName (Lexer::Type::url), "url", "Lexer::typeName (Lexer::Type::url)");
|
||||
t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)");
|
||||
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue