mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-06-26 10:54:26 +02:00
Lexer: Removed expermental code, didn't help
This commit is contained in:
parent
d8e48e1e2b
commit
1836ac29e2
3 changed files with 9 additions and 357 deletions
192
src/Lexer.cpp
192
src/Lexer.cpp
|
@ -38,14 +38,6 @@ static const unsigned int uuid_min_length = 8;
|
|||
std::string Lexer::dateFormat = "";
|
||||
bool Lexer::isoEnabled = true;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
Lexer::Lexer ()
|
||||
: _text ("")
|
||||
, _cursor (0)
|
||||
, _eos (0)
|
||||
{
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
Lexer::Lexer (const std::string& text)
|
||||
: _text (text)
|
||||
|
@ -73,14 +65,14 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
|
|||
return false;
|
||||
|
||||
// The sequence is specific, and must follow these rules:
|
||||
// - date < duration < uuid < identifier
|
||||
// - uuid < hex < number
|
||||
// - url < pair < identifier
|
||||
// - hex < number
|
||||
// - separator < tag < operator
|
||||
// - path < substitution < pattern
|
||||
// - set < number
|
||||
// - word last
|
||||
// - date < duration < uuid < identifier
|
||||
// - uuid < hex < number
|
||||
// - url < pair < identifier
|
||||
// - hex < number
|
||||
// - separator < tag < operator
|
||||
// - path < substitution < pattern
|
||||
// - set < number
|
||||
// - word last
|
||||
if (isString (token, type, '\'') ||
|
||||
isString (token, type, '"') ||
|
||||
isDate (token, type) ||
|
||||
|
@ -105,35 +97,6 @@ bool Lexer::token (std::string& token, Lexer::Type& type)
|
|||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Classify the whole token.
|
||||
Lexer::Type Lexer::token (const std::string& token)
|
||||
{
|
||||
/*
|
||||
if (isString (token, '\'')) return Lexer::Type:string;
|
||||
else if (isString (token, '"')) return Lexer::Type:string;
|
||||
else if (isDate (token)) return Lexer::Type:date;
|
||||
else if (isDuration (token)) return Lexer::Type:duration;
|
||||
else if (isURL (token)) return Lexer::Type:url;
|
||||
else if (isPair (token)) return Lexer::Type:pair;
|
||||
else if (isSet (token)) return Lexer::Type:set;
|
||||
else if (isDOM (token)) return Lexer::Type:dom;
|
||||
else if (isUUID (token)) return Lexer::Type:uuid;
|
||||
else if (isHexNumber (token)) return Lexer::Type:hex;
|
||||
else if (isNumber (token)) return Lexer::Type:number;
|
||||
else if (isSeparator (token)) return Lexer::Type:separator;
|
||||
else*/ if (isTag (token)) return Lexer::Type::tag;
|
||||
/*
|
||||
else if (isPath (token)) return Lexer::Type:path;
|
||||
else if (isSubstitution (token)) return Lexer::Type:substitution;
|
||||
else if (isPattern (token)) return Lexer::Type:pattern;
|
||||
else if (isOperator (token)) return Lexer::Type:op;
|
||||
else if (isIdentifier (token)) return Lexer::Type:identifier;
|
||||
else if (isWord (token)) return Lexer::Type:word;
|
||||
*/
|
||||
return Lexer::Type::word;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// This static method tokenizes the input and provides a vector of token/type
|
||||
// results from a high-level lex.
|
||||
|
@ -1257,143 +1220,4 @@ bool Lexer::isOneWord (const std::string& text)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isString (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isDate (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isDuration (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isUUID (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isNumber (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isHexNumber (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isSeparator (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isURL (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isPair (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isSet (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isTag (const std::string& input)
|
||||
{
|
||||
return (input[0] == '+' ||
|
||||
input[0] == '-') &&
|
||||
isIdentifierStart (input[0]) &&
|
||||
input.length () > 1;
|
||||
}
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isPath (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isSubstitution (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isPattern (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isOperator (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isDOM (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isIdentifier (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isWord (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool Lexer::isContiguous (const std::string& input)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
27
src/Lexer.h
27
src/Lexer.h
|
@ -51,11 +51,9 @@ public:
|
|||
dom, identifier, word,
|
||||
date, duration };
|
||||
|
||||
Lexer ();
|
||||
Lexer (const std::string&);
|
||||
~Lexer ();
|
||||
bool token (std::string&, Lexer::Type&);
|
||||
Lexer::Type token (const std::string&);
|
||||
static std::vector <std::pair <std::string, Lexer::Type>> tokens (const std::string&);
|
||||
static std::vector <std::string> split (const std::string&);
|
||||
static std::string typeToString (Lexer::Type);
|
||||
|
@ -105,31 +103,6 @@ public:
|
|||
bool isWord (std::string&, Lexer::Type&);
|
||||
bool isContiguous (std::string&, Lexer::Type&);
|
||||
|
||||
// Token Classifiers.
|
||||
/*
|
||||
bool isString (const std::string&);
|
||||
bool isDate (const std::string&);
|
||||
bool isDuration (const std::string&);
|
||||
bool isUUID (const std::string&);
|
||||
bool isNumber (const std::string&);
|
||||
bool isHexNumber (const std::string&);
|
||||
bool isSeparator (const std::string&);
|
||||
bool isURL (const std::string&);
|
||||
bool isPair (const std::string&);
|
||||
bool isSet (const std::string&);
|
||||
*/
|
||||
bool isTag (const std::string&);
|
||||
/*
|
||||
bool isPath (const std::string&);
|
||||
bool isSubstitution (const std::string&);
|
||||
bool isPattern (const std::string&);
|
||||
bool isOperator (const std::string&);
|
||||
bool isDOM (const std::string&);
|
||||
bool isIdentifier (const std::string&);
|
||||
bool isWord (const std::string&);
|
||||
bool isContiguous (const std::string&);
|
||||
*/
|
||||
|
||||
private:
|
||||
std::string _text;
|
||||
std::size_t _cursor;
|
||||
|
|
147
test/lexer.t.cpp
147
test/lexer.t.cpp
|
@ -36,7 +36,7 @@ Context context;
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main (int argc, char** argv)
|
||||
{
|
||||
UnitTest t (912);
|
||||
UnitTest t (806);
|
||||
|
||||
std::vector <std::pair <std::string, Lexer::Type>> tokens;
|
||||
std::string token;
|
||||
|
@ -390,151 +390,6 @@ int main (int argc, char** argv)
|
|||
}
|
||||
}
|
||||
|
||||
// Test individual token classification.
|
||||
Lexer l4;
|
||||
|
||||
// Pattern
|
||||
t.ok (l4.token ("/foo/") == Lexer::Type::pattern, "token Lexer::Type:pattern 1");
|
||||
t.ok (l4.token ("/a\\/b/") == Lexer::Type::pattern, "token Lexer::Type:pattern 2");
|
||||
t.ok (l4.token ("/'/") == Lexer::Type::pattern, "token Lexer::Type:pattern 3");
|
||||
|
||||
// Substitution
|
||||
t.ok (l4.token ("/from/to/g") == Lexer::Type::substitution, "token Lexer::Type:substitution 1");
|
||||
t.ok (l4.token ("/from/to/") == Lexer::Type::substitution, "token Lexer::Type:substitution 2");
|
||||
|
||||
// Tag
|
||||
t.ok (l4.token ("+tag") == Lexer::Type::tag, "token Lexer::Type:tag 1");
|
||||
t.ok (l4.token ("-tag") == Lexer::Type::tag, "token Lexer::Type:tag 2");
|
||||
t.ok (l4.token ("+@tag") == Lexer::Type::tag, "token Lexer::Type:tag 3");
|
||||
|
||||
// Path
|
||||
t.ok (l4.token ("/long/path/to/file.txt") == Lexer::Type::path, "token Lexer::Type:path 1");
|
||||
|
||||
// Word
|
||||
t.ok (l4.token ("9th") == Lexer::Type::word, "token Lexer::Type:word 1");
|
||||
t.ok (l4.token ("10th") == Lexer::Type::word, "token Lexer::Type:word 2");
|
||||
|
||||
// DOM
|
||||
t.ok (l4.token ("foo") == Lexer::Type::dom, "token Lexer::Type:dom 1");
|
||||
t.ok (l4.token ("Çirçös") == Lexer::Type::dom, "token Lexer::Type:dom 2");
|
||||
t.ok (l4.token ("☺") == Lexer::Type::dom, "token Lexer::Type:dom 3");
|
||||
t.ok (l4.token ("name") == Lexer::Type::dom, "token Lexer::Type:dom 4");
|
||||
t.ok (l4.token ("f1") == Lexer::Type::dom, "token Lexer::Type:dom 5");
|
||||
t.ok (l4.token ("foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 6");
|
||||
t.ok (l4.token ("1.foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 7");
|
||||
t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e7520b749.foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 8");
|
||||
t.ok (l4.token ("today") == Lexer::Type::dom, "token Lexer::Type:dom 9");
|
||||
|
||||
// URL
|
||||
t.ok (l4.token ("http://tasktools.org") == Lexer::Type::url, "token Lexer::Type:url 1");
|
||||
t.ok (l4.token ("https://bug.tasktools.org") == Lexer::Type::url, "token Lexer::Type:url 2");
|
||||
|
||||
// String
|
||||
t.ok (l4.token ("'one two'") == Lexer::Type::string, "token Lexer::Type:string 1");
|
||||
t.ok (l4.token ("\"three\"") == Lexer::Type::string, "token Lexer::Type:string 2");
|
||||
t.ok (l4.token ("'\\''") == Lexer::Type::string, "token Lexer::Type:string 3");
|
||||
t.ok (l4.token ("\"\\\"\"") == Lexer::Type::string, "token Lexer::Type:string 4");
|
||||
t.ok (l4.token ("\"\tfoo\t\"") == Lexer::Type::string, "token Lexer::Type:string 5");
|
||||
t.ok (l4.token ("\"\\u20A43\"") == Lexer::Type::string, "token Lexer::Type:string 6");
|
||||
t.ok (l4.token ("\"U+20AC4\"") == Lexer::Type::string, "token Lexer::Type:string 7");
|
||||
|
||||
// Number
|
||||
t.ok (l4.token ("1") == Lexer::Type::number, "token Lexer::Type:number 1");
|
||||
t.ok (l4.token ("3.14") == Lexer::Type::number, "token Lexer::Type:number 2");
|
||||
t.ok (l4.token ("6.02217e23") == Lexer::Type::number, "token Lexer::Type:number 3");
|
||||
t.ok (l4.token ("1.2e-3.4") == Lexer::Type::number, "token Lexer::Type:number 4");
|
||||
|
||||
// Hex
|
||||
t.ok (l4.token ("0x2f") == Lexer::Type::hex, "token Lexer::Type:hex 1");
|
||||
|
||||
// Set (1,2,4-7,9)
|
||||
t.ok (l4.token ("1,2") == Lexer::Type::set, "token Lexer::Type:set 1");
|
||||
t.ok (l4.token ("1-2") == Lexer::Type::set, "token Lexer::Type:set 2");
|
||||
t.ok (l4.token ("1-2,4") == Lexer::Type::set, "token Lexer::Type:set 3");
|
||||
t.ok (l4.token ("1-2,4,6-8") == Lexer::Type::set, "token Lexer::Type:set 4");
|
||||
t.ok (l4.token ("1-2,4,6-8,10-12") == Lexer::Type::set, "token Lexer::Type:set 5");
|
||||
|
||||
// Pair
|
||||
t.ok (l4.token ("name:value") == Lexer::Type::pair, "token Lexer::Type:pair 1");
|
||||
t.ok (l4.token ("name=value") == Lexer::Type::pair, "token Lexer::Type:pair 2");
|
||||
t.ok (l4.token ("name:=value") == Lexer::Type::pair, "token Lexer::Type:pair 3");
|
||||
t.ok (l4.token ("name.mod:value") == Lexer::Type::pair, "token Lexer::Type:pair 4");
|
||||
t.ok (l4.token ("name.mod=value") == Lexer::Type::pair, "token Lexer::Type:pair 5");
|
||||
t.ok (l4.token ("name:") == Lexer::Type::pair, "token Lexer::Type:pair 6");
|
||||
t.ok (l4.token ("name=") == Lexer::Type::pair, "token Lexer::Type:pair 7");
|
||||
t.ok (l4.token ("name.mod:") == Lexer::Type::pair, "token Lexer::Type:pair 8");
|
||||
t.ok (l4.token ("name.mod=") == Lexer::Type::pair, "token Lexer::Type:pair 9");
|
||||
t.ok (l4.token ("pro:'P 1'") == Lexer::Type::pair, "token Lexer::Type:pair 10");
|
||||
t.ok (l4.token ("rc:x") == Lexer::Type::pair, "token Lexer::Type:pair 11");
|
||||
t.ok (l4.token ("rc.name:value") == Lexer::Type::pair, "token Lexer::Type:pair 12");
|
||||
t.ok (l4.token ("rc.name=value") == Lexer::Type::pair, "token Lexer::Type:pair 13");
|
||||
t.ok (l4.token ("rc.name:=value") == Lexer::Type::pair, "token Lexer::Type:pair 14");
|
||||
t.ok (l4.token ("due:='eow - 2d'") == Lexer::Type::pair, "token Lexer::Type:pair 15");
|
||||
|
||||
// Operator - complete set
|
||||
t.ok (l4.token ("^") == Lexer::Type::op, "token Lexer::Type:op 1");
|
||||
t.ok (l4.token ("!") == Lexer::Type::op, "token Lexer::Type:op 2");
|
||||
t.ok (l4.token ("_neg_") == Lexer::Type::op, "token Lexer::Type:op 3");
|
||||
t.ok (l4.token ("_pos_") == Lexer::Type::op, "token Lexer::Type:op 4");
|
||||
t.ok (l4.token ("_hastag_") == Lexer::Type::op, "token Lexer::Type:op 5");
|
||||
t.ok (l4.token ("_notag_") == Lexer::Type::op, "token Lexer::Type:op 6");
|
||||
t.ok (l4.token ("*") == Lexer::Type::op, "token Lexer::Type:op 7");
|
||||
t.ok (l4.token ("/") == Lexer::Type::op, "token Lexer::Type:op 8");
|
||||
t.ok (l4.token ("%") == Lexer::Type::op, "token Lexer::Type:op 9");
|
||||
t.ok (l4.token ("+") == Lexer::Type::op, "token Lexer::Type:op 10");
|
||||
t.ok (l4.token ("-") == Lexer::Type::op, "token Lexer::Type:op 11");
|
||||
t.ok (l4.token ("<=") == Lexer::Type::op, "token Lexer::Type:op 12");
|
||||
t.ok (l4.token (">=") == Lexer::Type::op, "token Lexer::Type:op 13");
|
||||
t.ok (l4.token (">") == Lexer::Type::op, "token Lexer::Type:op 14");
|
||||
t.ok (l4.token ("<") == Lexer::Type::op, "token Lexer::Type:op 15");
|
||||
t.ok (l4.token ("=") == Lexer::Type::op, "token Lexer::Type:op 16");
|
||||
t.ok (l4.token ("==") == Lexer::Type::op, "token Lexer::Type:op 17");
|
||||
t.ok (l4.token ("!=") == Lexer::Type::op, "token Lexer::Type:op 18");
|
||||
t.ok (l4.token ("!==") == Lexer::Type::op, "token Lexer::Type:op 19");
|
||||
t.ok (l4.token ("~") == Lexer::Type::op, "token Lexer::Type:op 20");
|
||||
t.ok (l4.token ("!~") == Lexer::Type::op, "token Lexer::Type:op 21");
|
||||
t.ok (l4.token ("and") == Lexer::Type::op, "token Lexer::Type:op 22");
|
||||
t.ok (l4.token ("or") == Lexer::Type::op, "token Lexer::Type:op 23");
|
||||
t.ok (l4.token ("xor") == Lexer::Type::op, "token Lexer::Type:op 24");
|
||||
t.ok (l4.token ("(") == Lexer::Type::op, "token Lexer::Type:op 25");
|
||||
t.ok (l4.token (")") == Lexer::Type::op, "token Lexer::Type:op 26");
|
||||
|
||||
// Word that starts wih 'or', which is an operator, but should be ignored.
|
||||
t.ok (l4.token ("ordinary") == Lexer::Type::dom, "token Lexer::Type:dom 1");
|
||||
|
||||
// UUID
|
||||
t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e7520b749") == Lexer::Type::uuid, "token Lexer::Type:uuid 1");
|
||||
t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e752") == Lexer::Type::uuid, "token Lexer::Type:uuid 2");
|
||||
t.ok (l4.token ("a360fc44-315c-4366-b70c") == Lexer::Type::uuid, "token Lexer::Type:uuid 3");
|
||||
t.ok (l4.token ("a360fc44-315c-4366") == Lexer::Type::uuid, "token Lexer::Type:uuid 4");
|
||||
t.ok (l4.token ("a360fc44-315c") == Lexer::Type::uuid, "token Lexer::Type:uuid 5");
|
||||
t.ok (l4.token ("a360fc44") == Lexer::Type::uuid, "token Lexer::Type:uuid 6");
|
||||
|
||||
// Date
|
||||
t.ok (l4.token ("2015-W01") == Lexer::Type::date, "token Lexer::Type:date 1");
|
||||
t.ok (l4.token ("2015-02-17") == Lexer::Type::date, "token Lexer::Type:date 2");
|
||||
t.ok (l4.token ("2013-11-29T22:58:00Z") == Lexer::Type::date, "token Lexer::Type:date 3");
|
||||
t.ok (l4.token ("20131129T225800Z") == Lexer::Type::date, "token Lexer::Type:date 4");
|
||||
|
||||
// Duration
|
||||
t.ok (l4.token ("year") == Lexer::Type::duration, "token Lexer::Type:duration 1");
|
||||
t.ok (l4.token ("4weeks") == Lexer::Type::duration, "token Lexer::Type:duration 2");
|
||||
t.ok (l4.token ("PT23H") == Lexer::Type::duration, "token Lexer::Type:duration 3");
|
||||
t.ok (l4.token ("1second") == Lexer::Type::duration, "token Lexer::Type:duration 4");
|
||||
t.ok (l4.token ("1s") == Lexer::Type::duration, "token Lexer::Type:duration 5");
|
||||
t.ok (l4.token ("1minute") == Lexer::Type::duration, "token Lexer::Type:duration 6");
|
||||
t.ok (l4.token ("2hour") == Lexer::Type::duration, "token Lexer::Type:duration 7");
|
||||
t.ok (l4.token ("3 days") == Lexer::Type::duration, "token Lexer::Type:duration 8");
|
||||
t.ok (l4.token ("4w") == Lexer::Type::duration, "token Lexer::Type:duration 9");
|
||||
t.ok (l4.token ("5mo") == Lexer::Type::duration, "token Lexer::Type:duration 10");
|
||||
t.ok (l4.token ("6 years") == Lexer::Type::duration, "token Lexer::Type:duration 11");
|
||||
t.ok (l4.token ("P1Y") == Lexer::Type::duration, "token Lexer::Type:duration 12");
|
||||
t.ok (l4.token ("PT1H") == Lexer::Type::duration, "token Lexer::Type:duration 13");
|
||||
t.ok (l4.token ("P1Y1M1DT1H1M1S") == Lexer::Type::duration, "token Lexer::Type:duration 14");
|
||||
|
||||
// Misc
|
||||
t.ok (l4.token ("--") == Lexer::Type::separator, "token Lexer::Type:separator 1");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue