diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 1c9ccf644..b1bf136b4 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -38,14 +38,6 @@ static const unsigned int uuid_min_length = 8; std::string Lexer::dateFormat = ""; bool Lexer::isoEnabled = true; -//////////////////////////////////////////////////////////////////////////////// -Lexer::Lexer () -: _text ("") -, _cursor (0) -, _eos (0) -{ -} - //////////////////////////////////////////////////////////////////////////////// Lexer::Lexer (const std::string& text) : _text (text) @@ -73,14 +65,14 @@ bool Lexer::token (std::string& token, Lexer::Type& type) return false; // The sequence is specific, and must follow these rules: - // - date < duration < uuid < identifier - // - uuid < hex < number - // - url < pair < identifier - // - hex < number - // - separator < tag < operator - // - path < substitution < pattern - // - set < number - // - word last + // - date < duration < uuid < identifier + // - uuid < hex < number + // - url < pair < identifier + // - hex < number + // - separator < tag < operator + // - path < substitution < pattern + // - set < number + // - word last if (isString (token, type, '\'') || isString (token, type, '"') || isDate (token, type) || @@ -105,35 +97,6 @@ bool Lexer::token (std::string& token, Lexer::Type& type) return false; } -//////////////////////////////////////////////////////////////////////////////// -// Classify the whole token. -Lexer::Type Lexer::token (const std::string& token) -{ -/* - if (isString (token, '\'')) return Lexer::Type:string; - else if (isString (token, '"')) return Lexer::Type:string; - else if (isDate (token)) return Lexer::Type:date; - else if (isDuration (token)) return Lexer::Type:duration; - else if (isURL (token)) return Lexer::Type:url; - else if (isPair (token)) return Lexer::Type:pair; - else if (isSet (token)) return Lexer::Type:set; - else if (isDOM (token)) return Lexer::Type:dom; - else if (isUUID (token)) return Lexer::Type:uuid; - else if (isHexNumber (token)) return Lexer::Type:hex; - else if (isNumber (token)) return Lexer::Type:number; - else if (isSeparator (token)) return Lexer::Type:separator; - else*/ if (isTag (token)) return Lexer::Type::tag; -/* - else if (isPath (token)) return Lexer::Type:path; - else if (isSubstitution (token)) return Lexer::Type:substitution; - else if (isPattern (token)) return Lexer::Type:pattern; - else if (isOperator (token)) return Lexer::Type:op; - else if (isIdentifier (token)) return Lexer::Type:identifier; - else if (isWord (token)) return Lexer::Type:word; -*/ - return Lexer::Type::word; -} - //////////////////////////////////////////////////////////////////////////////// // This static method tokenizes the input and provides a vector of token/type // results from a high-level lex. @@ -1257,143 +1220,4 @@ bool Lexer::isOneWord (const std::string& text) return true; } -/* -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isString (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isDate (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isDuration (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isUUID (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isNumber (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isHexNumber (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isSeparator (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isURL (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isPair (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isSet (const std::string& input) -{ - - return false; -} -*/ - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isTag (const std::string& input) -{ - return (input[0] == '+' || - input[0] == '-') && - isIdentifierStart (input[0]) && - input.length () > 1; -} - -/* -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isPath (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isSubstitution (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isPattern (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isOperator (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isDOM (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isIdentifier (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isWord (const std::string& input) -{ - - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -bool Lexer::isContiguous (const std::string& input) -{ - - return false; -} -*/ - //////////////////////////////////////////////////////////////////////////////// diff --git a/src/Lexer.h b/src/Lexer.h index 9ae46e9bd..34f706b43 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -51,11 +51,9 @@ public: dom, identifier, word, date, duration }; - Lexer (); Lexer (const std::string&); ~Lexer (); bool token (std::string&, Lexer::Type&); - Lexer::Type token (const std::string&); static std::vector > tokens (const std::string&); static std::vector split (const std::string&); static std::string typeToString (Lexer::Type); @@ -105,31 +103,6 @@ public: bool isWord (std::string&, Lexer::Type&); bool isContiguous (std::string&, Lexer::Type&); - // Token Classifiers. -/* - bool isString (const std::string&); - bool isDate (const std::string&); - bool isDuration (const std::string&); - bool isUUID (const std::string&); - bool isNumber (const std::string&); - bool isHexNumber (const std::string&); - bool isSeparator (const std::string&); - bool isURL (const std::string&); - bool isPair (const std::string&); - bool isSet (const std::string&); -*/ - bool isTag (const std::string&); -/* - bool isPath (const std::string&); - bool isSubstitution (const std::string&); - bool isPattern (const std::string&); - bool isOperator (const std::string&); - bool isDOM (const std::string&); - bool isIdentifier (const std::string&); - bool isWord (const std::string&); - bool isContiguous (const std::string&); -*/ - private: std::string _text; std::size_t _cursor; diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index 063c23de5..bbc30bd40 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -36,7 +36,7 @@ Context context; //////////////////////////////////////////////////////////////////////////////// int main (int argc, char** argv) { - UnitTest t (912); + UnitTest t (806); std::vector > tokens; std::string token; @@ -390,151 +390,6 @@ int main (int argc, char** argv) } } - // Test individual token classification. - Lexer l4; - - // Pattern - t.ok (l4.token ("/foo/") == Lexer::Type::pattern, "token Lexer::Type:pattern 1"); - t.ok (l4.token ("/a\\/b/") == Lexer::Type::pattern, "token Lexer::Type:pattern 2"); - t.ok (l4.token ("/'/") == Lexer::Type::pattern, "token Lexer::Type:pattern 3"); - - // Substitution - t.ok (l4.token ("/from/to/g") == Lexer::Type::substitution, "token Lexer::Type:substitution 1"); - t.ok (l4.token ("/from/to/") == Lexer::Type::substitution, "token Lexer::Type:substitution 2"); - - // Tag - t.ok (l4.token ("+tag") == Lexer::Type::tag, "token Lexer::Type:tag 1"); - t.ok (l4.token ("-tag") == Lexer::Type::tag, "token Lexer::Type:tag 2"); - t.ok (l4.token ("+@tag") == Lexer::Type::tag, "token Lexer::Type:tag 3"); - - // Path - t.ok (l4.token ("/long/path/to/file.txt") == Lexer::Type::path, "token Lexer::Type:path 1"); - - // Word - t.ok (l4.token ("9th") == Lexer::Type::word, "token Lexer::Type:word 1"); - t.ok (l4.token ("10th") == Lexer::Type::word, "token Lexer::Type:word 2"); - - // DOM - t.ok (l4.token ("foo") == Lexer::Type::dom, "token Lexer::Type:dom 1"); - t.ok (l4.token ("Çirçös") == Lexer::Type::dom, "token Lexer::Type:dom 2"); - t.ok (l4.token ("☺") == Lexer::Type::dom, "token Lexer::Type:dom 3"); - t.ok (l4.token ("name") == Lexer::Type::dom, "token Lexer::Type:dom 4"); - t.ok (l4.token ("f1") == Lexer::Type::dom, "token Lexer::Type:dom 5"); - t.ok (l4.token ("foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 6"); - t.ok (l4.token ("1.foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 7"); - t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e7520b749.foo.bar") == Lexer::Type::dom, "token Lexer::Type:dom 8"); - t.ok (l4.token ("today") == Lexer::Type::dom, "token Lexer::Type:dom 9"); - - // URL - t.ok (l4.token ("http://tasktools.org") == Lexer::Type::url, "token Lexer::Type:url 1"); - t.ok (l4.token ("https://bug.tasktools.org") == Lexer::Type::url, "token Lexer::Type:url 2"); - - // String - t.ok (l4.token ("'one two'") == Lexer::Type::string, "token Lexer::Type:string 1"); - t.ok (l4.token ("\"three\"") == Lexer::Type::string, "token Lexer::Type:string 2"); - t.ok (l4.token ("'\\''") == Lexer::Type::string, "token Lexer::Type:string 3"); - t.ok (l4.token ("\"\\\"\"") == Lexer::Type::string, "token Lexer::Type:string 4"); - t.ok (l4.token ("\"\tfoo\t\"") == Lexer::Type::string, "token Lexer::Type:string 5"); - t.ok (l4.token ("\"\\u20A43\"") == Lexer::Type::string, "token Lexer::Type:string 6"); - t.ok (l4.token ("\"U+20AC4\"") == Lexer::Type::string, "token Lexer::Type:string 7"); - - // Number - t.ok (l4.token ("1") == Lexer::Type::number, "token Lexer::Type:number 1"); - t.ok (l4.token ("3.14") == Lexer::Type::number, "token Lexer::Type:number 2"); - t.ok (l4.token ("6.02217e23") == Lexer::Type::number, "token Lexer::Type:number 3"); - t.ok (l4.token ("1.2e-3.4") == Lexer::Type::number, "token Lexer::Type:number 4"); - - // Hex - t.ok (l4.token ("0x2f") == Lexer::Type::hex, "token Lexer::Type:hex 1"); - - // Set (1,2,4-7,9) - t.ok (l4.token ("1,2") == Lexer::Type::set, "token Lexer::Type:set 1"); - t.ok (l4.token ("1-2") == Lexer::Type::set, "token Lexer::Type:set 2"); - t.ok (l4.token ("1-2,4") == Lexer::Type::set, "token Lexer::Type:set 3"); - t.ok (l4.token ("1-2,4,6-8") == Lexer::Type::set, "token Lexer::Type:set 4"); - t.ok (l4.token ("1-2,4,6-8,10-12") == Lexer::Type::set, "token Lexer::Type:set 5"); - - // Pair - t.ok (l4.token ("name:value") == Lexer::Type::pair, "token Lexer::Type:pair 1"); - t.ok (l4.token ("name=value") == Lexer::Type::pair, "token Lexer::Type:pair 2"); - t.ok (l4.token ("name:=value") == Lexer::Type::pair, "token Lexer::Type:pair 3"); - t.ok (l4.token ("name.mod:value") == Lexer::Type::pair, "token Lexer::Type:pair 4"); - t.ok (l4.token ("name.mod=value") == Lexer::Type::pair, "token Lexer::Type:pair 5"); - t.ok (l4.token ("name:") == Lexer::Type::pair, "token Lexer::Type:pair 6"); - t.ok (l4.token ("name=") == Lexer::Type::pair, "token Lexer::Type:pair 7"); - t.ok (l4.token ("name.mod:") == Lexer::Type::pair, "token Lexer::Type:pair 8"); - t.ok (l4.token ("name.mod=") == Lexer::Type::pair, "token Lexer::Type:pair 9"); - t.ok (l4.token ("pro:'P 1'") == Lexer::Type::pair, "token Lexer::Type:pair 10"); - t.ok (l4.token ("rc:x") == Lexer::Type::pair, "token Lexer::Type:pair 11"); - t.ok (l4.token ("rc.name:value") == Lexer::Type::pair, "token Lexer::Type:pair 12"); - t.ok (l4.token ("rc.name=value") == Lexer::Type::pair, "token Lexer::Type:pair 13"); - t.ok (l4.token ("rc.name:=value") == Lexer::Type::pair, "token Lexer::Type:pair 14"); - t.ok (l4.token ("due:='eow - 2d'") == Lexer::Type::pair, "token Lexer::Type:pair 15"); - - // Operator - complete set - t.ok (l4.token ("^") == Lexer::Type::op, "token Lexer::Type:op 1"); - t.ok (l4.token ("!") == Lexer::Type::op, "token Lexer::Type:op 2"); - t.ok (l4.token ("_neg_") == Lexer::Type::op, "token Lexer::Type:op 3"); - t.ok (l4.token ("_pos_") == Lexer::Type::op, "token Lexer::Type:op 4"); - t.ok (l4.token ("_hastag_") == Lexer::Type::op, "token Lexer::Type:op 5"); - t.ok (l4.token ("_notag_") == Lexer::Type::op, "token Lexer::Type:op 6"); - t.ok (l4.token ("*") == Lexer::Type::op, "token Lexer::Type:op 7"); - t.ok (l4.token ("/") == Lexer::Type::op, "token Lexer::Type:op 8"); - t.ok (l4.token ("%") == Lexer::Type::op, "token Lexer::Type:op 9"); - t.ok (l4.token ("+") == Lexer::Type::op, "token Lexer::Type:op 10"); - t.ok (l4.token ("-") == Lexer::Type::op, "token Lexer::Type:op 11"); - t.ok (l4.token ("<=") == Lexer::Type::op, "token Lexer::Type:op 12"); - t.ok (l4.token (">=") == Lexer::Type::op, "token Lexer::Type:op 13"); - t.ok (l4.token (">") == Lexer::Type::op, "token Lexer::Type:op 14"); - t.ok (l4.token ("<") == Lexer::Type::op, "token Lexer::Type:op 15"); - t.ok (l4.token ("=") == Lexer::Type::op, "token Lexer::Type:op 16"); - t.ok (l4.token ("==") == Lexer::Type::op, "token Lexer::Type:op 17"); - t.ok (l4.token ("!=") == Lexer::Type::op, "token Lexer::Type:op 18"); - t.ok (l4.token ("!==") == Lexer::Type::op, "token Lexer::Type:op 19"); - t.ok (l4.token ("~") == Lexer::Type::op, "token Lexer::Type:op 20"); - t.ok (l4.token ("!~") == Lexer::Type::op, "token Lexer::Type:op 21"); - t.ok (l4.token ("and") == Lexer::Type::op, "token Lexer::Type:op 22"); - t.ok (l4.token ("or") == Lexer::Type::op, "token Lexer::Type:op 23"); - t.ok (l4.token ("xor") == Lexer::Type::op, "token Lexer::Type:op 24"); - t.ok (l4.token ("(") == Lexer::Type::op, "token Lexer::Type:op 25"); - t.ok (l4.token (")") == Lexer::Type::op, "token Lexer::Type:op 26"); - - // Word that starts wih 'or', which is an operator, but should be ignored. - t.ok (l4.token ("ordinary") == Lexer::Type::dom, "token Lexer::Type:dom 1"); - - // UUID - t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e7520b749") == Lexer::Type::uuid, "token Lexer::Type:uuid 1"); - t.ok (l4.token ("a360fc44-315c-4366-b70c-ea7e752") == Lexer::Type::uuid, "token Lexer::Type:uuid 2"); - t.ok (l4.token ("a360fc44-315c-4366-b70c") == Lexer::Type::uuid, "token Lexer::Type:uuid 3"); - t.ok (l4.token ("a360fc44-315c-4366") == Lexer::Type::uuid, "token Lexer::Type:uuid 4"); - t.ok (l4.token ("a360fc44-315c") == Lexer::Type::uuid, "token Lexer::Type:uuid 5"); - t.ok (l4.token ("a360fc44") == Lexer::Type::uuid, "token Lexer::Type:uuid 6"); - - // Date - t.ok (l4.token ("2015-W01") == Lexer::Type::date, "token Lexer::Type:date 1"); - t.ok (l4.token ("2015-02-17") == Lexer::Type::date, "token Lexer::Type:date 2"); - t.ok (l4.token ("2013-11-29T22:58:00Z") == Lexer::Type::date, "token Lexer::Type:date 3"); - t.ok (l4.token ("20131129T225800Z") == Lexer::Type::date, "token Lexer::Type:date 4"); - - // Duration - t.ok (l4.token ("year") == Lexer::Type::duration, "token Lexer::Type:duration 1"); - t.ok (l4.token ("4weeks") == Lexer::Type::duration, "token Lexer::Type:duration 2"); - t.ok (l4.token ("PT23H") == Lexer::Type::duration, "token Lexer::Type:duration 3"); - t.ok (l4.token ("1second") == Lexer::Type::duration, "token Lexer::Type:duration 4"); - t.ok (l4.token ("1s") == Lexer::Type::duration, "token Lexer::Type:duration 5"); - t.ok (l4.token ("1minute") == Lexer::Type::duration, "token Lexer::Type:duration 6"); - t.ok (l4.token ("2hour") == Lexer::Type::duration, "token Lexer::Type:duration 7"); - t.ok (l4.token ("3 days") == Lexer::Type::duration, "token Lexer::Type:duration 8"); - t.ok (l4.token ("4w") == Lexer::Type::duration, "token Lexer::Type:duration 9"); - t.ok (l4.token ("5mo") == Lexer::Type::duration, "token Lexer::Type:duration 10"); - t.ok (l4.token ("6 years") == Lexer::Type::duration, "token Lexer::Type:duration 11"); - t.ok (l4.token ("P1Y") == Lexer::Type::duration, "token Lexer::Type:duration 12"); - t.ok (l4.token ("PT1H") == Lexer::Type::duration, "token Lexer::Type:duration 13"); - t.ok (l4.token ("P1Y1M1DT1H1M1S") == Lexer::Type::duration, "token Lexer::Type:duration 14"); - - // Misc - t.ok (l4.token ("--") == Lexer::Type::separator, "token Lexer::Type:separator 1"); - return 0; }