From 745aad0d275c4eb74f59f55c76058c7f1068d033 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 22 Feb 2015 17:46:22 -0500 Subject: [PATCH] Lexer - Renamed Lexer2 to Lexer, it looks good enough to assume control. --- src/CLI.cpp | 40 +-- src/CLI.h | 12 +- src/CMakeLists.txt | 2 +- src/Context.cpp | 4 +- src/Duration.cpp | 6 +- src/Eval.cpp | 146 +++++------ src/Eval.h | 32 +-- src/{Lexer2.cpp => Lexer.cpp} | 270 ++++++++++---------- src/{Lexer2.h => Lexer.h} | 56 ++--- src/Nibbler.cpp | 8 +- src/Variant.cpp | 56 ++--- src/commands/CmdCustom.cpp | 6 +- src/lex.cpp | 14 +- src/text.cpp | 6 +- test/lexer.t.cpp | 448 +++++++++++++++++----------------- 15 files changed, 553 insertions(+), 553 deletions(-) rename src/{Lexer2.cpp => Lexer.cpp} (76%) rename src/{Lexer2.h => Lexer.h} (66%) diff --git a/src/CLI.cpp b/src/CLI.cpp index b8af8092f..67f602aac 100644 --- a/src/CLI.cpp +++ b/src/CLI.cpp @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -661,13 +661,13 @@ void CLI::addArg (const std::string& arg) // that cause the lexemes to be ignored, and the original arugment used // intact. std::string lexeme; - Lexer2::Type type; - Lexer2 lex (raw); + Lexer::Type type; + Lexer lex (raw); lex.ambiguity (false); - std::vector > lexemes; + std::vector > lexemes; while (lex.token (lexeme, type)) - lexemes.push_back (std::pair (lexeme, type)); + lexemes.push_back (std::pair (lexeme, type)); if (disqualifyInsufficientTerms (lexemes) || disqualifyNoOps (lexemes) || @@ -681,7 +681,7 @@ void CLI::addArg (const std::string& arg) { // How often have I said to you that when you have eliminated the // impossible, whatever remains, however improbable, must be the truth? - std::vector >::iterator l; + std::vector >::iterator l; for (l = lexemes.begin (); l != lexemes.end (); ++l) _original_args.push_back (l->first); } @@ -713,7 +713,7 @@ void CLI::aliasExpansion () { if (_aliases.find (raw) != _aliases.end ()) { - std::vector lexed = Lexer2::split (_aliases[raw]); + std::vector lexed = Lexer::split (_aliases[raw]); std::vector ::iterator l; for (l = lexed.begin (); l != lexed.end (); ++l) { @@ -1636,7 +1636,7 @@ void CLI::desugarFilterPlainArgs () reconstructed.push_back (op); std::string pattern = a->attribute ("raw"); - Lexer2::dequote (pattern); + Lexer::dequote (pattern); A rhs ("argPattern", "'" + pattern + "'"); rhs.tag ("LITERAL"); rhs.tag ("FILTER"); @@ -1812,7 +1812,7 @@ void CLI::injectDefaults () if (defaultCommand != "") { // Split the defaultCommand into separate args. - std::vector tokens = Lexer2::split (defaultCommand); + std::vector tokens = Lexer::split (defaultCommand); // Modify _args to be: [ ...] [...] std::vector reconstructed; @@ -2302,9 +2302,9 @@ bool CLI::isName (const std::string& raw) const { for (int i = 0; i < raw.length (); ++i) { - if (i == 0 && ! Lexer2::isIdentifierStart (raw[i])) + if (i == 0 && ! Lexer::isIdentifierStart (raw[i])) return false; - else if (! Lexer2::isIdentifierNext (raw[i])) + else if (! Lexer::isIdentifierNext (raw[i])) return false; } @@ -2316,19 +2316,19 @@ bool CLI::isName (const std::string& raw) const //////////////////////////////////////////////////////////////////////////////// bool CLI::disqualifyInsufficientTerms ( - const std::vector >& lexemes) const + const std::vector >& lexemes) const { return lexemes.size () < 3 ? true : false; } //////////////////////////////////////////////////////////////////////////////// bool CLI::disqualifyNoOps ( - const std::vector >& lexemes) const + const std::vector >& lexemes) const { bool foundOP = false; - std::vector >::const_iterator l; + std::vector >::const_iterator l; for (l = lexemes.begin (); l != lexemes.end (); ++l) - if (l->second == Lexer2::Type::op) + if (l->second == Lexer::Type::op) foundOP = true; return ! foundOP; @@ -2336,16 +2336,16 @@ bool CLI::disqualifyNoOps ( //////////////////////////////////////////////////////////////////////////////// bool CLI::disqualifyOnlyParenOps ( - const std::vector >& lexemes) const + const std::vector >& lexemes) const { int opCount = 0; int opSugarCount = 0; int opParenCount = 0; - std::vector >::const_iterator l; + std::vector >::const_iterator l; for (l = lexemes.begin (); l != lexemes.end (); ++l) { - if (l->second == Lexer2::Type::op) + if (l->second == Lexer::Type::op) { ++opCount; @@ -2372,7 +2372,7 @@ bool CLI::disqualifyOnlyParenOps ( // as there are no operators in between, which includes syntactic sugar that // hides operators. bool CLI::disqualifyFirstLastBinary ( - const std::vector >& lexemes) const + const std::vector >& lexemes) const { bool firstBinary = false; bool lastBinary = false; @@ -2391,7 +2391,7 @@ bool CLI::disqualifyFirstLastBinary ( //////////////////////////////////////////////////////////////////////////////// // Disqualify terms when there operators hidden by syntactic sugar. bool CLI::disqualifySugarFree ( - const std::vector >& lexemes) const + const std::vector >& lexemes) const { bool sugared = true; for (unsigned int i = 1; i < lexemes.size () - 1; ++i) diff --git a/src/CLI.h b/src/CLI.h index 0a3c28221..e855db697 100644 --- a/src/CLI.h +++ b/src/CLI.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -126,11 +126,11 @@ private: bool isOperator (const std::string&) const; bool isName (const std::string&) const; - bool disqualifyInsufficientTerms (const std::vector >&) const; - bool disqualifyNoOps (const std::vector >&) const; - bool disqualifyOnlyParenOps (const std::vector >&) const; - bool disqualifyFirstLastBinary (const std::vector >&) const; - bool disqualifySugarFree (const std::vector >&) const; + bool disqualifyInsufficientTerms (const std::vector >&) const; + bool disqualifyNoOps (const std::vector >&) const; + bool disqualifyOnlyParenOps (const std::vector >&) const; + bool disqualifyFirstLastBinary (const std::vector >&) const; + bool disqualifySugarFree (const std::vector >&) const; public: std::multimap _entities; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 51fec6e28..ec1d5deed 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,7 +20,7 @@ set (task_SRCS CLI.cpp CLI.h Hooks.cpp Hooks.h ISO8601.cpp ISO8601.h JSON.cpp JSON.h - Lexer2.cpp Lexer2.h + Lexer.cpp Lexer.h Msg.cpp Msg.h Nibbler.cpp Nibbler.h Path.cpp Path.h diff --git a/src/Context.cpp b/src/Context.cpp index 33852e8a1..4d0e6bc0b 100644 --- a/src/Context.cpp +++ b/src/Context.cpp @@ -657,8 +657,8 @@ void Context::staticInitialization () Task::searchCaseSensitive = Variant::searchCaseSensitive = config.getBoolean ("search.case.sensitive"); Task::regex = Variant::searchUsingRegex = config.getBoolean ("regex"); - Lexer2::dateFormat = Variant::dateFormat = config.get ("dateformat"); - Lexer2::isoEnabled = Variant::isoEnabled = config.getBoolean ("date.iso"); + Lexer::dateFormat = Variant::dateFormat = config.get ("dateformat"); + Lexer::isoEnabled = Variant::isoEnabled = config.getBoolean ("date.iso"); std::map ::iterator i; for (i = columns.begin (); i != columns.end (); ++i) diff --git a/src/Duration.cpp b/src/Duration.cpp index e719dc227..fc98fecb4 100644 --- a/src/Duration.cpp +++ b/src/Duration.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include @@ -295,7 +295,7 @@ bool Duration::parse (const std::string& input, std::string::size_type& start) if (n.getOneOf (units, unit)) { if (n.depleted () || - Lexer2::isWhitespace (n.next ())) + Lexer::isWhitespace (n.next ())) { start = original_start + n.cursor (); @@ -319,7 +319,7 @@ bool Duration::parse (const std::string& input, std::string::size_type& start) if (n.getOneOf (units, unit)) { if (n.depleted () || - Lexer2::isWhitespace (n.next ())) + Lexer::isWhitespace (n.next ())) { start = original_start + n.cursor (); double quantity = strtod (number.c_str (), NULL); diff --git a/src/Eval.cpp b/src/Eval.cpp index df00acc50..acaa1a760 100644 --- a/src/Eval.cpp +++ b/src/Eval.cpp @@ -125,13 +125,13 @@ void Eval::addSource (bool (*source)(const std::string&, Variant&)) void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const { // Reduce e to a vector of tokens. - Lexer2 l (e); + Lexer l (e); l.ambiguity (_ambiguity); - std::vector > tokens; + std::vector > tokens; std::string token; - Lexer2::Type type; + Lexer::Type type; while (l.token (token, type)) - tokens.push_back (std::pair (token, type)); + tokens.push_back (std::pair (token, type)); // Parse for syntax checking and operator replacement. if (_debug) @@ -153,13 +153,13 @@ void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const { // Reduce e to a vector of tokens. - Lexer2 l (e); + Lexer l (e); l.ambiguity (_ambiguity); - std::vector > tokens; + std::vector > tokens; std::string token; - Lexer2::Type type; + Lexer::Type type; while (l.token (token, type)) - tokens.push_back (std::pair (token, type)); + tokens.push_back (std::pair (token, type)); if (_debug) context.debug ("FILTER Postfix " + dump (tokens)); @@ -172,15 +172,15 @@ void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const void Eval::compileExpression (const std::string& e) { // Reduce e to a vector of tokens. - Lexer2 l (e); + Lexer l (e); l.ambiguity (_ambiguity); std::string token; - Lexer2::Type type; + Lexer::Type type; while (l.token (token, type)) { if (_debug) - context.debug ("Lexer '" + token + "' " + Lexer2::typeToString (type)); - _compiled.push_back (std::pair (token, type)); + context.debug ("Lexer '" + token + "' " + Lexer::typeToString (type)); + _compiled.push_back (std::pair (token, type)); } // Parse for syntax checking and operator replacement. @@ -236,7 +236,7 @@ void Eval::getBinaryOperators (std::vector & all) //////////////////////////////////////////////////////////////////////////////// void Eval::evaluatePostfixStack ( - const std::vector >& tokens, + const std::vector >& tokens, Variant& result) const { if (tokens.size () == 0) @@ -245,11 +245,11 @@ void Eval::evaluatePostfixStack ( // This is stack used by the postfix evaluator. std::vector values; - std::vector >::const_iterator token; + std::vector >::const_iterator token; for (token = tokens.begin (); token != tokens.end (); ++token) { // Unary operators. - if (token->second == Lexer2::Type::op && + if (token->second == Lexer::Type::op && token->first == "!") { if (values.size () < 1) @@ -262,7 +262,7 @@ void Eval::evaluatePostfixStack ( if (_debug) context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result)); } - else if (token->second == Lexer2::Type::op && + else if (token->second == Lexer::Type::op && token->first == "_neg_") { if (values.size () < 1) @@ -278,7 +278,7 @@ void Eval::evaluatePostfixStack ( if (_debug) context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result)); } - else if (token->second == Lexer2::Type::op && + else if (token->second == Lexer::Type::op && token->first == "_pos_") { // The _pos_ operator is a NOP. @@ -287,7 +287,7 @@ void Eval::evaluatePostfixStack ( } // Binary operators. - else if (token->second == Lexer2::Type::op) + else if (token->second == Lexer::Type::op) { if (values.size () < 2) throw std::string (STRING_EVAL_NO_EVAL); @@ -338,8 +338,8 @@ void Eval::evaluatePostfixStack ( Variant v (token->first); switch (token->second) { - case Lexer2::Type::number: - if (Lexer2::isAllDigits (token->first)) + case Lexer::Type::number: + if (Lexer::isAllDigits (token->first)) { v.cast (Variant::type_integer); if (_debug) @@ -354,11 +354,11 @@ void Eval::evaluatePostfixStack ( break; - case Lexer2::Type::op: + case Lexer::Type::op: throw std::string (STRING_EVAL_OP_EXPECTED); break; - case Lexer2::Type::identifier: + case Lexer::Type::identifier: { bool found = false; std::vector ::const_iterator source; @@ -383,13 +383,13 @@ void Eval::evaluatePostfixStack ( } break; - case Lexer2::Type::date: + case Lexer::Type::date: v.cast (Variant::type_date); if (_debug) context.debug (format ("Eval literal date ↑'{1}'", (std::string) v)); break; - case Lexer2::Type::duration: + case Lexer::Type::duration: v.cast (Variant::type_duration); if (_debug) context.debug (format ("Eval literal duration ↑'{1}'", (std::string) v)); @@ -397,19 +397,19 @@ void Eval::evaluatePostfixStack ( // Nothing to do. /* - case Lexer2::Type::uuid: - case Lexer2::Type::hex: - case Lexer2::Type::list: - case Lexer2::Type::url: - case Lexer2::Type::pair: - case Lexer2::Type::separator: - case Lexer2::Type::tag: - case Lexer2::Type::path: - case Lexer2::Type::substitution: - case Lexer2::Type::pattern: - case Lexer2::Type::word: + case Lexer::Type::uuid: + case Lexer::Type::hex: + case Lexer::Type::list: + case Lexer::Type::url: + case Lexer::Type::pair: + case Lexer::Type::separator: + case Lexer::Type::tag: + case Lexer::Type::path: + case Lexer::Type::substitution: + case Lexer::Type::pattern: + case Lexer::Type::word: */ - case Lexer2::Type::string: + case Lexer::Type::string: default: if (_debug) context.debug (format ("Eval literal string ↑'{1}'", (std::string) v)); @@ -443,7 +443,7 @@ void Eval::evaluatePostfixStack ( // Primitive --> "(" Logical ")" | Variant // void Eval::infixParse ( - std::vector >& infix) const + std::vector >& infix) const { int i = 0; parseLogical (infix, i); @@ -452,14 +452,14 @@ void Eval::infixParse ( //////////////////////////////////////////////////////////////////////////////// // Logical --> Regex {( "and" | "or" | "xor" ) Regex} bool Eval::parseLogical ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseRegex (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "and" || infix[i].first == "or" || infix[i].first == "xor")) @@ -478,14 +478,14 @@ bool Eval::parseLogical ( //////////////////////////////////////////////////////////////////////////////// // Regex --> Equality {( "~" | "!~" ) Equality} bool Eval::parseRegex ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseEquality (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "~" || infix[i].first == "!~")) { @@ -503,14 +503,14 @@ bool Eval::parseRegex ( //////////////////////////////////////////////////////////////////////////////// // Equality --> Comparative {( "==" | "=" | "!==" | "!=" ) Comparative} bool Eval::parseEquality ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseComparative (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "==" || infix[i].first == "=" || infix[i].first == "!==" || @@ -530,14 +530,14 @@ bool Eval::parseEquality ( //////////////////////////////////////////////////////////////////////////////// // Comparative --> Arithmetic {( "<=" | "<" | ">=" | ">" ) Arithmetic} bool Eval::parseComparative ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseArithmetic (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "<=" || infix[i].first == "<" || infix[i].first == ">=" || @@ -557,14 +557,14 @@ bool Eval::parseComparative ( //////////////////////////////////////////////////////////////////////////////// // Arithmetic --> Geometric {( "+" | "-" ) Geometric} bool Eval::parseArithmetic ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseGeometric (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "+" || infix[i].first == "-")) { @@ -582,14 +582,14 @@ bool Eval::parseArithmetic ( //////////////////////////////////////////////////////////////////////////////// // Geometric --> Tag {( "*" | "/" | "%" ) Tag} bool Eval::parseGeometric ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseTag (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "*" || infix[i].first == "/" || infix[i].first == "%")) @@ -608,14 +608,14 @@ bool Eval::parseGeometric ( //////////////////////////////////////////////////////////////////////////////// // Tag --> Unary {( "_hastag_" | "_notag_" ) Unary} bool Eval::parseTag ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parseUnary (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && (infix[i].first == "_hastag_" || infix[i].first == "_notag_")) { @@ -633,7 +633,7 @@ bool Eval::parseTag ( //////////////////////////////////////////////////////////////////////////////// // Unary --> [( "-" | "+" | "!" )] Exponent bool Eval::parseUnary ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size ()) @@ -660,14 +660,14 @@ bool Eval::parseUnary ( //////////////////////////////////////////////////////////////////////////////// // Exponent --> Primitive ["^" Primitive] bool Eval::parseExponent ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size () && parsePrimitive (infix, i)) { while (i < infix.size () && - infix[i].second == Lexer2::Type::op && + infix[i].second == Lexer::Type::op && infix[i].first == "^") { ++i; @@ -684,7 +684,7 @@ bool Eval::parseExponent ( //////////////////////////////////////////////////////////////////////////////// // Primitive --> "(" Logical ")" | Variant bool Eval::parsePrimitive ( - std::vector >& infix, + std::vector >& infix, int &i) const { if (i < infix.size ()) @@ -722,7 +722,7 @@ bool Eval::parsePrimitive ( ++i; return true; } - else if (infix[i].second != Lexer2::Type::op) + else if (infix[i].second != Lexer::Type::op) { ++i; return true; @@ -766,32 +766,32 @@ bool Eval::parsePrimitive ( // Exit. // void Eval::infixToPostfix ( - std::vector >& infix) const + std::vector >& infix) const { // Short circuit. if (infix.size () == 1) return; // Result. - std::vector > postfix; + std::vector > postfix; // Shunting yard. - std::vector > op_stack; + std::vector > op_stack; // Operator characteristics. char type; int precedence; char associativity; - std::vector >::iterator token; + std::vector >::iterator token; for (token = infix.begin (); token != infix.end (); ++token) { - if (token->second == Lexer2::Type::op && + if (token->second == Lexer::Type::op && token->first == "(") { op_stack.push_back (*token); } - else if (token->second == Lexer2::Type::op && + else if (token->second == Lexer::Type::op && token->first == ")") { while (op_stack.size () && @@ -806,7 +806,7 @@ void Eval::infixToPostfix ( else throw std::string ("Mismatched parentheses in expression"); } - else if (token->second == Lexer2::Type::op && + else if (token->second == Lexer::Type::op && identifyOperator (token->first, type, precedence, associativity)) { char type2; @@ -865,20 +865,20 @@ bool Eval::identifyOperator ( //////////////////////////////////////////////////////////////////////////////// std::string Eval::dump ( - std::vector >& tokens) const + std::vector >& tokens) const { // Set up a color mapping. - std::map color_map; - color_map[Lexer2::Type::op] = Color ("gray14 on gray6"); - color_map[Lexer2::Type::number] = Color ("rgb530 on gray6"); - color_map[Lexer2::Type::hex] = Color ("rgb303 on gray6"); - color_map[Lexer2::Type::string] = Color ("rgb550 on gray6"); - color_map[Lexer2::Type::identifier] = Color ("rgb035 on gray6"); - color_map[Lexer2::Type::date] = Color ("rgb150 on gray6"); - color_map[Lexer2::Type::duration] = Color ("rgb531 on gray6"); + std::map color_map; + color_map[Lexer::Type::op] = Color ("gray14 on gray6"); + color_map[Lexer::Type::number] = Color ("rgb530 on gray6"); + color_map[Lexer::Type::hex] = Color ("rgb303 on gray6"); + color_map[Lexer::Type::string] = Color ("rgb550 on gray6"); + color_map[Lexer::Type::identifier] = Color ("rgb035 on gray6"); + color_map[Lexer::Type::date] = Color ("rgb150 on gray6"); + color_map[Lexer::Type::duration] = Color ("rgb531 on gray6"); std::string output; - std::vector >::const_iterator i; + std::vector >::const_iterator i; for (i = tokens.begin (); i != tokens.end (); ++i) { if (i != tokens.begin ()) diff --git a/src/Eval.h b/src/Eval.h index cfeb03721..b0062814b 100644 --- a/src/Eval.h +++ b/src/Eval.h @@ -29,7 +29,7 @@ #include #include -#include +#include #include class Eval @@ -53,28 +53,28 @@ public: static void getBinaryOperators (std::vector &); private: - void evaluatePostfixStack (const std::vector >&, Variant&) const; - void infixToPostfix (std::vector >&) const; - void infixParse (std::vector >&) const; - bool parseLogical (std::vector >&, int &) const; - bool parseRegex (std::vector >&, int &) const; - bool parseEquality (std::vector >&, int &) const; - bool parseComparative (std::vector >&, int &) const; - bool parseArithmetic (std::vector >&, int &) const; - bool parseGeometric (std::vector >&, int &) const; - bool parseTag (std::vector >&, int &) const; - bool parseUnary (std::vector >&, int &) const; - bool parseExponent (std::vector >&, int &) const; - bool parsePrimitive (std::vector >&, int &) const; + void evaluatePostfixStack (const std::vector >&, Variant&) const; + void infixToPostfix (std::vector >&) const; + void infixParse (std::vector >&) const; + bool parseLogical (std::vector >&, int &) const; + bool parseRegex (std::vector >&, int &) const; + bool parseEquality (std::vector >&, int &) const; + bool parseComparative (std::vector >&, int &) const; + bool parseArithmetic (std::vector >&, int &) const; + bool parseGeometric (std::vector >&, int &) const; + bool parseTag (std::vector >&, int &) const; + bool parseUnary (std::vector >&, int &) const; + bool parseExponent (std::vector >&, int &) const; + bool parsePrimitive (std::vector >&, int &) const; bool identifyOperator (const std::string&, char&, int&, char&) const; - std::string dump (std::vector >&) const; + std::string dump (std::vector >&) const; private: std::vector _sources; bool _ambiguity; bool _debug; - std::vector > _compiled; + std::vector > _compiled; }; diff --git a/src/Lexer2.cpp b/src/Lexer.cpp similarity index 76% rename from src/Lexer2.cpp rename to src/Lexer.cpp index 538730d80..6d8e36350 100644 --- a/src/Lexer2.cpp +++ b/src/Lexer.cpp @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include @@ -35,11 +35,11 @@ static const std::string uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; static const int uuid_min_length = 8; -std::string Lexer2::dateFormat = ""; -bool Lexer2::isoEnabled = true; +std::string Lexer::dateFormat = ""; +bool Lexer::isoEnabled = true; //////////////////////////////////////////////////////////////////////////////// -Lexer2::Lexer2 (const std::string& text) +Lexer::Lexer (const std::string& text) : _text (text) , _cursor (0) , _eos (text.size ()) @@ -48,20 +48,20 @@ Lexer2::Lexer2 (const std::string& text) } //////////////////////////////////////////////////////////////////////////////// -Lexer2::~Lexer2 () +Lexer::~Lexer () { } //////////////////////////////////////////////////////////////////////////////// -void Lexer2::ambiguity (bool value) +void Lexer::ambiguity (bool value) { _ambiguity = value; } //////////////////////////////////////////////////////////////////////////////// -// When a Lexer2 object is constructed with a string, this method walks through +// When a Lexer object is constructed with a string, this method walks through // the stream of low-level tokens. -bool Lexer2::token (std::string& token, Lexer2::Type& type) +bool Lexer::token (std::string& token, Lexer::Type& type) { // Eat white space. while (isWhitespace (_text[_cursor])) @@ -104,27 +104,27 @@ bool Lexer2::token (std::string& token, Lexer2::Type& type) //////////////////////////////////////////////////////////////////////////////// // This static method tokenizes the input and provides a vector of token/type // results from a high-level lex. -std::vector > Lexer2::tokens ( +std::vector > Lexer::tokens ( const std::string& text) { - std::vector > all; + std::vector > all; std::string token; - Lexer2::Type type; - Lexer2 l (text); + Lexer::Type type; + Lexer l (text); while (l.token (token, type)) - all.push_back (std::pair (token, type)); + all.push_back (std::pair (token, type)); return all; } //////////////////////////////////////////////////////////////////////////////// // This static method tokenizes the input, but discards the type information. -std::vector Lexer2::split (const std::string& text) +std::vector Lexer::split (const std::string& text) { std::vector all; std::string token; - Lexer2::Type ignored; - Lexer2 l (text); + Lexer::Type ignored; + Lexer l (text); while (l.token (token, ignored)) all.push_back (token); @@ -133,27 +133,27 @@ std::vector Lexer2::split (const std::string& text) //////////////////////////////////////////////////////////////////////////////// // No L10N - these are for internal purposes. -const std::string Lexer2::typeName (const Lexer2::Type& type) +const std::string Lexer::typeName (const Lexer::Type& type) { switch (type) { - case Lexer2::Type::uuid: return "uuid"; - case Lexer2::Type::number: return "number"; - case Lexer2::Type::hex: return "hex"; - case Lexer2::Type::string: return "string"; - case Lexer2::Type::list: return "list"; - case Lexer2::Type::url: return "url"; - case Lexer2::Type::pair: return "pair"; - case Lexer2::Type::separator: return "separator"; - case Lexer2::Type::tag: return "tag"; - case Lexer2::Type::path: return "path"; - case Lexer2::Type::substitution: return "substitution"; - case Lexer2::Type::pattern: return "pattern"; - case Lexer2::Type::op: return "op"; - case Lexer2::Type::identifier: return "identifier"; - case Lexer2::Type::word: return "word"; - case Lexer2::Type::date: return "date"; - case Lexer2::Type::duration: return "duration"; + case Lexer::Type::uuid: return "uuid"; + case Lexer::Type::number: return "number"; + case Lexer::Type::hex: return "hex"; + case Lexer::Type::string: return "string"; + case Lexer::Type::list: return "list"; + case Lexer::Type::url: return "url"; + case Lexer::Type::pair: return "pair"; + case Lexer::Type::separator: return "separator"; + case Lexer::Type::tag: return "tag"; + case Lexer::Type::path: return "path"; + case Lexer::Type::substitution: return "substitution"; + case Lexer::Type::pattern: return "pattern"; + case Lexer::Type::op: return "op"; + case Lexer::Type::identifier: return "identifier"; + case Lexer::Type::word: return "word"; + case Lexer::Type::date: return "date"; + case Lexer::Type::duration: return "duration"; } } @@ -163,7 +163,7 @@ const std::string Lexer2::typeName (const Lexer2::Type& type) // http://en.wikipedia.org/wiki/Whitespace_character // Updated 2013-11-18 // Static -bool Lexer2::isWhitespace (int c) +bool Lexer::isWhitespace (int c) { return (c == 0x0020 || // space Common Separator, space c == 0x0009 || // Common Other, control HT, Horizontal Tab @@ -195,14 +195,14 @@ bool Lexer2::isWhitespace (int c) //////////////////////////////////////////////////////////////////////////////// // Digits 0-9. -bool Lexer2::isDigit (int c) +bool Lexer::isDigit (int c) { return c >= 0x30 && c <= 0x39; } //////////////////////////////////////////////////////////////////////////////// // Digits 0-9 a-f A-F. -bool Lexer2::isHexDigit (int c) +bool Lexer::isHexDigit (int c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || @@ -210,7 +210,7 @@ bool Lexer2::isHexDigit (int c) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isIdentifierStart (int c) +bool Lexer::isIdentifierStart (int c) { return c && // Include null character check. ! isWhitespace (c) && @@ -220,7 +220,7 @@ bool Lexer2::isIdentifierStart (int c) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isIdentifierNext (int c) +bool Lexer::isIdentifierNext (int c) { return c && // Include null character check. c != ':' && // Used in isPair. @@ -229,7 +229,7 @@ bool Lexer2::isIdentifierNext (int c) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isSingleCharOperator (int c) +bool Lexer::isSingleCharOperator (int c) { return c == '+' || // Addition c == '-' || // Subtraction or unary minus = ambiguous @@ -247,7 +247,7 @@ bool Lexer2::isSingleCharOperator (int c) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isDoubleCharOperator (int c0, int c1, int c2) +bool Lexer::isDoubleCharOperator (int c0, int c1, int c2) { return (c0 == '=' && c1 == '=') || (c0 == '!' && c1 == '=') || @@ -260,7 +260,7 @@ bool Lexer2::isDoubleCharOperator (int c0, int c1, int c2) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isTripleCharOperator (int c0, int c1, int c2, int c3) +bool Lexer::isTripleCharOperator (int c0, int c1, int c2, int c3) { return (c0 == 'a' && c1 == 'n' && c2 == 'd' && isBoundary (c2, c3)) || (c0 == 'x' && c1 == 'o' && c2 == 'r' && isBoundary (c2, c3)) || @@ -268,7 +268,7 @@ bool Lexer2::isTripleCharOperator (int c0, int c1, int c2, int c3) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isBoundary (int left, int right) +bool Lexer::isBoundary (int left, int right) { // XOR if (isalpha (left) != isalpha (right)) return true; @@ -282,14 +282,14 @@ bool Lexer2::isBoundary (int left, int right) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isPunctuation (int c) +bool Lexer::isPunctuation (int c) { return c != '@' && ispunct (c); } //////////////////////////////////////////////////////////////////////////////// -void Lexer2::dequote (std::string& input) +void Lexer::dequote (std::string& input) { int quote = input[0]; size_t len = input.length (); @@ -301,7 +301,7 @@ void Lexer2::dequote (std::string& input) } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isEOS () const +bool Lexer::isEOS () const { return _cursor >= _eos; } @@ -311,7 +311,7 @@ bool Lexer2::isEOS () const // '9' -> 9 // 'a'/'A' -> 10 // 'f'/'F' -> 15 -int Lexer2::hexToInt (int c) const +int Lexer::hexToInt (int c) const { if (c >= '0' && c <= '9') return (c - '0'); else if (c >= 'a' && c <= 'f') return (c - 'a' + 10); @@ -319,13 +319,13 @@ int Lexer2::hexToInt (int c) const } //////////////////////////////////////////////////////////////////////////////// -int Lexer2::hexToInt (int c0, int c1) const +int Lexer::hexToInt (int c0, int c1) const { return (hexToInt (c0) << 4) + hexToInt (c1); } //////////////////////////////////////////////////////////////////////////////// -int Lexer2::hexToInt (int c0, int c1, int c2, int c3) const +int Lexer::hexToInt (int c0, int c1, int c2, int c3) const { return (hexToInt (c0) << 12) + (hexToInt (c1) << 8) + @@ -334,11 +334,11 @@ int Lexer2::hexToInt (int c0, int c1, int c2, int c3) const } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::string +// Lexer::Type::string // '|" // [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ] // '|" -bool Lexer2::isString (std::string& token, Lexer2::Type& type, int quote) +bool Lexer::isString (std::string& token, Lexer::Type& type, int quote) { std::size_t marker = _cursor; @@ -404,7 +404,7 @@ bool Lexer2::isString (std::string& token, Lexer2::Type& type, int quote) if (_text[marker] == quote) { ++marker; - type = Lexer2::Type::string; + type = Lexer::Type::string; _cursor = marker; return true; } @@ -414,19 +414,19 @@ bool Lexer2::isString (std::string& token, Lexer2::Type& type, int quote) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::date -// -bool Lexer2::isDate (std::string& token, Lexer2::Type& type) +// Lexer::Type::date +// | +bool Lexer::isDate (std::string& token, Lexer::Type& type) { // Try an ISO date parse. - if (Lexer2::isoEnabled) + if (Lexer::isoEnabled) { std::size_t iso_i = 0; ISO8601d iso; iso.ambiguity (_ambiguity); if (iso.parse (_text.substr (_cursor), iso_i)) { - type = Lexer2::Type::date; + type = Lexer::Type::date; token = _text.substr (_cursor, iso_i); _cursor += iso_i; return true; @@ -434,14 +434,14 @@ bool Lexer2::isDate (std::string& token, Lexer2::Type& type) } // Try a legacy rc.dateformat parse here. - if (Lexer2::dateFormat != "") + if (Lexer::dateFormat != "") { try { std::size_t legacy_i = 0; - Date legacyDate (_text.substr (_cursor), legacy_i, Lexer2::dateFormat, false, false); + Date legacyDate (_text.substr (_cursor), legacy_i, Lexer::dateFormat, false, false); - type = Lexer2::Type::date; + type = Lexer::Type::date; token = _text.substr (_cursor, legacy_i); _cursor += legacy_i; return true; @@ -454,16 +454,16 @@ bool Lexer2::isDate (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::duration -// -bool Lexer2::isDuration (std::string& token, Lexer2::Type& type) +// Lexer::Type::duration +// | +bool Lexer::isDuration (std::string& token, Lexer::Type& type) { std::size_t marker = 0; ISO8601p iso; if (iso.parse (_text.substr (_cursor), marker)) { - type = Lexer2::Type::duration; + type = Lexer::Type::duration; token = _text.substr (_cursor, marker); _cursor += marker; return true; @@ -472,7 +472,7 @@ bool Lexer2::isDuration (std::string& token, Lexer2::Type& type) Duration dur; if (dur.parse (_text.substr (_cursor), marker)) { - type = Lexer2::Type::duration; + type = Lexer::Type::duration; token = _text.substr (_cursor, marker); _cursor += marker; return true; @@ -482,7 +482,7 @@ bool Lexer2::isDuration (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::uuid +// Lexer::Type::uuid // XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX // XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX // XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX @@ -492,7 +492,7 @@ bool Lexer2::isDuration (std::string& token, Lexer2::Type& type) // XXXXXXXX-X // XXXXXXXX- // XXXXXXXX -bool Lexer2::isUUID (std::string& token, Lexer2::Type& type) +bool Lexer::isUUID (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -513,7 +513,7 @@ bool Lexer2::isUUID (std::string& token, Lexer2::Type& type) token = _text.substr (_cursor, i); if (! isAllDigits (token)) { - type = Lexer2::Type::uuid; + type = Lexer::Type::uuid; _cursor += i; return true; } @@ -523,9 +523,9 @@ bool Lexer2::isUUID (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::hex +// Lexer::Type::hex // 0xX+ -bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type) +bool Lexer::isHexNumber (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -541,7 +541,7 @@ bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type) if (marker - _cursor > 2) { token = _text.substr (_cursor, marker - _cursor); - type = Lexer2::Type::hex; + type = Lexer::Type::hex; _cursor = marker; return true; } @@ -551,11 +551,11 @@ bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::number +// Lexer::Type::number // \d+ // [ . \d+ ] // [ e|E [ +|- ] \d+ [ . \d+ ] ] -bool Lexer2::isNumber (std::string& token, Lexer2::Type& type) +bool Lexer::isNumber (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -605,7 +605,7 @@ bool Lexer2::isNumber (std::string& token, Lexer2::Type& type) } token = _text.substr (_cursor, marker - _cursor); - type = Lexer2::Type::number; + type = Lexer::Type::number; _cursor = marker; return true; } @@ -614,16 +614,16 @@ bool Lexer2::isNumber (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::separator +// Lexer::Type::separator // -- -bool Lexer2::isSeparator (std::string& token, Lexer2::Type& type) +bool Lexer::isSeparator (std::string& token, Lexer::Type& type) { if (_eos - _cursor >= 2 && _text[_cursor] == '-' && _text[_cursor + 1] == '-') { _cursor += 2; - type = Lexer2::Type::separator; + type = Lexer::Type::separator; token = "--"; return true; } @@ -632,15 +632,15 @@ bool Lexer2::isSeparator (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::list +// Lexer::Type::list // , -bool Lexer2::isList (std::string& token, Lexer2::Type& type) +bool Lexer::isList (std::string& token, Lexer::Type& type) { if (_eos - _cursor > 1 && _text[_cursor] == ',') { ++_cursor; - type = Lexer2::Type::list; + type = Lexer::Type::list; token = ","; return true; } @@ -649,9 +649,9 @@ bool Lexer2::isList (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::url +// Lexer::Type::url // http [s] :// ... -bool Lexer2::isURL (std::string& token, Lexer2::Type& type) +bool Lexer::isURL (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -676,7 +676,7 @@ bool Lexer2::isURL (std::string& token, Lexer2::Type& type) utf8_next_char (_text, marker); token = _text.substr (_cursor, marker - _cursor); - type = Lexer2::Type::url; + type = Lexer::Type::url; _cursor = marker; return true; } @@ -686,14 +686,14 @@ bool Lexer2::isURL (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::pair +// Lexer::Type::pair // :|= [ | ] -bool Lexer2::isPair (std::string& token, Lexer2::Type& type) +bool Lexer::isPair (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string ignoredToken; - Lexer2::Type ignoredType; + Lexer::Type ignoredType; if (isIdentifier (ignoredToken, ignoredType)) { if (_eos - _cursor > 1 && @@ -706,7 +706,7 @@ bool Lexer2::isPair (std::string& token, Lexer2::Type& type) isWord (ignoredToken, ignoredType)) { token = _text.substr (marker, _cursor - marker); - type = Lexer2::Type::pair; + type = Lexer::Type::pair; return true; } } @@ -717,9 +717,9 @@ bool Lexer2::isPair (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::tag +// Lexer::Type::tag // ^ | [ +|- ] [ ]* -bool Lexer2::isTag (std::string& token, Lexer2::Type& type) +bool Lexer::isTag (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -743,7 +743,7 @@ bool Lexer2::isTag (std::string& token, Lexer2::Type& type) utf8_next_char (_text, marker); token = _text.substr (_cursor, marker - _cursor); - type = Lexer2::Type::tag; + type = Lexer::Type::tag; _cursor = marker; return true; } @@ -753,9 +753,9 @@ bool Lexer2::isTag (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::path +// Lexer::Type::path // ( / )+ -bool Lexer2::isPath (std::string& token, Lexer2::Type& type) +bool Lexer::isPath (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; int slashCount = 0; @@ -786,7 +786,7 @@ bool Lexer2::isPath (std::string& token, Lexer2::Type& type) if (marker > _cursor && slashCount > 3) { - type = Lexer2::Type::path; + type = Lexer::Type::path; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -796,14 +796,14 @@ bool Lexer2::isPath (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::substitution +// Lexer::Type::substitution // / / / [g] -bool Lexer2::isSubstitution (std::string& token, Lexer2::Type& type) +bool Lexer::isSubstitution (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string extractedToken; - Lexer2::Type extractedType; + Lexer::Type extractedType; if (isString (extractedToken, extractedType, '/')) { --_cursor; // Step back over the '/'. @@ -816,7 +816,7 @@ bool Lexer2::isSubstitution (std::string& token, Lexer2::Type& type) if (isWhitespace (_text[_cursor])) { token = _text.substr (marker, _cursor - marker); - type = Lexer2::Type::substitution; + type = Lexer::Type::substitution; return true; } } @@ -827,19 +827,19 @@ bool Lexer2::isSubstitution (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::pattern +// Lexer::Type::pattern // / / -bool Lexer2::isPattern (std::string& token, Lexer2::Type& type) +bool Lexer::isPattern (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string extractedToken; - Lexer2::Type extractedType; + Lexer::Type extractedType; if (isString (extractedToken, extractedType, '/') && isWhitespace (_text[_cursor])) { token = _text.substr (marker, _cursor - marker); - type = Lexer2::Type::pattern; + type = Lexer::Type::pattern; return true; } @@ -848,19 +848,19 @@ bool Lexer2::isPattern (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::op +// Lexer::Type::op // _hastag_ | _notag | _neg_ | _pos_ | // | // | // | -bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) +bool Lexer::isOperator (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; if (_eos - marker >= 8 && _text.substr (marker, 8) == "_hastag_") { marker += 8; - type = Lexer2::Type::op; + type = Lexer::Type::op; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -869,7 +869,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) else if (_eos - marker >= 7 && _text.substr (marker, 7) == "_notag_") { marker += 7; - type = Lexer2::Type::op; + type = Lexer::Type::op; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -878,7 +878,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_neg_") { marker += 5; - type = Lexer2::Type::op; + type = Lexer::Type::op; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -887,7 +887,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_pos_") { marker += 5; - type = Lexer2::Type::op; + type = Lexer::Type::op; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -897,7 +897,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) isTripleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2], _text[marker + 3])) { marker += 3; - type = Lexer2::Type::op; + type = Lexer::Type::op; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -907,7 +907,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) isDoubleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2])) { marker += 2; - type = Lexer2::Type::op; + type = Lexer::Type::op; token = _text.substr (_cursor, marker - _cursor); _cursor = marker; return true; @@ -916,7 +916,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) else if (isSingleCharOperator (_text[marker])) { token = _text[marker]; - type = Lexer2::Type::op; + type = Lexer::Type::op; _cursor = ++marker; return true; } @@ -925,9 +925,9 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::identifier +// Lexer::Type::identifier // [ ]* -bool Lexer2::isIdentifier (std::string& token, Lexer2::Type& type) +bool Lexer::isIdentifier (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -939,7 +939,7 @@ bool Lexer2::isIdentifier (std::string& token, Lexer2::Type& type) utf8_next_char (_text, marker); token = _text.substr (_cursor, marker - _cursor); - type = Lexer2::Type::identifier; + type = Lexer::Type::identifier; _cursor = marker; return true; } @@ -948,9 +948,9 @@ bool Lexer2::isIdentifier (std::string& token, Lexer2::Type& type) } //////////////////////////////////////////////////////////////////////////////// -// Lexer2::Type::word +// Lexer::Type::word // [^\s]+ -bool Lexer2::isWord (std::string& token, Lexer2::Type& type) +bool Lexer::isWord (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; @@ -960,7 +960,7 @@ bool Lexer2::isWord (std::string& token, Lexer2::Type& type) if (marker > _cursor) { token = _text.substr (_cursor, marker - _cursor); - type = Lexer2::Type::word; + type = Lexer::Type::word; _cursor = marker; return true; } @@ -970,30 +970,30 @@ bool Lexer2::isWord (std::string& token, Lexer2::Type& type) //////////////////////////////////////////////////////////////////////////////// // Static -std::string Lexer2::typeToString (Lexer2::Type type) +std::string Lexer::typeToString (Lexer::Type type) { - if (type == Lexer2::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m"; - else if (type == Lexer2::Type::uuid) return std::string ("\033[38;5;7m\033[48;5;10m") + "uuid" + "\033[0m"; - else if (type == Lexer2::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m"; - else if (type == Lexer2::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m"; - else if (type == Lexer2::Type::separator) return std::string ("\033[38;5;7m\033[48;5;4m") + "separator" + "\033[0m"; - else if (type == Lexer2::Type::list) return std::string ("\033[38;5;7m\033[48;5;4m") + "list" + "\033[0m"; - else if (type == Lexer2::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m"; - else if (type == Lexer2::Type::pair) return std::string ("\033[38;5;7m\033[48;5;1m") + "pair" + "\033[0m"; - else if (type == Lexer2::Type::tag) return std::string ("\033[37;45m") + "tag" + "\033[0m"; - else if (type == Lexer2::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m"; - else if (type == Lexer2::Type::substitution) return std::string ("\033[37;102m") + "substitution" + "\033[0m"; - else if (type == Lexer2::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m"; - else if (type == Lexer2::Type::op) return std::string ("\033[38;5;7m\033[48;5;203m") + "op" + "\033[0m"; - else if (type == Lexer2::Type::identifier) return std::string ("\033[38;5;15m\033[48;5;244m") + "identifier" + "\033[0m"; - else if (type == Lexer2::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; - else if (type == Lexer2::Type::date) return std::string ("\033[38;5;15m\033[48;5;34m") + "date" + "\033[0m"; - else if (type == Lexer2::Type::duration) return std::string ("\033[38;5;15m\033[48;5;34m") + "duration" + "\033[0m"; + if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m"; + else if (type == Lexer::Type::uuid) return std::string ("\033[38;5;7m\033[48;5;10m") + "uuid" + "\033[0m"; + else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m"; + else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m"; + else if (type == Lexer::Type::separator) return std::string ("\033[38;5;7m\033[48;5;4m") + "separator" + "\033[0m"; + else if (type == Lexer::Type::list) return std::string ("\033[38;5;7m\033[48;5;4m") + "list" + "\033[0m"; + else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m"; + else if (type == Lexer::Type::pair) return std::string ("\033[38;5;7m\033[48;5;1m") + "pair" + "\033[0m"; + else if (type == Lexer::Type::tag) return std::string ("\033[37;45m") + "tag" + "\033[0m"; + else if (type == Lexer::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m"; + else if (type == Lexer::Type::substitution) return std::string ("\033[37;102m") + "substitution" + "\033[0m"; + else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m"; + else if (type == Lexer::Type::op) return std::string ("\033[38;5;7m\033[48;5;203m") + "op" + "\033[0m"; + else if (type == Lexer::Type::identifier) return std::string ("\033[38;5;15m\033[48;5;244m") + "identifier" + "\033[0m"; + else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; + else if (type == Lexer::Type::date) return std::string ("\033[38;5;15m\033[48;5;34m") + "date" + "\033[0m"; + else if (type == Lexer::Type::duration) return std::string ("\033[38;5;15m\033[48;5;34m") + "duration" + "\033[0m"; else return std::string ("\033[37;41m") + "unknown" + "\033[0m"; } //////////////////////////////////////////////////////////////////////////////// -bool Lexer2::isAllDigits (const std::string& text) +bool Lexer::isAllDigits (const std::string& text) { return text.find_first_not_of ("0123456789") == std::string::npos; } diff --git a/src/Lexer2.h b/src/Lexer.h similarity index 66% rename from src/Lexer2.h rename to src/Lexer.h index 9ede9be7d..3bb07ce6b 100644 --- a/src/Lexer2.h +++ b/src/Lexer.h @@ -24,17 +24,17 @@ // //////////////////////////////////////////////////////////////////////////////// -#ifndef INCLUDED_LEXER2 -#define INCLUDED_LEXER2 +#ifndef INCLUDED_LEXER +#define INCLUDED_LEXER #include #include #include -// Lexer2: A UTF8 lexical analyzer for every construct used on the Taskwarrior -// command line, with additional recognized types for disambiguation. +// Lexer: A UTF8 lexical analyzer for every construct used on the Taskwarrior +// command line, with additional recognized types for disambiguation. -class Lexer2 +class Lexer { public: // These are overridable. @@ -51,17 +51,17 @@ public: identifier, word, date, duration }; - Lexer2 (const std::string&); - ~Lexer2 (); + Lexer (const std::string&); + ~Lexer (); void ambiguity (bool); - bool token (std::string&, Lexer2::Type&); - static std::vector > tokens (const std::string&); + bool token (std::string&, Lexer::Type&); + static std::vector > tokens (const std::string&); static std::vector split (const std::string&); - static std::string typeToString (Lexer2::Type); + static std::string typeToString (Lexer::Type); static bool isAllDigits (const std::string&); // Static helpers. - static const std::string typeName (const Lexer2::Type&); + static const std::string typeName (const Lexer::Type&); static bool isWhitespace (int); static bool isDigit (int); static bool isHexDigit (int); @@ -81,23 +81,23 @@ public: int hexToInt (int, int, int, int) const; // Classifiers. - bool isString (std::string&, Lexer2::Type&, int quote); - bool isDate (std::string&, Lexer2::Type&); - bool isDuration (std::string&, Lexer2::Type&); - bool isUUID (std::string&, Lexer2::Type&); - bool isNumber (std::string&, Lexer2::Type&); - bool isHexNumber (std::string&, Lexer2::Type&); - bool isSeparator (std::string&, Lexer2::Type&); - bool isList (std::string&, Lexer2::Type&); - bool isURL (std::string&, Lexer2::Type&); - bool isPair (std::string&, Lexer2::Type&); - bool isTag (std::string&, Lexer2::Type&); - bool isPath (std::string&, Lexer2::Type&); - bool isSubstitution (std::string&, Lexer2::Type&); - bool isPattern (std::string&, Lexer2::Type&); - bool isOperator (std::string&, Lexer2::Type&); - bool isIdentifier (std::string&, Lexer2::Type&); - bool isWord (std::string&, Lexer2::Type&); + bool isString (std::string&, Lexer::Type&, int quote); + bool isDate (std::string&, Lexer::Type&); + bool isDuration (std::string&, Lexer::Type&); + bool isUUID (std::string&, Lexer::Type&); + bool isNumber (std::string&, Lexer::Type&); + bool isHexNumber (std::string&, Lexer::Type&); + bool isSeparator (std::string&, Lexer::Type&); + bool isList (std::string&, Lexer::Type&); + bool isURL (std::string&, Lexer::Type&); + bool isPair (std::string&, Lexer::Type&); + bool isTag (std::string&, Lexer::Type&); + bool isPath (std::string&, Lexer::Type&); + bool isSubstitution (std::string&, Lexer::Type&); + bool isPattern (std::string&, Lexer::Type&); + bool isOperator (std::string&, Lexer::Type&); + bool isIdentifier (std::string&, Lexer::Type&); + bool isWord (std::string&, Lexer::Type&); private: std::string _text; diff --git a/src/Nibbler.cpp b/src/Nibbler.cpp index ba283288a..495e7e2cd 100644 --- a/src/Nibbler.cpp +++ b/src/Nibbler.cpp @@ -37,7 +37,7 @@ #ifdef NIBBLER_FEATURE_REGEX #include #endif -#include +#include static const char* _uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; static const unsigned int _uuid_min_length = 8; @@ -1005,12 +1005,12 @@ bool Nibbler::getName (std::string& result) { if (! isdigit (_input[i]) && ! ispunct (_input[i]) && - ! Lexer2::isWhitespace (_input[i])) + ! Lexer::isWhitespace (_input[i])) { ++i; while (i < _length && (_input[i] == '_' || ! ispunct (_input[i])) && - ! Lexer2::isWhitespace (_input[i])) + ! Lexer::isWhitespace (_input[i])) { ++i; } @@ -1037,7 +1037,7 @@ bool Nibbler::getWord (std::string& result) { while (!isdigit (_input[i]) && !isPunctuation (_input[i]) && - !Lexer2::isWhitespace (_input[i])) + !Lexer::isWhitespace (_input[i])) { ++i; } diff --git a/src/Variant.cpp b/src/Variant.cpp index f12519ad1..d26aafe87 100644 --- a/src/Variant.cpp +++ b/src/Variant.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -196,10 +196,10 @@ bool Variant::operator&& (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); left.cast (type_boolean); right.cast (type_boolean); @@ -214,10 +214,10 @@ bool Variant::operator|| (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); left.cast (type_boolean); right.cast (type_boolean); @@ -232,10 +232,10 @@ bool Variant::operator_xor (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); left.cast (type_boolean); right.cast (type_boolean); @@ -251,10 +251,10 @@ bool Variant::operator< (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (left._type) { @@ -396,10 +396,10 @@ bool Variant::operator<= (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (left._type) { @@ -542,10 +542,10 @@ bool Variant::operator> (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (left._type) { @@ -686,10 +686,10 @@ bool Variant::operator>= (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (left._type) { @@ -832,10 +832,10 @@ bool Variant::operator== (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (left._type) { @@ -962,16 +962,16 @@ bool Variant::operator_match (const Variant& other, const Task& task) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); left.cast (type_string); right.cast (type_string); std::string pattern = right._string; - Lexer2::dequote (pattern); + Lexer::dequote (pattern); if (searchUsingRegex) { @@ -1032,10 +1032,10 @@ bool Variant::operator_partial (const Variant& other) const Variant right (other); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (left._type) { @@ -1220,7 +1220,7 @@ bool Variant::operator_hastag (const Variant& other, const Task& task) const { Variant right (other); right.cast (type_string); - Lexer2::dequote (right._string); + Lexer::dequote (right._string); return task.hasTag (right._string); } @@ -1236,7 +1236,7 @@ bool Variant::operator! () const Variant left (*this); if (left._type == type_string) - Lexer2::dequote (left._string); + Lexer::dequote (left._string); left.cast (type_boolean); return ! left._bool; @@ -1401,7 +1401,7 @@ Variant& Variant::operator+= (const Variant& other) Variant right (other); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (_type) { @@ -1513,7 +1513,7 @@ Variant& Variant::operator*= (const Variant& other) Variant right (other); if (right._type == type_string) - Lexer2::dequote (right._string); + Lexer::dequote (right._string); switch (_type) { @@ -1970,7 +1970,7 @@ Variant::operator std::string () const void Variant::sqrt () { if (_type == type_string) - Lexer2::dequote (_string); + Lexer::dequote (_string); cast (type_real); if (_real < 0.0) @@ -2046,7 +2046,7 @@ void Variant::cast (const enum type new_type) break; case type_string: - Lexer2::dequote (_string); + Lexer::dequote (_string); switch (new_type) { case type_unknown: break; diff --git a/src/commands/CmdCustom.cpp b/src/commands/CmdCustom.cpp index 77ec3b76f..1876dab23 100644 --- a/src/commands/CmdCustom.cpp +++ b/src/commands/CmdCustom.cpp @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -83,8 +83,8 @@ int CmdCustom::execute (std::string& output) // Prepend the argument list with those from the report filter. std::string lexeme; - Lexer2::Type type; - Lexer2 lex (reportFilter); + Lexer::Type type; + Lexer lex (reportFilter); lex.ambiguity (false); while (lex.token (lexeme, type)) context.cli.add (lexeme); diff --git a/src/lex.cpp b/src/lex.cpp index 5f13a82fc..295d8ab02 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////////// #include -#include +#include #include Context context; @@ -12,17 +12,17 @@ int main (int argc, char** argv) { std::cout << "input '" << argv[i] << "'\n"; // Low-level tokens. - Lexer2 lexer (argv[i]); + Lexer lexer (argv[i]); std::string token; - Lexer2::Type type; + Lexer::Type type; while (lexer.token (token, type)) - std::cout << " token '" << token << "' " << Lexer2::typeToString (type) << "\n"; + std::cout << " token '" << token << "' " << Lexer::typeToString (type) << "\n"; /* // High-level tokens. - auto all = Lexer2::tokens (argv[i]); - for (auto token : Lexer2::tokens (argv[i])) - std::cout << " token '" << token.first << "' " << Lexer2::typeToString (token.second) << "\n"; + auto all = Lexer::tokens (argv[i]); + for (auto token : Lexer::tokens (argv[i])) + std::cout << " token '" << token.first << "' " << Lexer::typeToString (token.second) << "\n"; */ } } diff --git a/src/text.cpp b/src/text.cpp index d2efa8517..c68d65b5b 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -473,7 +473,7 @@ bool nontrivial (const std::string& input) std::string::size_type i = 0; int character; while ((character = utf8_next_char (input, i))) - if (! Lexer2::isWhitespace (character)) + if (! Lexer::isWhitespace (character)) return true; return false; @@ -495,7 +495,7 @@ bool noSpaces (const std::string& input) std::string::size_type i = 0; int character; while ((character = utf8_next_char (input, i))) - if (Lexer2::isWhitespace (character)) + if (Lexer::isWhitespace (character)) return false; return true; diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index 684680538..a04b723a1 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include Context context; @@ -38,317 +38,317 @@ int main (int argc, char** argv) { UnitTest t (211); - std::vector > tokens; + std::vector > tokens; std::string token; - Lexer2::Type type; + Lexer::Type type; // White space detection. - t.notok (Lexer2::isWhitespace (0x0041), "U+0041 (A) ! isWhitespace"); - t.ok (Lexer2::isWhitespace (0x0020), "U+0020 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x0009), "U+0009 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x000A), "U+000A isWhitespace"); - t.ok (Lexer2::isWhitespace (0x000B), "U+000B isWhitespace"); - t.ok (Lexer2::isWhitespace (0x000C), "U+000C isWhitespace"); - t.ok (Lexer2::isWhitespace (0x000D), "U+000D isWhitespace"); - t.ok (Lexer2::isWhitespace (0x0085), "U+0085 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x00A0), "U+00A0 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x1680), "U+1680 isWhitespace"); // 10 - t.ok (Lexer2::isWhitespace (0x180E), "U+180E isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2000), "U+2000 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2001), "U+2001 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2002), "U+2002 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2003), "U+2003 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2004), "U+2004 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2005), "U+2005 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2006), "U+2006 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2007), "U+2007 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2008), "U+2008 isWhitespace"); // 20 - t.ok (Lexer2::isWhitespace (0x2009), "U+2009 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x200A), "U+200A isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2028), "U+2028 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x2029), "U+2029 isWhitespace"); - t.ok (Lexer2::isWhitespace (0x202F), "U+202F isWhitespace"); - t.ok (Lexer2::isWhitespace (0x205F), "U+205F isWhitespace"); - t.ok (Lexer2::isWhitespace (0x3000), "U+3000 isWhitespace"); + t.notok (Lexer::isWhitespace (0x0041), "U+0041 (A) ! isWhitespace"); + t.ok (Lexer::isWhitespace (0x0020), "U+0020 isWhitespace"); + t.ok (Lexer::isWhitespace (0x0009), "U+0009 isWhitespace"); + t.ok (Lexer::isWhitespace (0x000A), "U+000A isWhitespace"); + t.ok (Lexer::isWhitespace (0x000B), "U+000B isWhitespace"); + t.ok (Lexer::isWhitespace (0x000C), "U+000C isWhitespace"); + t.ok (Lexer::isWhitespace (0x000D), "U+000D isWhitespace"); + t.ok (Lexer::isWhitespace (0x0085), "U+0085 isWhitespace"); + t.ok (Lexer::isWhitespace (0x00A0), "U+00A0 isWhitespace"); + t.ok (Lexer::isWhitespace (0x1680), "U+1680 isWhitespace"); // 10 + t.ok (Lexer::isWhitespace (0x180E), "U+180E isWhitespace"); + t.ok (Lexer::isWhitespace (0x2000), "U+2000 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2001), "U+2001 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2002), "U+2002 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2003), "U+2003 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2004), "U+2004 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2005), "U+2005 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2006), "U+2006 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2007), "U+2007 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2008), "U+2008 isWhitespace"); // 20 + t.ok (Lexer::isWhitespace (0x2009), "U+2009 isWhitespace"); + t.ok (Lexer::isWhitespace (0x200A), "U+200A isWhitespace"); + t.ok (Lexer::isWhitespace (0x2028), "U+2028 isWhitespace"); + t.ok (Lexer::isWhitespace (0x2029), "U+2029 isWhitespace"); + t.ok (Lexer::isWhitespace (0x202F), "U+202F isWhitespace"); + t.ok (Lexer::isWhitespace (0x205F), "U+205F isWhitespace"); + t.ok (Lexer::isWhitespace (0x3000), "U+3000 isWhitespace"); - // static bool Lexer2::isBoundary(int, int); - t.ok (Lexer2::isBoundary (' ', 'a'), "' ' --> 'a' = isBoundary"); - t.ok (Lexer2::isBoundary ('a', ' '), "'a' --> ' ' = isBoundary"); - t.ok (Lexer2::isBoundary (' ', '+'), "' ' --> '+' = isBoundary"); - t.ok (Lexer2::isBoundary (' ', ','), "' ' --> ',' = isBoundary"); - t.notok (Lexer2::isBoundary ('3', '4'), "'3' --> '4' = isBoundary"); - t.ok (Lexer2::isBoundary ('(', '('), "'(' --> '(' = isBoundary"); - t.notok (Lexer2::isBoundary ('r', 'd'), "'r' --> 'd' = isBoundary"); + // static bool Lexer::isBoundary(int, int); + t.ok (Lexer::isBoundary (' ', 'a'), "' ' --> 'a' = isBoundary"); + t.ok (Lexer::isBoundary ('a', ' '), "'a' --> ' ' = isBoundary"); + t.ok (Lexer::isBoundary (' ', '+'), "' ' --> '+' = isBoundary"); + t.ok (Lexer::isBoundary (' ', ','), "' ' --> ',' = isBoundary"); + t.notok (Lexer::isBoundary ('3', '4'), "'3' --> '4' = isBoundary"); + t.ok (Lexer::isBoundary ('(', '('), "'(' --> '(' = isBoundary"); + t.notok (Lexer::isBoundary ('r', 'd'), "'r' --> 'd' = isBoundary"); // Should result in no tokens. - Lexer2 l0 (""); + Lexer l0 (""); t.notok (l0.token (token, type), "'' --> no tokens"); // Should result in no tokens. - Lexer2 l1 (" \t "); + Lexer l1 (" \t "); t.notok (l1.token (token, type), "' \\t ' --> no tokens"); // \u20ac = Euro symbol. - Lexer2 l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'"); + Lexer l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'"); tokens.clear (); while (l2.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30 - t.is (Lexer2::typeName (tokens[0].second), "identifier", "tokens[0] = identifier"); + t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30 + t.is (Lexer::typeName (tokens[0].second), "identifier", "tokens[0] = identifier"); t.is (tokens[1].first, "two 'three'", "tokens[1] = 'two 'three''"); - t.is (Lexer2::typeName (tokens[1].second), "string", "tokens[1] = string"); + t.is (Lexer::typeName (tokens[1].second), "string", "tokens[1] = string"); - t.is (tokens[2].first, "+", "tokens[2] = '+'"); - t.is (Lexer2::typeName (tokens[2].second), "op", "tokens[2] = op"); + t.is (tokens[2].first, "+", "tokens[2] = '+'"); + t.is (Lexer::typeName (tokens[2].second), "op", "tokens[2] = op"); - t.is (tokens[3].first, "456", "tokens[3] = '456'"); - t.is (Lexer2::typeName (tokens[3].second), "number", "tokens[3] = number"); + t.is (tokens[3].first, "456", "tokens[3] = '456'"); + t.is (Lexer::typeName (tokens[3].second), "number", "tokens[3] = number"); - t.is (tokens[4].first, "-", "tokens[4] = '-'"); - t.is (Lexer2::typeName (tokens[4].second), "op", "tokens[4] = op"); + t.is (tokens[4].first, "-", "tokens[4] = '-'"); + t.is (Lexer::typeName (tokens[4].second), "op", "tokens[4] = op"); - t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40 - t.is (Lexer2::typeName (tokens[5].second), "op", "tokens[5] = op"); + t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40 + t.is (Lexer::typeName (tokens[5].second), "op", "tokens[5] = op"); - t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'"); - t.is (Lexer2::typeName (tokens[6].second), "number", "tokens[6] = number"); + t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'"); + t.is (Lexer::typeName (tokens[6].second), "number", "tokens[6] = number"); - t.is (tokens[7].first, "*", "tokens[7] = '*'"); - t.is (Lexer2::typeName (tokens[7].second), "op", "tokens[7] = op"); + t.is (tokens[7].first, "*", "tokens[7] = '*'"); + t.is (Lexer::typeName (tokens[7].second), "op", "tokens[7] = op"); - t.is (tokens[8].first, "2", "tokens[8] = '2'"); - t.is (Lexer2::typeName (tokens[8].second), "number", "tokens[8] = number"); + t.is (tokens[8].first, "2", "tokens[8] = '2'"); + t.is (Lexer::typeName (tokens[8].second), "number", "tokens[8] = number"); - t.is (tokens[9].first, "-", "tokens[9] = '-'"); - t.is (Lexer2::typeName (tokens[9].second), "op", "tokens[9] = op"); + t.is (tokens[9].first, "-", "tokens[9] = '-'"); + t.is (Lexer::typeName (tokens[9].second), "op", "tokens[9] = op"); - t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50 - t.is (Lexer2::typeName (tokens[10].second), "hex", "tokens[10] = hex"); + t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50 + t.is (Lexer::typeName (tokens[10].second), "hex", "tokens[10] = hex"); - t.is (tokens[11].first, ")", "tokens[11] = ')'"); - t.is (Lexer2::typeName (tokens[11].second), "op", "tokens[11] = op"); + t.is (tokens[11].first, ")", "tokens[11] = ')'"); + t.is (Lexer::typeName (tokens[11].second), "op", "tokens[11] = op"); - t.is (tokens[12].first, "1.2e-3.4", "tokens[12] = '1.2e-3.4'"); - t.is (Lexer2::typeName (tokens[12].second), "number", "tokens[12] = number"); + t.is (tokens[12].first, "1.2e-3.4", "tokens[12] = '1.2e-3.4'"); + t.is (Lexer::typeName (tokens[12].second), "number", "tokens[12] = number"); - t.is (tokens[13].first, "foo.bar", "tokens[13] = 'foo.bar'"); - t.is (Lexer2::typeName (tokens[13].second), "identifier", "tokens[13] = identifier"); + t.is (tokens[13].first, "foo.bar", "tokens[13] = 'foo.bar'"); + t.is (Lexer::typeName (tokens[13].second), "identifier", "tokens[13] = identifier"); - t.is (tokens[14].first, "and", "tokens[14] = 'and'"); // 60 - t.is (Lexer2::typeName (tokens[14].second), "op", "tokens[14] = op"); + t.is (tokens[14].first, "and", "tokens[14] = 'and'"); // 60 + t.is (Lexer::typeName (tokens[14].second), "op", "tokens[14] = op"); - t.is (tokens[15].first, "€", "tokens[15] = \\u20ac --> '€'"); - t.is (Lexer2::typeName (tokens[15].second), "string", "tokens[15] = string"); + t.is (tokens[15].first, "€", "tokens[15] = \\u20ac --> '€'"); + t.is (Lexer::typeName (tokens[15].second), "string", "tokens[15] = string"); // Test for ISO-8601 dates (favoring dates in ambiguous cases). - Lexer2 l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); + Lexer l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); l3.ambiguity (true); tokens.clear (); while (l3.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 10, "10 tokens"); - t.is (tokens[0].first, "1", "tokens[0] == '1'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::number, "tokens[0] == Type::number"); - t.is (tokens[1].first, "12", "tokens[1] == '12'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::date, "tokens[1] == Type::date"); - t.is (tokens[2].first, "123", "tokens[2] == '123'"); - t.is ((int) tokens[2].second, (int) Lexer2::Type::number, "tokens[2] == Type::number"); // 70 - t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); - t.is ((int) tokens[3].second, (int) Lexer2::Type::date, "tokens[3] == Type::date"); - t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); - t.is ((int) tokens[4].second, (int) Lexer2::Type::number, "tokens[4] == Type::number"); - t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); - t.is ((int) tokens[5].second, (int) Lexer2::Type::date, "tokens[5] == Type::date"); - t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); - t.is ((int) tokens[6].second, (int) Lexer2::Type::number, "tokens[6] == Type::number"); - t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); - t.is ((int) tokens[7].second, (int) Lexer2::Type::number, "tokens[7] == Type::number"); // 80 - t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); - t.is ((int) tokens[8].second, (int) Lexer2::Type::date, "tokens[8] == Type::date"); - t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); - t.is ((int) tokens[9].second, (int) Lexer2::Type::date, "tokens[9] == Type::date"); + t.is ((int)tokens.size (), 10, "10 tokens"); + t.is (tokens[0].first, "1", "tokens[0] == '1'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::number, "tokens[0] == Type::number"); + t.is (tokens[1].first, "12", "tokens[1] == '12'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::date, "tokens[1] == Type::date"); + t.is (tokens[2].first, "123", "tokens[2] == '123'"); + t.is ((int) tokens[2].second, (int) Lexer::Type::number, "tokens[2] == Type::number"); // 70 + t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); + t.is ((int) tokens[3].second, (int) Lexer::Type::date, "tokens[3] == Type::date"); + t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); + t.is ((int) tokens[4].second, (int) Lexer::Type::number, "tokens[4] == Type::number"); + t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); + t.is ((int) tokens[5].second, (int) Lexer::Type::date, "tokens[5] == Type::date"); + t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); + t.is ((int) tokens[6].second, (int) Lexer::Type::number, "tokens[6] == Type::number"); + t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); + t.is ((int) tokens[7].second, (int) Lexer::Type::number, "tokens[7] == Type::number"); // 80 + t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); + t.is ((int) tokens[8].second, (int) Lexer::Type::date, "tokens[8] == Type::date"); + t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); + t.is ((int) tokens[9].second, (int) Lexer::Type::date, "tokens[9] == Type::date"); // Test for ISO-8601 dates (favoring numbers in ambiguous cases). - Lexer2 l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); + Lexer l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); l4.ambiguity (false); tokens.clear (); while (l4.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 10, "10 tokens"); - t.is (tokens[0].first, "1", "tokens[0] == '1'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::number, "tokens[0] == Type::number"); - t.is (tokens[1].first, "12", "tokens[1] == '12'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::number, "tokens[1] == Type::number"); - t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90 - t.is ((int) tokens[2].second, (int) Lexer2::Type::number, "tokens[2] == Type::number"); - t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); - t.is ((int) tokens[3].second, (int) Lexer2::Type::number, "tokens[3] == Type::number"); - t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); - t.is ((int) tokens[4].second, (int) Lexer2::Type::number, "tokens[4] == Type::number"); - t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); - t.is ((int) tokens[5].second, (int) Lexer2::Type::number, "tokens[5] == Type::number"); - t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); - t.is ((int) tokens[6].second, (int) Lexer2::Type::number, "tokens[6] == Type::number"); - t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100 - t.is ((int) tokens[7].second, (int) Lexer2::Type::number, "tokens[7] == Type::number"); - t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); - t.is ((int) tokens[8].second, (int) Lexer2::Type::date, "tokens[8] == Type::date"); - t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); - t.is ((int) tokens[9].second, (int) Lexer2::Type::date, "tokens[9] == Type::date"); + t.is ((int)tokens.size (), 10, "10 tokens"); + t.is (tokens[0].first, "1", "tokens[0] == '1'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::number, "tokens[0] == Type::number"); + t.is (tokens[1].first, "12", "tokens[1] == '12'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::number, "tokens[1] == Type::number"); + t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90 + t.is ((int) tokens[2].second, (int) Lexer::Type::number, "tokens[2] == Type::number"); + t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); + t.is ((int) tokens[3].second, (int) Lexer::Type::number, "tokens[3] == Type::number"); + t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); + t.is ((int) tokens[4].second, (int) Lexer::Type::number, "tokens[4] == Type::number"); + t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); + t.is ((int) tokens[5].second, (int) Lexer::Type::number, "tokens[5] == Type::number"); + t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); + t.is ((int) tokens[6].second, (int) Lexer::Type::number, "tokens[6] == Type::number"); + t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100 + t.is ((int) tokens[7].second, (int) Lexer::Type::number, "tokens[7] == Type::number"); + t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); + t.is ((int) tokens[8].second, (int) Lexer::Type::date, "tokens[8] == Type::date"); + t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); + t.is ((int) tokens[9].second, (int) Lexer::Type::date, "tokens[9] == Type::date"); // Test for durations - Lexer2 l5 ("1second 1minute 2hour 3 days 4w 5mo 6 years"); + Lexer l5 ("1second 1minute 2hour 3 days 4w 5mo 6 years"); tokens.clear (); while (l5.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 7, "7 tokens"); - t.is (tokens[0].first, "1second", "tokens[0] == '1second'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::duration, "tokens[0] == Type::duration"); - t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::duration, "tokens[1] == Type::duration"); // 110 - t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'"); - t.is ((int) tokens[2].second, (int) Lexer2::Type::duration, "tokens[2] == Type::duration"); - t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'"); - t.is ((int) tokens[3].second, (int) Lexer2::Type::duration, "tokens[3] == Type::duration"); - t.is (tokens[4].first, "4w", "tokens[4] == '4w'"); - t.is ((int) tokens[4].second, (int) Lexer2::Type::duration, "tokens[4] == Type::duration"); - t.is (tokens[5].first, "5mo", "tokens[5] == '5mo'"); - t.is ((int) tokens[5].second, (int) Lexer2::Type::duration, "tokens[5] == Type::duration"); - t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'"); - t.is ((int) tokens[6].second, (int) Lexer2::Type::duration, "tokens[6] == Type::duration"); // 120 + t.is ((int)tokens.size (), 7, "7 tokens"); + t.is (tokens[0].first, "1second", "tokens[0] == '1second'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::duration, "tokens[0] == Type::duration"); + t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::duration, "tokens[1] == Type::duration"); // 110 + t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'"); + t.is ((int) tokens[2].second, (int) Lexer::Type::duration, "tokens[2] == Type::duration"); + t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'"); + t.is ((int) tokens[3].second, (int) Lexer::Type::duration, "tokens[3] == Type::duration"); + t.is (tokens[4].first, "4w", "tokens[4] == '4w'"); + t.is ((int) tokens[4].second, (int) Lexer::Type::duration, "tokens[4] == Type::duration"); + t.is (tokens[5].first, "5mo", "tokens[5] == '5mo'"); + t.is ((int) tokens[5].second, (int) Lexer::Type::duration, "tokens[5] == Type::duration"); + t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'"); + t.is ((int) tokens[6].second, (int) Lexer::Type::duration, "tokens[6] == Type::duration"); // 120 // All the Eval operators. - Lexer2 l6 ("P1Y PT1H P1Y1M1DT1H1M1S 1s 1second"); + Lexer l6 ("P1Y PT1H P1Y1M1DT1H1M1S 1s 1second"); tokens.clear (); while (l6.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 5, "5 ISO periods"); - t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::duration, "tokens[0] == Type::duration"); - t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::duration, "tokens[1] == Type::duration"); - t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[2] == 'P1Y1M1DT1H1M1S'"); - t.is ((int) tokens[2].second, (int) Lexer2::Type::duration, "tokens[2] == Type::duration"); - t.is (tokens[3].first, "1s", "tokens[3] == '1s'"); - t.is ((int) tokens[3].second, (int) Lexer2::Type::duration, "tokens[3] == Type::duration"); - t.is (tokens[4].first, "1second", "tokens[4] == '1second'"); - t.is ((int) tokens[4].second, (int) Lexer2::Type::duration, "tokens[4] == Type::duration"); + t.is ((int)tokens.size (), 5, "5 ISO periods"); + t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::duration, "tokens[0] == Type::duration"); + t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::duration, "tokens[1] == Type::duration"); + t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[2] == 'P1Y1M1DT1H1M1S'"); + t.is ((int) tokens[2].second, (int) Lexer::Type::duration, "tokens[2] == Type::duration"); + t.is (tokens[3].first, "1s", "tokens[3] == '1s'"); + t.is ((int) tokens[3].second, (int) Lexer::Type::duration, "tokens[3] == Type::duration"); + t.is (tokens[4].first, "1second", "tokens[4] == '1second'"); + t.is ((int) tokens[4].second, (int) Lexer::Type::duration, "tokens[4] == Type::duration"); // All (int) the Eval operators. - Lexer2 l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )"); + Lexer l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )"); tokens.clear (); while (l7.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 21, "21 operators"); - t.is (tokens[0].first, "and", "tokens[0] == 'and'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::op, "tokens[0] == Type::op"); // 130 - t.is (tokens[1].first, "xor", "tokens[1] == 'xor'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::op, "tokens[1] == Type::op"); - t.is (tokens[2].first, "or", "tokens[2] == 'or'"); - t.is ((int) tokens[2].second, (int) Lexer2::Type::op, "tokens[2] == Type::op"); - t.is (tokens[3].first, "<=", "tokens[3] == '<='"); - t.is ((int) tokens[3].second, (int) Lexer2::Type::op, "tokens[3] == Type::op"); - t.is (tokens[4].first, ">=", "tokens[4] == '>='"); - t.is ((int) tokens[4].second, (int) Lexer2::Type::op, "tokens[4] == Type::op"); - t.is (tokens[5].first, "!~", "tokens[5] == '!~'"); - t.is ((int) tokens[5].second, (int) Lexer2::Type::op, "tokens[5] == Type::op"); // 140 - t.is (tokens[6].first, "!=", "tokens[6] == '!='"); - t.is ((int) tokens[6].second, (int) Lexer2::Type::op, "tokens[6] == Type::op"); - t.is (tokens[7].first, "==", "tokens[7] == '=='"); - t.is ((int) tokens[7].second, (int) Lexer2::Type::op, "tokens[7] == Type::op"); - t.is (tokens[8].first, "=", "tokens[8] == '='"); - t.is ((int) tokens[8].second, (int) Lexer2::Type::op, "tokens[8] == Type::op"); - t.is (tokens[9].first, "^", "tokens[9] == '^'"); - t.is ((int) tokens[9].second, (int) Lexer2::Type::op, "tokens[9] == Type::op"); - t.is (tokens[10].first, ">", "tokens[10] == '>'"); - t.is ((int) tokens[10].second, (int) Lexer2::Type::op, "tokens[10] == Type::op"); // 150 - t.is (tokens[11].first, "~", "tokens[11] == '~'"); - t.is ((int) tokens[11].second, (int) Lexer2::Type::op, "tokens[11] == Type::op"); - t.is (tokens[12].first, "!", "tokens[12] == '!'"); - t.is ((int) tokens[12].second, (int) Lexer2::Type::op, "tokens[12] == Type::op"); - t.is (tokens[13].first, "*", "tokens[13] == '*'"); - t.is ((int) tokens[13].second, (int) Lexer2::Type::op, "tokens[13] == Type::op"); - t.is (tokens[14].first, "/", "tokens[14] == '/'"); - t.is ((int) tokens[14].second, (int) Lexer2::Type::op, "tokens[14] == Type::op"); - t.is (tokens[15].first, "%", "tokens[15] == '%'"); - t.is ((int) tokens[15].second, (int) Lexer2::Type::op, "tokens[15] == Type::op"); // 160 - t.is (tokens[16].first, "+", "tokens[16] == '+'"); - t.is ((int) tokens[16].second, (int) Lexer2::Type::op, "tokens[16] == Type::op"); - t.is (tokens[17].first, "-", "tokens[17] == '-'"); - t.is ((int) tokens[17].second, (int) Lexer2::Type::op, "tokens[17] == Type::op"); - t.is (tokens[18].first, "<", "tokens[18] == '<'"); - t.is ((int) tokens[18].second, (int) Lexer2::Type::op, "tokens[18] == Type::op"); - t.is (tokens[19].first, "(", "tokens[19] == '('"); - t.is ((int) tokens[19].second, (int) Lexer2::Type::op, "tokens[19] == Type::op"); - t.is (tokens[20].first, ")", "tokens[20] == ')'"); - t.is ((int) tokens[20].second, (int)Lexer2::Type::op, "tokens[20] == Type::op"); // 170 + t.is ((int)tokens.size (), 21, "21 operators"); + t.is (tokens[0].first, "and", "tokens[0] == 'and'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::op, "tokens[0] == Type::op"); // 130 + t.is (tokens[1].first, "xor", "tokens[1] == 'xor'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::op, "tokens[1] == Type::op"); + t.is (tokens[2].first, "or", "tokens[2] == 'or'"); + t.is ((int) tokens[2].second, (int) Lexer::Type::op, "tokens[2] == Type::op"); + t.is (tokens[3].first, "<=", "tokens[3] == '<='"); + t.is ((int) tokens[3].second, (int) Lexer::Type::op, "tokens[3] == Type::op"); + t.is (tokens[4].first, ">=", "tokens[4] == '>='"); + t.is ((int) tokens[4].second, (int) Lexer::Type::op, "tokens[4] == Type::op"); + t.is (tokens[5].first, "!~", "tokens[5] == '!~'"); + t.is ((int) tokens[5].second, (int) Lexer::Type::op, "tokens[5] == Type::op"); // 140 + t.is (tokens[6].first, "!=", "tokens[6] == '!='"); + t.is ((int) tokens[6].second, (int) Lexer::Type::op, "tokens[6] == Type::op"); + t.is (tokens[7].first, "==", "tokens[7] == '=='"); + t.is ((int) tokens[7].second, (int) Lexer::Type::op, "tokens[7] == Type::op"); + t.is (tokens[8].first, "=", "tokens[8] == '='"); + t.is ((int) tokens[8].second, (int) Lexer::Type::op, "tokens[8] == Type::op"); + t.is (tokens[9].first, "^", "tokens[9] == '^'"); + t.is ((int) tokens[9].second, (int) Lexer::Type::op, "tokens[9] == Type::op"); + t.is (tokens[10].first, ">", "tokens[10] == '>'"); + t.is ((int) tokens[10].second, (int) Lexer::Type::op, "tokens[10] == Type::op"); // 150 + t.is (tokens[11].first, "~", "tokens[11] == '~'"); + t.is ((int) tokens[11].second, (int) Lexer::Type::op, "tokens[11] == Type::op"); + t.is (tokens[12].first, "!", "tokens[12] == '!'"); + t.is ((int) tokens[12].second, (int) Lexer::Type::op, "tokens[12] == Type::op"); + t.is (tokens[13].first, "*", "tokens[13] == '*'"); + t.is ((int) tokens[13].second, (int) Lexer::Type::op, "tokens[13] == Type::op"); + t.is (tokens[14].first, "/", "tokens[14] == '/'"); + t.is ((int) tokens[14].second, (int) Lexer::Type::op, "tokens[14] == Type::op"); + t.is (tokens[15].first, "%", "tokens[15] == '%'"); + t.is ((int) tokens[15].second, (int) Lexer::Type::op, "tokens[15] == Type::op"); // 160 + t.is (tokens[16].first, "+", "tokens[16] == '+'"); + t.is ((int) tokens[16].second, (int) Lexer::Type::op, "tokens[16] == Type::op"); + t.is (tokens[17].first, "-", "tokens[17] == '-'"); + t.is ((int) tokens[17].second, (int) Lexer::Type::op, "tokens[17] == Type::op"); + t.is (tokens[18].first, "<", "tokens[18] == '<'"); + t.is ((int) tokens[18].second, (int) Lexer::Type::op, "tokens[18] == Type::op"); + t.is (tokens[19].first, "(", "tokens[19] == '('"); + t.is ((int) tokens[19].second, (int) Lexer::Type::op, "tokens[19] == Type::op"); + t.is (tokens[20].first, ")", "tokens[20] == ')'"); + t.is ((int) tokens[20].second, (int)Lexer::Type::op, "tokens[20] == Type::op"); // 170 // Test ordinal dates. - Lexer2 l8 ("9th 10th"); + Lexer l8 ("9th 10th"); l8.ambiguity (false); tokens.clear (); while (l8.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 2, "2 tokens"); - t.is (tokens[0].first, "9th", "tokens[0] == '9th'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::identifier, "tokens[0] == Type::identifier"); - t.is (tokens[1].first, "10th", "tokens[1] == '10th'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::identifier, "tokens[1] == Type::identifier"); + t.is ((int)tokens.size (), 2, "2 tokens"); + t.is (tokens[0].first, "9th", "tokens[0] == '9th'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::identifier, "tokens[0] == Type::identifier"); + t.is (tokens[1].first, "10th", "tokens[1] == '10th'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::identifier, "tokens[1] == Type::identifier"); // Test tag recognition. - Lexer2 l9 ("+with -WITHOUT + 2"); + Lexer l9 ("+with -WITHOUT + 2"); l9.ambiguity (false); tokens.clear (); while (l9.token (token, type)) { - std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; - tokens.push_back (std::pair (token, type)); + std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n"; + tokens.push_back (std::pair (token, type)); } - t.is ((int)tokens.size (), 4, "4 tokens"); - t.is (tokens[0].first, "+with", "tokens[0] == '+with'"); - t.is ((int) tokens[0].second, (int) Lexer2::Type::tag, "tokens[0] == Type::tag"); - t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'"); - t.is ((int) tokens[1].second, (int) Lexer2::Type::tag, "tokens[1] == Type::tag"); - t.is (tokens[2].first, "+", "tokens[2] == '+'"); - t.is ((int) tokens[2].second, (int) Lexer2::Type::op, "tokens[2] == Type::op"); - t.is (tokens[3].first, "2", "tokens[3] == '2'"); - t.is ((int) tokens[3].second, (int) Lexer2::Type::number, "tokens[3] == Type::number"); + t.is ((int)tokens.size (), 4, "4 tokens"); + t.is (tokens[0].first, "+with", "tokens[0] == '+with'"); + t.is ((int) tokens[0].second, (int) Lexer::Type::tag, "tokens[0] == Type::tag"); + t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'"); + t.is ((int) tokens[1].second, (int) Lexer::Type::tag, "tokens[1] == Type::tag"); + t.is (tokens[2].first, "+", "tokens[2] == '+'"); + t.is ((int) tokens[2].second, (int) Lexer::Type::op, "tokens[2] == Type::op"); + t.is (tokens[3].first, "2", "tokens[3] == '2'"); + t.is ((int) tokens[3].second, (int) Lexer::Type::number, "tokens[3] == Type::number"); // void split (std::vector&, const std::string&); std::string unsplit = " ( A or B ) "; std::vector items; - items = Lexer2::split (unsplit); + items = Lexer::split (unsplit); t.is (items.size (), (size_t) 5, "split ' ( A or B ) '"); t.is (items[0], "(", "split ' ( A or B ) ' -> [0] '('"); t.is (items[1], "A", "split ' ( A or B ) ' -> [1] 'A'"); @@ -358,7 +358,7 @@ int main (int argc, char** argv) // Test simple mode with contrived tokens that ordinarily split. unsplit = " +-* a+b 12.3e4 'c d'"; - items = Lexer2::split (unsplit); + items = Lexer::split (unsplit); t.is (items.size (), (size_t) 8, "split ' +-* a+b 12.3e4 'c d''"); t.is (items[0], "+", "split ' +-* a+b 12.3e4 'c d'' -> [0] '+'"); t.is (items[1], "-", "split ' +-* a+b 12.3e4 'c d'' -> [1] '-'"); @@ -371,12 +371,12 @@ int main (int argc, char** argv) // Test common expression element. unsplit = "name=value"; - items = Lexer2::split (unsplit); + items = Lexer::split (unsplit); t.is (items.size (), (size_t) 1, "split 'name=value'"); // Test unterminated tokens. unsplit = " ordinary "; - items = Lexer2::split (unsplit); + items = Lexer::split (unsplit); t.is (items.size (), (size_t) 1, "split 'ordinary' --> 1 token"); t.is (items[0], "ordinary", "split 'ordinary' --> 'ordinary'");