- Renamed Lexer2 to Lexer, it looks good enough to assume control.
This commit is contained in:
Paul Beckingham 2015-02-22 17:46:22 -05:00
parent e1c0d5b130
commit 745aad0d27
15 changed files with 553 additions and 553 deletions

View file

@ -29,7 +29,7 @@
#include <algorithm> #include <algorithm>
#include <Context.h> #include <Context.h>
#include <Nibbler.h> #include <Nibbler.h>
#include <Lexer2.h> #include <Lexer.h>
#include <CLI.h> #include <CLI.h>
#include <Color.h> #include <Color.h>
#include <text.h> #include <text.h>
@ -661,13 +661,13 @@ void CLI::addArg (const std::string& arg)
// that cause the lexemes to be ignored, and the original arugment used // that cause the lexemes to be ignored, and the original arugment used
// intact. // intact.
std::string lexeme; std::string lexeme;
Lexer2::Type type; Lexer::Type type;
Lexer2 lex (raw); Lexer lex (raw);
lex.ambiguity (false); lex.ambiguity (false);
std::vector <std::pair <std::string, Lexer2::Type> > lexemes; std::vector <std::pair <std::string, Lexer::Type> > lexemes;
while (lex.token (lexeme, type)) while (lex.token (lexeme, type))
lexemes.push_back (std::pair <std::string, Lexer2::Type> (lexeme, type)); lexemes.push_back (std::pair <std::string, Lexer::Type> (lexeme, type));
if (disqualifyInsufficientTerms (lexemes) || if (disqualifyInsufficientTerms (lexemes) ||
disqualifyNoOps (lexemes) || disqualifyNoOps (lexemes) ||
@ -681,7 +681,7 @@ void CLI::addArg (const std::string& arg)
{ {
// How often have I said to you that when you have eliminated the // How often have I said to you that when you have eliminated the
// impossible, whatever remains, however improbable, must be the truth? // impossible, whatever remains, however improbable, must be the truth?
std::vector <std::pair <std::string, Lexer2::Type> >::iterator l; std::vector <std::pair <std::string, Lexer::Type> >::iterator l;
for (l = lexemes.begin (); l != lexemes.end (); ++l) for (l = lexemes.begin (); l != lexemes.end (); ++l)
_original_args.push_back (l->first); _original_args.push_back (l->first);
} }
@ -713,7 +713,7 @@ void CLI::aliasExpansion ()
{ {
if (_aliases.find (raw) != _aliases.end ()) if (_aliases.find (raw) != _aliases.end ())
{ {
std::vector <std::string> lexed = Lexer2::split (_aliases[raw]); std::vector <std::string> lexed = Lexer::split (_aliases[raw]);
std::vector <std::string>::iterator l; std::vector <std::string>::iterator l;
for (l = lexed.begin (); l != lexed.end (); ++l) for (l = lexed.begin (); l != lexed.end (); ++l)
{ {
@ -1636,7 +1636,7 @@ void CLI::desugarFilterPlainArgs ()
reconstructed.push_back (op); reconstructed.push_back (op);
std::string pattern = a->attribute ("raw"); std::string pattern = a->attribute ("raw");
Lexer2::dequote (pattern); Lexer::dequote (pattern);
A rhs ("argPattern", "'" + pattern + "'"); A rhs ("argPattern", "'" + pattern + "'");
rhs.tag ("LITERAL"); rhs.tag ("LITERAL");
rhs.tag ("FILTER"); rhs.tag ("FILTER");
@ -1812,7 +1812,7 @@ void CLI::injectDefaults ()
if (defaultCommand != "") if (defaultCommand != "")
{ {
// Split the defaultCommand into separate args. // Split the defaultCommand into separate args.
std::vector <std::string> tokens = Lexer2::split (defaultCommand); std::vector <std::string> tokens = Lexer::split (defaultCommand);
// Modify _args to be: <args0> [<def0> ...] <args1> [...] // Modify _args to be: <args0> [<def0> ...] <args1> [...]
std::vector <A> reconstructed; std::vector <A> reconstructed;
@ -2302,9 +2302,9 @@ bool CLI::isName (const std::string& raw) const
{ {
for (int i = 0; i < raw.length (); ++i) for (int i = 0; i < raw.length (); ++i)
{ {
if (i == 0 && ! Lexer2::isIdentifierStart (raw[i])) if (i == 0 && ! Lexer::isIdentifierStart (raw[i]))
return false; return false;
else if (! Lexer2::isIdentifierNext (raw[i])) else if (! Lexer::isIdentifierNext (raw[i]))
return false; return false;
} }
@ -2316,19 +2316,19 @@ bool CLI::isName (const std::string& raw) const
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool CLI::disqualifyInsufficientTerms ( bool CLI::disqualifyInsufficientTerms (
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
{ {
return lexemes.size () < 3 ? true : false; return lexemes.size () < 3 ? true : false;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool CLI::disqualifyNoOps ( bool CLI::disqualifyNoOps (
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
{ {
bool foundOP = false; bool foundOP = false;
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator l; std::vector <std::pair <std::string, Lexer::Type> >::const_iterator l;
for (l = lexemes.begin (); l != lexemes.end (); ++l) for (l = lexemes.begin (); l != lexemes.end (); ++l)
if (l->second == Lexer2::Type::op) if (l->second == Lexer::Type::op)
foundOP = true; foundOP = true;
return ! foundOP; return ! foundOP;
@ -2336,16 +2336,16 @@ bool CLI::disqualifyNoOps (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool CLI::disqualifyOnlyParenOps ( bool CLI::disqualifyOnlyParenOps (
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
{ {
int opCount = 0; int opCount = 0;
int opSugarCount = 0; int opSugarCount = 0;
int opParenCount = 0; int opParenCount = 0;
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator l; std::vector <std::pair <std::string, Lexer::Type> >::const_iterator l;
for (l = lexemes.begin (); l != lexemes.end (); ++l) for (l = lexemes.begin (); l != lexemes.end (); ++l)
{ {
if (l->second == Lexer2::Type::op) if (l->second == Lexer::Type::op)
{ {
++opCount; ++opCount;
@ -2372,7 +2372,7 @@ bool CLI::disqualifyOnlyParenOps (
// as there are no operators in between, which includes syntactic sugar that // as there are no operators in between, which includes syntactic sugar that
// hides operators. // hides operators.
bool CLI::disqualifyFirstLastBinary ( bool CLI::disqualifyFirstLastBinary (
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
{ {
bool firstBinary = false; bool firstBinary = false;
bool lastBinary = false; bool lastBinary = false;
@ -2391,7 +2391,7 @@ bool CLI::disqualifyFirstLastBinary (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Disqualify terms when there operators hidden by syntactic sugar. // Disqualify terms when there operators hidden by syntactic sugar.
bool CLI::disqualifySugarFree ( bool CLI::disqualifySugarFree (
const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
{ {
bool sugared = true; bool sugared = true;
for (unsigned int i = 1; i < lexemes.size () - 1; ++i) for (unsigned int i = 1; i < lexemes.size () - 1; ++i)

View file

@ -29,7 +29,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <map> #include <map>
#include <Lexer2.h> #include <Lexer.h>
#include <Path.h> #include <Path.h>
#include <File.h> #include <File.h>
@ -126,11 +126,11 @@ private:
bool isOperator (const std::string&) const; bool isOperator (const std::string&) const;
bool isName (const std::string&) const; bool isName (const std::string&) const;
bool disqualifyInsufficientTerms (const std::vector <std::pair <std::string, Lexer2::Type> >&) const; bool disqualifyInsufficientTerms (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
bool disqualifyNoOps (const std::vector <std::pair <std::string, Lexer2::Type> >&) const; bool disqualifyNoOps (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
bool disqualifyOnlyParenOps (const std::vector <std::pair <std::string, Lexer2::Type> >&) const; bool disqualifyOnlyParenOps (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
bool disqualifyFirstLastBinary (const std::vector <std::pair <std::string, Lexer2::Type> >&) const; bool disqualifyFirstLastBinary (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
bool disqualifySugarFree (const std::vector <std::pair <std::string, Lexer2::Type> >&) const; bool disqualifySugarFree (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
public: public:
std::multimap <std::string, std::string> _entities; std::multimap <std::string, std::string> _entities;

View file

@ -20,7 +20,7 @@ set (task_SRCS CLI.cpp CLI.h
Hooks.cpp Hooks.h Hooks.cpp Hooks.h
ISO8601.cpp ISO8601.h ISO8601.cpp ISO8601.h
JSON.cpp JSON.h JSON.cpp JSON.h
Lexer2.cpp Lexer2.h Lexer.cpp Lexer.h
Msg.cpp Msg.h Msg.cpp Msg.h
Nibbler.cpp Nibbler.h Nibbler.cpp Nibbler.h
Path.cpp Path.h Path.cpp Path.h

View file

@ -657,8 +657,8 @@ void Context::staticInitialization ()
Task::searchCaseSensitive = Variant::searchCaseSensitive = config.getBoolean ("search.case.sensitive"); Task::searchCaseSensitive = Variant::searchCaseSensitive = config.getBoolean ("search.case.sensitive");
Task::regex = Variant::searchUsingRegex = config.getBoolean ("regex"); Task::regex = Variant::searchUsingRegex = config.getBoolean ("regex");
Lexer2::dateFormat = Variant::dateFormat = config.get ("dateformat"); Lexer::dateFormat = Variant::dateFormat = config.get ("dateformat");
Lexer2::isoEnabled = Variant::isoEnabled = config.getBoolean ("date.iso"); Lexer::isoEnabled = Variant::isoEnabled = config.getBoolean ("date.iso");
std::map <std::string, Column*>::iterator i; std::map <std::string, Column*>::iterator i;
for (i = columns.begin (); i != columns.end (); ++i) for (i = columns.begin (); i != columns.end (); ++i)

View file

@ -31,7 +31,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <Nibbler.h> #include <Nibbler.h>
#include <Lexer2.h> #include <Lexer.h>
#include <Duration.h> #include <Duration.h>
#include <text.h> #include <text.h>
@ -295,7 +295,7 @@ bool Duration::parse (const std::string& input, std::string::size_type& start)
if (n.getOneOf (units, unit)) if (n.getOneOf (units, unit))
{ {
if (n.depleted () || if (n.depleted () ||
Lexer2::isWhitespace (n.next ())) Lexer::isWhitespace (n.next ()))
{ {
start = original_start + n.cursor (); start = original_start + n.cursor ();
@ -319,7 +319,7 @@ bool Duration::parse (const std::string& input, std::string::size_type& start)
if (n.getOneOf (units, unit)) if (n.getOneOf (units, unit))
{ {
if (n.depleted () || if (n.depleted () ||
Lexer2::isWhitespace (n.next ())) Lexer::isWhitespace (n.next ()))
{ {
start = original_start + n.cursor (); start = original_start + n.cursor ();
double quantity = strtod (number.c_str (), NULL); double quantity = strtod (number.c_str (), NULL);

View file

@ -125,13 +125,13 @@ void Eval::addSource (bool (*source)(const std::string&, Variant&))
void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
{ {
// Reduce e to a vector of tokens. // Reduce e to a vector of tokens.
Lexer2 l (e); Lexer l (e);
l.ambiguity (_ambiguity); l.ambiguity (_ambiguity);
std::vector <std::pair <std::string, Lexer2::Type> > tokens; std::vector <std::pair <std::string, Lexer::Type> > tokens;
std::string token; std::string token;
Lexer2::Type type; Lexer::Type type;
while (l.token (token, type)) while (l.token (token, type))
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
// Parse for syntax checking and operator replacement. // Parse for syntax checking and operator replacement.
if (_debug) if (_debug)
@ -153,13 +153,13 @@ void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
{ {
// Reduce e to a vector of tokens. // Reduce e to a vector of tokens.
Lexer2 l (e); Lexer l (e);
l.ambiguity (_ambiguity); l.ambiguity (_ambiguity);
std::vector <std::pair <std::string, Lexer2::Type> > tokens; std::vector <std::pair <std::string, Lexer::Type> > tokens;
std::string token; std::string token;
Lexer2::Type type; Lexer::Type type;
while (l.token (token, type)) while (l.token (token, type))
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
if (_debug) if (_debug)
context.debug ("FILTER Postfix " + dump (tokens)); context.debug ("FILTER Postfix " + dump (tokens));
@ -172,15 +172,15 @@ void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
void Eval::compileExpression (const std::string& e) void Eval::compileExpression (const std::string& e)
{ {
// Reduce e to a vector of tokens. // Reduce e to a vector of tokens.
Lexer2 l (e); Lexer l (e);
l.ambiguity (_ambiguity); l.ambiguity (_ambiguity);
std::string token; std::string token;
Lexer2::Type type; Lexer::Type type;
while (l.token (token, type)) while (l.token (token, type))
{ {
if (_debug) if (_debug)
context.debug ("Lexer '" + token + "' " + Lexer2::typeToString (type)); context.debug ("Lexer '" + token + "' " + Lexer::typeToString (type));
_compiled.push_back (std::pair <std::string, Lexer2::Type> (token, type)); _compiled.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
// Parse for syntax checking and operator replacement. // Parse for syntax checking and operator replacement.
@ -236,7 +236,7 @@ void Eval::getBinaryOperators (std::vector <std::string>& all)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void Eval::evaluatePostfixStack ( void Eval::evaluatePostfixStack (
const std::vector <std::pair <std::string, Lexer2::Type> >& tokens, const std::vector <std::pair <std::string, Lexer::Type> >& tokens,
Variant& result) const Variant& result) const
{ {
if (tokens.size () == 0) if (tokens.size () == 0)
@ -245,11 +245,11 @@ void Eval::evaluatePostfixStack (
// This is stack used by the postfix evaluator. // This is stack used by the postfix evaluator.
std::vector <Variant> values; std::vector <Variant> values;
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator token; std::vector <std::pair <std::string, Lexer::Type> >::const_iterator token;
for (token = tokens.begin (); token != tokens.end (); ++token) for (token = tokens.begin (); token != tokens.end (); ++token)
{ {
// Unary operators. // Unary operators.
if (token->second == Lexer2::Type::op && if (token->second == Lexer::Type::op &&
token->first == "!") token->first == "!")
{ {
if (values.size () < 1) if (values.size () < 1)
@ -262,7 +262,7 @@ void Eval::evaluatePostfixStack (
if (_debug) if (_debug)
context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result)); context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
} }
else if (token->second == Lexer2::Type::op && else if (token->second == Lexer::Type::op &&
token->first == "_neg_") token->first == "_neg_")
{ {
if (values.size () < 1) if (values.size () < 1)
@ -278,7 +278,7 @@ void Eval::evaluatePostfixStack (
if (_debug) if (_debug)
context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result)); context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
} }
else if (token->second == Lexer2::Type::op && else if (token->second == Lexer::Type::op &&
token->first == "_pos_") token->first == "_pos_")
{ {
// The _pos_ operator is a NOP. // The _pos_ operator is a NOP.
@ -287,7 +287,7 @@ void Eval::evaluatePostfixStack (
} }
// Binary operators. // Binary operators.
else if (token->second == Lexer2::Type::op) else if (token->second == Lexer::Type::op)
{ {
if (values.size () < 2) if (values.size () < 2)
throw std::string (STRING_EVAL_NO_EVAL); throw std::string (STRING_EVAL_NO_EVAL);
@ -338,8 +338,8 @@ void Eval::evaluatePostfixStack (
Variant v (token->first); Variant v (token->first);
switch (token->second) switch (token->second)
{ {
case Lexer2::Type::number: case Lexer::Type::number:
if (Lexer2::isAllDigits (token->first)) if (Lexer::isAllDigits (token->first))
{ {
v.cast (Variant::type_integer); v.cast (Variant::type_integer);
if (_debug) if (_debug)
@ -354,11 +354,11 @@ void Eval::evaluatePostfixStack (
break; break;
case Lexer2::Type::op: case Lexer::Type::op:
throw std::string (STRING_EVAL_OP_EXPECTED); throw std::string (STRING_EVAL_OP_EXPECTED);
break; break;
case Lexer2::Type::identifier: case Lexer::Type::identifier:
{ {
bool found = false; bool found = false;
std::vector <bool (*)(const std::string&, Variant&)>::const_iterator source; std::vector <bool (*)(const std::string&, Variant&)>::const_iterator source;
@ -383,13 +383,13 @@ void Eval::evaluatePostfixStack (
} }
break; break;
case Lexer2::Type::date: case Lexer::Type::date:
v.cast (Variant::type_date); v.cast (Variant::type_date);
if (_debug) if (_debug)
context.debug (format ("Eval literal date ↑'{1}'", (std::string) v)); context.debug (format ("Eval literal date ↑'{1}'", (std::string) v));
break; break;
case Lexer2::Type::duration: case Lexer::Type::duration:
v.cast (Variant::type_duration); v.cast (Variant::type_duration);
if (_debug) if (_debug)
context.debug (format ("Eval literal duration ↑'{1}'", (std::string) v)); context.debug (format ("Eval literal duration ↑'{1}'", (std::string) v));
@ -397,19 +397,19 @@ void Eval::evaluatePostfixStack (
// Nothing to do. // Nothing to do.
/* /*
case Lexer2::Type::uuid: case Lexer::Type::uuid:
case Lexer2::Type::hex: case Lexer::Type::hex:
case Lexer2::Type::list: case Lexer::Type::list:
case Lexer2::Type::url: case Lexer::Type::url:
case Lexer2::Type::pair: case Lexer::Type::pair:
case Lexer2::Type::separator: case Lexer::Type::separator:
case Lexer2::Type::tag: case Lexer::Type::tag:
case Lexer2::Type::path: case Lexer::Type::path:
case Lexer2::Type::substitution: case Lexer::Type::substitution:
case Lexer2::Type::pattern: case Lexer::Type::pattern:
case Lexer2::Type::word: case Lexer::Type::word:
*/ */
case Lexer2::Type::string: case Lexer::Type::string:
default: default:
if (_debug) if (_debug)
context.debug (format ("Eval literal string ↑'{1}'", (std::string) v)); context.debug (format ("Eval literal string ↑'{1}'", (std::string) v));
@ -443,7 +443,7 @@ void Eval::evaluatePostfixStack (
// Primitive --> "(" Logical ")" | Variant // Primitive --> "(" Logical ")" | Variant
// //
void Eval::infixParse ( void Eval::infixParse (
std::vector <std::pair <std::string, Lexer2::Type> >& infix) const std::vector <std::pair <std::string, Lexer::Type> >& infix) const
{ {
int i = 0; int i = 0;
parseLogical (infix, i); parseLogical (infix, i);
@ -452,14 +452,14 @@ void Eval::infixParse (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Logical --> Regex {( "and" | "or" | "xor" ) Regex} // Logical --> Regex {( "and" | "or" | "xor" ) Regex}
bool Eval::parseLogical ( bool Eval::parseLogical (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseRegex (infix, i)) parseRegex (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "and" || (infix[i].first == "and" ||
infix[i].first == "or" || infix[i].first == "or" ||
infix[i].first == "xor")) infix[i].first == "xor"))
@ -478,14 +478,14 @@ bool Eval::parseLogical (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Regex --> Equality {( "~" | "!~" ) Equality} // Regex --> Equality {( "~" | "!~" ) Equality}
bool Eval::parseRegex ( bool Eval::parseRegex (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseEquality (infix, i)) parseEquality (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "~" || (infix[i].first == "~" ||
infix[i].first == "!~")) infix[i].first == "!~"))
{ {
@ -503,14 +503,14 @@ bool Eval::parseRegex (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Equality --> Comparative {( "==" | "=" | "!==" | "!=" ) Comparative} // Equality --> Comparative {( "==" | "=" | "!==" | "!=" ) Comparative}
bool Eval::parseEquality ( bool Eval::parseEquality (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseComparative (infix, i)) parseComparative (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "==" || (infix[i].first == "==" ||
infix[i].first == "=" || infix[i].first == "=" ||
infix[i].first == "!==" || infix[i].first == "!==" ||
@ -530,14 +530,14 @@ bool Eval::parseEquality (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Comparative --> Arithmetic {( "<=" | "<" | ">=" | ">" ) Arithmetic} // Comparative --> Arithmetic {( "<=" | "<" | ">=" | ">" ) Arithmetic}
bool Eval::parseComparative ( bool Eval::parseComparative (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseArithmetic (infix, i)) parseArithmetic (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "<=" || (infix[i].first == "<=" ||
infix[i].first == "<" || infix[i].first == "<" ||
infix[i].first == ">=" || infix[i].first == ">=" ||
@ -557,14 +557,14 @@ bool Eval::parseComparative (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Arithmetic --> Geometric {( "+" | "-" ) Geometric} // Arithmetic --> Geometric {( "+" | "-" ) Geometric}
bool Eval::parseArithmetic ( bool Eval::parseArithmetic (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseGeometric (infix, i)) parseGeometric (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "+" || (infix[i].first == "+" ||
infix[i].first == "-")) infix[i].first == "-"))
{ {
@ -582,14 +582,14 @@ bool Eval::parseArithmetic (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Geometric --> Tag {( "*" | "/" | "%" ) Tag} // Geometric --> Tag {( "*" | "/" | "%" ) Tag}
bool Eval::parseGeometric ( bool Eval::parseGeometric (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseTag (infix, i)) parseTag (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "*" || (infix[i].first == "*" ||
infix[i].first == "/" || infix[i].first == "/" ||
infix[i].first == "%")) infix[i].first == "%"))
@ -608,14 +608,14 @@ bool Eval::parseGeometric (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Tag --> Unary {( "_hastag_" | "_notag_" ) Unary} // Tag --> Unary {( "_hastag_" | "_notag_" ) Unary}
bool Eval::parseTag ( bool Eval::parseTag (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parseUnary (infix, i)) parseUnary (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
(infix[i].first == "_hastag_" || (infix[i].first == "_hastag_" ||
infix[i].first == "_notag_")) infix[i].first == "_notag_"))
{ {
@ -633,7 +633,7 @@ bool Eval::parseTag (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Unary --> [( "-" | "+" | "!" )] Exponent // Unary --> [( "-" | "+" | "!" )] Exponent
bool Eval::parseUnary ( bool Eval::parseUnary (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size ()) if (i < infix.size ())
@ -660,14 +660,14 @@ bool Eval::parseUnary (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Exponent --> Primitive ["^" Primitive] // Exponent --> Primitive ["^" Primitive]
bool Eval::parseExponent ( bool Eval::parseExponent (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size () && if (i < infix.size () &&
parsePrimitive (infix, i)) parsePrimitive (infix, i))
{ {
while (i < infix.size () && while (i < infix.size () &&
infix[i].second == Lexer2::Type::op && infix[i].second == Lexer::Type::op &&
infix[i].first == "^") infix[i].first == "^")
{ {
++i; ++i;
@ -684,7 +684,7 @@ bool Eval::parseExponent (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Primitive --> "(" Logical ")" | Variant // Primitive --> "(" Logical ")" | Variant
bool Eval::parsePrimitive ( bool Eval::parsePrimitive (
std::vector <std::pair <std::string, Lexer2::Type> >& infix, std::vector <std::pair <std::string, Lexer::Type> >& infix,
int &i) const int &i) const
{ {
if (i < infix.size ()) if (i < infix.size ())
@ -722,7 +722,7 @@ bool Eval::parsePrimitive (
++i; ++i;
return true; return true;
} }
else if (infix[i].second != Lexer2::Type::op) else if (infix[i].second != Lexer::Type::op)
{ {
++i; ++i;
return true; return true;
@ -766,32 +766,32 @@ bool Eval::parsePrimitive (
// Exit. // Exit.
// //
void Eval::infixToPostfix ( void Eval::infixToPostfix (
std::vector <std::pair <std::string, Lexer2::Type> >& infix) const std::vector <std::pair <std::string, Lexer::Type> >& infix) const
{ {
// Short circuit. // Short circuit.
if (infix.size () == 1) if (infix.size () == 1)
return; return;
// Result. // Result.
std::vector <std::pair <std::string, Lexer2::Type> > postfix; std::vector <std::pair <std::string, Lexer::Type> > postfix;
// Shunting yard. // Shunting yard.
std::vector <std::pair <std::string, Lexer2::Type> > op_stack; std::vector <std::pair <std::string, Lexer::Type> > op_stack;
// Operator characteristics. // Operator characteristics.
char type; char type;
int precedence; int precedence;
char associativity; char associativity;
std::vector <std::pair <std::string, Lexer2::Type> >::iterator token; std::vector <std::pair <std::string, Lexer::Type> >::iterator token;
for (token = infix.begin (); token != infix.end (); ++token) for (token = infix.begin (); token != infix.end (); ++token)
{ {
if (token->second == Lexer2::Type::op && if (token->second == Lexer::Type::op &&
token->first == "(") token->first == "(")
{ {
op_stack.push_back (*token); op_stack.push_back (*token);
} }
else if (token->second == Lexer2::Type::op && else if (token->second == Lexer::Type::op &&
token->first == ")") token->first == ")")
{ {
while (op_stack.size () && while (op_stack.size () &&
@ -806,7 +806,7 @@ void Eval::infixToPostfix (
else else
throw std::string ("Mismatched parentheses in expression"); throw std::string ("Mismatched parentheses in expression");
} }
else if (token->second == Lexer2::Type::op && else if (token->second == Lexer::Type::op &&
identifyOperator (token->first, type, precedence, associativity)) identifyOperator (token->first, type, precedence, associativity))
{ {
char type2; char type2;
@ -865,20 +865,20 @@ bool Eval::identifyOperator (
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
std::string Eval::dump ( std::string Eval::dump (
std::vector <std::pair <std::string, Lexer2::Type> >& tokens) const std::vector <std::pair <std::string, Lexer::Type> >& tokens) const
{ {
// Set up a color mapping. // Set up a color mapping.
std::map <Lexer2::Type, Color> color_map; std::map <Lexer::Type, Color> color_map;
color_map[Lexer2::Type::op] = Color ("gray14 on gray6"); color_map[Lexer::Type::op] = Color ("gray14 on gray6");
color_map[Lexer2::Type::number] = Color ("rgb530 on gray6"); color_map[Lexer::Type::number] = Color ("rgb530 on gray6");
color_map[Lexer2::Type::hex] = Color ("rgb303 on gray6"); color_map[Lexer::Type::hex] = Color ("rgb303 on gray6");
color_map[Lexer2::Type::string] = Color ("rgb550 on gray6"); color_map[Lexer::Type::string] = Color ("rgb550 on gray6");
color_map[Lexer2::Type::identifier] = Color ("rgb035 on gray6"); color_map[Lexer::Type::identifier] = Color ("rgb035 on gray6");
color_map[Lexer2::Type::date] = Color ("rgb150 on gray6"); color_map[Lexer::Type::date] = Color ("rgb150 on gray6");
color_map[Lexer2::Type::duration] = Color ("rgb531 on gray6"); color_map[Lexer::Type::duration] = Color ("rgb531 on gray6");
std::string output; std::string output;
std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator i; std::vector <std::pair <std::string, Lexer::Type> >::const_iterator i;
for (i = tokens.begin (); i != tokens.end (); ++i) for (i = tokens.begin (); i != tokens.end (); ++i)
{ {
if (i != tokens.begin ()) if (i != tokens.begin ())

View file

@ -29,7 +29,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <Lexer2.h> #include <Lexer.h>
#include <Variant.h> #include <Variant.h>
class Eval class Eval
@ -53,28 +53,28 @@ public:
static void getBinaryOperators (std::vector <std::string>&); static void getBinaryOperators (std::vector <std::string>&);
private: private:
void evaluatePostfixStack (const std::vector <std::pair <std::string, Lexer2::Type> >&, Variant&) const; void evaluatePostfixStack (const std::vector <std::pair <std::string, Lexer::Type> >&, Variant&) const;
void infixToPostfix (std::vector <std::pair <std::string, Lexer2::Type> >&) const; void infixToPostfix (std::vector <std::pair <std::string, Lexer::Type> >&) const;
void infixParse (std::vector <std::pair <std::string, Lexer2::Type> >&) const; void infixParse (std::vector <std::pair <std::string, Lexer::Type> >&) const;
bool parseLogical (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseLogical (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseRegex (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseRegex (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseEquality (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseEquality (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseComparative (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseComparative (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseArithmetic (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseArithmetic (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseGeometric (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseGeometric (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseTag (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseTag (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseUnary (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseUnary (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parseExponent (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parseExponent (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool parsePrimitive (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const; bool parsePrimitive (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
bool identifyOperator (const std::string&, char&, int&, char&) const; bool identifyOperator (const std::string&, char&, int&, char&) const;
std::string dump (std::vector <std::pair <std::string, Lexer2::Type> >&) const; std::string dump (std::vector <std::pair <std::string, Lexer::Type> >&) const;
private: private:
std::vector <bool (*)(const std::string&, Variant&)> _sources; std::vector <bool (*)(const std::string&, Variant&)> _sources;
bool _ambiguity; bool _ambiguity;
bool _debug; bool _debug;
std::vector <std::pair <std::string, Lexer2::Type> > _compiled; std::vector <std::pair <std::string, Lexer::Type> > _compiled;
}; };

View file

@ -26,7 +26,7 @@
#include <cmake.h> #include <cmake.h>
#include <ctype.h> #include <ctype.h>
#include <Lexer2.h> #include <Lexer.h>
#include <ISO8601.h> #include <ISO8601.h>
#include <Date.h> #include <Date.h>
#include <Duration.h> #include <Duration.h>
@ -35,11 +35,11 @@
static const std::string uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; static const std::string uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
static const int uuid_min_length = 8; static const int uuid_min_length = 8;
std::string Lexer2::dateFormat = ""; std::string Lexer::dateFormat = "";
bool Lexer2::isoEnabled = true; bool Lexer::isoEnabled = true;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
Lexer2::Lexer2 (const std::string& text) Lexer::Lexer (const std::string& text)
: _text (text) : _text (text)
, _cursor (0) , _cursor (0)
, _eos (text.size ()) , _eos (text.size ())
@ -48,20 +48,20 @@ Lexer2::Lexer2 (const std::string& text)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
Lexer2::~Lexer2 () Lexer::~Lexer ()
{ {
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void Lexer2::ambiguity (bool value) void Lexer::ambiguity (bool value)
{ {
_ambiguity = value; _ambiguity = value;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// When a Lexer2 object is constructed with a string, this method walks through // When a Lexer object is constructed with a string, this method walks through
// the stream of low-level tokens. // the stream of low-level tokens.
bool Lexer2::token (std::string& token, Lexer2::Type& type) bool Lexer::token (std::string& token, Lexer::Type& type)
{ {
// Eat white space. // Eat white space.
while (isWhitespace (_text[_cursor])) while (isWhitespace (_text[_cursor]))
@ -104,27 +104,27 @@ bool Lexer2::token (std::string& token, Lexer2::Type& type)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// This static method tokenizes the input and provides a vector of token/type // This static method tokenizes the input and provides a vector of token/type
// results from a high-level lex. // results from a high-level lex.
std::vector <std::pair <std::string, Lexer2::Type>> Lexer2::tokens ( std::vector <std::pair <std::string, Lexer::Type>> Lexer::tokens (
const std::string& text) const std::string& text)
{ {
std::vector <std::pair <std::string, Lexer2::Type>> all; std::vector <std::pair <std::string, Lexer::Type>> all;
std::string token; std::string token;
Lexer2::Type type; Lexer::Type type;
Lexer2 l (text); Lexer l (text);
while (l.token (token, type)) while (l.token (token, type))
all.push_back (std::pair <std::string, Lexer2::Type> (token, type)); all.push_back (std::pair <std::string, Lexer::Type> (token, type));
return all; return all;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// This static method tokenizes the input, but discards the type information. // This static method tokenizes the input, but discards the type information.
std::vector <std::string> Lexer2::split (const std::string& text) std::vector <std::string> Lexer::split (const std::string& text)
{ {
std::vector <std::string> all; std::vector <std::string> all;
std::string token; std::string token;
Lexer2::Type ignored; Lexer::Type ignored;
Lexer2 l (text); Lexer l (text);
while (l.token (token, ignored)) while (l.token (token, ignored))
all.push_back (token); all.push_back (token);
@ -133,27 +133,27 @@ std::vector <std::string> Lexer2::split (const std::string& text)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// No L10N - these are for internal purposes. // No L10N - these are for internal purposes.
const std::string Lexer2::typeName (const Lexer2::Type& type) const std::string Lexer::typeName (const Lexer::Type& type)
{ {
switch (type) switch (type)
{ {
case Lexer2::Type::uuid: return "uuid"; case Lexer::Type::uuid: return "uuid";
case Lexer2::Type::number: return "number"; case Lexer::Type::number: return "number";
case Lexer2::Type::hex: return "hex"; case Lexer::Type::hex: return "hex";
case Lexer2::Type::string: return "string"; case Lexer::Type::string: return "string";
case Lexer2::Type::list: return "list"; case Lexer::Type::list: return "list";
case Lexer2::Type::url: return "url"; case Lexer::Type::url: return "url";
case Lexer2::Type::pair: return "pair"; case Lexer::Type::pair: return "pair";
case Lexer2::Type::separator: return "separator"; case Lexer::Type::separator: return "separator";
case Lexer2::Type::tag: return "tag"; case Lexer::Type::tag: return "tag";
case Lexer2::Type::path: return "path"; case Lexer::Type::path: return "path";
case Lexer2::Type::substitution: return "substitution"; case Lexer::Type::substitution: return "substitution";
case Lexer2::Type::pattern: return "pattern"; case Lexer::Type::pattern: return "pattern";
case Lexer2::Type::op: return "op"; case Lexer::Type::op: return "op";
case Lexer2::Type::identifier: return "identifier"; case Lexer::Type::identifier: return "identifier";
case Lexer2::Type::word: return "word"; case Lexer::Type::word: return "word";
case Lexer2::Type::date: return "date"; case Lexer::Type::date: return "date";
case Lexer2::Type::duration: return "duration"; case Lexer::Type::duration: return "duration";
} }
} }
@ -163,7 +163,7 @@ const std::string Lexer2::typeName (const Lexer2::Type& type)
// http://en.wikipedia.org/wiki/Whitespace_character // http://en.wikipedia.org/wiki/Whitespace_character
// Updated 2013-11-18 // Updated 2013-11-18
// Static // Static
bool Lexer2::isWhitespace (int c) bool Lexer::isWhitespace (int c)
{ {
return (c == 0x0020 || // space Common Separator, space return (c == 0x0020 || // space Common Separator, space
c == 0x0009 || // Common Other, control HT, Horizontal Tab c == 0x0009 || // Common Other, control HT, Horizontal Tab
@ -195,14 +195,14 @@ bool Lexer2::isWhitespace (int c)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Digits 0-9. // Digits 0-9.
bool Lexer2::isDigit (int c) bool Lexer::isDigit (int c)
{ {
return c >= 0x30 && c <= 0x39; return c >= 0x30 && c <= 0x39;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Digits 0-9 a-f A-F. // Digits 0-9 a-f A-F.
bool Lexer2::isHexDigit (int c) bool Lexer::isHexDigit (int c)
{ {
return (c >= '0' && c <= '9') || return (c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'f') || (c >= 'a' && c <= 'f') ||
@ -210,7 +210,7 @@ bool Lexer2::isHexDigit (int c)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isIdentifierStart (int c) bool Lexer::isIdentifierStart (int c)
{ {
return c && // Include null character check. return c && // Include null character check.
! isWhitespace (c) && ! isWhitespace (c) &&
@ -220,7 +220,7 @@ bool Lexer2::isIdentifierStart (int c)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isIdentifierNext (int c) bool Lexer::isIdentifierNext (int c)
{ {
return c && // Include null character check. return c && // Include null character check.
c != ':' && // Used in isPair. c != ':' && // Used in isPair.
@ -229,7 +229,7 @@ bool Lexer2::isIdentifierNext (int c)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isSingleCharOperator (int c) bool Lexer::isSingleCharOperator (int c)
{ {
return c == '+' || // Addition return c == '+' || // Addition
c == '-' || // Subtraction or unary minus = ambiguous c == '-' || // Subtraction or unary minus = ambiguous
@ -247,7 +247,7 @@ bool Lexer2::isSingleCharOperator (int c)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isDoubleCharOperator (int c0, int c1, int c2) bool Lexer::isDoubleCharOperator (int c0, int c1, int c2)
{ {
return (c0 == '=' && c1 == '=') || return (c0 == '=' && c1 == '=') ||
(c0 == '!' && c1 == '=') || (c0 == '!' && c1 == '=') ||
@ -260,7 +260,7 @@ bool Lexer2::isDoubleCharOperator (int c0, int c1, int c2)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isTripleCharOperator (int c0, int c1, int c2, int c3) bool Lexer::isTripleCharOperator (int c0, int c1, int c2, int c3)
{ {
return (c0 == 'a' && c1 == 'n' && c2 == 'd' && isBoundary (c2, c3)) || return (c0 == 'a' && c1 == 'n' && c2 == 'd' && isBoundary (c2, c3)) ||
(c0 == 'x' && c1 == 'o' && c2 == 'r' && isBoundary (c2, c3)) || (c0 == 'x' && c1 == 'o' && c2 == 'r' && isBoundary (c2, c3)) ||
@ -268,7 +268,7 @@ bool Lexer2::isTripleCharOperator (int c0, int c1, int c2, int c3)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isBoundary (int left, int right) bool Lexer::isBoundary (int left, int right)
{ {
// XOR // XOR
if (isalpha (left) != isalpha (right)) return true; if (isalpha (left) != isalpha (right)) return true;
@ -282,14 +282,14 @@ bool Lexer2::isBoundary (int left, int right)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isPunctuation (int c) bool Lexer::isPunctuation (int c)
{ {
return c != '@' && return c != '@' &&
ispunct (c); ispunct (c);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void Lexer2::dequote (std::string& input) void Lexer::dequote (std::string& input)
{ {
int quote = input[0]; int quote = input[0];
size_t len = input.length (); size_t len = input.length ();
@ -301,7 +301,7 @@ void Lexer2::dequote (std::string& input)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isEOS () const bool Lexer::isEOS () const
{ {
return _cursor >= _eos; return _cursor >= _eos;
} }
@ -311,7 +311,7 @@ bool Lexer2::isEOS () const
// '9' -> 9 // '9' -> 9
// 'a'/'A' -> 10 // 'a'/'A' -> 10
// 'f'/'F' -> 15 // 'f'/'F' -> 15
int Lexer2::hexToInt (int c) const int Lexer::hexToInt (int c) const
{ {
if (c >= '0' && c <= '9') return (c - '0'); if (c >= '0' && c <= '9') return (c - '0');
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10); else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
@ -319,13 +319,13 @@ int Lexer2::hexToInt (int c) const
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int Lexer2::hexToInt (int c0, int c1) const int Lexer::hexToInt (int c0, int c1) const
{ {
return (hexToInt (c0) << 4) + hexToInt (c1); return (hexToInt (c0) << 4) + hexToInt (c1);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int Lexer2::hexToInt (int c0, int c1, int c2, int c3) const int Lexer::hexToInt (int c0, int c1, int c2, int c3) const
{ {
return (hexToInt (c0) << 12) + return (hexToInt (c0) << 12) +
(hexToInt (c1) << 8) + (hexToInt (c1) << 8) +
@ -334,11 +334,11 @@ int Lexer2::hexToInt (int c0, int c1, int c2, int c3) const
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::string // Lexer::Type::string
// '|" // '|"
// [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ] // [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ]
// '|" // '|"
bool Lexer2::isString (std::string& token, Lexer2::Type& type, int quote) bool Lexer::isString (std::string& token, Lexer::Type& type, int quote)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -404,7 +404,7 @@ bool Lexer2::isString (std::string& token, Lexer2::Type& type, int quote)
if (_text[marker] == quote) if (_text[marker] == quote)
{ {
++marker; ++marker;
type = Lexer2::Type::string; type = Lexer::Type::string;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -414,19 +414,19 @@ bool Lexer2::isString (std::string& token, Lexer2::Type& type, int quote)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::date // Lexer::Type::date
// // <ISO8601d> | <Date>
bool Lexer2::isDate (std::string& token, Lexer2::Type& type) bool Lexer::isDate (std::string& token, Lexer::Type& type)
{ {
// Try an ISO date parse. // Try an ISO date parse.
if (Lexer2::isoEnabled) if (Lexer::isoEnabled)
{ {
std::size_t iso_i = 0; std::size_t iso_i = 0;
ISO8601d iso; ISO8601d iso;
iso.ambiguity (_ambiguity); iso.ambiguity (_ambiguity);
if (iso.parse (_text.substr (_cursor), iso_i)) if (iso.parse (_text.substr (_cursor), iso_i))
{ {
type = Lexer2::Type::date; type = Lexer::Type::date;
token = _text.substr (_cursor, iso_i); token = _text.substr (_cursor, iso_i);
_cursor += iso_i; _cursor += iso_i;
return true; return true;
@ -434,14 +434,14 @@ bool Lexer2::isDate (std::string& token, Lexer2::Type& type)
} }
// Try a legacy rc.dateformat parse here. // Try a legacy rc.dateformat parse here.
if (Lexer2::dateFormat != "") if (Lexer::dateFormat != "")
{ {
try try
{ {
std::size_t legacy_i = 0; std::size_t legacy_i = 0;
Date legacyDate (_text.substr (_cursor), legacy_i, Lexer2::dateFormat, false, false); Date legacyDate (_text.substr (_cursor), legacy_i, Lexer::dateFormat, false, false);
type = Lexer2::Type::date; type = Lexer::Type::date;
token = _text.substr (_cursor, legacy_i); token = _text.substr (_cursor, legacy_i);
_cursor += legacy_i; _cursor += legacy_i;
return true; return true;
@ -454,16 +454,16 @@ bool Lexer2::isDate (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::duration // Lexer::Type::duration
// // <ISO8106p> | <Duration>
bool Lexer2::isDuration (std::string& token, Lexer2::Type& type) bool Lexer::isDuration (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = 0; std::size_t marker = 0;
ISO8601p iso; ISO8601p iso;
if (iso.parse (_text.substr (_cursor), marker)) if (iso.parse (_text.substr (_cursor), marker))
{ {
type = Lexer2::Type::duration; type = Lexer::Type::duration;
token = _text.substr (_cursor, marker); token = _text.substr (_cursor, marker);
_cursor += marker; _cursor += marker;
return true; return true;
@ -472,7 +472,7 @@ bool Lexer2::isDuration (std::string& token, Lexer2::Type& type)
Duration dur; Duration dur;
if (dur.parse (_text.substr (_cursor), marker)) if (dur.parse (_text.substr (_cursor), marker))
{ {
type = Lexer2::Type::duration; type = Lexer::Type::duration;
token = _text.substr (_cursor, marker); token = _text.substr (_cursor, marker);
_cursor += marker; _cursor += marker;
return true; return true;
@ -482,7 +482,7 @@ bool Lexer2::isDuration (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::uuid // Lexer::Type::uuid
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX // XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX // XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX // XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX
@ -492,7 +492,7 @@ bool Lexer2::isDuration (std::string& token, Lexer2::Type& type)
// XXXXXXXX-X // XXXXXXXX-X
// XXXXXXXX- // XXXXXXXX-
// XXXXXXXX // XXXXXXXX
bool Lexer2::isUUID (std::string& token, Lexer2::Type& type) bool Lexer::isUUID (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -513,7 +513,7 @@ bool Lexer2::isUUID (std::string& token, Lexer2::Type& type)
token = _text.substr (_cursor, i); token = _text.substr (_cursor, i);
if (! isAllDigits (token)) if (! isAllDigits (token))
{ {
type = Lexer2::Type::uuid; type = Lexer::Type::uuid;
_cursor += i; _cursor += i;
return true; return true;
} }
@ -523,9 +523,9 @@ bool Lexer2::isUUID (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::hex // Lexer::Type::hex
// 0xX+ // 0xX+
bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type) bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -541,7 +541,7 @@ bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type)
if (marker - _cursor > 2) if (marker - _cursor > 2)
{ {
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
type = Lexer2::Type::hex; type = Lexer::Type::hex;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -551,11 +551,11 @@ bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::number // Lexer::Type::number
// \d+ // \d+
// [ . \d+ ] // [ . \d+ ]
// [ e|E [ +|- ] \d+ [ . \d+ ] ] // [ e|E [ +|- ] \d+ [ . \d+ ] ]
bool Lexer2::isNumber (std::string& token, Lexer2::Type& type) bool Lexer::isNumber (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -605,7 +605,7 @@ bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
} }
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
type = Lexer2::Type::number; type = Lexer::Type::number;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -614,16 +614,16 @@ bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::separator // Lexer::Type::separator
// -- // --
bool Lexer2::isSeparator (std::string& token, Lexer2::Type& type) bool Lexer::isSeparator (std::string& token, Lexer::Type& type)
{ {
if (_eos - _cursor >= 2 && if (_eos - _cursor >= 2 &&
_text[_cursor] == '-' && _text[_cursor] == '-' &&
_text[_cursor + 1] == '-') _text[_cursor + 1] == '-')
{ {
_cursor += 2; _cursor += 2;
type = Lexer2::Type::separator; type = Lexer::Type::separator;
token = "--"; token = "--";
return true; return true;
} }
@ -632,15 +632,15 @@ bool Lexer2::isSeparator (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::list // Lexer::Type::list
// , // ,
bool Lexer2::isList (std::string& token, Lexer2::Type& type) bool Lexer::isList (std::string& token, Lexer::Type& type)
{ {
if (_eos - _cursor > 1 && if (_eos - _cursor > 1 &&
_text[_cursor] == ',') _text[_cursor] == ',')
{ {
++_cursor; ++_cursor;
type = Lexer2::Type::list; type = Lexer::Type::list;
token = ","; token = ",";
return true; return true;
} }
@ -649,9 +649,9 @@ bool Lexer2::isList (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::url // Lexer::Type::url
// http [s] :// ... // http [s] :// ...
bool Lexer2::isURL (std::string& token, Lexer2::Type& type) bool Lexer::isURL (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -676,7 +676,7 @@ bool Lexer2::isURL (std::string& token, Lexer2::Type& type)
utf8_next_char (_text, marker); utf8_next_char (_text, marker);
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
type = Lexer2::Type::url; type = Lexer::Type::url;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -686,14 +686,14 @@ bool Lexer2::isURL (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::pair // Lexer::Type::pair
// <identifier> :|= [ <string> | <word> ] // <identifier> :|= [ <string> | <word> ]
bool Lexer2::isPair (std::string& token, Lexer2::Type& type) bool Lexer::isPair (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
std::string ignoredToken; std::string ignoredToken;
Lexer2::Type ignoredType; Lexer::Type ignoredType;
if (isIdentifier (ignoredToken, ignoredType)) if (isIdentifier (ignoredToken, ignoredType))
{ {
if (_eos - _cursor > 1 && if (_eos - _cursor > 1 &&
@ -706,7 +706,7 @@ bool Lexer2::isPair (std::string& token, Lexer2::Type& type)
isWord (ignoredToken, ignoredType)) isWord (ignoredToken, ignoredType))
{ {
token = _text.substr (marker, _cursor - marker); token = _text.substr (marker, _cursor - marker);
type = Lexer2::Type::pair; type = Lexer::Type::pair;
return true; return true;
} }
} }
@ -717,9 +717,9 @@ bool Lexer2::isPair (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::tag // Lexer::Type::tag
// ^ | <isWhiteSpace> [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]* // ^ | <isWhiteSpace> [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
bool Lexer2::isTag (std::string& token, Lexer2::Type& type) bool Lexer::isTag (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -743,7 +743,7 @@ bool Lexer2::isTag (std::string& token, Lexer2::Type& type)
utf8_next_char (_text, marker); utf8_next_char (_text, marker);
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
type = Lexer2::Type::tag; type = Lexer::Type::tag;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -753,9 +753,9 @@ bool Lexer2::isTag (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::path // Lexer::Type::path
// ( / <non-slash, non-whitespace> )+ // ( / <non-slash, non-whitespace> )+
bool Lexer2::isPath (std::string& token, Lexer2::Type& type) bool Lexer::isPath (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
int slashCount = 0; int slashCount = 0;
@ -786,7 +786,7 @@ bool Lexer2::isPath (std::string& token, Lexer2::Type& type)
if (marker > _cursor && if (marker > _cursor &&
slashCount > 3) slashCount > 3)
{ {
type = Lexer2::Type::path; type = Lexer::Type::path;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -796,14 +796,14 @@ bool Lexer2::isPath (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::substitution // Lexer::Type::substitution
// / <unquoted-string> / <unquoted-string> / [g] // / <unquoted-string> / <unquoted-string> / [g]
bool Lexer2::isSubstitution (std::string& token, Lexer2::Type& type) bool Lexer::isSubstitution (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
std::string extractedToken; std::string extractedToken;
Lexer2::Type extractedType; Lexer::Type extractedType;
if (isString (extractedToken, extractedType, '/')) if (isString (extractedToken, extractedType, '/'))
{ {
--_cursor; // Step back over the '/'. --_cursor; // Step back over the '/'.
@ -816,7 +816,7 @@ bool Lexer2::isSubstitution (std::string& token, Lexer2::Type& type)
if (isWhitespace (_text[_cursor])) if (isWhitespace (_text[_cursor]))
{ {
token = _text.substr (marker, _cursor - marker); token = _text.substr (marker, _cursor - marker);
type = Lexer2::Type::substitution; type = Lexer::Type::substitution;
return true; return true;
} }
} }
@ -827,19 +827,19 @@ bool Lexer2::isSubstitution (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::pattern // Lexer::Type::pattern
// / <unquoted-string> / // / <unquoted-string> /
bool Lexer2::isPattern (std::string& token, Lexer2::Type& type) bool Lexer::isPattern (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
std::string extractedToken; std::string extractedToken;
Lexer2::Type extractedType; Lexer::Type extractedType;
if (isString (extractedToken, extractedType, '/') && if (isString (extractedToken, extractedType, '/') &&
isWhitespace (_text[_cursor])) isWhitespace (_text[_cursor]))
{ {
token = _text.substr (marker, _cursor - marker); token = _text.substr (marker, _cursor - marker);
type = Lexer2::Type::pattern; type = Lexer::Type::pattern;
return true; return true;
} }
@ -848,19 +848,19 @@ bool Lexer2::isPattern (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::op // Lexer::Type::op
// _hastag_ | _notag | _neg_ | _pos_ | // _hastag_ | _notag | _neg_ | _pos_ |
// <isTripleCharOperator> | // <isTripleCharOperator> |
// <isDoubleCharOperator> | // <isDoubleCharOperator> |
// <isSingleCharOperator> | // <isSingleCharOperator> |
bool Lexer2::isOperator (std::string& token, Lexer2::Type& type) bool Lexer::isOperator (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
if (_eos - marker >= 8 && _text.substr (marker, 8) == "_hastag_") if (_eos - marker >= 8 && _text.substr (marker, 8) == "_hastag_")
{ {
marker += 8; marker += 8;
type = Lexer2::Type::op; type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -869,7 +869,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
else if (_eos - marker >= 7 && _text.substr (marker, 7) == "_notag_") else if (_eos - marker >= 7 && _text.substr (marker, 7) == "_notag_")
{ {
marker += 7; marker += 7;
type = Lexer2::Type::op; type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -878,7 +878,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_neg_") else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_neg_")
{ {
marker += 5; marker += 5;
type = Lexer2::Type::op; type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -887,7 +887,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_pos_") else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_pos_")
{ {
marker += 5; marker += 5;
type = Lexer2::Type::op; type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -897,7 +897,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
isTripleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2], _text[marker + 3])) isTripleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2], _text[marker + 3]))
{ {
marker += 3; marker += 3;
type = Lexer2::Type::op; type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -907,7 +907,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
isDoubleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2])) isDoubleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2]))
{ {
marker += 2; marker += 2;
type = Lexer2::Type::op; type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
_cursor = marker; _cursor = marker;
return true; return true;
@ -916,7 +916,7 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
else if (isSingleCharOperator (_text[marker])) else if (isSingleCharOperator (_text[marker]))
{ {
token = _text[marker]; token = _text[marker];
type = Lexer2::Type::op; type = Lexer::Type::op;
_cursor = ++marker; _cursor = ++marker;
return true; return true;
} }
@ -925,9 +925,9 @@ bool Lexer2::isOperator (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::identifier // Lexer::Type::identifier
// <isIdentifierStart> [ <isIdentifierNext> ]* // <isIdentifierStart> [ <isIdentifierNext> ]*
bool Lexer2::isIdentifier (std::string& token, Lexer2::Type& type) bool Lexer::isIdentifier (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -939,7 +939,7 @@ bool Lexer2::isIdentifier (std::string& token, Lexer2::Type& type)
utf8_next_char (_text, marker); utf8_next_char (_text, marker);
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
type = Lexer2::Type::identifier; type = Lexer::Type::identifier;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -948,9 +948,9 @@ bool Lexer2::isIdentifier (std::string& token, Lexer2::Type& type)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Lexer2::Type::word // Lexer::Type::word
// [^\s]+ // [^\s]+
bool Lexer2::isWord (std::string& token, Lexer2::Type& type) bool Lexer::isWord (std::string& token, Lexer::Type& type)
{ {
std::size_t marker = _cursor; std::size_t marker = _cursor;
@ -960,7 +960,7 @@ bool Lexer2::isWord (std::string& token, Lexer2::Type& type)
if (marker > _cursor) if (marker > _cursor)
{ {
token = _text.substr (_cursor, marker - _cursor); token = _text.substr (_cursor, marker - _cursor);
type = Lexer2::Type::word; type = Lexer::Type::word;
_cursor = marker; _cursor = marker;
return true; return true;
} }
@ -970,30 +970,30 @@ bool Lexer2::isWord (std::string& token, Lexer2::Type& type)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Static // Static
std::string Lexer2::typeToString (Lexer2::Type type) std::string Lexer::typeToString (Lexer::Type type)
{ {
if (type == Lexer2::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m"; if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
else if (type == Lexer2::Type::uuid) return std::string ("\033[38;5;7m\033[48;5;10m") + "uuid" + "\033[0m"; else if (type == Lexer::Type::uuid) return std::string ("\033[38;5;7m\033[48;5;10m") + "uuid" + "\033[0m";
else if (type == Lexer2::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m"; else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
else if (type == Lexer2::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m"; else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m";
else if (type == Lexer2::Type::separator) return std::string ("\033[38;5;7m\033[48;5;4m") + "separator" + "\033[0m"; else if (type == Lexer::Type::separator) return std::string ("\033[38;5;7m\033[48;5;4m") + "separator" + "\033[0m";
else if (type == Lexer2::Type::list) return std::string ("\033[38;5;7m\033[48;5;4m") + "list" + "\033[0m"; else if (type == Lexer::Type::list) return std::string ("\033[38;5;7m\033[48;5;4m") + "list" + "\033[0m";
else if (type == Lexer2::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m"; else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m";
else if (type == Lexer2::Type::pair) return std::string ("\033[38;5;7m\033[48;5;1m") + "pair" + "\033[0m"; else if (type == Lexer::Type::pair) return std::string ("\033[38;5;7m\033[48;5;1m") + "pair" + "\033[0m";
else if (type == Lexer2::Type::tag) return std::string ("\033[37;45m") + "tag" + "\033[0m"; else if (type == Lexer::Type::tag) return std::string ("\033[37;45m") + "tag" + "\033[0m";
else if (type == Lexer2::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m"; else if (type == Lexer::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m";
else if (type == Lexer2::Type::substitution) return std::string ("\033[37;102m") + "substitution" + "\033[0m"; else if (type == Lexer::Type::substitution) return std::string ("\033[37;102m") + "substitution" + "\033[0m";
else if (type == Lexer2::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m"; else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m";
else if (type == Lexer2::Type::op) return std::string ("\033[38;5;7m\033[48;5;203m") + "op" + "\033[0m"; else if (type == Lexer::Type::op) return std::string ("\033[38;5;7m\033[48;5;203m") + "op" + "\033[0m";
else if (type == Lexer2::Type::identifier) return std::string ("\033[38;5;15m\033[48;5;244m") + "identifier" + "\033[0m"; else if (type == Lexer::Type::identifier) return std::string ("\033[38;5;15m\033[48;5;244m") + "identifier" + "\033[0m";
else if (type == Lexer2::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m"; else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
else if (type == Lexer2::Type::date) return std::string ("\033[38;5;15m\033[48;5;34m") + "date" + "\033[0m"; else if (type == Lexer::Type::date) return std::string ("\033[38;5;15m\033[48;5;34m") + "date" + "\033[0m";
else if (type == Lexer2::Type::duration) return std::string ("\033[38;5;15m\033[48;5;34m") + "duration" + "\033[0m"; else if (type == Lexer::Type::duration) return std::string ("\033[38;5;15m\033[48;5;34m") + "duration" + "\033[0m";
else return std::string ("\033[37;41m") + "unknown" + "\033[0m"; else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer2::isAllDigits (const std::string& text) bool Lexer::isAllDigits (const std::string& text)
{ {
return text.find_first_not_of ("0123456789") == std::string::npos; return text.find_first_not_of ("0123456789") == std::string::npos;
} }

View file

@ -24,17 +24,17 @@
// //
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_LEXER2 #ifndef INCLUDED_LEXER
#define INCLUDED_LEXER2 #define INCLUDED_LEXER
#include <string> #include <string>
#include <vector> #include <vector>
#include <cstddef> #include <cstddef>
// Lexer2: A UTF8 lexical analyzer for every construct used on the Taskwarrior // Lexer: A UTF8 lexical analyzer for every construct used on the Taskwarrior
// command line, with additional recognized types for disambiguation. // command line, with additional recognized types for disambiguation.
class Lexer2 class Lexer
{ {
public: public:
// These are overridable. // These are overridable.
@ -51,17 +51,17 @@ public:
identifier, word, identifier, word,
date, duration }; date, duration };
Lexer2 (const std::string&); Lexer (const std::string&);
~Lexer2 (); ~Lexer ();
void ambiguity (bool); void ambiguity (bool);
bool token (std::string&, Lexer2::Type&); bool token (std::string&, Lexer::Type&);
static std::vector <std::pair <std::string, Lexer2::Type>> tokens (const std::string&); static std::vector <std::pair <std::string, Lexer::Type>> tokens (const std::string&);
static std::vector <std::string> split (const std::string&); static std::vector <std::string> split (const std::string&);
static std::string typeToString (Lexer2::Type); static std::string typeToString (Lexer::Type);
static bool isAllDigits (const std::string&); static bool isAllDigits (const std::string&);
// Static helpers. // Static helpers.
static const std::string typeName (const Lexer2::Type&); static const std::string typeName (const Lexer::Type&);
static bool isWhitespace (int); static bool isWhitespace (int);
static bool isDigit (int); static bool isDigit (int);
static bool isHexDigit (int); static bool isHexDigit (int);
@ -81,23 +81,23 @@ public:
int hexToInt (int, int, int, int) const; int hexToInt (int, int, int, int) const;
// Classifiers. // Classifiers.
bool isString (std::string&, Lexer2::Type&, int quote); bool isString (std::string&, Lexer::Type&, int quote);
bool isDate (std::string&, Lexer2::Type&); bool isDate (std::string&, Lexer::Type&);
bool isDuration (std::string&, Lexer2::Type&); bool isDuration (std::string&, Lexer::Type&);
bool isUUID (std::string&, Lexer2::Type&); bool isUUID (std::string&, Lexer::Type&);
bool isNumber (std::string&, Lexer2::Type&); bool isNumber (std::string&, Lexer::Type&);
bool isHexNumber (std::string&, Lexer2::Type&); bool isHexNumber (std::string&, Lexer::Type&);
bool isSeparator (std::string&, Lexer2::Type&); bool isSeparator (std::string&, Lexer::Type&);
bool isList (std::string&, Lexer2::Type&); bool isList (std::string&, Lexer::Type&);
bool isURL (std::string&, Lexer2::Type&); bool isURL (std::string&, Lexer::Type&);
bool isPair (std::string&, Lexer2::Type&); bool isPair (std::string&, Lexer::Type&);
bool isTag (std::string&, Lexer2::Type&); bool isTag (std::string&, Lexer::Type&);
bool isPath (std::string&, Lexer2::Type&); bool isPath (std::string&, Lexer::Type&);
bool isSubstitution (std::string&, Lexer2::Type&); bool isSubstitution (std::string&, Lexer::Type&);
bool isPattern (std::string&, Lexer2::Type&); bool isPattern (std::string&, Lexer::Type&);
bool isOperator (std::string&, Lexer2::Type&); bool isOperator (std::string&, Lexer::Type&);
bool isIdentifier (std::string&, Lexer2::Type&); bool isIdentifier (std::string&, Lexer::Type&);
bool isWord (std::string&, Lexer2::Type&); bool isWord (std::string&, Lexer::Type&);
private: private:
std::string _text; std::string _text;

View file

@ -37,7 +37,7 @@
#ifdef NIBBLER_FEATURE_REGEX #ifdef NIBBLER_FEATURE_REGEX
#include <RX.h> #include <RX.h>
#endif #endif
#include <Lexer2.h> #include <Lexer.h>
static const char* _uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; static const char* _uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
static const unsigned int _uuid_min_length = 8; static const unsigned int _uuid_min_length = 8;
@ -1005,12 +1005,12 @@ bool Nibbler::getName (std::string& result)
{ {
if (! isdigit (_input[i]) && if (! isdigit (_input[i]) &&
! ispunct (_input[i]) && ! ispunct (_input[i]) &&
! Lexer2::isWhitespace (_input[i])) ! Lexer::isWhitespace (_input[i]))
{ {
++i; ++i;
while (i < _length && while (i < _length &&
(_input[i] == '_' || ! ispunct (_input[i])) && (_input[i] == '_' || ! ispunct (_input[i])) &&
! Lexer2::isWhitespace (_input[i])) ! Lexer::isWhitespace (_input[i]))
{ {
++i; ++i;
} }
@ -1037,7 +1037,7 @@ bool Nibbler::getWord (std::string& result)
{ {
while (!isdigit (_input[i]) && while (!isdigit (_input[i]) &&
!isPunctuation (_input[i]) && !isPunctuation (_input[i]) &&
!Lexer2::isWhitespace (_input[i])) !Lexer::isWhitespace (_input[i]))
{ {
++i; ++i;
} }

View file

@ -31,7 +31,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <Variant.h> #include <Variant.h>
#include <ISO8601.h> #include <ISO8601.h>
#include <Lexer2.h> #include <Lexer.h>
#include <Date.h> #include <Date.h>
#include <Duration.h> #include <Duration.h>
#include <RX.h> #include <RX.h>
@ -196,10 +196,10 @@ bool Variant::operator&& (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
left.cast (type_boolean); left.cast (type_boolean);
right.cast (type_boolean); right.cast (type_boolean);
@ -214,10 +214,10 @@ bool Variant::operator|| (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
left.cast (type_boolean); left.cast (type_boolean);
right.cast (type_boolean); right.cast (type_boolean);
@ -232,10 +232,10 @@ bool Variant::operator_xor (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
left.cast (type_boolean); left.cast (type_boolean);
right.cast (type_boolean); right.cast (type_boolean);
@ -251,10 +251,10 @@ bool Variant::operator< (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (left._type) switch (left._type)
{ {
@ -396,10 +396,10 @@ bool Variant::operator<= (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (left._type) switch (left._type)
{ {
@ -542,10 +542,10 @@ bool Variant::operator> (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (left._type) switch (left._type)
{ {
@ -686,10 +686,10 @@ bool Variant::operator>= (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (left._type) switch (left._type)
{ {
@ -832,10 +832,10 @@ bool Variant::operator== (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (left._type) switch (left._type)
{ {
@ -962,16 +962,16 @@ bool Variant::operator_match (const Variant& other, const Task& task) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
left.cast (type_string); left.cast (type_string);
right.cast (type_string); right.cast (type_string);
std::string pattern = right._string; std::string pattern = right._string;
Lexer2::dequote (pattern); Lexer::dequote (pattern);
if (searchUsingRegex) if (searchUsingRegex)
{ {
@ -1032,10 +1032,10 @@ bool Variant::operator_partial (const Variant& other) const
Variant right (other); Variant right (other);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (left._type) switch (left._type)
{ {
@ -1220,7 +1220,7 @@ bool Variant::operator_hastag (const Variant& other, const Task& task) const
{ {
Variant right (other); Variant right (other);
right.cast (type_string); right.cast (type_string);
Lexer2::dequote (right._string); Lexer::dequote (right._string);
return task.hasTag (right._string); return task.hasTag (right._string);
} }
@ -1236,7 +1236,7 @@ bool Variant::operator! () const
Variant left (*this); Variant left (*this);
if (left._type == type_string) if (left._type == type_string)
Lexer2::dequote (left._string); Lexer::dequote (left._string);
left.cast (type_boolean); left.cast (type_boolean);
return ! left._bool; return ! left._bool;
@ -1401,7 +1401,7 @@ Variant& Variant::operator+= (const Variant& other)
Variant right (other); Variant right (other);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (_type) switch (_type)
{ {
@ -1513,7 +1513,7 @@ Variant& Variant::operator*= (const Variant& other)
Variant right (other); Variant right (other);
if (right._type == type_string) if (right._type == type_string)
Lexer2::dequote (right._string); Lexer::dequote (right._string);
switch (_type) switch (_type)
{ {
@ -1970,7 +1970,7 @@ Variant::operator std::string () const
void Variant::sqrt () void Variant::sqrt ()
{ {
if (_type == type_string) if (_type == type_string)
Lexer2::dequote (_string); Lexer::dequote (_string);
cast (type_real); cast (type_real);
if (_real < 0.0) if (_real < 0.0)
@ -2046,7 +2046,7 @@ void Variant::cast (const enum type new_type)
break; break;
case type_string: case type_string:
Lexer2::dequote (_string); Lexer::dequote (_string);
switch (new_type) switch (new_type)
{ {
case type_unknown: break; case type_unknown: break;

View file

@ -32,7 +32,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <Context.h> #include <Context.h>
#include <Filter.h> #include <Filter.h>
#include <Lexer2.h> #include <Lexer.h>
#include <ViewTask.h> #include <ViewTask.h>
#include <i18n.h> #include <i18n.h>
#include <text.h> #include <text.h>
@ -83,8 +83,8 @@ int CmdCustom::execute (std::string& output)
// Prepend the argument list with those from the report filter. // Prepend the argument list with those from the report filter.
std::string lexeme; std::string lexeme;
Lexer2::Type type; Lexer::Type type;
Lexer2 lex (reportFilter); Lexer lex (reportFilter);
lex.ambiguity (false); lex.ambiguity (false);
while (lex.token (lexeme, type)) while (lex.token (lexeme, type))
context.cli.add (lexeme); context.cli.add (lexeme);

View file

@ -1,7 +1,7 @@
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#include <iostream> #include <iostream>
#include <Lexer2.h> #include <Lexer.h>
#include <Context.h> #include <Context.h>
Context context; Context context;
@ -12,17 +12,17 @@ int main (int argc, char** argv)
{ {
std::cout << "input '" << argv[i] << "'\n"; std::cout << "input '" << argv[i] << "'\n";
// Low-level tokens. // Low-level tokens.
Lexer2 lexer (argv[i]); Lexer lexer (argv[i]);
std::string token; std::string token;
Lexer2::Type type; Lexer::Type type;
while (lexer.token (token, type)) while (lexer.token (token, type))
std::cout << " token '" << token << "' " << Lexer2::typeToString (type) << "\n"; std::cout << " token '" << token << "' " << Lexer::typeToString (type) << "\n";
/* /*
// High-level tokens. // High-level tokens.
auto all = Lexer2::tokens (argv[i]); auto all = Lexer::tokens (argv[i]);
for (auto token : Lexer2::tokens (argv[i])) for (auto token : Lexer::tokens (argv[i]))
std::cout << " token '" << token.first << "' " << Lexer2::typeToString (token.second) << "\n"; std::cout << " token '" << token.first << "' " << Lexer::typeToString (token.second) << "\n";
*/ */
} }
} }

View file

@ -34,7 +34,7 @@
#include <strings.h> #include <strings.h>
#include <ctype.h> #include <ctype.h>
#include <Context.h> #include <Context.h>
#include <Lexer2.h> #include <Lexer.h>
#include <math.h> #include <math.h>
#include <util.h> #include <util.h>
#include <text.h> #include <text.h>
@ -473,7 +473,7 @@ bool nontrivial (const std::string& input)
std::string::size_type i = 0; std::string::size_type i = 0;
int character; int character;
while ((character = utf8_next_char (input, i))) while ((character = utf8_next_char (input, i)))
if (! Lexer2::isWhitespace (character)) if (! Lexer::isWhitespace (character))
return true; return true;
return false; return false;
@ -495,7 +495,7 @@ bool noSpaces (const std::string& input)
std::string::size_type i = 0; std::string::size_type i = 0;
int character; int character;
while ((character = utf8_next_char (input, i))) while ((character = utf8_next_char (input, i)))
if (Lexer2::isWhitespace (character)) if (Lexer::isWhitespace (character))
return false; return false;
return true; return true;

View file

@ -28,7 +28,7 @@
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <test.h> #include <test.h>
#include <Lexer2.h> #include <Lexer.h>
#include <Context.h> #include <Context.h>
Context context; Context context;
@ -38,317 +38,317 @@ int main (int argc, char** argv)
{ {
UnitTest t (211); UnitTest t (211);
std::vector <std::pair <std::string, Lexer2::Type> > tokens; std::vector <std::pair <std::string, Lexer::Type> > tokens;
std::string token; std::string token;
Lexer2::Type type; Lexer::Type type;
// White space detection. // White space detection.
t.notok (Lexer2::isWhitespace (0x0041), "U+0041 (A) ! isWhitespace"); t.notok (Lexer::isWhitespace (0x0041), "U+0041 (A) ! isWhitespace");
t.ok (Lexer2::isWhitespace (0x0020), "U+0020 isWhitespace"); t.ok (Lexer::isWhitespace (0x0020), "U+0020 isWhitespace");
t.ok (Lexer2::isWhitespace (0x0009), "U+0009 isWhitespace"); t.ok (Lexer::isWhitespace (0x0009), "U+0009 isWhitespace");
t.ok (Lexer2::isWhitespace (0x000A), "U+000A isWhitespace"); t.ok (Lexer::isWhitespace (0x000A), "U+000A isWhitespace");
t.ok (Lexer2::isWhitespace (0x000B), "U+000B isWhitespace"); t.ok (Lexer::isWhitespace (0x000B), "U+000B isWhitespace");
t.ok (Lexer2::isWhitespace (0x000C), "U+000C isWhitespace"); t.ok (Lexer::isWhitespace (0x000C), "U+000C isWhitespace");
t.ok (Lexer2::isWhitespace (0x000D), "U+000D isWhitespace"); t.ok (Lexer::isWhitespace (0x000D), "U+000D isWhitespace");
t.ok (Lexer2::isWhitespace (0x0085), "U+0085 isWhitespace"); t.ok (Lexer::isWhitespace (0x0085), "U+0085 isWhitespace");
t.ok (Lexer2::isWhitespace (0x00A0), "U+00A0 isWhitespace"); t.ok (Lexer::isWhitespace (0x00A0), "U+00A0 isWhitespace");
t.ok (Lexer2::isWhitespace (0x1680), "U+1680 isWhitespace"); // 10 t.ok (Lexer::isWhitespace (0x1680), "U+1680 isWhitespace"); // 10
t.ok (Lexer2::isWhitespace (0x180E), "U+180E isWhitespace"); t.ok (Lexer::isWhitespace (0x180E), "U+180E isWhitespace");
t.ok (Lexer2::isWhitespace (0x2000), "U+2000 isWhitespace"); t.ok (Lexer::isWhitespace (0x2000), "U+2000 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2001), "U+2001 isWhitespace"); t.ok (Lexer::isWhitespace (0x2001), "U+2001 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2002), "U+2002 isWhitespace"); t.ok (Lexer::isWhitespace (0x2002), "U+2002 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2003), "U+2003 isWhitespace"); t.ok (Lexer::isWhitespace (0x2003), "U+2003 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2004), "U+2004 isWhitespace"); t.ok (Lexer::isWhitespace (0x2004), "U+2004 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2005), "U+2005 isWhitespace"); t.ok (Lexer::isWhitespace (0x2005), "U+2005 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2006), "U+2006 isWhitespace"); t.ok (Lexer::isWhitespace (0x2006), "U+2006 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2007), "U+2007 isWhitespace"); t.ok (Lexer::isWhitespace (0x2007), "U+2007 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2008), "U+2008 isWhitespace"); // 20 t.ok (Lexer::isWhitespace (0x2008), "U+2008 isWhitespace"); // 20
t.ok (Lexer2::isWhitespace (0x2009), "U+2009 isWhitespace"); t.ok (Lexer::isWhitespace (0x2009), "U+2009 isWhitespace");
t.ok (Lexer2::isWhitespace (0x200A), "U+200A isWhitespace"); t.ok (Lexer::isWhitespace (0x200A), "U+200A isWhitespace");
t.ok (Lexer2::isWhitespace (0x2028), "U+2028 isWhitespace"); t.ok (Lexer::isWhitespace (0x2028), "U+2028 isWhitespace");
t.ok (Lexer2::isWhitespace (0x2029), "U+2029 isWhitespace"); t.ok (Lexer::isWhitespace (0x2029), "U+2029 isWhitespace");
t.ok (Lexer2::isWhitespace (0x202F), "U+202F isWhitespace"); t.ok (Lexer::isWhitespace (0x202F), "U+202F isWhitespace");
t.ok (Lexer2::isWhitespace (0x205F), "U+205F isWhitespace"); t.ok (Lexer::isWhitespace (0x205F), "U+205F isWhitespace");
t.ok (Lexer2::isWhitespace (0x3000), "U+3000 isWhitespace"); t.ok (Lexer::isWhitespace (0x3000), "U+3000 isWhitespace");
// static bool Lexer2::isBoundary(int, int); // static bool Lexer::isBoundary(int, int);
t.ok (Lexer2::isBoundary (' ', 'a'), "' ' --> 'a' = isBoundary"); t.ok (Lexer::isBoundary (' ', 'a'), "' ' --> 'a' = isBoundary");
t.ok (Lexer2::isBoundary ('a', ' '), "'a' --> ' ' = isBoundary"); t.ok (Lexer::isBoundary ('a', ' '), "'a' --> ' ' = isBoundary");
t.ok (Lexer2::isBoundary (' ', '+'), "' ' --> '+' = isBoundary"); t.ok (Lexer::isBoundary (' ', '+'), "' ' --> '+' = isBoundary");
t.ok (Lexer2::isBoundary (' ', ','), "' ' --> ',' = isBoundary"); t.ok (Lexer::isBoundary (' ', ','), "' ' --> ',' = isBoundary");
t.notok (Lexer2::isBoundary ('3', '4'), "'3' --> '4' = isBoundary"); t.notok (Lexer::isBoundary ('3', '4'), "'3' --> '4' = isBoundary");
t.ok (Lexer2::isBoundary ('(', '('), "'(' --> '(' = isBoundary"); t.ok (Lexer::isBoundary ('(', '('), "'(' --> '(' = isBoundary");
t.notok (Lexer2::isBoundary ('r', 'd'), "'r' --> 'd' = isBoundary"); t.notok (Lexer::isBoundary ('r', 'd'), "'r' --> 'd' = isBoundary");
// Should result in no tokens. // Should result in no tokens.
Lexer2 l0 (""); Lexer l0 ("");
t.notok (l0.token (token, type), "'' --> no tokens"); t.notok (l0.token (token, type), "'' --> no tokens");
// Should result in no tokens. // Should result in no tokens.
Lexer2 l1 (" \t "); Lexer l1 (" \t ");
t.notok (l1.token (token, type), "' \\t ' --> no tokens"); t.notok (l1.token (token, type), "' \\t ' --> no tokens");
// \u20ac = Euro symbol. // \u20ac = Euro symbol.
Lexer2 l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'"); Lexer l2 (" one 'two \\'three\\''+456-(1.3*2 - 0x12) 1.2e-3.4 foo.bar and '\\u20ac'");
tokens.clear (); tokens.clear ();
while (l2.token (token, type)) while (l2.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30 t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30
t.is (Lexer2::typeName (tokens[0].second), "identifier", "tokens[0] = identifier"); t.is (Lexer::typeName (tokens[0].second), "identifier", "tokens[0] = identifier");
t.is (tokens[1].first, "two 'three'", "tokens[1] = 'two 'three''"); t.is (tokens[1].first, "two 'three'", "tokens[1] = 'two 'three''");
t.is (Lexer2::typeName (tokens[1].second), "string", "tokens[1] = string"); t.is (Lexer::typeName (tokens[1].second), "string", "tokens[1] = string");
t.is (tokens[2].first, "+", "tokens[2] = '+'"); t.is (tokens[2].first, "+", "tokens[2] = '+'");
t.is (Lexer2::typeName (tokens[2].second), "op", "tokens[2] = op"); t.is (Lexer::typeName (tokens[2].second), "op", "tokens[2] = op");
t.is (tokens[3].first, "456", "tokens[3] = '456'"); t.is (tokens[3].first, "456", "tokens[3] = '456'");
t.is (Lexer2::typeName (tokens[3].second), "number", "tokens[3] = number"); t.is (Lexer::typeName (tokens[3].second), "number", "tokens[3] = number");
t.is (tokens[4].first, "-", "tokens[4] = '-'"); t.is (tokens[4].first, "-", "tokens[4] = '-'");
t.is (Lexer2::typeName (tokens[4].second), "op", "tokens[4] = op"); t.is (Lexer::typeName (tokens[4].second), "op", "tokens[4] = op");
t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40 t.is (tokens[5].first, "(", "tokens[5] = '('"); // 40
t.is (Lexer2::typeName (tokens[5].second), "op", "tokens[5] = op"); t.is (Lexer::typeName (tokens[5].second), "op", "tokens[5] = op");
t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'"); t.is (tokens[6].first, "1.3", "tokens[6] = '1.3'");
t.is (Lexer2::typeName (tokens[6].second), "number", "tokens[6] = number"); t.is (Lexer::typeName (tokens[6].second), "number", "tokens[6] = number");
t.is (tokens[7].first, "*", "tokens[7] = '*'"); t.is (tokens[7].first, "*", "tokens[7] = '*'");
t.is (Lexer2::typeName (tokens[7].second), "op", "tokens[7] = op"); t.is (Lexer::typeName (tokens[7].second), "op", "tokens[7] = op");
t.is (tokens[8].first, "2", "tokens[8] = '2'"); t.is (tokens[8].first, "2", "tokens[8] = '2'");
t.is (Lexer2::typeName (tokens[8].second), "number", "tokens[8] = number"); t.is (Lexer::typeName (tokens[8].second), "number", "tokens[8] = number");
t.is (tokens[9].first, "-", "tokens[9] = '-'"); t.is (tokens[9].first, "-", "tokens[9] = '-'");
t.is (Lexer2::typeName (tokens[9].second), "op", "tokens[9] = op"); t.is (Lexer::typeName (tokens[9].second), "op", "tokens[9] = op");
t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50 t.is (tokens[10].first, "0x12", "tokens[10] = '0x12'"); // 50
t.is (Lexer2::typeName (tokens[10].second), "hex", "tokens[10] = hex"); t.is (Lexer::typeName (tokens[10].second), "hex", "tokens[10] = hex");
t.is (tokens[11].first, ")", "tokens[11] = ')'"); t.is (tokens[11].first, ")", "tokens[11] = ')'");
t.is (Lexer2::typeName (tokens[11].second), "op", "tokens[11] = op"); t.is (Lexer::typeName (tokens[11].second), "op", "tokens[11] = op");
t.is (tokens[12].first, "1.2e-3.4", "tokens[12] = '1.2e-3.4'"); t.is (tokens[12].first, "1.2e-3.4", "tokens[12] = '1.2e-3.4'");
t.is (Lexer2::typeName (tokens[12].second), "number", "tokens[12] = number"); t.is (Lexer::typeName (tokens[12].second), "number", "tokens[12] = number");
t.is (tokens[13].first, "foo.bar", "tokens[13] = 'foo.bar'"); t.is (tokens[13].first, "foo.bar", "tokens[13] = 'foo.bar'");
t.is (Lexer2::typeName (tokens[13].second), "identifier", "tokens[13] = identifier"); t.is (Lexer::typeName (tokens[13].second), "identifier", "tokens[13] = identifier");
t.is (tokens[14].first, "and", "tokens[14] = 'and'"); // 60 t.is (tokens[14].first, "and", "tokens[14] = 'and'"); // 60
t.is (Lexer2::typeName (tokens[14].second), "op", "tokens[14] = op"); t.is (Lexer::typeName (tokens[14].second), "op", "tokens[14] = op");
t.is (tokens[15].first, "", "tokens[15] = \\u20ac --> '€'"); t.is (tokens[15].first, "", "tokens[15] = \\u20ac --> '€'");
t.is (Lexer2::typeName (tokens[15].second), "string", "tokens[15] = string"); t.is (Lexer::typeName (tokens[15].second), "string", "tokens[15] = string");
// Test for ISO-8601 dates (favoring dates in ambiguous cases). // Test for ISO-8601 dates (favoring dates in ambiguous cases).
Lexer2 l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); Lexer l3 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z");
l3.ambiguity (true); l3.ambiguity (true);
tokens.clear (); tokens.clear ();
while (l3.token (token, type)) while (l3.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 10, "10 tokens"); t.is ((int)tokens.size (), 10, "10 tokens");
t.is (tokens[0].first, "1", "tokens[0] == '1'"); t.is (tokens[0].first, "1", "tokens[0] == '1'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::number, "tokens[0] == Type::number"); t.is ((int) tokens[0].second, (int) Lexer::Type::number, "tokens[0] == Type::number");
t.is (tokens[1].first, "12", "tokens[1] == '12'"); t.is (tokens[1].first, "12", "tokens[1] == '12'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::date, "tokens[1] == Type::date"); t.is ((int) tokens[1].second, (int) Lexer::Type::date, "tokens[1] == Type::date");
t.is (tokens[2].first, "123", "tokens[2] == '123'"); t.is (tokens[2].first, "123", "tokens[2] == '123'");
t.is ((int) tokens[2].second, (int) Lexer2::Type::number, "tokens[2] == Type::number"); // 70 t.is ((int) tokens[2].second, (int) Lexer::Type::number, "tokens[2] == Type::number"); // 70
t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
t.is ((int) tokens[3].second, (int) Lexer2::Type::date, "tokens[3] == Type::date"); t.is ((int) tokens[3].second, (int) Lexer::Type::date, "tokens[3] == Type::date");
t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
t.is ((int) tokens[4].second, (int) Lexer2::Type::number, "tokens[4] == Type::number"); t.is ((int) tokens[4].second, (int) Lexer::Type::number, "tokens[4] == Type::number");
t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
t.is ((int) tokens[5].second, (int) Lexer2::Type::date, "tokens[5] == Type::date"); t.is ((int) tokens[5].second, (int) Lexer::Type::date, "tokens[5] == Type::date");
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
t.is ((int) tokens[6].second, (int) Lexer2::Type::number, "tokens[6] == Type::number"); t.is ((int) tokens[6].second, (int) Lexer::Type::number, "tokens[6] == Type::number");
t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'");
t.is ((int) tokens[7].second, (int) Lexer2::Type::number, "tokens[7] == Type::number"); // 80 t.is ((int) tokens[7].second, (int) Lexer::Type::number, "tokens[7] == Type::number"); // 80
t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'");
t.is ((int) tokens[8].second, (int) Lexer2::Type::date, "tokens[8] == Type::date"); t.is ((int) tokens[8].second, (int) Lexer::Type::date, "tokens[8] == Type::date");
t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'");
t.is ((int) tokens[9].second, (int) Lexer2::Type::date, "tokens[9] == Type::date"); t.is ((int) tokens[9].second, (int) Lexer::Type::date, "tokens[9] == Type::date");
// Test for ISO-8601 dates (favoring numbers in ambiguous cases). // Test for ISO-8601 dates (favoring numbers in ambiguous cases).
Lexer2 l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z"); Lexer l4 ("1 12 123 1234 12345 123456 1234567 12345678 20131129T225800Z 2013-11-29T22:58:00Z");
l4.ambiguity (false); l4.ambiguity (false);
tokens.clear (); tokens.clear ();
while (l4.token (token, type)) while (l4.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 10, "10 tokens"); t.is ((int)tokens.size (), 10, "10 tokens");
t.is (tokens[0].first, "1", "tokens[0] == '1'"); t.is (tokens[0].first, "1", "tokens[0] == '1'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::number, "tokens[0] == Type::number"); t.is ((int) tokens[0].second, (int) Lexer::Type::number, "tokens[0] == Type::number");
t.is (tokens[1].first, "12", "tokens[1] == '12'"); t.is (tokens[1].first, "12", "tokens[1] == '12'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::number, "tokens[1] == Type::number"); t.is ((int) tokens[1].second, (int) Lexer::Type::number, "tokens[1] == Type::number");
t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90 t.is (tokens[2].first, "123", "tokens[2] == '123'"); // 90
t.is ((int) tokens[2].second, (int) Lexer2::Type::number, "tokens[2] == Type::number"); t.is ((int) tokens[2].second, (int) Lexer::Type::number, "tokens[2] == Type::number");
t.is (tokens[3].first, "1234", "tokens[3] == '1234'"); t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
t.is ((int) tokens[3].second, (int) Lexer2::Type::number, "tokens[3] == Type::number"); t.is ((int) tokens[3].second, (int) Lexer::Type::number, "tokens[3] == Type::number");
t.is (tokens[4].first, "12345", "tokens[4] == '12345'"); t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
t.is ((int) tokens[4].second, (int) Lexer2::Type::number, "tokens[4] == Type::number"); t.is ((int) tokens[4].second, (int) Lexer::Type::number, "tokens[4] == Type::number");
t.is (tokens[5].first, "123456", "tokens[5] == '123456'"); t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
t.is ((int) tokens[5].second, (int) Lexer2::Type::number, "tokens[5] == Type::number"); t.is ((int) tokens[5].second, (int) Lexer::Type::number, "tokens[5] == Type::number");
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'"); t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
t.is ((int) tokens[6].second, (int) Lexer2::Type::number, "tokens[6] == Type::number"); t.is ((int) tokens[6].second, (int) Lexer::Type::number, "tokens[6] == Type::number");
t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100 t.is (tokens[7].first, "12345678", "tokens[7] == '12345678'"); // 100
t.is ((int) tokens[7].second, (int) Lexer2::Type::number, "tokens[7] == Type::number"); t.is ((int) tokens[7].second, (int) Lexer::Type::number, "tokens[7] == Type::number");
t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'"); t.is (tokens[8].first, "20131129T225800Z", "tokens[8] == '20131129T225800Z'");
t.is ((int) tokens[8].second, (int) Lexer2::Type::date, "tokens[8] == Type::date"); t.is ((int) tokens[8].second, (int) Lexer::Type::date, "tokens[8] == Type::date");
t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'"); t.is (tokens[9].first, "2013-11-29T22:58:00Z", "tokens[9] == '2013-11-29T22:58:00Z'");
t.is ((int) tokens[9].second, (int) Lexer2::Type::date, "tokens[9] == Type::date"); t.is ((int) tokens[9].second, (int) Lexer::Type::date, "tokens[9] == Type::date");
// Test for durations // Test for durations
Lexer2 l5 ("1second 1minute 2hour 3 days 4w 5mo 6 years"); Lexer l5 ("1second 1minute 2hour 3 days 4w 5mo 6 years");
tokens.clear (); tokens.clear ();
while (l5.token (token, type)) while (l5.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 7, "7 tokens"); t.is ((int)tokens.size (), 7, "7 tokens");
t.is (tokens[0].first, "1second", "tokens[0] == '1second'"); t.is (tokens[0].first, "1second", "tokens[0] == '1second'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::duration, "tokens[0] == Type::duration"); t.is ((int) tokens[0].second, (int) Lexer::Type::duration, "tokens[0] == Type::duration");
t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'"); t.is (tokens[1].first, "1minute", "tokens[1] == '1minute'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::duration, "tokens[1] == Type::duration"); // 110 t.is ((int) tokens[1].second, (int) Lexer::Type::duration, "tokens[1] == Type::duration"); // 110
t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'"); t.is (tokens[2].first, "2hour", "tokens[2] == '2hour'");
t.is ((int) tokens[2].second, (int) Lexer2::Type::duration, "tokens[2] == Type::duration"); t.is ((int) tokens[2].second, (int) Lexer::Type::duration, "tokens[2] == Type::duration");
t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'"); t.is (tokens[3].first, "3 days", "tokens[3] == '3 days'");
t.is ((int) tokens[3].second, (int) Lexer2::Type::duration, "tokens[3] == Type::duration"); t.is ((int) tokens[3].second, (int) Lexer::Type::duration, "tokens[3] == Type::duration");
t.is (tokens[4].first, "4w", "tokens[4] == '4w'"); t.is (tokens[4].first, "4w", "tokens[4] == '4w'");
t.is ((int) tokens[4].second, (int) Lexer2::Type::duration, "tokens[4] == Type::duration"); t.is ((int) tokens[4].second, (int) Lexer::Type::duration, "tokens[4] == Type::duration");
t.is (tokens[5].first, "5mo", "tokens[5] == '5mo'"); t.is (tokens[5].first, "5mo", "tokens[5] == '5mo'");
t.is ((int) tokens[5].second, (int) Lexer2::Type::duration, "tokens[5] == Type::duration"); t.is ((int) tokens[5].second, (int) Lexer::Type::duration, "tokens[5] == Type::duration");
t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'"); t.is (tokens[6].first, "6 years", "tokens[6] == '6 years'");
t.is ((int) tokens[6].second, (int) Lexer2::Type::duration, "tokens[6] == Type::duration"); // 120 t.is ((int) tokens[6].second, (int) Lexer::Type::duration, "tokens[6] == Type::duration"); // 120
// All the Eval operators. // All the Eval operators.
Lexer2 l6 ("P1Y PT1H P1Y1M1DT1H1M1S 1s 1second"); Lexer l6 ("P1Y PT1H P1Y1M1DT1H1M1S 1s 1second");
tokens.clear (); tokens.clear ();
while (l6.token (token, type)) while (l6.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 5, "5 ISO periods"); t.is ((int)tokens.size (), 5, "5 ISO periods");
t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'"); t.is (tokens[0].first, "P1Y", "tokens[0] == 'P1Y'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::duration, "tokens[0] == Type::duration"); t.is ((int) tokens[0].second, (int) Lexer::Type::duration, "tokens[0] == Type::duration");
t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'"); t.is (tokens[1].first, "PT1H", "tokens[1] == 'PT1H'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::duration, "tokens[1] == Type::duration"); t.is ((int) tokens[1].second, (int) Lexer::Type::duration, "tokens[1] == Type::duration");
t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[2] == 'P1Y1M1DT1H1M1S'"); t.is (tokens[2].first, "P1Y1M1DT1H1M1S", "tokens[2] == 'P1Y1M1DT1H1M1S'");
t.is ((int) tokens[2].second, (int) Lexer2::Type::duration, "tokens[2] == Type::duration"); t.is ((int) tokens[2].second, (int) Lexer::Type::duration, "tokens[2] == Type::duration");
t.is (tokens[3].first, "1s", "tokens[3] == '1s'"); t.is (tokens[3].first, "1s", "tokens[3] == '1s'");
t.is ((int) tokens[3].second, (int) Lexer2::Type::duration, "tokens[3] == Type::duration"); t.is ((int) tokens[3].second, (int) Lexer::Type::duration, "tokens[3] == Type::duration");
t.is (tokens[4].first, "1second", "tokens[4] == '1second'"); t.is (tokens[4].first, "1second", "tokens[4] == '1second'");
t.is ((int) tokens[4].second, (int) Lexer2::Type::duration, "tokens[4] == Type::duration"); t.is ((int) tokens[4].second, (int) Lexer::Type::duration, "tokens[4] == Type::duration");
// All (int) the Eval operators. // All (int) the Eval operators.
Lexer2 l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )"); Lexer l7 ("and xor or <= >= !~ != == = ^ > ~ ! * / % + - < ( )");
tokens.clear (); tokens.clear ();
while (l7.token (token, type)) while (l7.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 21, "21 operators"); t.is ((int)tokens.size (), 21, "21 operators");
t.is (tokens[0].first, "and", "tokens[0] == 'and'"); t.is (tokens[0].first, "and", "tokens[0] == 'and'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::op, "tokens[0] == Type::op"); // 130 t.is ((int) tokens[0].second, (int) Lexer::Type::op, "tokens[0] == Type::op"); // 130
t.is (tokens[1].first, "xor", "tokens[1] == 'xor'"); t.is (tokens[1].first, "xor", "tokens[1] == 'xor'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::op, "tokens[1] == Type::op"); t.is ((int) tokens[1].second, (int) Lexer::Type::op, "tokens[1] == Type::op");
t.is (tokens[2].first, "or", "tokens[2] == 'or'"); t.is (tokens[2].first, "or", "tokens[2] == 'or'");
t.is ((int) tokens[2].second, (int) Lexer2::Type::op, "tokens[2] == Type::op"); t.is ((int) tokens[2].second, (int) Lexer::Type::op, "tokens[2] == Type::op");
t.is (tokens[3].first, "<=", "tokens[3] == '<='"); t.is (tokens[3].first, "<=", "tokens[3] == '<='");
t.is ((int) tokens[3].second, (int) Lexer2::Type::op, "tokens[3] == Type::op"); t.is ((int) tokens[3].second, (int) Lexer::Type::op, "tokens[3] == Type::op");
t.is (tokens[4].first, ">=", "tokens[4] == '>='"); t.is (tokens[4].first, ">=", "tokens[4] == '>='");
t.is ((int) tokens[4].second, (int) Lexer2::Type::op, "tokens[4] == Type::op"); t.is ((int) tokens[4].second, (int) Lexer::Type::op, "tokens[4] == Type::op");
t.is (tokens[5].first, "!~", "tokens[5] == '!~'"); t.is (tokens[5].first, "!~", "tokens[5] == '!~'");
t.is ((int) tokens[5].second, (int) Lexer2::Type::op, "tokens[5] == Type::op"); // 140 t.is ((int) tokens[5].second, (int) Lexer::Type::op, "tokens[5] == Type::op"); // 140
t.is (tokens[6].first, "!=", "tokens[6] == '!='"); t.is (tokens[6].first, "!=", "tokens[6] == '!='");
t.is ((int) tokens[6].second, (int) Lexer2::Type::op, "tokens[6] == Type::op"); t.is ((int) tokens[6].second, (int) Lexer::Type::op, "tokens[6] == Type::op");
t.is (tokens[7].first, "==", "tokens[7] == '=='"); t.is (tokens[7].first, "==", "tokens[7] == '=='");
t.is ((int) tokens[7].second, (int) Lexer2::Type::op, "tokens[7] == Type::op"); t.is ((int) tokens[7].second, (int) Lexer::Type::op, "tokens[7] == Type::op");
t.is (tokens[8].first, "=", "tokens[8] == '='"); t.is (tokens[8].first, "=", "tokens[8] == '='");
t.is ((int) tokens[8].second, (int) Lexer2::Type::op, "tokens[8] == Type::op"); t.is ((int) tokens[8].second, (int) Lexer::Type::op, "tokens[8] == Type::op");
t.is (tokens[9].first, "^", "tokens[9] == '^'"); t.is (tokens[9].first, "^", "tokens[9] == '^'");
t.is ((int) tokens[9].second, (int) Lexer2::Type::op, "tokens[9] == Type::op"); t.is ((int) tokens[9].second, (int) Lexer::Type::op, "tokens[9] == Type::op");
t.is (tokens[10].first, ">", "tokens[10] == '>'"); t.is (tokens[10].first, ">", "tokens[10] == '>'");
t.is ((int) tokens[10].second, (int) Lexer2::Type::op, "tokens[10] == Type::op"); // 150 t.is ((int) tokens[10].second, (int) Lexer::Type::op, "tokens[10] == Type::op"); // 150
t.is (tokens[11].first, "~", "tokens[11] == '~'"); t.is (tokens[11].first, "~", "tokens[11] == '~'");
t.is ((int) tokens[11].second, (int) Lexer2::Type::op, "tokens[11] == Type::op"); t.is ((int) tokens[11].second, (int) Lexer::Type::op, "tokens[11] == Type::op");
t.is (tokens[12].first, "!", "tokens[12] == '!'"); t.is (tokens[12].first, "!", "tokens[12] == '!'");
t.is ((int) tokens[12].second, (int) Lexer2::Type::op, "tokens[12] == Type::op"); t.is ((int) tokens[12].second, (int) Lexer::Type::op, "tokens[12] == Type::op");
t.is (tokens[13].first, "*", "tokens[13] == '*'"); t.is (tokens[13].first, "*", "tokens[13] == '*'");
t.is ((int) tokens[13].second, (int) Lexer2::Type::op, "tokens[13] == Type::op"); t.is ((int) tokens[13].second, (int) Lexer::Type::op, "tokens[13] == Type::op");
t.is (tokens[14].first, "/", "tokens[14] == '/'"); t.is (tokens[14].first, "/", "tokens[14] == '/'");
t.is ((int) tokens[14].second, (int) Lexer2::Type::op, "tokens[14] == Type::op"); t.is ((int) tokens[14].second, (int) Lexer::Type::op, "tokens[14] == Type::op");
t.is (tokens[15].first, "%", "tokens[15] == '%'"); t.is (tokens[15].first, "%", "tokens[15] == '%'");
t.is ((int) tokens[15].second, (int) Lexer2::Type::op, "tokens[15] == Type::op"); // 160 t.is ((int) tokens[15].second, (int) Lexer::Type::op, "tokens[15] == Type::op"); // 160
t.is (tokens[16].first, "+", "tokens[16] == '+'"); t.is (tokens[16].first, "+", "tokens[16] == '+'");
t.is ((int) tokens[16].second, (int) Lexer2::Type::op, "tokens[16] == Type::op"); t.is ((int) tokens[16].second, (int) Lexer::Type::op, "tokens[16] == Type::op");
t.is (tokens[17].first, "-", "tokens[17] == '-'"); t.is (tokens[17].first, "-", "tokens[17] == '-'");
t.is ((int) tokens[17].second, (int) Lexer2::Type::op, "tokens[17] == Type::op"); t.is ((int) tokens[17].second, (int) Lexer::Type::op, "tokens[17] == Type::op");
t.is (tokens[18].first, "<", "tokens[18] == '<'"); t.is (tokens[18].first, "<", "tokens[18] == '<'");
t.is ((int) tokens[18].second, (int) Lexer2::Type::op, "tokens[18] == Type::op"); t.is ((int) tokens[18].second, (int) Lexer::Type::op, "tokens[18] == Type::op");
t.is (tokens[19].first, "(", "tokens[19] == '('"); t.is (tokens[19].first, "(", "tokens[19] == '('");
t.is ((int) tokens[19].second, (int) Lexer2::Type::op, "tokens[19] == Type::op"); t.is ((int) tokens[19].second, (int) Lexer::Type::op, "tokens[19] == Type::op");
t.is (tokens[20].first, ")", "tokens[20] == ')'"); t.is (tokens[20].first, ")", "tokens[20] == ')'");
t.is ((int) tokens[20].second, (int)Lexer2::Type::op, "tokens[20] == Type::op"); // 170 t.is ((int) tokens[20].second, (int)Lexer::Type::op, "tokens[20] == Type::op"); // 170
// Test ordinal dates. // Test ordinal dates.
Lexer2 l8 ("9th 10th"); Lexer l8 ("9th 10th");
l8.ambiguity (false); l8.ambiguity (false);
tokens.clear (); tokens.clear ();
while (l8.token (token, type)) while (l8.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 2, "2 tokens"); t.is ((int)tokens.size (), 2, "2 tokens");
t.is (tokens[0].first, "9th", "tokens[0] == '9th'"); t.is (tokens[0].first, "9th", "tokens[0] == '9th'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::identifier, "tokens[0] == Type::identifier"); t.is ((int) tokens[0].second, (int) Lexer::Type::identifier, "tokens[0] == Type::identifier");
t.is (tokens[1].first, "10th", "tokens[1] == '10th'"); t.is (tokens[1].first, "10th", "tokens[1] == '10th'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::identifier, "tokens[1] == Type::identifier"); t.is ((int) tokens[1].second, (int) Lexer::Type::identifier, "tokens[1] == Type::identifier");
// Test tag recognition. // Test tag recognition.
Lexer2 l9 ("+with -WITHOUT + 2"); Lexer l9 ("+with -WITHOUT + 2");
l9.ambiguity (false); l9.ambiguity (false);
tokens.clear (); tokens.clear ();
while (l9.token (token, type)) while (l9.token (token, type))
{ {
std::cout << "# «" << token << "» " << Lexer2::typeName (type) << "\n"; std::cout << "# «" << token << "» " << Lexer::typeName (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type)); tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
} }
t.is ((int)tokens.size (), 4, "4 tokens"); t.is ((int)tokens.size (), 4, "4 tokens");
t.is (tokens[0].first, "+with", "tokens[0] == '+with'"); t.is (tokens[0].first, "+with", "tokens[0] == '+with'");
t.is ((int) tokens[0].second, (int) Lexer2::Type::tag, "tokens[0] == Type::tag"); t.is ((int) tokens[0].second, (int) Lexer::Type::tag, "tokens[0] == Type::tag");
t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'"); t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'");
t.is ((int) tokens[1].second, (int) Lexer2::Type::tag, "tokens[1] == Type::tag"); t.is ((int) tokens[1].second, (int) Lexer::Type::tag, "tokens[1] == Type::tag");
t.is (tokens[2].first, "+", "tokens[2] == '+'"); t.is (tokens[2].first, "+", "tokens[2] == '+'");
t.is ((int) tokens[2].second, (int) Lexer2::Type::op, "tokens[2] == Type::op"); t.is ((int) tokens[2].second, (int) Lexer::Type::op, "tokens[2] == Type::op");
t.is (tokens[3].first, "2", "tokens[3] == '2'"); t.is (tokens[3].first, "2", "tokens[3] == '2'");
t.is ((int) tokens[3].second, (int) Lexer2::Type::number, "tokens[3] == Type::number"); t.is ((int) tokens[3].second, (int) Lexer::Type::number, "tokens[3] == Type::number");
// void split (std::vector<std::string>&, const std::string&); // void split (std::vector<std::string>&, const std::string&);
std::string unsplit = " ( A or B ) "; std::string unsplit = " ( A or B ) ";
std::vector <std::string> items; std::vector <std::string> items;
items = Lexer2::split (unsplit); items = Lexer::split (unsplit);
t.is (items.size (), (size_t) 5, "split ' ( A or B ) '"); t.is (items.size (), (size_t) 5, "split ' ( A or B ) '");
t.is (items[0], "(", "split ' ( A or B ) ' -> [0] '('"); t.is (items[0], "(", "split ' ( A or B ) ' -> [0] '('");
t.is (items[1], "A", "split ' ( A or B ) ' -> [1] 'A'"); t.is (items[1], "A", "split ' ( A or B ) ' -> [1] 'A'");
@ -358,7 +358,7 @@ int main (int argc, char** argv)
// Test simple mode with contrived tokens that ordinarily split. // Test simple mode with contrived tokens that ordinarily split.
unsplit = " +-* a+b 12.3e4 'c d'"; unsplit = " +-* a+b 12.3e4 'c d'";
items = Lexer2::split (unsplit); items = Lexer::split (unsplit);
t.is (items.size (), (size_t) 8, "split ' +-* a+b 12.3e4 'c d''"); t.is (items.size (), (size_t) 8, "split ' +-* a+b 12.3e4 'c d''");
t.is (items[0], "+", "split ' +-* a+b 12.3e4 'c d'' -> [0] '+'"); t.is (items[0], "+", "split ' +-* a+b 12.3e4 'c d'' -> [0] '+'");
t.is (items[1], "-", "split ' +-* a+b 12.3e4 'c d'' -> [1] '-'"); t.is (items[1], "-", "split ' +-* a+b 12.3e4 'c d'' -> [1] '-'");
@ -371,12 +371,12 @@ int main (int argc, char** argv)
// Test common expression element. // Test common expression element.
unsplit = "name=value"; unsplit = "name=value";
items = Lexer2::split (unsplit); items = Lexer::split (unsplit);
t.is (items.size (), (size_t) 1, "split 'name=value'"); t.is (items.size (), (size_t) 1, "split 'name=value'");
// Test unterminated tokens. // Test unterminated tokens.
unsplit = " ordinary "; unsplit = " ordinary ";
items = Lexer2::split (unsplit); items = Lexer::split (unsplit);
t.is (items.size (), (size_t) 1, "split 'ordinary' --> 1 token"); t.is (items.size (), (size_t) 1, "split 'ordinary' --> 1 token");
t.is (items[0], "ordinary", "split 'ordinary' --> 'ordinary'"); t.is (items[0], "ordinary", "split 'ordinary' --> 'ordinary'");