Lexer2

- Integrated Lexer2 in place of Lexer. Tests fail.
2025-08-29 17:07:19 +02:00 · 2015-02-22 13:52:14 -05:00 · 2015-02-22 13:52:14 -05:00 · 0cf18f3b16
commit 0cf18f3b16
parent 2155bd3969
12 changed files with 416 additions and 1408 deletions
--- a/src/CLI.cpp
+++ b/src/CLI.cpp
@ -29,7 +29,6 @@
 #include <algorithm>
 #include <Context.h>
 #include <Nibbler.h>
-#include <Lexer.h>
 #include <Lexer2.h>
 #include <CLI.h>
 #include <Color.h>
@ -662,13 +661,13 @@ void CLI::addArg (const std::string& arg)
    // that cause the lexemes to be ignored, and the original arugment used
    // intact.
    std::string lexeme;
-    Lexer::Type type;
-    Lexer lex (raw);
+    Lexer2::Type type;
+    Lexer2 lex (raw);
    lex.ambiguity (false);

-    std::vector <std::pair <std::string, Lexer::Type> > lexemes;
+    std::vector <std::pair <std::string, Lexer2::Type> > lexemes;
    while (lex.token (lexeme, type))
-      lexemes.push_back (std::pair <std::string, Lexer::Type> (lexeme, type));
+      lexemes.push_back (std::pair <std::string, Lexer2::Type> (lexeme, type));

    if (disqualifyInsufficientTerms (lexemes) ||
        disqualifyNoOps             (lexemes) ||
@ -682,7 +681,7 @@ void CLI::addArg (const std::string& arg)
    {
      // How often have I said to you that when you have eliminated the
      // impossible, whatever remains, however improbable, must be the truth?
-      std::vector <std::pair <std::string, Lexer::Type> >::iterator l;
+      std::vector <std::pair <std::string, Lexer2::Type> >::iterator l;
      for (l = lexemes.begin (); l != lexemes.end (); ++l)
        _original_args.push_back (l->first);
    }
@ -714,9 +713,7 @@ void CLI::aliasExpansion ()
      {
        if (_aliases.find (raw) != _aliases.end ())
        {
-          std::vector <std::string> lexed;
-          Lexer::token_split (lexed, _aliases[raw]);
-
+          std::vector <std::string> lexed = Lexer2::split (_aliases[raw]);
          std::vector <std::string>::iterator l;
          for (l = lexed.begin (); l != lexed.end (); ++l)
          {
@ -1815,8 +1812,7 @@ void CLI::injectDefaults ()
      if (defaultCommand != "")
      {
        // Split the defaultCommand into separate args.
-        std::vector <std::string> tokens;
-        Lexer::token_split (tokens, defaultCommand);
+        std::vector <std::string> tokens = Lexer2::split (defaultCommand);

        // Modify _args to be:   <args0> [<def0> ...] <args1> [...]
        std::vector <A> reconstructed;
@ -2306,9 +2302,9 @@ bool CLI::isName (const std::string& raw) const
  {
    for (int i = 0; i < raw.length (); ++i)
    {
-      if (i == 0 && ! Lexer::is_ident_start (raw[i]))
+      if (i == 0 && ! Lexer2::isIdentifierStart (raw[i]))
        return false;
-      else if (! Lexer::is_ident (raw[i]))
+      else if (! Lexer2::isIdentifierNext (raw[i]))
        return false;
    }

@ -2320,19 +2316,19 @@ bool CLI::isName (const std::string& raw) const

 ////////////////////////////////////////////////////////////////////////////////
 bool CLI::disqualifyInsufficientTerms (
-  const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
+  const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
 {
  return lexemes.size () < 3 ? true : false;
 }

 ////////////////////////////////////////////////////////////////////////////////
 bool CLI::disqualifyNoOps (
-  const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
+  const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
 {
  bool foundOP = false;
-  std::vector <std::pair <std::string, Lexer::Type> >::const_iterator l;
+  std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator l;
  for (l = lexemes.begin (); l != lexemes.end (); ++l)
-    if (l->second == Lexer::typeOperator)
+    if (l->second == Lexer2::Type::op)
      foundOP = true;

  return ! foundOP;
@ -2340,16 +2336,16 @@ bool CLI::disqualifyNoOps (

 ////////////////////////////////////////////////////////////////////////////////
 bool CLI::disqualifyOnlyParenOps (
-  const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
+  const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
 {
  int opCount      = 0;
  int opSugarCount = 0;
  int opParenCount = 0;

-  std::vector <std::pair <std::string, Lexer::Type> >::const_iterator l;
+  std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator l;
  for (l = lexemes.begin (); l != lexemes.end (); ++l)
  {
-    if (l->second == Lexer::typeOperator)
+    if (l->second == Lexer2::Type::op)
    {
      ++opCount;

@ -2376,7 +2372,7 @@ bool CLI::disqualifyOnlyParenOps (
 // as there are no operators in between, which includes syntactic sugar that
 // hides operators.
 bool CLI::disqualifyFirstLastBinary (
-  const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
+  const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
 {
  bool firstBinary = false;
  bool lastBinary  = false;
@ -2395,7 +2391,7 @@ bool CLI::disqualifyFirstLastBinary (
 ////////////////////////////////////////////////////////////////////////////////
 // Disqualify terms when there operators hidden by syntactic sugar.
 bool CLI::disqualifySugarFree (
-  const std::vector <std::pair <std::string, Lexer::Type> >& lexemes) const
+  const std::vector <std::pair <std::string, Lexer2::Type> >& lexemes) const
 {
  bool sugared = true;
  for (unsigned int i = 1; i < lexemes.size () - 1; ++i)
--- a/src/CLI.h
+++ b/src/CLI.h
@ -29,7 +29,7 @@
 #include <string>
 #include <vector>
 #include <map>
-#include <Lexer.h>
+#include <Lexer2.h>
 #include <Path.h>
 #include <File.h>

@ -126,11 +126,11 @@ private:
  bool isOperator       (const std::string&) const;
  bool isName           (const std::string&) const;

-  bool disqualifyInsufficientTerms (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
-  bool disqualifyNoOps             (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
-  bool disqualifyOnlyParenOps      (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
-  bool disqualifyFirstLastBinary   (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
-  bool disqualifySugarFree         (const std::vector <std::pair <std::string, Lexer::Type> >&) const;
+  bool disqualifyInsufficientTerms (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
+  bool disqualifyNoOps             (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
+  bool disqualifyOnlyParenOps      (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
+  bool disqualifyFirstLastBinary   (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;
+  bool disqualifySugarFree         (const std::vector <std::pair <std::string, Lexer2::Type> >&) const;

 public:
  std::multimap <std::string, std::string> _entities;
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -20,7 +20,6 @@ set (task_SRCS CLI.cpp CLI.h
               Hooks.cpp Hooks.h
               ISO8601.cpp ISO8601.h
               JSON.cpp JSON.h
-               Lexer.cpp Lexer.h
               Lexer2.cpp Lexer2.h
               Msg.cpp Msg.h
               Nibbler.cpp Nibbler.h
--- a/src/Context.cpp
+++ b/src/Context.cpp
@ -657,8 +657,8 @@ void Context::staticInitialization ()

  Task::searchCaseSensitive = Variant::searchCaseSensitive = config.getBoolean ("search.case.sensitive");
  Task::regex               = Variant::searchUsingRegex    = config.getBoolean ("regex");
-  Lexer::dateFormat         = Variant::dateFormat          = config.get ("dateformat");
-  Lexer::isoEnabled         = Variant::isoEnabled          = config.getBoolean ("date.iso");
+  Lexer2::dateFormat        = Variant::dateFormat          = config.get ("dateformat");
+  Lexer2::isoEnabled        = Variant::isoEnabled          = config.getBoolean ("date.iso");

  std::map <std::string, Column*>::iterator i;
  for (i = columns.begin (); i != columns.end (); ++i)
--- a/src/Eval.cpp
+++ b/src/Eval.cpp
@ -125,13 +125,13 @@ void Eval::addSource (bool (*source)(const std::string&, Variant&))
 void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
 {
  // Reduce e to a vector of tokens.
-  Lexer l (e);
+  Lexer2 l (e);
  l.ambiguity (_ambiguity);
-  std::vector <std::pair <std::string, Lexer::Type> > tokens;
+  std::vector <std::pair <std::string, Lexer2::Type> > tokens;
  std::string token;
-  Lexer::Type type;
+  Lexer2::Type type;
  while (l.token (token, type))
-    tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
+    tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));

  // Parse for syntax checking and operator replacement.
  if (_debug)
@ -153,13 +153,13 @@ void Eval::evaluateInfixExpression (const std::string& e, Variant& v) const
 void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
 {
  // Reduce e to a vector of tokens.
-  Lexer l (e);
+  Lexer2 l (e);
  l.ambiguity (_ambiguity);
-  std::vector <std::pair <std::string, Lexer::Type> > tokens;
+  std::vector <std::pair <std::string, Lexer2::Type> > tokens;
  std::string token;
-  Lexer::Type type;
+  Lexer2::Type type;
  while (l.token (token, type))
-    tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
+    tokens.push_back (std::pair <std::string, Lexer2::Type> (token, type));

  if (_debug)
    context.debug ("[1;37;42mFILTER[0m Postfix      " + dump (tokens));
@ -172,15 +172,15 @@ void Eval::evaluatePostfixExpression (const std::string& e, Variant& v) const
 void Eval::compileExpression (const std::string& e)
 {
  // Reduce e to a vector of tokens.
-  Lexer l (e);
+  Lexer2 l (e);
  l.ambiguity (_ambiguity);
  std::string token;
-  Lexer::Type type;
+  Lexer2::Type type;
  while (l.token (token, type))
  {
    if (_debug)
-      context.debug ("Lexer '" + token + "' " + Lexer::type_name (type));
-    _compiled.push_back (std::pair <std::string, Lexer::Type> (token, type));
+      context.debug ("Lexer '" + token + "' " + Lexer2::typeToString (type));
+    _compiled.push_back (std::pair <std::string, Lexer2::Type> (token, type));
  }

  // Parse for syntax checking and operator replacement.
@ -236,7 +236,7 @@ void Eval::getBinaryOperators (std::vector <std::string>& all)

 ////////////////////////////////////////////////////////////////////////////////
 void Eval::evaluatePostfixStack (
-  const std::vector <std::pair <std::string, Lexer::Type> >& tokens,
+  const std::vector <std::pair <std::string, Lexer2::Type> >& tokens,
  Variant& result) const
 {
  if (tokens.size () == 0)
@ -245,11 +245,11 @@ void Eval::evaluatePostfixStack (
  // This is stack used by the postfix evaluator.
  std::vector <Variant> values;

-  std::vector <std::pair <std::string, Lexer::Type> >::const_iterator token;
+  std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator token;
  for (token = tokens.begin (); token != tokens.end (); ++token)
  {
    // Unary operators.
-    if (token->second == Lexer::typeOperator &&
+    if (token->second == Lexer2::Type::op &&
        token->first == "!")
    {
      if (values.size () < 1)
@ -262,7 +262,7 @@ void Eval::evaluatePostfixStack (
      if (_debug)
        context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
    }
-    else if (token->second == Lexer::typeOperator &&
+    else if (token->second == Lexer2::Type::op &&
             token->first == "_neg_")
    {
      if (values.size () < 1)
@ -278,7 +278,7 @@ void Eval::evaluatePostfixStack (
      if (_debug)
        context.debug (format ("Eval {1} ↓'{2}' → ↑'{3}'", token->first, (std::string) right, (std::string) result));
    }
-    else if (token->second == Lexer::typeOperator &&
+    else if (token->second == Lexer2::Type::op &&
             token->first == "_pos_")
    {
      // The _pos_ operator is a NOP.
@ -287,7 +287,7 @@ void Eval::evaluatePostfixStack (
    }

    // Binary operators.
-    else if (token->second == Lexer::typeOperator)
+    else if (token->second == Lexer2::Type::op)
    {
      if (values.size () < 2)
        throw std::string (STRING_EVAL_NO_EVAL);
@ -338,24 +338,27 @@ void Eval::evaluatePostfixStack (
      Variant v (token->first);
      switch (token->second)
      {
-      case Lexer::typeNumber:
-      case Lexer::typeHex:
-        v.cast (Variant::type_integer);
-        if (_debug)
-          context.debug (format ("Eval literal number ↑'{1}'", (std::string) v));
+      case Lexer2::Type::number:
+        if (Lexer2::isAllDigits (token->first))
+        {
+          v.cast (Variant::type_integer);
+          if (_debug)
+            context.debug (format ("Eval literal number ↑'{1}'", (std::string) v));
+        }
+        else
+        {
+          v.cast (Variant::type_real);
+          if (_debug)
+            context.debug (format ("Eval literal decimal ↑'{1}'", (std::string) v));
+        }
        break;

-      case Lexer::typeDecimal:
-        v.cast (Variant::type_real);
-        if (_debug)
-          context.debug (format ("Eval literal decimal ↑'{1}'", (std::string) v));
-        break;

-      case Lexer::typeOperator:
+      case Lexer2::Type::op:
        throw std::string (STRING_EVAL_OP_EXPECTED);
        break;

-      case Lexer::typeIdentifier:
+      case Lexer2::Type::identifier:
        {
          bool found = false;
          std::vector <bool (*)(const std::string&, Variant&)>::const_iterator source;
@ -380,20 +383,33 @@ void Eval::evaluatePostfixStack (
        }
        break;

-      case Lexer::typeDate:
+      case Lexer2::Type::date:
        v.cast (Variant::type_date);
        if (_debug)
          context.debug (format ("Eval literal date ↑'{1}'", (std::string) v));
        break;

-      case Lexer::typeDuration:
+      case Lexer2::Type::duration:
        v.cast (Variant::type_duration);
        if (_debug)
          context.debug (format ("Eval literal duration ↑'{1}'", (std::string) v));
        break;

      // Nothing to do.
-      case Lexer::typeString:
+/*
+      case Lexer2::Type::uuid:
+      case Lexer2::Type::hex:
+      case Lexer2::Type::list:
+      case Lexer2::Type::url:
+      case Lexer2::Type::pair:
+      case Lexer2::Type::separator:
+      case Lexer2::Type::tag:
+      case Lexer2::Type::path:
+      case Lexer2::Type::substitution:
+      case Lexer2::Type::pattern:
+      case Lexer2::Type::word:
+*/
+      case Lexer2::Type::string:
      default:
        if (_debug)
          context.debug (format ("Eval literal string ↑'{1}'", (std::string) v));
@ -427,7 +443,7 @@ void Eval::evaluatePostfixStack (
 //   Primitive   --> "(" Logical ")" | Variant
 //
 void Eval::infixParse (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix) const
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix) const
 {
  int i = 0;
  parseLogical (infix, i);
@ -436,17 +452,17 @@ void Eval::infixParse (
 ////////////////////////////////////////////////////////////////////////////////
 // Logical     --> Regex {( "and" | "or" | "xor" ) Regex}
 bool Eval::parseLogical (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseRegex (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "and"  ||
            infix[i].first == "or"   ||
-            infix[i].first == "xor") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == "xor"))
    {
      ++i;
      if (! parseRegex (infix, i))
@ -462,16 +478,16 @@ bool Eval::parseLogical (
 ////////////////////////////////////////////////////////////////////////////////
 // Regex       --> Equality {( "~" | "!~" ) Equality}
 bool Eval::parseRegex (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseEquality (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "~" ||
-            infix[i].first == "!~") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == "!~"))
    {
      ++i;
      if (! parseEquality (infix, i))
@ -487,18 +503,18 @@ bool Eval::parseRegex (
 ////////////////////////////////////////////////////////////////////////////////
 // Equality    --> Comparative {( "==" | "=" | "!==" | "!=" ) Comparative}
 bool Eval::parseEquality (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseComparative (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "=="  ||
            infix[i].first == "="   ||
            infix[i].first == "!==" ||
-            infix[i].first == "!=") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == "!="))
    {
      ++i;
      if (! parseComparative (infix, i))
@ -514,18 +530,18 @@ bool Eval::parseEquality (
 ////////////////////////////////////////////////////////////////////////////////
 // Comparative --> Arithmetic {( "<=" | "<" | ">=" | ">" ) Arithmetic}
 bool Eval::parseComparative (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseArithmetic (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "<=" ||
            infix[i].first == "<"  ||
            infix[i].first == ">=" ||
-            infix[i].first == ">") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == ">"))
    {
      ++i;
      if (! parseArithmetic (infix, i))
@ -541,16 +557,16 @@ bool Eval::parseComparative (
 ////////////////////////////////////////////////////////////////////////////////
 // Arithmetic  --> Geometric {( "+" | "-" ) Geometric}
 bool Eval::parseArithmetic (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseGeometric (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "+" ||
-            infix[i].first == "-") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == "-"))
    {
      ++i;
      if (! parseGeometric (infix, i))
@ -566,17 +582,17 @@ bool Eval::parseArithmetic (
 ////////////////////////////////////////////////////////////////////////////////
 // Geometric   --> Tag {( "*" | "/" | "%" ) Tag}
 bool Eval::parseGeometric (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseTag (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "*" ||
            infix[i].first == "/" ||
-            infix[i].first == "%") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == "%"))
    {
      ++i;
      if (! parseTag (infix, i))
@ -592,16 +608,16 @@ bool Eval::parseGeometric (
 ////////////////////////////////////////////////////////////////////////////////
 // Tag         --> Unary {( "_hastag_" | "_notag_" ) Unary}
 bool Eval::parseTag (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parseUnary (infix, i))
  {
    while (i < infix.size () &&
+           infix[i].second == Lexer2::Type::op &&
           (infix[i].first == "_hastag_" ||
-            infix[i].first == "_notag_") &&
-           infix[i].second == Lexer::typeOperator)
+            infix[i].first == "_notag_"))
    {
      ++i;
      if (! parseUnary (infix, i))
@ -617,7 +633,7 @@ bool Eval::parseTag (
 ////////////////////////////////////////////////////////////////////////////////
 // Unary       --> [( "-" | "+" | "!" )] Exponent
 bool Eval::parseUnary (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size ())
@ -644,15 +660,15 @@ bool Eval::parseUnary (
 ////////////////////////////////////////////////////////////////////////////////
 // Exponent    --> Primitive ["^" Primitive]
 bool Eval::parseExponent (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size () &&
      parsePrimitive (infix, i))
  {
    while (i < infix.size () &&
-           infix[i].first == "^" &&
-           infix[i].second == Lexer::typeOperator)
+           infix[i].second == Lexer2::Type::op &&
+           infix[i].first == "^")
    {
      ++i;
      if (! parsePrimitive (infix, i))
@ -668,7 +684,7 @@ bool Eval::parseExponent (
 ////////////////////////////////////////////////////////////////////////////////
 // Primitive   --> "(" Logical ")" | Variant
 bool Eval::parsePrimitive (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix,
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix,
  int &i) const
 {
  if (i < infix.size ())
@ -706,7 +722,7 @@ bool Eval::parsePrimitive (
        ++i;
        return true;
      }
-      else if (infix[i].second != Lexer::typeOperator)
+      else if (infix[i].second != Lexer2::Type::op)
      {
        ++i;
        return true;
@ -750,32 +766,32 @@ bool Eval::parsePrimitive (
 //   Exit.
 //
 void Eval::infixToPostfix (
-  std::vector <std::pair <std::string, Lexer::Type> >& infix) const
+  std::vector <std::pair <std::string, Lexer2::Type> >& infix) const
 {
  // Short circuit.
  if (infix.size () == 1)
    return;

  // Result.
-  std::vector <std::pair <std::string, Lexer::Type> > postfix;
+  std::vector <std::pair <std::string, Lexer2::Type> > postfix;

  // Shunting yard.
-  std::vector <std::pair <std::string, Lexer::Type> > op_stack;
+  std::vector <std::pair <std::string, Lexer2::Type> > op_stack;

  // Operator characteristics.
  char type;
  int precedence;
  char associativity;

-  std::vector <std::pair <std::string, Lexer::Type> >::iterator token;
+  std::vector <std::pair <std::string, Lexer2::Type> >::iterator token;
  for (token = infix.begin (); token != infix.end (); ++token)
  {
-    if (token->second == Lexer::typeOperator &&
+    if (token->second == Lexer2::Type::op &&
        token->first == "(")
    {
      op_stack.push_back (*token);
    }
-    else if (token->second == Lexer::typeOperator &&
+    else if (token->second == Lexer2::Type::op &&
             token->first == ")")
    {
      while (op_stack.size () &&
@ -790,7 +806,7 @@ void Eval::infixToPostfix (
      else
        throw std::string ("Mismatched parentheses in expression");
    }
-    else if (token->second == Lexer::typeOperator &&
+    else if (token->second == Lexer2::Type::op &&
             identifyOperator (token->first, type, precedence, associativity))
    {
      char type2;
@ -849,22 +865,20 @@ bool Eval::identifyOperator (

 ////////////////////////////////////////////////////////////////////////////////
 std::string Eval::dump (
-  std::vector <std::pair <std::string, Lexer::Type> >& tokens) const
+  std::vector <std::pair <std::string, Lexer2::Type> >& tokens) const
 {
  // Set up a color mapping.
-  std::map <Lexer::Type, Color> color_map;
-  color_map[Lexer::typeNone]       = Color ("rgb000 on gray6");
-  color_map[Lexer::typeOperator]   = Color ("gray14 on gray6");
-  color_map[Lexer::typeNumber]     = Color ("rgb530 on gray6");
-  color_map[Lexer::typeHex]        = Color ("rgb303 on gray6");
-  color_map[Lexer::typeDecimal]    = Color ("rgb530 on gray6");
-  color_map[Lexer::typeString]     = Color ("rgb550 on gray6");
-  color_map[Lexer::typeIdentifier] = Color ("rgb035 on gray6");
-  color_map[Lexer::typeDate]       = Color ("rgb150 on gray6");
-  color_map[Lexer::typeDuration]   = Color ("rgb531 on gray6");
+  std::map <Lexer2::Type, Color> color_map;
+  color_map[Lexer2::Type::op]         = Color ("gray14 on gray6");
+  color_map[Lexer2::Type::number]     = Color ("rgb530 on gray6");
+  color_map[Lexer2::Type::hex]        = Color ("rgb303 on gray6");
+  color_map[Lexer2::Type::string]     = Color ("rgb550 on gray6");
+  color_map[Lexer2::Type::identifier] = Color ("rgb035 on gray6");
+  color_map[Lexer2::Type::date]       = Color ("rgb150 on gray6");
+  color_map[Lexer2::Type::duration]   = Color ("rgb531 on gray6");

  std::string output;
-  std::vector <std::pair <std::string, Lexer::Type> >::const_iterator i;
+  std::vector <std::pair <std::string, Lexer2::Type> >::const_iterator i;
  for (i = tokens.begin (); i != tokens.end (); ++i)
  {
    if (i != tokens.begin ())
@ -874,7 +888,7 @@ std::string Eval::dump (
    if (color_map[i->second].nontrivial ())
      c = color_map[i->second];
    else
-      c = color_map[Lexer::typeNone];
+      c = Color ("rgb000 on gray6");

    output += c.colorize (i->first);
  }
--- a/src/Eval.h
+++ b/src/Eval.h
@ -29,7 +29,7 @@

 #include <vector>
 #include <string>
-#include <Lexer.h>
+#include <Lexer2.h>
 #include <Variant.h>

 class Eval
@ -53,28 +53,28 @@ public:
  static void getBinaryOperators (std::vector <std::string>&);

 private:
-  void evaluatePostfixStack (const std::vector <std::pair <std::string, Lexer::Type> >&, Variant&) const;
-  void infixToPostfix (std::vector <std::pair <std::string, Lexer::Type> >&) const;
-  void infixParse (std::vector <std::pair <std::string, Lexer::Type> >&) const;
-  bool parseLogical (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseRegex (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseEquality (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseComparative (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseArithmetic (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseGeometric (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseTag (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseUnary (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parseExponent (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
-  bool parsePrimitive (std::vector <std::pair <std::string, Lexer::Type> >&, int &) const;
+  void evaluatePostfixStack (const std::vector <std::pair <std::string, Lexer2::Type> >&, Variant&) const;
+  void infixToPostfix (std::vector <std::pair <std::string, Lexer2::Type> >&) const;
+  void infixParse (std::vector <std::pair <std::string, Lexer2::Type> >&) const;
+  bool parseLogical (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseRegex (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseEquality (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseComparative (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseArithmetic (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseGeometric (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseTag (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseUnary (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parseExponent (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
+  bool parsePrimitive (std::vector <std::pair <std::string, Lexer2::Type> >&, int &) const;
  bool identifyOperator (const std::string&, char&, int&, char&) const;

-  std::string dump (std::vector <std::pair <std::string, Lexer::Type> >&) const;
+  std::string dump (std::vector <std::pair <std::string, Lexer2::Type> >&) const;

 private:
  std::vector <bool (*)(const std::string&, Variant&)> _sources;
  bool _ambiguity;
  bool _debug;
-  std::vector <std::pair <std::string, Lexer::Type> > _compiled;
+  std::vector <std::pair <std::string, Lexer2::Type> > _compiled;
 };


--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -1,898 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// Copyright 2013 - 2015, Paul Beckingham, Federico Hernandez.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included
-// in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// http://www.opensource.org/licenses/mit-license.php
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include <cmake.h>
-#include <ctype.h>
-#include <utf8.h>
-#include <ISO8601.h>
-#include <Date.h>
-#include <Duration.h>
-#include <Lexer.h>
-#include <i18n.h>
-
-std::string Lexer::dateFormat = "";
-bool Lexer::isoEnabled = true;
-
-////////////////////////////////////////////////////////////////////////////////
-Lexer::Lexer (const std::string& input)
-: _input (input)
-, _i (0)
-, _shift_counter (0)
-, _n0 (32)
-, _n1 (32)
-, _n2 (32)
-, _n3 (32)
-, _boundary01 (false)
-, _boundary12 (false)
-, _boundary23 (false)
-, _ambiguity (true)
-{
-  // Read 4 chars in preparation.  Even if there are < 4.  Take a deep breath.
-  shift ();
-  shift ();
-  shift ();
-  shift ();
-
-  // Reset because the four shifts above do not represent advancement into the
-  // _input. All subsequents shiftѕ do though.
-  _shift_counter = 0;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-Lexer::~Lexer ()
-{
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Walk the input string, looking for transitions.
-bool Lexer::token (std::string& result, Type& type)
-{
-  // Start with nothing.
-  result = "";
-
-  // Different types of matching quote:  ', ".
-  int quote = 0;
-
-  type = typeNone;
-  while (_n0)
-  {
-    switch (type)
-    {
-    case typeNone:
-      if (is_ws (_n0))
-        shift ();
-      else if (_n0 == '"' || _n0 == '\'')
-      {
-        type = typeString;
-        quote = _n0;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (_n0 == '0' &&
-               _n1 == 'x' &&
-               is_hex_digit (_n2))
-      {
-        type = typeHex;
-        result += utf8_character (_n0);
-        shift ();
-        result += utf8_character (_n0);
-        shift ();
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (is_dec_digit (_n0))
-      {
-        // Speculatively try a date and duration parse.  Longest wins.
-        if (is_date (result))
-        {
-          type = typeDate;
-          return true;
-        }
-
-        if (is_duration (result))
-        {
-          type = typeDuration;
-          return true;
-        }
-
-        type = typeNumber;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (_n0 == '.' && is_dec_digit (_n1))
-      {
-        type = typeDecimal;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1))
-      {
-        type = typeTag;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (is_triple_op (_n0, _n1, _n2))
-      {
-        type = typeOperator;
-        result += utf8_character (_n0);
-        shift ();
-        result += utf8_character (_n0);
-        shift ();
-        result += utf8_character (_n0);
-        shift ();
-        return true;
-      }
-      else if (is_double_op (_n0, _n1, _n2))
-      {
-        type = typeOperator;
-        result += utf8_character (_n0);
-        shift ();
-        result += utf8_character (_n0);
-        shift ();
-        return true;
-      }
-      else if (is_single_op (_n0))
-      {
-        type = typeOperator;
-        result += utf8_character (_n0);
-        shift ();
-        return true;
-      }
-      else if (_n0 == '\\')
-      {
-        type = typeIdentifierEscape;
-        shift ();
-      }
-      else if (is_ident_start (_n0))
-      {
-        if (is_date (result))
-        {
-          type = typeDate;
-          return true;
-        }
-
-        if (is_duration (result))
-        {
-          type = typeDuration;
-          return true;
-        }
-
-        type = typeIdentifier;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-        throw std::string (STRING_LEX_IMMEDIATE_UNK);
-      break;
-
-    case typeString:
-      if (_n0 == quote)
-      {
-        result += utf8_character (_n0);
-        shift ();
-        quote = 0;
-        return true;
-      }
-      else if (_n0 == '\\')
-      {
-        type = typeEscape;
-        shift ();
-      }
-      else
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      break;
-
-    case typeTag:
-      if (is_ident_start (_n0))
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        return true;
-      }
-      break;
-
-    case typeIdentifier:
-      if (is_ident (_n0))
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        // typeIdentifier is a catch-all type. Anything word-like becomes an
-        // identifier. At this point in the processing, an identifier is found,
-        // and can be matched against a list of potential upgrades.
-        if (result == "_hastag_" ||
-            result == "_notag_"  ||
-            result == "_neg_"    ||
-            result == "_pos_")
-          type = typeOperator;
-
-        return true;
-      }
-      break;
-
-    case typeIdentifierEscape:
-      if (_n0 == 'u')
-      {
-        type = typeEscapeUnicode;
-        shift ();
-      }
-      else
-      {
-        type = quote ? typeString : typeIdentifier;
-        result += utf8_character (quote);
-        result += utf8_character (_n0);
-        shift ();
-      }
-      break;
-
-    case typeEscape:
-      if (_n0 == 'x')
-      {
-        type = typeEscapeHex;
-        shift ();
-      }
-      else if (_n0 == 'u')
-      {
-        type = typeEscapeUnicode;
-        shift ();
-      }
-      else
-      {
-        result += '\\';
-        result += utf8_character (_n0);
-        type = quote ? typeString : typeIdentifier;
-        shift ();
-      }
-      break;
-
-    case typeEscapeHex:
-      if (is_hex_digit (_n0) && is_hex_digit (_n1))
-      {
-        result += utf8_character (hex_to_int (_n0, _n1));
-        type = quote ? typeString : typeIdentifier;
-        shift ();
-        shift ();
-      }
-      else
-      {
-        type = quote ? typeString : typeIdentifier;
-        shift ();
-        quote = 0;
-        return true;
-      }
-      break;
-
-    case typeEscapeUnicode:
-      if (is_hex_digit (_n0) &&
-          is_hex_digit (_n1) &&
-          is_hex_digit (_n2) &&
-          is_hex_digit (_n3))
-      {
-        result += utf8_character (hex_to_int (_n0, _n1, _n2, _n3));
-        shift ();
-        shift ();
-        shift ();
-        shift ();
-        type = quote ? typeString : typeIdentifier;
-      }
-      else if (_n0 == quote)
-      {
-        type = typeString;
-        shift ();
-        quote = 0;
-        return true;
-      }
-      break;
-
-    case typeNumber:
-      if (is_dec_digit (_n0))
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (_n0 == '.')
-      {
-        type = typeDecimal;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (_n0 == 'e' || _n0 == 'E')
-      {
-        type = typeExponentIndicator;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (is_ident_start (_n0))
-      {
-        type = typeIdentifier;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        return true;
-      }
-      break;
-
-    case typeDecimal:
-      if (is_dec_digit (_n0))
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (_n0 == 'e' || _n0 == 'E')
-      {
-        type = typeExponentIndicator;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (is_ident_start (_n0))
-      {
-        type = typeIdentifier;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        return true;
-      }
-      break;
-
-    case typeExponentIndicator:
-      if (_n0 == '+' || _n0 == '-')
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (is_dec_digit (_n0))
-      {
-        type = typeExponent;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else if (is_ident_start (_n0))
-      {
-        type = typeIdentifier;
-        result += utf8_character (_n0);
-        shift ();
-      }
-      break;
-
-    case typeExponent:
-      if (is_dec_digit (_n0) || _n0 == '.')
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        type = typeDecimal;
-        return true;
-      }
-      break;
-
-    case typeHex:
-      if (is_hex_digit (_n0))
-      {
-        result += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        return true;
-      }
-      break;
-
-    default:
-      throw std::string (STRING_LEX_TYPE_UNK);
-      break;
-    }
-
-    // Fence post.
-    if (!_n0 && result != "")
-      return true;
-  }
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Just like Lexer::token, but no operators, numbers, dates or durations.
-bool Lexer::word (std::string& token, Type& type)
-{
-  // Start with nothing.
-  token = "";
-
-  // Different types of matching quote:  ', ".
-  int quote = 0;
-
-  type = typeNone;
-  while (_n0)
-  {
-    switch (type)
-    {
-    case typeNone:
-      if (is_ws (_n0))
-        shift ();
-      else if (_n0 == '"' || _n0 == '\'')
-      {
-        type = typeString;
-        quote = _n0;
-        token += utf8_character (_n0);
-        shift ();
-      }
-      else
-      {
-        type = typeString;
-        token += utf8_character (_n0);
-        shift ();
-      }
-      break;
-
-    case typeString:
-      if (_n0 == quote)
-      {
-        token += utf8_character (_n0);
-        shift ();
-        quote = 0;
-        return true;
-      }
-      else if (_n0 == '\\')
-      {
-        type = typeEscape;
-        shift ();
-      }
-      else if (! quote && is_ws (_n0))
-      {
-        shift ();
-        return true;
-      }
-      else
-      {
-        token += utf8_character (_n0);
-        shift ();
-      }
-      break;
-
-    case typeEscape:
-      if (_n0 == 'x')
-      {
-        type = typeEscapeHex;
-        shift ();
-      }
-      else if (_n0 == 'u')
-      {
-        type = typeEscapeUnicode;
-        shift ();
-      }
-      else
-      {
-        token += '\\';
-        token += utf8_character (_n0);
-        type = typeString;
-        shift ();
-      }
-      break;
-
-    case typeEscapeHex:
-      if (is_hex_digit (_n0) && is_hex_digit (_n1))
-      {
-        token += utf8_character (hex_to_int (_n0, _n1));
-        type = typeString;
-        shift ();
-        shift ();
-      }
-      else
-      {
-        type = typeString;
-        shift ();
-        quote = 0;
-        return true;
-      }
-      break;
-
-    case typeEscapeUnicode:
-      if (is_hex_digit (_n0) &&
-          is_hex_digit (_n1) &&
-          is_hex_digit (_n2) &&
-          is_hex_digit (_n3))
-      {
-        token += utf8_character (hex_to_int (_n0, _n1, _n2, _n3));
-        shift ();
-        shift ();
-        shift ();
-        shift ();
-        type = typeString;
-      }
-      else if (_n0 == quote)
-      {
-        type = typeString;
-        shift ();
-        quote = 0;
-        return true;
-      }
-      break;
-
-    default:
-      throw std::string (STRING_LEX_TYPE_UNK);
-      break;
-    }
-
-    // Fence post.
-    if (!_n0 && token != "")
-      return true;
-  }
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-void Lexer::ambiguity (bool value)
-{
-  _ambiguity = value;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// No L10N - these are for internal purposes.
-const std::string Lexer::type_name (const Type& type)
-{
-  switch (type)
-  {
-  case Lexer::typeNone:              return "None";
-  case Lexer::typeString:            return "String";
-  case Lexer::typeIdentifier:        return "Identifier";
-  case Lexer::typeIdentifierEscape:  return "IdentifierEscape";
-  case Lexer::typeNumber:            return "Number";
-  case Lexer::typeDecimal:           return "Decimal";
-  case Lexer::typeExponentIndicator: return "ExponentIndicator";
-  case Lexer::typeExponent:          return "Exponent";
-  case Lexer::typeHex:               return "Hex";
-  case Lexer::typeOperator:          return "Operator";
-  case Lexer::typeEscape:            return "Escape";
-  case Lexer::typeEscapeHex:         return "EscapeHex";
-  case Lexer::typeEscapeUnicode:     return "EscapeUnicode";
-  case Lexer::typeDate:              return "Date";
-  case Lexer::typeDuration:          return "Duration";
-  case Lexer::typeTag:               return "Tag";
-  }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Complete Unicode whitespace list.
-//
-// http://en.wikipedia.org/wiki/Whitespace_character
-// Updated 2013-11-18
-bool Lexer::is_ws (int c)
-{
-  return (c == 0x0020 ||   // space Common  Separator, space
-          c == 0x0009 ||   // Common  Other, control  HT, Horizontal Tab
-          c == 0x000A ||   // Common  Other, control  LF, Line feed
-          c == 0x000B ||   // Common  Other, control  VT, Vertical Tab
-          c == 0x000C ||   // Common  Other, control  FF, Form feed
-          c == 0x000D ||   // Common  Other, control  CR, Carriage return
-          c == 0x0085 ||   // Common  Other, control  NEL, Next line
-          c == 0x00A0 ||   // no-break space  Common  Separator, space
-          c == 0x1680 ||   // ogham space mark  Ogham Separator, space
-          c == 0x180E ||   // mongolian vowel separator Mongolian Separator, space
-          c == 0x2000 ||   // en quad Common  Separator, space
-          c == 0x2001 ||   // em quad Common  Separator, space
-          c == 0x2002 ||   // en space  Common  Separator, space
-          c == 0x2003 ||   // em space  Common  Separator, space
-          c == 0x2004 ||   // three-per-em space  Common  Separator, space
-          c == 0x2005 ||   // four-per-em space Common  Separator, space
-          c == 0x2006 ||   // six-per-em space  Common  Separator, space
-          c == 0x2007 ||   // figure space  Common  Separator, space
-          c == 0x2008 ||   // punctuation space Common  Separator, space
-          c == 0x2009 ||   // thin space  Common  Separator, space
-          c == 0x200A ||   // hair space  Common  Separator, space
-          c == 0x2028 ||   // line separator  Common  Separator, line
-          c == 0x2029 ||   // paragraph separator Common  Separator, paragraph
-          c == 0x202F ||   // narrow no-break space Common  Separator, space
-          c == 0x205F ||   // medium mathematical space Common  Separator, space
-          c == 0x3000);    // ideographic space Common  Separator, space
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_ident_start (int c)
-{
-  return c           &&       // Include null character check.
-         ! is_ws (c) &&
-         ! is_dec_digit (c) &&
-         ! is_single_op (c);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_ident (int c)
-{
-  return c           &&       // Include null character check.
-         ! is_ws (c) &&
-         ! is_single_op (c);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_single_op (int c)
-{
-  return c == '+' ||
-         c == '-' ||
-         c == '*' ||
-         c == '/' ||
-         c == '(' ||
-         c == ')' ||
-         c == '<' ||
-         c == '>' ||
-         c == '^' ||
-         c == '!' ||
-         c == '%' ||
-         c == '=' ||
-         c == '~';
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_dec_digit (int c)
-{
-  return c >= '0' && c <= '9';
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::boundary (int left, int right)
-{
-  // XOR
-  if (isalpha (left) != isalpha (right)) return true;
-  if (isdigit (left) != isdigit (right)) return true;
-  if (isspace (left) != isspace (right)) return true;
-
-  // OR
-  if (ispunct (left)  || ispunct (right))  return true;
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Split 'input' into 'words' on Lexer::is_ws boundaries, observing quotes.
-void Lexer::word_split (std::vector <std::string>& words, const std::string& input)
-{
-  words.clear ();
-
-  std::string word;
-  Lexer::Type type;
-  Lexer lex (input);
-  while (lex.word (word, type))
-    words.push_back (word);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Split 'input' into 'tokens'.
-void Lexer::token_split (std::vector <std::string>& words, const std::string& input)
-{
-  words.clear ();
-
-  std::string word;
-  Lexer::Type type;
-  Lexer lex (input);
-  while (lex.token (word, type))
-    words.push_back (word);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Split 'input' into 'tokens', preserving type.
-void Lexer::token_split (std::vector <std::pair <std::string, Lexer::Type> >& lexemes, const std::string& input)
-{
-  lexemes.clear ();
-
-  std::string word;
-  Lexer::Type type;
-  Lexer lex (input);
-  while (lex.token (word, type))
-    lexemes.push_back (std::pair <std::string, Lexer::Type>(word, type));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-void Lexer::dequote (std::string& input)
-{
-  int quote = input[0];
-  size_t len = input.length ();
-  if ((quote == '\'' || quote == '"') &&
-      quote == input[len - 1])
-  {
-    input = input.substr (1, len - 2);
-  }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_date (std::string& result)
-{
-  // Try an ISO date parse.
-  if (isoEnabled)
-  {
-    std::string::size_type iso_i = 0;
-    std::string iso_result;
-    ISO8601d iso;
-    iso.ambiguity (_ambiguity);
-    if (iso.parse (_input.substr (_shift_counter), iso_i))
-    {
-      result = _input.substr (_shift_counter, iso_i);
-      while (iso_i--) shift ();
-      return true;
-    }
-  }
-
-  // Try a legacy rc.dateformat parse here.
-  if (Lexer::dateFormat != "")
-  {
-    try
-    {
-      std::string::size_type legacy_i = 0;
-      Date legacyDate (_input.substr (_shift_counter), legacy_i, Lexer::dateFormat, false, false);
-      result = _input.substr (_shift_counter, legacy_i);
-      while (legacy_i--) shift ();
-      return true;
-    }
-
-    catch (...) { /* Never mind. */ }
-  }
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_duration (std::string& result)
-{
-  std::string::size_type iso_i = 0;
-  std::string iso_result;
-  ISO8601p iso;
-  if (iso.parse (_input.substr (_shift_counter), iso_i))
-  {
-    result = _input.substr (_shift_counter, iso_i);
-    while (iso_i--) shift ();
-    return true;
-  }
-
-  std::string::size_type dur_i = 0;
-  std::string dur_result;
-  Duration dur;
-  if (dur.parse (_input.substr (_shift_counter), dur_i))
-  {
-    result = _input.substr (_shift_counter, dur_i);
-    while (dur_i--) shift ();
-    return true;
-  }
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_punct (int c) const
-{
-  if (c == ',' ||
-      c == '.')      // Tab
-    return true;
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_num (int c) const
-{
-  if ((c >= '0' && c <= '9') ||
-      c == '.')
-    return true;
-
-  return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_triple_op (int c0, int c1, int c2) const
-{
-  return (c0 == 'a' && c1 == 'n' && c2 == 'd' && _boundary23) ||
-         (c0 == 'x' && c1 == 'o' && c2 == 'r' && _boundary23) ||
-         (c0 == '!' && c1 == '=' && c2 == '=');
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_double_op (int c0, int c1, int c2) const
-{
-  return (c0 == '=' && c1 == '=')                ||
-         (c0 == '!' && c1 == '=')                ||
-         (c0 == '<' && c1 == '=')                ||
-         (c0 == '>' && c1 == '=')                ||
-         (c0 == 'o' && c1 == 'r' && _boundary12) ||
-         (c0 == '|' && c1 == '|')                ||
-         (c0 == '&' && c1 == '&')                ||
-         (c0 == '!' && c1 == '~');
-}
-
-////////////////////////////////////////////////////////////////////////////////
-bool Lexer::is_hex_digit (int c) const
-{
-  return (c >= '0' && c <= '9') ||
-         (c >= 'a' && c <= 'f') ||
-         (c >= 'A' && c <= 'F');
-}
-
-////////////////////////////////////////////////////////////////////////////////
-int Lexer::decode_escape (int c) const
-{
-  switch (c)
-  {
-  case 'b':  return 0x08;
-  case 'f':  return 0x0C;
-  case 'n':  return 0x0A;
-  case 'r':  return 0x0D;
-  case 't':  return 0x09;
-  case 'v':  return 0x0B;
-  case '\'': return 0x27;
-  case '"':  return 0x22;
-  default:   return c;
-  }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-int Lexer::hex_to_int (int c) const
-{
-       if (c >= '0' && c <= '9') return (c - '0');
-  else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
-  else                           return (c - 'A' + 10);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-int Lexer::hex_to_int (int c0, int c1) const
-{
-  return (hex_to_int (c0) << 4) + hex_to_int (c1);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-int Lexer::hex_to_int (int c0, int c1, int c2, int c3) const
-{
-  return (hex_to_int (c0) << 12) +
-         (hex_to_int (c1) << 8)  +
-         (hex_to_int (c2) << 4)  +
-          hex_to_int (c3);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-void Lexer::shift ()
-{
-  _n0 = _n1;
-  _n1 = _n2;
-  _n2 = _n3;
-  _n3 = utf8_next_char (_input, _i);
-  ++_shift_counter;
-
-  // Detect type boundaries between characters.
-  _boundary01 = boundary (_n0, _n1);
-  _boundary12 = boundary (_n1, _n2);
-  _boundary23 = boundary (_n2, _n3);
-}
-
-////////////////////////////////////////////////////////////////////////////////
--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -1,120 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-//
-// Copyright 2013 - 2015, Paul Beckingham, Federico Hernandez.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included
-// in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//
-// http://www.opensource.org/licenses/mit-license.php
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef INCLUDED_LEXER
-#define INCLUDED_LEXER
-
-#include <vector>
-#include <string>
-
-class Lexer
-{
-public:
-  static std::string dateFormat;
-  static bool isoEnabled;
-
-  enum Type
-  {
-    typeNone = 0,
-    typeString,
-    typeIdentifier,
-    typeIdentifierEscape,    // Intermediate
-    typeEscape,              // Intermediate
-    typeEscapeHex,           // Intermediate
-    typeEscapeUnicode,       // Intermediate
-    typeNumber,
-    typeDecimal,
-    typeExponentIndicator,   // Intermediate
-    typeExponent,            // Intermediate
-    typeHex,
-    typeOperator,
-    typeDate,
-    typeDuration,
-    typeTag,
-/*
-    Recognizing more types means that Lexer::*_split and Lexer::token approach
-    the ideal form, whereby the command line becomes just one string that is
-    lexed into tokens. Those tokens are then simply dissected by type..
-
-    typeUUID,
-    typePattern,
-    typeSubstitution,
-    typeNameValue,
-*/
-  };
-
-  Lexer (const std::string&);
-  virtual ~Lexer ();
-  Lexer (const Lexer&);            // Not implemented.
-  Lexer& operator= (const Lexer&); // Not implemented.
-  bool operator== (const Lexer&);  // Not implemented.
-  bool token (std::string&, Type&);
-  bool word (std::string&, Type&);
-  void ambiguity (bool);
-
-  static const std::string type_name (const Type&);
-  static bool is_ws (int);
-  static bool is_ident_start (int);
-  static bool is_ident (int);
-  static bool is_single_op (int);
-  static bool is_dec_digit (int);
-  static bool boundary (int, int);
-  static void word_split (std::vector <std::string>&, const std::string&);
-  static void token_split (std::vector <std::string>&, const std::string&);
-  static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
-  static void dequote (std::string&);
-
-private:
-  bool is_date (std::string&);
-  bool is_duration (std::string&);
-  bool is_punct (int) const;
-  bool is_num (int) const;
-  bool is_triple_op (int, int, int) const;
-  bool is_double_op (int, int, int) const;
-  bool is_hex_digit (int) const;
-  int decode_escape (int) const;
-  int hex_to_int (int) const;
-  int hex_to_int (int, int) const;
-  int hex_to_int (int, int, int, int) const;
-  void shift ();
-
-private:
-  const std::string _input;
-  std::string::size_type _i;
-  std::string::size_type _shift_counter;
-  int _n0;
-  int _n1;
-  int _n2;
-  int _n3;
-  bool _boundary01;
-  bool _boundary12;
-  bool _boundary23;
-  bool _ambiguity;
-};
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
--- a/src/Lexer2.cpp
+++ b/src/Lexer2.cpp
@ -37,13 +37,13 @@ static const int uuid_min_length = 8;

 std::string Lexer2::dateFormat = "";
 bool Lexer2::isoEnabled = true;
-bool Lexer2::ambiguity = true;

 ////////////////////////////////////////////////////////////////////////////////
 Lexer2::Lexer2 (const std::string& text)
 : _text (text)
 , _cursor (0)
 , _eos (text.size ())
+, _ambiguity (false)
 {
 }

@ -52,6 +52,12 @@ Lexer2::~Lexer2 ()
 {
 }

+////////////////////////////////////////////////////////////////////////////////
+void Lexer2::ambiguity (bool value)
+{
+  _ambiguity = value;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // When a Lexer2 object is constructed with a string, this method walks through
 // the stream of low-level tokens.
@ -417,7 +423,7 @@ bool Lexer2::isDate (std::string& token, Lexer2::Type& type)
  {
    std::size_t iso_i = 0;
    ISO8601d iso;
-    iso.ambiguity (Lexer2::ambiguity);
+    iso.ambiguity (_ambiguity);
    if (iso.parse (_text.substr (_cursor), iso_i))
    {
      type = Lexer2::Type::date;
@ -504,10 +510,13 @@ bool Lexer2::isUUID (std::string& token, Lexer2::Type& type)

  if (i >= uuid_min_length)
  {
-    token = _text.substr (_cursor, i + 1);
-    type = Lexer2::Type::uuid;
-    _cursor += i;
-    return true;
+    token = _text.substr (_cursor, i);
+    if (! isAllDigits (token))
+    {
+      type = Lexer2::Type::uuid;
+      _cursor += i;
+      return true;
+    }
  }

  return false;
@ -545,7 +554,7 @@ bool Lexer2::isHexNumber (std::string& token, Lexer2::Type& type)
 // Lexer2::Type::number
 //   \d+
 //   [ . \d+ ]
-//   [ e|E [ +|- ] \d+ ]
+//   [ e|E [ +|- ] \d+ [ . \d+ ] ]
 bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
 {
  std::size_t marker = _cursor;
@ -581,6 +590,17 @@ bool Lexer2::isNumber (std::string& token, Lexer2::Type& type)
        ++marker;
        while (isDigit (_text[marker]))
          utf8_next_char (_text, marker);
+
+        if (_text[marker] == '.')
+        {
+          ++marker;
+          if (isDigit (_text[marker]))
+          {
+            ++marker;
+            while (isDigit (_text[marker]))
+              utf8_next_char (_text, marker);
+          }
+        }
      }
    }

@ -667,7 +687,7 @@ bool Lexer2::isURL (std::string& token, Lexer2::Type& type)

 ////////////////////////////////////////////////////////////////////////////////
 // Lexer2::Type::pair
-//   <identifier> : [ <string> | <word> ]
+//   <identifier> :|= [ <string> | <word> ]
 bool Lexer2::isPair (std::string& token, Lexer2::Type& type)
 {
  std::size_t marker = _cursor;
@ -698,11 +718,18 @@ bool Lexer2::isPair (std::string& token, Lexer2::Type& type)

 ////////////////////////////////////////////////////////////////////////////////
 // Lexer2::Type::tag
-//   [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
+//   ^ | <isWhiteSpace>    [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
 bool Lexer2::isTag (std::string& token, Lexer2::Type& type)
 {
  std::size_t marker = _cursor;

+  // This test requires a tag to have a preceding space or start a string.
+  //   bad:  'a+b' --> identifier tag
+  //   good: 'a+b' --> identifier op identifier
+  if (marker > 0 &&
+      ! isWhitespace (_text[marker - 1]))
+    return false;
+
  if (_text[marker] == '+' ||
      _text[marker] == '-')
  {
@ -926,7 +953,7 @@ bool Lexer2::isWord (std::string& token, Lexer2::Type& type)
 {
  std::size_t marker = _cursor;

-  while (! isWhitespace (_text[marker]))
+  while (_text[marker] && ! isWhitespace (_text[marker]))
    utf8_next_char (_text, marker);

  if (marker > _cursor)
--- a/src/Lexer2.h
+++ b/src/Lexer2.h
@ -40,7 +40,6 @@ public:
  // These are overridable.
  static std::string dateFormat;
  static bool isoEnabled;
-  static bool ambiguity;

  enum class Type { uuid, number, hex,
                    string,
@ -54,6 +53,7 @@ public:

  Lexer2 (const std::string&);
  ~Lexer2 ();
+  void ambiguity (bool);
  bool token (std::string&, Lexer2::Type&);
  static std::vector <std::pair <std::string, Lexer2::Type>> tokens (const std::string&);
  static std::vector <std::string> split (const std::string&);
@ -101,8 +101,9 @@ public:

 private:
  std::string _text;
-  std::size_t _cursor = 0;
-  std::size_t _eos = 0;
+  std::size_t _cursor;
+  std::size_t _eos;
+  bool        _ambiguity;
 };

 #endif
--- a/src/commands/CmdCustom.cpp
+++ b/src/commands/CmdCustom.cpp
@ -32,7 +32,7 @@
 #include <stdlib.h>
 #include <Context.h>
 #include <Filter.h>
-#include <Lexer.h>
+#include <Lexer2.h>
 #include <ViewTask.h>
 #include <i18n.h>
 #include <text.h>
@ -83,8 +83,8 @@ int CmdCustom::execute (std::string& output)

  // Prepend the argument list with those from the report filter.
  std::string lexeme;
-  Lexer::Type type;
-  Lexer lex (reportFilter);
+  Lexer2::Type type;
+  Lexer2 lex (reportFilter);
  lex.ambiguity (false);
  while (lex.token (lexeme, type))
    context.cli.add (lexeme);