Expressions

- Implemented sequence --> infix converter. - Added new Lexer code. - Added Lexer unit tests.
2025-06-26 10:54:26 +02:00 · 2011-06-06 01:46:11 -04:00 · 2011-06-06 01:46:11 -04:00 · ed8454c202
commit ed8454c202
parent 86dcec8aea
10 changed files with 1247 additions and 70 deletions
--- a/src/Arguments.cpp
+++ b/src/Arguments.cpp
@ -251,7 +251,6 @@ void Arguments::categorize ()
        arg->second = "tag";
      }

-      // 
      // <name>.<modifier>[:=]<value>
      else if (is_attmod (arg->first))
      {
@ -302,6 +301,16 @@ void Arguments::categorize ()
        arg->second = "op";
      }

+      // <expression>
+      else if (is_expression (arg->first))
+      {
+        found_non_sequence = true;
+        if (found_sequence)
+          found_something_after_sequence = true;
+
+        arg->second = "exp";
+      }
+
      // If the type is not known, it is treated as a generic word.
      else
      {
@ -557,13 +566,7 @@ bool Arguments::is_command (
 }

 ////////////////////////////////////////////////////////////////////////////////
-//                    ______________
-//                    |            |
-//                    |            v
-// start --> name --> : --> " --> value --> " --> end
-//                                   |             ^
-//                                   |_____________|
-//
+// <name>[:=]['"][<value>]['"]
 bool Arguments::is_attr (const std::string& input)
 {
  Nibbler n (input);
@ -591,13 +594,7 @@ bool Arguments::is_attr (const std::string& input)
 }

 ////////////////////////////////////////////////////////////////////////////////
-//                                  ______________
-//                                  |            |
-//                                  |            v
-// start --> name --> . --> mod --> : --> " --> value --> " --> end
-//            |                     ^              |             ^
-//            |_____________________|              |_____________|
-//
+// <name>.<mod>[:=]['"]<value>['"]
 bool Arguments::is_attmod (const std::string& input)
 {
  Nibbler n (input);
@ -733,6 +730,7 @@ bool Arguments::is_tag (const std::string& input)
 }

 ////////////////////////////////////////////////////////////////////////////////
+// "+", "-", "*", "/", "%", "~", "!~", "<" ...
 bool Arguments::is_operator (const std::string& input)
 {
  for (unsigned int i = 0; i < NUM_OPERATORS; ++i)
@ -743,13 +741,21 @@ bool Arguments::is_operator (const std::string& input)
 }

 ////////////////////////////////////////////////////////////////////////////////
-//                    ______________
-//                    |            |
-//                    |            v
-// start --> name --> : --> " --> value --> " --> end
-//                                   |             ^
-//                                   |_____________|
-//
+bool Arguments::is_expression (const std::string& input)
+{
+  std::vector <std::string> tokens;
+  splitq (tokens, input, ' ');
+
+  std::vector <std::string>::iterator token;
+  for (token = tokens.begin (); token != tokens.end (); ++token)
+    if (is_operator (*token))
+      return true;
+
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// <name>[:=]['"]<value>['"]
 bool Arguments::extract_attr (
  const std::string& input,
  std::string& name,
@ -787,13 +793,7 @@ bool Arguments::extract_attr (
 }

 ////////////////////////////////////////////////////////////////////////////////
-//                                  ______________
-//                                  |            |
-//                                  |            v
-// start --> name --> . --> mod --> : --> " --> value --> " --> end
-//                                                 |             ^
-//                                                 |_____________|
-//
+// <name>.<mod>[:=]['"]<value>['"]
 bool Arguments::extract_attmod (
  const std::string& input,
  std::string& name,
@ -821,7 +821,7 @@ bool Arguments::extract_attmod (

      if (n.getUntilOneOf (":=", modifier))
      {
-        if (!valid_modifier (modifier))
+        if (!Arguments::valid_modifier (modifier))
          throw std::string ("The name '") + modifier + "' is not a valid modifier."; // TODO i18n
      }
      else
@ -928,19 +928,24 @@ bool Arguments::extract_pattern (const std::string& input, std::string& pattern)
 bool Arguments::extract_id (const std::string& input, std::vector <int>& sequence)
 {
  Nibbler n (input);
-  sequence.clear ();

  int id;
+
  if (n.getUnsignedInt (id))
  {
    sequence.push_back (id);

    if (n.skip ('-'))
    {
-      if (!n.getUnsignedInt (id))
+      int end;
+      if (!n.getUnsignedInt (end))
        throw std::string ("Unrecognized ID after hyphen.");

-      sequence.push_back (id);
+      if (id > end)
+        throw std::string ("Inverted range 'high-low' instead of 'low-high'");
+
+      for (int n = id + 1; n <= end; ++n)
+        sequence.push_back (n);
    }

    while (n.skip (','))
@ -951,10 +956,15 @@ bool Arguments::extract_id (const std::string& input, std::vector <int>& sequenc

        if (n.skip ('-'))
        {
-          if (!n.getUnsignedInt (id))
+          int end;
+          if (!n.getUnsignedInt (end))
            throw std::string ("Unrecognized ID after hyphen.");

-          sequence.push_back (id);
+          if (id > end)
+            throw std::string ("Inverted range 'high-low' instead of 'low-high'");
+
+          for (int n = id + 1; n <= end; ++n)
+            sequence.push_back (n);
        }
      }
      else
@ -973,7 +983,6 @@ bool Arguments::extract_uuid (
  std::vector <std::string>& sequence)
 {
  Nibbler n (input);
-  sequence.clear ();

  std::string uuid;
  if (n.getUUID (uuid))
@ -1047,6 +1056,7 @@ Arguments Arguments::extract_read_only_filter ()
             i->second == "id"        ||
             i->second == "uuid"      ||
             i->second == "op"        ||
+             i->second == "exp"       ||
             i->second == "word")
    {
      filter.push_back (*i);
@ -1092,6 +1102,7 @@ Arguments Arguments::extract_write_filter ()
             i->second == "id"        ||
             i->second == "uuid"      ||
             i->second == "op"        ||
+             i->second == "exp"       ||
             i->second == "word")
    {
      filter.push_back (*i);
@ -1153,9 +1164,14 @@ Arguments Arguments::extract_modifications ()
                + "' is not allowed when modifiying a task.";

        else if (i->second == "attmod")
-          throw std::string ("Attribute modifiers '")
+          throw std::string ("An attribute modifier '")
                + i->first
-                + "' are not allowed when modifiying a task.";
+                + "' is not allowed when modifiying a task.";
+
+        else if (i->second == "exp")
+          throw std::string ("An expression '")
+                + i->first
+                + "' is not allowed when modifiying a task.";

        else if (i->second == "id")
          throw std::string ("A task id cannot be modified.");
@ -1196,6 +1212,7 @@ void Arguments::dump (const std::string& label)
  color_map["uuid"]         = Color ("yellow on gray3");
  color_map["substitution"] = Color ("bold cyan on gray3");
  color_map["op"]           = Color ("bold blue on gray3");
+  color_map["exp"]          = Color ("bold green on gray5");
  color_map["none"]         = Color ("white on gray3");

  Color color_debug (context.config.get ("color.debug"));
--- a/src/Arguments.h
+++ b/src/Arguments.h
@ -55,31 +55,32 @@ public:

  bool find_command (std::string&);

-  bool is_command (const std::vector <std::string>&, std::string&);
-  bool is_attr (const std::string&);
-  bool is_attmod (const std::string&);
-  bool is_subst (const std::string&);
-  bool is_pattern (const std::string&);
-  bool is_id (const std::string&);
-  bool is_uuid (const std::string&);
-  bool is_tag (const std::string&);
-  bool is_operator (const std::string&);
+  static bool is_command (const std::vector <std::string>&, std::string&);
+  static bool is_attr (const std::string&);
+  static bool is_attmod (const std::string&);
+  static bool is_subst (const std::string&);
+  static bool is_pattern (const std::string&);
+  static bool is_id (const std::string&);
+  static bool is_uuid (const std::string&);
+  static bool is_tag (const std::string&);
+  static bool is_operator (const std::string&);
+  static bool is_expression (const std::string&);

  // TODO Decide if these are really useful.
-  bool extract_attr (const std::string&, std::string&, std::string&);
-  bool extract_attmod (const std::string&, std::string&, std::string&, std::string&, std::string&);
-  bool extract_subst (const std::string&, std::string&, std::string&, bool&);
-  bool extract_pattern (const std::string&, std::string&);
-  bool extract_id (const std::string&, std::vector <int>&);
-  bool extract_uuid (const std::string&, std::vector <std::string>&);
-  bool extract_tag (const std::string&, char&, std::string&);
-  bool extract_operator (const std::string&, std::string&);
+  static bool extract_attr (const std::string&, std::string&, std::string&);
+  static bool extract_attmod (const std::string&, std::string&, std::string&, std::string&, std::string&);
+  static bool extract_subst (const std::string&, std::string&, std::string&, bool&);
+  static bool extract_pattern (const std::string&, std::string&);
+  static bool extract_id (const std::string&, std::vector <int>&);
+  static bool extract_uuid (const std::string&, std::vector <std::string>&);
+  static bool extract_tag (const std::string&, char&, std::string&);
+  static bool extract_operator (const std::string&, std::string&);

  Arguments extract_read_only_filter ();
  Arguments extract_write_filter ();
  Arguments extract_modifications ();

-  bool valid_modifier (const std::string&);
+  static bool valid_modifier (const std::string&);

  void dump (const std::string&);
 };
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -20,6 +20,7 @@ set (task_SRCS API.cpp API.h
               Filter.cpp Filter.h
               Hooks.cpp Hooks.h
               JSON.cpp JSON.h
+               Lexer.cpp Lexer.h
               Location.cpp Location.h
               Nibbler.cpp Nibbler.h
               Path.cpp Path.h
--- a/src/Expression.cpp
+++ b/src/Expression.cpp
@ -25,15 +25,23 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

+#include <iostream> // TODO Remove.
+#include <sstream>
 #include <Context.h>
+#include <Lexer.h>
 #include <Expression.h>

 extern Context context;

 ////////////////////////////////////////////////////////////////////////////////
+// Perform all the necessary steps prior to an eval call.
 Expression::Expression (Arguments& arguments)
 : _original (arguments)
 {
+  expand_sequence ();
+  to_infix ();
+  expand_expression ();
+  to_postfix ();
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -44,29 +52,328 @@ Expression::~Expression ()
 ////////////////////////////////////////////////////////////////////////////////
 bool Expression::eval (Task& task)
 {
+  // TODO Duplicate the _postfix vector as the operating stack.
+  // TODO ...
+
  return true;
 }

+////////////////////////////////////////////////////////////////////////////////
+// Convert:  1,3-5,00000000-0000-0000-0000-000000000000
+//
+// To:       (id=1 or (id>=3 and id<=5) or
+//            uuid="00000000-0000-0000-0000-000000000000")
+void Expression::expand_sequence ()
+{
+  Arguments temp;
+  _sequenced.clear ();
+
+  // Extract all the components of a sequence.
+  std::vector <int> ids;
+  std::vector <std::string> uuids;
+  std::vector <std::pair <std::string, std::string> >::iterator arg;
+  for (arg = _original.begin (); arg != _original.end (); ++arg)
+  {
+    if (arg->second == "id")
+      Arguments::extract_id (arg->first, ids);
+
+    else if (arg->second == "uuid")
+      Arguments::extract_uuid (arg->first, uuids);
+  }
+
+  // If there is no sequence, we're done.
+  if (ids.size () == 0 && uuids.size () == 0)
+    return;
+
+  // Construct the algebraic form.
+  std::stringstream sequence;
+  sequence << "(";
+  for (unsigned int i = 0; i < ids.size (); ++i)
+  {
+    if (i)
+      sequence << " or ";
+
+    sequence << "id=" << ids[i];
+  }
+
+  if (uuids.size ())
+  {
+    sequence << " or ";
+    for (unsigned int i = 0; i < uuids.size (); ++i)
+    {
+      if (i)
+        sequence << " or ";
+
+      sequence << "uuid=\"" << uuids[i] << "\"";
+    }
+  }
+
+  sequence << ")";
+  std::cout << "# sequence '" << sequence.str () << "'\n";
+
+  // Copy everything up to the first id/uuid.
+  for (arg = _original.begin (); arg != _original.end (); ++arg)
+  {
+    if (arg->second == "id" || arg->second == "uuid")
+      break;
+
+    temp.push_back (*arg);
+  }
+
+  // Now insert the new sequence expression.
+  temp.push_back (std::make_pair (sequence.str (), "exp"));
+
+  // Now copy everything after the last id/uuid.
+  bool found_id = false;
+  for (arg = _original.begin (); arg != _original.end (); ++arg)
+  {
+    if (arg->second == "id" || arg->second == "uuid")
+      found_id = true;
+
+    else if (found_id)
+      temp.push_back (*arg);
+  }
+
+  _sequenced.swap (temp);
+  _sequenced.dump ("Expression::expand_sequence");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert:  +with -without
+//
+// To:       tags ~ with
+//           tags !~ without
+void Expression::expand_tag (const std::string& input)
+{
+   char type;
+   std::string value;
+   Arguments::extract_tag (input, type, value);
+
+   _infix.push_back (std::make_pair ("tags", "dom"));
+   _infix.push_back (std::make_pair (type == '+' ? "~" : "!~", "op"));
+   _infix.push_back (std::make_pair (value, "exp"));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert:  <name>[:=]<value>
+//
+// To:       <name> = lex<value>
+void Expression::expand_attr (const std::string& input)
+{
+  // TODO Should canonicalize 'name'.
+  std::string name;
+  std::string value;
+  Arguments::extract_attr (input, name, value);
+
+  _infix.push_back (std::make_pair (name, "dom"));
+  _infix.push_back (std::make_pair ("=", "op"));
+  _infix.push_back (std::make_pair (value, "exp"));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert:  <name>.<mod>[:=]<value>
+//
+// To:       <name> <op> lex<value>
+void Expression::expand_attmod (const std::string& input)
+{
+  // TODO Should canonicalize 'name'.
+  std::string name;
+  // TODO Should canonicalize 'mod'.
+  std::string mod;
+  std::string value;
+  std::string sense;
+  Arguments::extract_attmod (input, name, mod, value, sense);
+
+  if (mod == "before" || mod == "under" || mod == "below")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("<", "op"));
+    _infix.push_back (std::make_pair (value, "exp"));
+  }
+  else if (mod == "after" || mod == "over" || mod == "above")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair (">", "op"));
+    _infix.push_back (std::make_pair (value, "exp"));
+  }
+  else if (mod == "none")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("==", "op"));
+    _infix.push_back (std::make_pair ("\"\"", "exp"));
+  }
+  else if (mod == "any")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("!=", "op"));
+    _infix.push_back (std::make_pair ("\"\"", "exp"));
+  }
+  else if (mod == "is" || mod == "equals")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("=", "op"));
+    _infix.push_back (std::make_pair (value, "exp"));
+  }
+  else if (mod == "isnt" || mod == "not")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("!=", "op"));
+    _infix.push_back (std::make_pair (value, "exp"));
+  }
+  else if (mod == "has" || mod == "contains")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("~", "op"));
+    _infix.push_back (std::make_pair (value, "exp"));
+  }
+  else if (mod == "hasnt")
+  {
+    _infix.push_back (std::make_pair (name, "dom"));
+    _infix.push_back (std::make_pair ("!~", "op"));
+    _infix.push_back (std::make_pair (value, "exp"));
+  }
+  else if (mod == "startswith" || mod == "left")
+  {
+    // TODO ?
+  }
+  else if (mod == "endswith" || mod == "right")
+  {
+    // TODO ?
+  }
+  else if (mod == "word")
+  {
+    // TODO ?
+  }
+  else if (mod == "noword")
+  {
+    // TODO ?
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert:  <word>
+//
+// To:       description ~ <word>
+void Expression::expand_word (const std::string& input)
+{
+  _infix.push_back (std::make_pair ("description", "dom"));
+  _infix.push_back (std::make_pair ("~", "op"));
+  _infix.push_back (std::make_pair (input, "exp"));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert:  /<pattern>/
+//
+// To:       description ~ <pattern>
+void Expression::expand_pattern (const std::string& input)
+{
+  std::string value;
+  Arguments::extract_pattern (input, value);
+
+  _infix.push_back (std::make_pair ("description", "dom"));
+  _infix.push_back (std::make_pair ("~", "op"));
+  _infix.push_back (std::make_pair (value, "exp"));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Convert:  <exp>
+//
+// To:       lex<exp>
+void Expression::expand_expression ()
+{
+  Arguments temp;
+
+  std::vector <std::pair <std::string, std::string> >::iterator arg;
+  for (arg = _infix.begin (); arg != _infix.end (); ++arg)
+  {
+    if (arg->second == "exp")
+    {
+      Lexer lexer (arg->first);
+      lexer.skipWhitespace (true);
+      lexer.coalesceAlpha (true);
+      lexer.coalesceDigits (true);
+      lexer.coalesceQuoted (true);
+
+      std::vector <std::string> tokens;
+      lexer.tokenize (tokens);
+
+      std::vector <std::string>::iterator token;
+      for (token = tokens.begin (); token != tokens.end (); ++token)
+      {
+        if (_infix.is_operator (*token))
+          temp.push_back (std::make_pair (*token, "op"));
+        else
+          temp.push_back (std::make_pair (*token, "dom"));
+      }
+    }
+    else
+      temp.push_back (*arg);
+  }
+
+  _infix.swap (temp);
+  _infix.dump ("Expression::expand_expression");
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Inserts the 'and' operator by default between terms that are not separated by
 // at least one operator.
 //
-// Converts:  <term1>     <term2> <op> <term3>
-// to:        <term1> and <term2> <op> <term3>
+// Converts:  <term1>     <term2> <op> <exp>
+// to:        <term1> and <term2> <op> <token> <token> <token>
 //
-void Expression::toInfix ()
+//
+//
+// Rules:
+//   1. Two adjacent non-operator arguments have an 'and' inserted between them.
+//   2. Any argument of type "exp" is lexed and replaced by tokens.
+//
+void Expression::to_infix ()
 {
  _infix.clear ();

+  bool new_style = is_new_style ();
+
+  std::string value;
  std::string previous = "op";
  std::vector <std::pair <std::string, std::string> >::iterator arg;
-  for (arg = _original.begin (); arg != _original.end (); ++arg)
+  for (arg = _sequenced.begin (); arg != _sequenced.end (); ++arg)
  {
-    if (previous    != "op" &&
+    // Old-style filters need 'and' conjunctions.
+    if (!new_style          &&
+        previous    != "op" &&
        arg->second != "op")
+    {
      _infix.push_back (std::make_pair ("and", "op"));
+    }

+    // Upgrade all arguments to new-style.
+    // ID & UUID sequence has already been converted.
+    if (arg->second == "id" ||
+        arg->second == "uuid")
+      ; // NOP.
+
+    else if (arg->second == "tag")
+      expand_tag (arg->first);
+
+    else if (arg->second == "pattern")
+      expand_pattern (arg->first);
+
+    else if (arg->second == "attribute")
+      expand_attr (arg->first);
+
+    else if (arg->second == "attmod")
+      expand_attmod (arg->first);
+
+    else if (arg->second == "word")
+      expand_word (arg->first);
+
+    // Expressions will be converted later.
+    else if (arg->second == "exp")
      _infix.push_back (*arg);
+
+    else
+      throw std::string ("Error: unrecognized argument category '") + arg->second + "'";
+
    previous = arg->second;
  }

@ -75,7 +382,41 @@ void Expression::toInfix ()

 ////////////////////////////////////////////////////////////////////////////////
 // Dijkstra Shunting Algorithm.
-void Expression::toPostfix ()
+//
+//   While there are tokens to be read:
+//     Read a token.
+//     If the token is a number, then add it to the output queue.
+//     If the token is a function token, then push it onto the stack.
+//     If the token is a function argument separator (e.g., a comma):
+//       Until the token at the top of the stack is a left parenthesis, pop
+//       operators off the stack onto the output queue. If no left parentheses
+//       are encountered, either the separator was misplaced or parentheses were
+//       mismatched.
+//     If the token is an operator, o1, then:
+//       while there is an operator token, o2, at the top of the stack, and
+//             either o1 is left-associative and its precedence is less than or
+//             equal to that of o2,
+//             or o1 is right-associative and its precedence is less than that
+//             of o2,
+//         pop o2 off the stack, onto the output queue;
+//       push o1 onto the stack.
+//     If the token is a left parenthesis, then push it onto the stack.
+//     If the token is a right parenthesis:
+//       Until the token at the top of the stack is a left parenthesis, pop
+//       operators off the stack onto the output queue.
+//       Pop the left parenthesis from the stack, but not onto the output queue.
+//       If the token at the top of the stack is a function token, pop it onto
+//       the output queue.
+//       If the stack runs out without finding a left parenthesis, then there
+//       are mismatched parentheses.
+//   When there are no more tokens to read:
+//     While there are still operator tokens in the stack:
+//       If the operator token on the top of the stack is a parenthesis, then
+//       there are mismatched parentheses.
+//       Pop the operator onto the output queue.
+//   Exit.
+//
+void Expression::to_postfix ()
 {
  _postfix.clear ();

@ -83,6 +424,25 @@ void Expression::toPostfix ()
 }

 ////////////////////////////////////////////////////////////////////////////////
+// Test whether the _original arguments are old style or new style.
+//
+// Old style:  no single argument corresponds to an operator, ie no 'and', 'or',
+//             etc.
+//
+// New style:  at least one argument that is an operator.
+//
+bool Expression::is_new_style ()
+{
+  std::vector <std::pair <std::string, std::string> >::iterator arg;
+  for (arg = _original.begin (); arg != _original.end (); ++arg)
+    if (Arguments::is_operator (arg->first))
+      return true;
+
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// TODO Remove?
 void Expression::dump (const std::string& label)
 {
 }
--- a/src/Expression.h
+++ b/src/Expression.h
@ -38,13 +38,24 @@ public:
  Expression (Arguments&);
  ~Expression ();
  bool eval (Task&);
-  void toInfix ();
-  void toPostfix ();

+private:
+  void expand_sequence ();
+  void expand_expression ();
+  void expand_tag (const std::string&);
+  void expand_attr (const std::string&);
+  void expand_attmod (const std::string&);
+  void expand_word (const std::string&);
+  void expand_pattern (const std::string&);
+
+  void to_infix ();
+  void to_postfix ();
+  bool is_new_style ();
  void dump (const std::string&);

 private:
  Arguments _original;
+  Arguments _sequenced;
  Arguments _infix;
  Arguments _postfix;
 };
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -0,0 +1,374 @@
+////////////////////////////////////////////////////////////////////////////////
+// taskwarrior - a command line task list manager.
+//
+// Copyright 2011, Paul Beckingham, Federico Hernandez.
+// All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation; either version 2 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the
+//
+//     Free Software Foundation, Inc.,
+//     51 Franklin Street, Fifth Floor,
+//     Boston, MA
+//     02110-1301
+//     USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////
+// This lexer works by breaking the input stream into tokens.  The essence of
+// the algorithm lies in the distinction between adjacent tokens, such that
+// between the two extremes lies a good solution.
+//
+// At one extreme, the entire input is considered one token.  Clearly this is
+// only correct for trivial input.  At the other extreme, every character of the
+// input is a token.  This is also wrong.
+//
+// If the input is as follows:
+//
+//   It is almost 11:00am.
+//
+// The desired tokenization is:
+//
+//   It
+//   <space>
+//   is
+//   <space>
+//   almost
+//   <space>
+//   11
+//   :
+//   00
+//   am
+//   .
+//   \n
+//
+// This can be achieved by allowing transitions to denote token boundaries.
+// Given the following character classes:
+//
+//   letter:     a-z A-Z
+//   digit:      0-9
+//   whitespace: <space> <tab> <newline> <cr> <lf> <vertical-tab>
+//   other:      Everything else
+//
+// Then a token boundary is a transition between:
+//   letter     -> !letter
+//   digit      -> !digit
+//   whitespace -> any
+//   other      -> any
+//
+// This has the effect of allowing groups of consecutive letters to be
+// considered one token, as well as groups of digits.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <iostream>
+#include <util.h>
+#include <Lexer.h>
+
+static const int other = -1;
+static const int alpha = -2;
+static const int digit = -3;
+static const int white = -4;
+static const int quote = -5;
+
+////////////////////////////////////////////////////////////////////////////////
+Lexer::Lexer (const std::string& input)
+: mInput (input)
+
+, mAlpha ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+, mDigit ("0123456789")
+, mQuote ("'\"")
+, mWhite (" \t\n\r\f")
+
+, mAlphaCoalesce (true)
+, mDigitCoalesce (true)
+, mQuotedCoalesce (false)
+, mWhiteCoalesce (false)
+, mSkipWhitespace (false)
+{
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::tokenize (std::vector <std::string>& all)
+{
+  all.clear (); // Prevent repeated accumulation.
+
+  std::string token;
+  bool inQuote = false;
+  char quoteChar = '\0';
+  for (unsigned int i = 0; i < mInput.length (); ++i)
+  {
+    bool specialFound = false;
+    for (unsigned int s = 0; s < mSpecialTokens.size (); ++s)
+    {
+      std::string potential = mInput.substr (
+        i, min (mSpecialTokens[s].length (), mInput.length () - i));
+
+      if (potential == mSpecialTokens[s])
+      {
+        // Capture currently assembled token, the special token, increment over
+        // that token, and skip all remaining code in the loop.
+        if (token.length ())
+        {
+          all.push_back (token);
+          token = "";
+        }
+
+        all.push_back (potential);
+        i += potential.length () - 1;
+        specialFound = true;
+      }
+    }
+
+    if (specialFound)
+      continue;
+
+    char c = mInput[i];
+    char next = '\0';
+    if (i < mInput.length () - 1)
+      next = mInput[i + 1];
+
+    // Classify current and next characters.
+    int thisChar = classify (c);
+    int nextChar = classify (next);
+
+    // Properly set inQuote, quoteChar.
+    if (!inQuote && thisChar == quote)
+    {
+      quoteChar = c;
+      inQuote = true;
+    }
+    else if (inQuote && c == quoteChar)
+    {
+      inQuote = false;
+    }
+
+    // Detect transitions.
+    bool transition = false;
+    if (thisChar != nextChar)
+      transition = true;
+
+    token += c;
+
+    // Transitions mean new token.  All 'other' characters are separate tokens.
+    if (transition || nextChar == other)
+    {
+      if (!inQuote || !mQuotedCoalesce)
+      {
+        if (!mSkipWhitespace || thisChar != white)
+          all.push_back (token);
+        token = "";
+      }
+    }
+
+    // Non-transitions - runs.
+    else
+    {
+      // Runs may be optionally coalesced.
+      if (!(mAlphaCoalesce && nextChar == alpha) &&
+          !(mDigitCoalesce && nextChar == digit) &&
+          !(mWhiteCoalesce && nextChar == white))
+      {
+        if (!inQuote || !mQuotedCoalesce)
+        {
+          if (!mSkipWhitespace || thisChar != white)
+            all.push_back (token);
+          token = "";
+        }
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::categorizeAsAlpha (char value)
+{
+  if (mAlpha.find (value) == std::string::npos)
+    mAlpha += value;
+
+  std::string::size_type pos;
+  if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
+  if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
+  if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::ignoreAsAlpha (char value)
+{
+  std::string::size_type pos;
+  if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::setAlpha (const std::string& value)
+{
+  mAlpha = value;
+
+  std::string::size_type pos;
+  for (unsigned int i = 0; i < mAlpha.length (); ++i)
+  {
+    if ((pos = mDigit.find (mAlpha[i])) != std::string::npos) mDigit.erase (pos, 1);
+    if ((pos = mQuote.find (mAlpha[i])) != std::string::npos) mQuote.erase (pos, 1);
+    if ((pos = mWhite.find (mAlpha[i])) != std::string::npos) mWhite.erase (pos, 1);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::categorizeAsDigit (char value)
+{
+  if (mDigit.find (value) == std::string::npos)
+    mDigit += value;
+
+  std::string::size_type pos;
+  if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
+  if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
+  if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::ignoreAsDigit (char value)
+{
+  std::string::size_type pos;
+  if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::setDigit (const std::string& value)
+{
+  mDigit = value;
+
+  std::string::size_type pos;
+  for (unsigned int i = 0; i < mDigit.length (); ++i)
+  {
+    if ((pos = mAlpha.find (mDigit[i])) != std::string::npos) mAlpha.erase (pos, 1);
+    if ((pos = mQuote.find (mDigit[i])) != std::string::npos) mQuote.erase (pos, 1);
+    if ((pos = mWhite.find (mDigit[i])) != std::string::npos) mWhite.erase (pos, 1);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::categorizeAsQuote (char value)
+{
+  if (mQuote.find (value) == std::string::npos)
+    mQuote += value;
+
+  std::string::size_type pos;
+  if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
+  if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
+  if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::ignoreAsQuote (char value)
+{
+  std::string::size_type pos;
+  if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::setQuote (const std::string& value)
+{
+  mQuote = value;
+
+  std::string::size_type pos;
+  for (unsigned int i = 0; i < mQuote.length (); ++i)
+  {
+    if ((pos = mAlpha.find (mQuote[i])) != std::string::npos) mAlpha.erase (pos, 1);
+    if ((pos = mDigit.find (mQuote[i])) != std::string::npos) mDigit.erase (pos, 1);
+    if ((pos = mWhite.find (mQuote[i])) != std::string::npos) mWhite.erase (pos, 1);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::categorizeAsWhite (char value)
+{
+  if (mWhite.find (value) == std::string::npos)
+    mWhite += value;
+
+  std::string::size_type pos;
+  if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
+  if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
+  if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::ignoreAsWhite (char value)
+{
+  std::string::size_type pos;
+  if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::setWhite (const std::string& value)
+{
+  mWhite = value;
+
+  std::string::size_type pos;
+  for (unsigned int i = 0; i < mWhite.length (); ++i)
+  {
+    if ((pos = mAlpha.find (mWhite[i])) != std::string::npos) mAlpha.erase (pos, 1);
+    if ((pos = mDigit.find (mWhite[i])) != std::string::npos) mDigit.erase (pos, 1);
+    if ((pos = mQuote.find (mWhite[i])) != std::string::npos) mQuote.erase (pos, 1);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::coalesceAlpha (bool value)
+{
+  mAlphaCoalesce = value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::coalesceDigits (bool value)
+{
+  mDigitCoalesce = value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::coalesceQuoted (bool value)
+{
+  mQuotedCoalesce = value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::coalesceWhite (bool value)
+{
+  mWhiteCoalesce = value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::skipWhitespace (bool value)
+{
+  mSkipWhitespace = value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+void Lexer::specialToken (const std::string& special)
+{
+  mSpecialTokens.push_back (special);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+int Lexer::classify (char c)
+{
+  if (mAlpha.find (c) != std::string::npos) return alpha;
+  if (mDigit.find (c) != std::string::npos) return digit;
+  if (mWhite.find (c) != std::string::npos) return white;
+  if (mQuote.find (c) != std::string::npos) return quote;
+
+  return other;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -0,0 +1,84 @@
+////////////////////////////////////////////////////////////////////////////////
+// taskwarrior - a command line task list manager.
+//
+// Copyright 2011, Paul Beckingham, Federico Hernandez.
+// All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation; either version 2 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the
+//
+//     Free Software Foundation, Inc.,
+//     51 Franklin Street, Fifth Floor,
+//     Boston, MA
+//     02110-1301
+//     USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef INCLUDED_LEXER
+#define INCLUDED_LEXER
+
+#include <vector>
+#include <string>
+
+class Lexer
+{
+public:
+  Lexer (const std::string&);
+  void tokenize (std::vector <std::string>&);
+
+  void categorizeAsAlpha (char);
+  void ignoreAsAlpha (char);
+  void setAlpha (const std::string&);
+
+  void categorizeAsDigit (char);
+  void ignoreAsDigit (char);
+  void setDigit (const std::string&);
+
+  void categorizeAsQuote (char);
+  void ignoreAsQuote (char);
+  void setQuote (const std::string&);
+
+  void categorizeAsWhite (char);
+  void ignoreAsWhite (char);
+  void setWhite (const std::string&);
+
+  void coalesceAlpha (bool);
+  void coalesceDigits (bool);
+  void coalesceQuoted (bool);
+  void coalesceWhite (bool);
+  void skipWhitespace (bool);
+  void specialToken (const std::string&);
+
+private:
+  int classify (char);
+
+  std::string mInput;
+
+  std::string mAlpha;
+  std::string mDigit;
+  std::string mQuote;
+  std::string mWhite;
+
+  bool mAlphaCoalesce;
+  bool mDigitCoalesce;
+  bool mQuotedCoalesce;
+  bool mWhiteCoalesce;
+  bool mSkipWhitespace;
+
+  std::vector <std::string> mSpecialTokens;
+};
+
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
--- a/src/commands/CmdCustom.cpp
+++ b/src/commands/CmdCustom.cpp
@ -95,8 +95,6 @@ int CmdCustom::execute (std::string& output)
 ////////////////////////////////////
  Arguments f = context.args.extract_read_only_filter ();
  Expression e (f);
-  e.toInfix ();
-  e.toPostfix ();

 return 0;
  // TODO e.apply (tasks);
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -7,9 +7,9 @@ include_directories (${CMAKE_SOURCE_DIR}
                     ${TASK_INCLUDE_DIRS})

 set (test_SRCS arguments.t att.t autocomplete.t color.t config.t date.t
-               directory.t dom.t duration.t file.t filt.t i18n.t json.t list.t
-               nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t t.t
-               taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t
+               directory.t dom.t duration.t file.t filt.t i18n.t json.t lexer.t
+               list.t nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t
+               t.t taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t
               json_test)

 add_custom_target (test ./run_all DEPENDS ${test_SRCS}
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@ -0,0 +1,331 @@
+////////////////////////////////////////////////////////////////////////////////
+// taskwarrior - a command line task list manager.
+//
+// Copyright 2006 - 2011, Paul Beckingham.
+// All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation; either version 2 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the
+//
+//     Free Software Foundation, Inc.,
+//     51 Franklin Street, Fifth Floor,
+//     Boston, MA
+//     02110-1301
+//     USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <Lexer.h>
+#include <Context.h>
+#include <Test.h>
+
+Context context;
+
+////////////////////////////////////////////////////////////////////////////////
+int main (int argc, char** argv)
+{
+  UnitTest t (80);
+
+  std::string input = "This is a test.";
+  std::vector <std::string> tokens;
+  {
+    Lexer l (input);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 8, "'This is a test.' -> 'This| |is| |a| |test|.'");
+  if (tokens.size () == 8)
+  {
+    t.is (tokens[0], "This", "'This is a test.' [0] -> 'This'");
+    t.is (tokens[1], " ",    "'This is a test.' [1] -> ' '");
+    t.is (tokens[2], "is",   "'This is a test.' [2] -> 'is'");
+    t.is (tokens[3], " ",    "'This is a test.' [3] -> ' '");
+    t.is (tokens[4], "a",    "'This is a test.' [4] -> 'a'");
+    t.is (tokens[5], " ",    "'This is a test.' [5] -> ' '");
+    t.is (tokens[6], "test", "'This is a test.' [6] -> 'test'");
+    t.is (tokens[7], ".",    "'This is a test.' [7] -> '.'");
+  }
+  else
+  {
+    t.skip ("'This is a test.' [0] -> 'This'");
+    t.skip ("'This is a test.' [1] -> ' '");
+    t.skip ("'This is a test.' [2] -> 'is'");
+    t.skip ("'This is a test.' [3] -> ' '");
+    t.skip ("'This is a test.' [4] -> 'a'");
+    t.skip ("'This is a test.' [5] -> ' '");
+    t.skip ("'This is a test.' [6] -> 'test'");
+    t.skip ("'This is a test.' [7] -> '.'");
+  }
+
+  input = "a12bcd345efgh6789";
+  {
+    Lexer l (input);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 6, "'a12bcd345efgh6789' -> 'a|12|bcd|345|efgh|6789'");
+  if (tokens.size () == 6)
+  {
+    t.is (tokens[0], "a",    "'a12bcd345efgh6789' [0] -> 'a'");
+    t.is (tokens[1], "12",   "'a12bcd345efgh6789' [1] -> '12'");
+    t.is (tokens[2], "bcd",  "'a12bcd345efgh6789' [2] -> 'bcd'");
+    t.is (tokens[3], "345",  "'a12bcd345efgh6789' [3] -> '345'");
+    t.is (tokens[4], "efgh", "'a12bcd345efgh6789' [4] -> 'efgh'");
+    t.is (tokens[5], "6789", "'a12bcd345efgh6789' [5] -> '6789'");
+  }
+  else
+  {
+    t.skip ("'a12bcd345efgh6789' [0] -> 'a'");
+    t.skip ("'a12bcd345efgh6789' [1] -> '12'");
+    t.skip ("'a12bcd345efgh6789' [2] -> 'bcd'");
+    t.skip ("'a12bcd345efgh6789' [3] -> '345'");
+    t.skip ("'a12bcd345efgh6789' [4] -> 'efgh'");
+    t.skip ("'a12bcd345efgh6789' [5] -> '6789'");
+  }
+
+  // Let's throw some ugly Perl at it.
+  input = "my $variable_name = 'single string';";
+  {
+    Lexer l (input);
+    l.categorizeAsAlpha ('_');
+    l.coalesceQuoted (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 9, "'my $variable_name = 'single string';' -> 'my| |$|variable_name| |=| |'|single string|'|;'");
+  if (tokens.size () == 9)
+  {
+    t.is (tokens[0], "my",              "'my $variable_name = 'single string';' [0] -> 'my'");
+    t.is (tokens[1], " ",               "'my $variable_name = 'single string';' [1] -> ' '");
+    t.is (tokens[2], "$",               "'my $variable_name = 'single string';' [2] -> '$'");
+    t.is (tokens[3], "variable_name",   "'my $variable_name = 'single string';' [3] -> 'variable_name'");
+    t.is (tokens[4], " ",               "'my $variable_name = 'single string';' [4] -> ' '");
+    t.is (tokens[5], "=",               "'my $variable_name = 'single string';' [5] -> '='");
+    t.is (tokens[6], " ",               "'my $variable_name = 'single string';' [6] -> ' '");
+    t.is (tokens[7], "'single string'", "'my $variable_name = 'single string';' [8] -> ''single string''");
+    t.is (tokens[8], ";",               "'my $variable_name = 'single string';' [10] -> ';'");
+  }
+  else
+  {
+    t.skip ("'my $variable_name = 'single string';' [0] -> 'my'");
+    t.skip ("'my $variable_name = 'single string';' [1] -> ' '");
+    t.skip ("'my $variable_name = 'single string';' [2] -> '$'");
+    t.skip ("'my $variable_name = 'single string';' [3] -> 'variable_name'");
+    t.skip ("'my $variable_name = 'single string';' [4] -> ' '");
+    t.skip ("'my $variable_name = 'single string';' [5] -> '='");
+    t.skip ("'my $variable_name = 'single string';' [6] -> ' '");
+    t.skip ("'my $variable_name = 'single string';' [8] -> ''single string''");
+    t.skip ("'my $variable_name = 'single string';' [10] -> ';'");
+  }
+
+  // Now exercise all the configurable coalescence.
+  input = "ab  12 'a'";
+  {
+    Lexer l (input);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 8, "'ab  12 'a'' -> 'ab| | |12| |'|a|''");
+  if (tokens.size () == 8)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], " ",  "'ab  12 'a'' [1] -> ' '");
+    t.is (tokens[2], " ",  "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], "12", "'ab  12 'a'' [3] -> '12'");
+    t.is (tokens[4], " ",  "'ab  12 'a'' [4] -> ' '");
+    t.is (tokens[5], "'",  "'ab  12 'a'' [5] -> '''");
+    t.is (tokens[6], "a",  "'ab  12 'a'' [6] -> 'a'");
+    t.is (tokens[7], "'",  "'ab  12 'a'' [7] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> ' '");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> '12'");
+    t.skip ("'ab  12 'a'' [4] -> ' '");
+    t.skip ("'ab  12 'a'' [5] -> '''");
+    t.skip ("'ab  12 'a'' [6] -> 'a'");
+    t.skip ("'ab  12 'a'' [7] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceAlpha (false);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 9, "'ab  12 'a'' -> 'a|b| | |12| |'|a|''");
+  if (tokens.size () == 9)
+  {
+    t.is (tokens[0], "a",  "'ab  12 'a'' [0] -> 'a'");
+    t.is (tokens[1], "b",  "'ab  12 'a'' [1] -> 'b'");
+    t.is (tokens[2], " ",  "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], " ",  "'ab  12 'a'' [3] -> ' '");
+    t.is (tokens[4], "12", "'ab  12 'a'' [4] -> '12'");
+    t.is (tokens[5], " ",  "'ab  12 'a'' [5] -> ' '");
+    t.is (tokens[6], "'",  "'ab  12 'a'' [6] -> '''");
+    t.is (tokens[7], "a",  "'ab  12 'a'' [7] -> 'a'");
+    t.is (tokens[8], "'",  "'ab  12 'a'' [8] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'a'");
+    t.skip ("'ab  12 'a'' [1] -> 'b'");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> ' '");
+    t.skip ("'ab  12 'a'' [4] -> '12'");
+    t.skip ("'ab  12 'a'' [5] -> ' '");
+    t.skip ("'ab  12 'a'' [6] -> '''");
+    t.skip ("'ab  12 'a'' [7] -> 'a'");
+    t.skip ("'ab  12 'a'' [8] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceDigits (false);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 9, "'ab  12 'a'' -> 'ab| | |1|2| |'|a|''");
+  if (tokens.size () == 9)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], " ",  "'ab  12 'a'' [1] -> ' '");
+    t.is (tokens[2], " ",  "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], "1",  "'ab  12 'a'' [3] -> '1'");
+    t.is (tokens[4], "2",  "'ab  12 'a'' [4] -> '2'");
+    t.is (tokens[5], " ",  "'ab  12 'a'' [5] -> ' '");
+    t.is (tokens[6], "'",  "'ab  12 'a'' [6] -> '''");
+    t.is (tokens[7], "a",  "'ab  12 'a'' [7] -> 'a'");
+    t.is (tokens[8], "'",  "'ab  12 'a'' [8] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> ' '");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> '1'");
+    t.skip ("'ab  12 'a'' [4] -> '2'");
+    t.skip ("'ab  12 'a'' [5] -> ' '");
+    t.skip ("'ab  12 'a'' [6] -> '''");
+    t.skip ("'ab  12 'a'' [7] -> 'a'");
+    t.skip ("'ab  12 'a'' [8] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceQuoted (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 6, "'ab  12 'a'' -> 'ab| | |12| |'a''");
+  if (tokens.size () == 6)
+  {
+    t.is (tokens[0], "ab",  "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], " ",   "'ab  12 'a'' [1] -> ' '");
+    t.is (tokens[2], " ",   "'ab  12 'a'' [2] -> ' '");
+    t.is (tokens[3], "12",  "'ab  12 'a'' [3] -> '12'");
+    t.is (tokens[4], " ",   "'ab  12 'a'' [4] -> ' '");
+    t.is (tokens[5], "'a'", "'ab  12 'a'' [5] -> ''a''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> ' '");
+    t.skip ("'ab  12 'a'' [2] -> ' '");
+    t.skip ("'ab  12 'a'' [3] -> '12'");
+    t.skip ("'ab  12 'a'' [4] -> ' '");
+    t.skip ("'ab  12 'a'' [5] -> ''a''");
+  }
+
+  {
+    Lexer l (input);
+    l.coalesceWhite (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 7, "'ab  12 'a'' -> 'ab|  |12| |'|a|''");
+  if (tokens.size () == 7)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], "  ", "'ab  12 'a'' [1] -> '  '");
+    t.is (tokens[2], "12", "'ab  12 'a'' [2] -> '12'");
+    t.is (tokens[3], " ",  "'ab  12 'a'' [3] -> ' '");
+    t.is (tokens[4], "'",  "'ab  12 'a'' [4] -> '''");
+    t.is (tokens[5], "a",  "'ab  12 'a'' [5] -> 'a'");
+    t.is (tokens[6], "'",  "'ab  12 'a'' [6] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> '  '");
+    t.skip ("'ab  12 'a'' [2] -> '12'");
+    t.skip ("'ab  12 'a'' [3] -> ' '");
+    t.skip ("'ab  12 'a'' [4] -> '''");
+    t.skip ("'ab  12 'a'' [5] -> 'a'");
+    t.skip ("'ab  12 'a'' [6] -> '''");
+  }
+
+  {
+    Lexer l (input);
+    l.skipWhitespace (true);
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 5, "'ab  12 'a'' -> 'ab|12|'|a|''");
+  if (tokens.size () == 5)
+  {
+    t.is (tokens[0], "ab", "'ab  12 'a'' [0] -> 'ab'");
+    t.is (tokens[1], "12", "'ab  12 'a'' [1] -> '12'");
+    t.is (tokens[2], "'",  "'ab  12 'a'' [2] -> '''");
+    t.is (tokens[3], "a",  "'ab  12 'a'' [3] -> 'a'");
+    t.is (tokens[4], "'",  "'ab  12 'a'' [4] -> '''");
+  }
+  else
+  {
+    t.skip ("'ab  12 'a'' [0] -> 'ab'");
+    t.skip ("'ab  12 'a'' [1] -> '12'");
+    t.skip ("'ab  12 'a'' [2] -> '''");
+    t.skip ("'ab  12 'a'' [3] -> 'a'");
+    t.skip ("'ab  12 'a'' [4] -> '''");
+  }
+
+  // Special tokens
+  input = "a := 1";
+  {
+    Lexer l (input);
+    l.skipWhitespace (true);
+    l.specialToken (":=");
+    l.tokenize (tokens);
+  }
+
+  t.is (tokens.size (), (size_t) 3, "'a := 1' -> 'a|:=|1'");
+  if (tokens.size () == 3)
+  {
+    t.is (tokens[0], "a",  "'a := 1' [0] -> 'a'");
+    t.is (tokens[1], ":=", "'a := 1' [1] -> ':='");
+    t.is (tokens[2], "1",  "'a := 1' [2] -> '1'");
+  }
+  else
+  {
+    t.skip ("'a := 1' [0] -> 'a'");
+    t.skip ("'a := 1' [1] -> ':='");
+    t.skip ("'a := 1' [2] -> '1'");
+  }
+
+  return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+