Build: Migrated Lexer to libshared

2025-06-26 10:54:28 +02:00 · 2016-06-25 10:11:08 -04:00 · 2016-06-25 10:11:08 -04:00 · ca57bf91e3
commit ca57bf91e3
parent fdca94085e
6 changed files with 2 additions and 1341 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -11,7 +11,6 @@ set (timew_SRCS CLI.cpp        CLI.h
                Exclusion.cpp  Exclusion.h
                Extensions.cpp Extensions.h
                Interval.cpp   Interval.h
                Lexer.cpp      Lexer.h
                Range.cpp      Range.h
                Rules.cpp      Rules.h
                data.cpp
@ -29,6 +28,7 @@ set (libshared_SRCS libshared/src/Args.cpp          libshared/src/Args.h
                    libshared/src/FS.cpp            libshared/src/FS.h
                    libshared/src/JSON.cpp          libshared/src/JSON.h
                    libshared/src/JSON2.cpp         libshared/src/JSON2.h
                    libshared/src/Lexer.cpp         libshared/src/Lexer.h
                    libshared/src/Msg.cpp           libshared/src/Msg.h
                    libshared/src/Palette.cpp       libshared/src/Palette.h
                    libshared/src/Pig.cpp           libshared/src/Pig.h
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -1,912 +0,0 @@
 ////////////////////////////////////////////////////////////////////////////////
 //
 // Copyright 2013 - 2016, Paul Beckingham, Federico Hernandez.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included
 // in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.
 //
 // http://www.opensource.org/licenses/mit-license.php
 //
 ////////////////////////////////////////////////////////////////////////////////
 #include <cmake.h>
 #include <Lexer.h>
 #include <Datetime.h>
 #include <Duration.h>
 #include <algorithm>
 #include <tuple>
 #include <ctype.h>
 #include <unicode.h>
 #include <utf8.h>
 static const std::string uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
 static const unsigned int uuid_min_length = 8;
 std::string Lexer::dateFormat = "";
 ////////////////////////////////////////////////////////////////////////////////
 Lexer::Lexer (const std::string& text)
 : _text (text)
 , _eos (text.size ())
 {
 }
 ////////////////////////////////////////////////////////////////////////////////
 // When a Lexer object is constructed with a string, this method walks through
 // the stream of low-level tokens.
 bool Lexer::token (std::string& token, Lexer::Type& type)
 {
  // Eat white space.
  while (unicodeWhitespace (_text[_cursor]))
    utf8_next_char (_text, _cursor);
  // Terminate at EOS.
  if (isEOS ())
    return false;
  if (isString    (token, type, "'\"") ||
      isUUID      (token, type, true)  ||
      isDate      (token, type)        ||
      isDuration  (token, type)        ||
      isURL       (token, type)        ||
      isHexNumber (token, type)        ||
      isNumber    (token, type)        ||
      isPath      (token, type)        ||
      isPattern   (token, type)        ||
      isOperator  (token, type)        ||
      isWord      (token, type))
    return true;
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 std::vector <std::tuple <std::string, Lexer::Type>> Lexer::tokenize (const std::string& input)
 {
  std::vector <std::tuple <std::string, Lexer::Type>> tokens;
  std::string token;
  Lexer::Type type;
  Lexer lexer (input);
  while (lexer.token (token, type))
    tokens.push_back (std::make_tuple (token, type));
  return tokens;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // No L10N - these are for internal purposes.
 const std::string Lexer::typeName (const Lexer::Type& type)
 {
  switch (type)
  {
  case Lexer::Type::uuid:         return "uuid";
  case Lexer::Type::number:       return "number";
  case Lexer::Type::hex:          return "hex";
  case Lexer::Type::string:       return "string";
  case Lexer::Type::url:          return "url";
  case Lexer::Type::path:         return "path";
  case Lexer::Type::pattern:      return "pattern";
  case Lexer::Type::op:           return "op";
  case Lexer::Type::word:         return "word";
  case Lexer::Type::date:         return "date";
  case Lexer::Type::duration:     return "duration";
  }
  return "unknown";
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::number
 //   \d+
 //   [ . \d+ ]
 //   [ e|E [ +|- ] \d+ [ . \d+ ] ]
 //   not followed by non-operator.
 bool Lexer::isNumber (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  if (unicodeLatinDigit (_text[marker]))
  {
    ++marker;
    while (unicodeLatinDigit (_text[marker]))
      utf8_next_char (_text, marker);
    if (_text[marker] == '.')
    {
      ++marker;
      if (unicodeLatinDigit (_text[marker]))
      {
        ++marker;
        while (unicodeLatinDigit (_text[marker]))
          utf8_next_char (_text, marker);
      }
    }
    if (_text[marker] == 'e' ||
        _text[marker] == 'E')
    {
      ++marker;
      if (_text[marker] == '+' ||
          _text[marker] == '-')
        ++marker;
      if (unicodeLatinDigit (_text[marker]))
      {
        ++marker;
        while (unicodeLatinDigit (_text[marker]))
          utf8_next_char (_text, marker);
        if (_text[marker] == '.')
        {
          ++marker;
          if (unicodeLatinDigit (_text[marker]))
          {
            ++marker;
            while (unicodeLatinDigit (_text[marker]))
              utf8_next_char (_text, marker);
          }
        }
      }
    }
    // Lookahread: !<unicodeWhitespace> | !<isSingleCharOperator>
    // If there is an immediately consecutive character, that is not an operator, fail.
    if (_eos > marker &&
        ! unicodeWhitespace (_text[marker]) &&
        ! isSingleCharOperator (_text[marker]))
      return false;
    token = _text.substr (_cursor, marker - _cursor);
    type = Lexer::Type::number;
    _cursor = marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::number
 //   \d+
 bool Lexer::isInteger (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  if (unicodeLatinDigit (_text[marker]))
  {
    ++marker;
    while (unicodeLatinDigit (_text[marker]))
      utf8_next_char (_text, marker);
    token = _text.substr (_cursor, marker - _cursor);
    type = Lexer::Type::number;
    _cursor = marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isSingleCharOperator (int c)
 {
  return c == '+' ||  // Addition
         c == '-' ||  // Subtraction or unary minus = ambiguous
         c == '*' ||  // Multiplication
         c == '/' ||  // Diviѕion
         c == '(' ||  // Precedence open parenthesis
         c == ')' ||  // Precedence close parenthesis
         c == '<' ||  // Less than
         c == '>' ||  // Greater than
         c == '^' ||  // Exponent
         c == '!' ||  // Unary not
         c == '%' ||  // Modulus
         c == '=' ||  // Partial match
         c == '~';    // Pattern match
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isDoubleCharOperator (int c0, int c1, int c2)
 {
  return (c0 == '=' && c1 == '=')                        ||
         (c0 == '!' && c1 == '=')                        ||
         (c0 == '<' && c1 == '=')                        ||
         (c0 == '>' && c1 == '=')                        ||
         (c0 == 'o' && c1 == 'r' && isBoundary (c1, c2)) ||
         (c0 == '|' && c1 == '|')                        ||
         (c0 == '&' && c1 == '&')                        ||
         (c0 == '!' && c1 == '~');
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isTripleCharOperator (int c0, int c1, int c2, int c3)
 {
  return (c0 == 'a' && c1 == 'n' && c2 == 'd' && isBoundary (c2, c3)) ||
         (c0 == 'x' && c1 == 'o' && c2 == 'r' && isBoundary (c2, c3)) ||
         (c0 == '!' && c1 == '=' && c2 == '=');
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isBoundary (int left, int right)
 {
  // EOS
  if (right == '\0')                                         return true;
  // XOR
  if (unicodeLatinAlpha (left) != unicodeLatinAlpha (right)) return true;
  if (unicodeLatinDigit (left) != unicodeLatinDigit (right)) return true;
  if (unicodeWhitespace (left) != unicodeWhitespace (right)) return true;
  // OR
  if (isPunctuation (left) || isPunctuation (right))         return true;
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isHardBoundary (int left, int right)
 {
  // EOS
  if (right == '\0')
    return true;
  // FILTER operators that don't need to be surrounded by whitespace.
  if (left == '(' ||
      left == ')' ||
      right == '(' ||
      right == ')')
    return true;
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isPunctuation (int c)
 {
  return isprint (c)   &&
         c != ' '      &&
         c != '@'      &&
         c != '#'      &&
         c != '$'      &&
         c != '_'      &&
         ! unicodeLatinDigit (c) &&
         ! unicodeLatinAlpha (c);
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Assumes that quotes is a string containing a non-trivial set of quote
 // characters.
 std::string Lexer::dequote (const std::string& input, const std::string& quotes)
 {
  if (input.length ())
  {
    int quote = input[0];
    if (quotes.find (quote) != std::string::npos)
    {
      size_t len = input.length ();
      if (quote == input[len - 1])
        return input.substr (1, len - 2);
    }
  }
  return input;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Detects characters in an input string that indicate quotes were required, or
 // escapes, to get them past the shell.
 bool Lexer::wasQuoted (const std::string& input)
 {
  if (input.find_first_of (" \t()<>&~") != std::string::npos)
    return true;
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isEOS () const
 {
  return _cursor >= _eos;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Converts '0'     -> 0
 //          '9'     -> 9
 //          'a'/'A' -> 10
 //          'f'/'F' -> 15
 int Lexer::hexToInt (int c)
 {
       if (c >= '0' && c <= '9') return (c - '0');
  else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
  else                           return (c - 'A' + 10);
 }
 ////////////////////////////////////////////////////////////////////////////////
 int Lexer::hexToInt (int c0, int c1)
 {
  return (hexToInt (c0) << 4) + hexToInt (c1);
 }
 ////////////////////////////////////////////////////////////////////////////////
 int Lexer::hexToInt (int c0, int c1, int c2, int c3)
 {
  return (hexToInt (c0) << 12) +
         (hexToInt (c1) << 8)  +
         (hexToInt (c2) << 4)  +
          hexToInt (c3);
 }
 ////////////////////////////////////////////////////////////////////////////////
 std::string Lexer::trimLeft (const std::string& in, const std::string& t /*= " "*/)
 {
  std::string::size_type ws = in.find_first_not_of (t);
  if (ws > 0)
  {
    std::string out {in};
    return out.erase (0, ws);
  }
  return in;
 }
 ////////////////////////////////////////////////////////////////////////////////
 std::string Lexer::trimRight (const std::string& in, const std::string& t /*= " "*/)
 {
  std::string out {in};
  return out.erase (in.find_last_not_of (t) + 1);
 }
 ////////////////////////////////////////////////////////////////////////////////
 std::string Lexer::trim (const std::string& in, const std::string& t /*= " "*/)
 {
  return trimLeft (trimRight (in, t), t);
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::string
 //   '|"
 //   [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ]
 //   '|"
 bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string& quotes)
 {
  std::size_t marker = _cursor;
  if (readWord (_text, quotes, marker, token))
  {
    type = Lexer::Type::string;
    _cursor = marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::date
 //   <Datetime> (followed by eos, WS, operator)
 bool Lexer::isDate (std::string& token, Lexer::Type& type)
 {
  // Try an ISO date parse.
  std::size_t i = _cursor;
  Datetime d;
  if (d.parse (_text, i, Lexer::dateFormat) &&
      (i >= _eos ||
       unicodeWhitespace (_text[i]) ||
       isSingleCharOperator (_text[i])))
  {
    type = Lexer::Type::date;
    token = _text.substr (_cursor, i - _cursor);
    _cursor = i;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::duration
 //   <Duration> (followed by eos, WS, operator)
 bool Lexer::isDuration (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  std::string extractedToken;
  Lexer::Type extractedType;
  if (isOperator(extractedToken, extractedType))
  {
    _cursor = marker;
    return false;
  }
  marker = _cursor;
  Duration dur;
  if (dur.parse (_text, marker) &&
      (marker >= _eos ||
       unicodeWhitespace (_text[marker]) ||
       isSingleCharOperator (_text[marker])))
  {
    type = Lexer::Type::duration;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::uuid
 //   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
 //   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX
 //   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX
 //   XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX
 //   ...
 //   XXXXXXXX-XX
 //   XXXXXXXX-X
 //   XXXXXXXX-
 //   XXXXXXXX
 //   Followed only by EOS, whitespace, or single character operator.
 bool Lexer::isUUID (std::string& token, Lexer::Type& type, bool endBoundary)
 {
  std::size_t marker = _cursor;
  // Greedy.
  std::size_t i = 0;
  for (; i < 36 && marker + i < _eos; i++)
  {
    if (uuid_pattern[i] == 'x')
    {
      if (! unicodeHexDigit (_text[marker + i]))
        break;
    }
    else if (uuid_pattern[i] != _text[marker + i])
      break;
  }
  if (i >= uuid_min_length              &&
      (! endBoundary                    ||
       ! _text[marker + i]              ||
       unicodeWhitespace (_text[marker + i]) ||
       isSingleCharOperator (_text[marker + i])))
  {
    token = _text.substr (_cursor, i);
    type = Lexer::Type::uuid;
    _cursor += i;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::hex
 //   0xX+
 bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  if (_eos - marker >= 3 &&
      _text[marker + 0] == '0' &&
      _text[marker + 1] == 'x')
  {
    marker += 2;
    while (unicodeHexDigit (_text[marker]))
      ++marker;
    if (marker - _cursor > 2)
    {
      token = _text.substr (_cursor, marker - _cursor);
      type = Lexer::Type::hex;
      _cursor = marker;
      return true;
    }
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::word
 //   [^\s]+
 bool Lexer::isWord (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  while (_text[marker]                  &&
         ! unicodeWhitespace (_text[marker]) &&
         ! isSingleCharOperator (_text[marker]))
    utf8_next_char (_text, marker);
  if (marker > _cursor)
  {
    token = _text.substr (_cursor, marker - _cursor);
    type = Lexer::Type::word;
    _cursor = marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::url
 //   http [s] :// ...
 bool Lexer::isURL (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  if (_eos - _cursor > 9 &&    // length 'https://*'
      (_text[marker + 0] == 'h' || _text[marker + 0] == 'H') &&
      (_text[marker + 1] == 't' || _text[marker + 1] == 'T') &&
      (_text[marker + 2] == 't' || _text[marker + 2] == 'T') &&
      (_text[marker + 3] == 'p' || _text[marker + 3] == 'P'))
  {
    marker += 4;
    if (_text[marker + 0] == 's' || _text[marker + 0] == 'S')
      ++marker;
    if (_text[marker + 0] == ':' &&
        _text[marker + 1] == '/' &&
        _text[marker + 2] == '/')
    {
      marker += 3;
      while (marker < _eos &&
             ! unicodeWhitespace (_text[marker]))
        utf8_next_char (_text, marker);
      token = _text.substr (_cursor, marker - _cursor);
      type = Lexer::Type::url;
      _cursor = marker;
      return true;
    }
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::path
 //   ( / <non-slash, non-whitespace> )+
 bool Lexer::isPath (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  int slashCount = 0;
  while (1)
  {
    if (_text[marker] == '/')
    {
      ++marker;
      ++slashCount;
    }
    else
      break;
    if (_text[marker] &&
        ! unicodeWhitespace (_text[marker]) &&
        _text[marker] != '/')
    {
      utf8_next_char (_text, marker);
      while (_text[marker] &&
             ! unicodeWhitespace (_text[marker]) &&
             _text[marker] != '/')
        utf8_next_char (_text, marker);
    }
    else
      break;
  }
  if (marker > _cursor &&
      slashCount > 3)
  {
    type = Lexer::Type::path;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::pattern
 //   / <unquoted-string> /  <EOS> | <unicodeWhitespace>
 bool Lexer::isPattern (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  std::string word;
  if (readWord (_text, "/", _cursor, word) &&
      (isEOS () ||
       unicodeWhitespace (_text[_cursor])))
  {
    token = _text.substr (marker, _cursor - marker);
    type = Lexer::Type::pattern;
    return true;
  }
  _cursor = marker;
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::op
 //   _hastag_ | _notag | _neg_ | _pos_ |
 //   <isTripleCharOperator> |
 //   <isDoubleCharOperator> |
 //   <isSingleCharOperator> |
 bool Lexer::isOperator (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
  if (_eos - marker >= 8 && _text.substr (marker, 8) == "_hastag_")
  {
    marker += 8;
    type = Lexer::Type::op;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  else if (_eos - marker >= 7 && _text.substr (marker, 7) == "_notag_")
  {
    marker += 7;
    type = Lexer::Type::op;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_neg_")
  {
    marker += 5;
    type = Lexer::Type::op;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_pos_")
  {
    marker += 5;
    type = Lexer::Type::op;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  else if (_eos - marker >= 3 &&
      isTripleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2], _text[marker + 3]))
  {
    marker += 3;
    type = Lexer::Type::op;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  else if (_eos - marker >= 2 &&
      isDoubleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2]))
  {
    marker += 2;
    type = Lexer::Type::op;
    token = _text.substr (_cursor, marker - _cursor);
    _cursor = marker;
    return true;
  }
  else if (isSingleCharOperator (_text[marker]))
  {
    token = _text[marker];
    type = Lexer::Type::op;
    _cursor = ++marker;
    return true;
  }
  return false;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Static
 std::string Lexer::typeToString (Lexer::Type type)
 {
       if (type == Lexer::Type::string)       return std::string ("\033[38;5;7m\033[48;5;3m")    + "string"       + "\033[0m";
  else if (type == Lexer::Type::uuid)         return std::string ("\033[38;5;7m\033[48;5;10m")   + "uuid"         + "\033[0m";
  else if (type == Lexer::Type::hex)          return std::string ("\033[38;5;7m\033[48;5;14m")   + "hex"          + "\033[0m";
  else if (type == Lexer::Type::number)       return std::string ("\033[38;5;7m\033[48;5;6m")    + "number"       + "\033[0m";
  else if (type == Lexer::Type::url)          return std::string ("\033[38;5;7m\033[48;5;4m")    + "url"          + "\033[0m";
  else if (type == Lexer::Type::path)         return std::string ("\033[37;102m")                + "path"         + "\033[0m";
  else if (type == Lexer::Type::pattern)      return std::string ("\033[37;42m")                 + "pattern"      + "\033[0m";
  else if (type == Lexer::Type::op)           return std::string ("\033[38;5;7m\033[48;5;203m")  + "op"           + "\033[0m";
  else if (type == Lexer::Type::word)         return std::string ("\033[38;5;15m\033[48;5;236m") + "word"         + "\033[0m";
  else if (type == Lexer::Type::date)         return std::string ("\033[38;5;15m\033[48;5;34m")  + "date"         + "\033[0m";
  else if (type == Lexer::Type::duration)     return std::string ("\033[38;5;15m\033[48;5;34m")  + "duration"     + "\033[0m";
  else                                        return std::string ("\033[37;41m")                 + "unknown"      + "\033[0m";
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Full implementation of a quoted word.  Includes:
 //   '\''
 //   '"'
 //   "'"
 //   "\""
 //   'one two'
 // Result includes the quotes.
 bool Lexer::readWord (
  const std::string& text,
  const std::string& quotes,
  std::string::size_type& cursor,
  std::string& word)
 {
  if (quotes.find (text[cursor]) == std::string::npos)
    return false;
  std::string::size_type eos = text.length ();
  int quote = text[cursor++];
  word = quote;
  int c;
  while ((c = text[cursor]))
  {
    // Quoted word ends on a quote.
    if (quote && quote == c)
    {
      word += utf8_character (utf8_next_char (text, cursor));
      break;
    }
    // Unicode U+XXXX or \uXXXX codepoint.
    else if (eos - cursor >= 6 &&
             ((text[cursor + 0] == 'U'  && text[cursor + 1] == '+') ||
              (text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
             unicodeHexDigit (text[cursor + 2]) &&
             unicodeHexDigit (text[cursor + 3]) &&
             unicodeHexDigit (text[cursor + 4]) &&
             unicodeHexDigit (text[cursor + 5]))
    {
      word += utf8_character (
                hexToInt (
                  text[cursor + 2],
                  text[cursor + 3],
                  text[cursor + 4],
                  text[cursor + 5]));
      cursor += 6;
    }
    // An escaped thing.
    else if (c == '\\')
    {
      c = text[++cursor];
      switch (c)
      {
      case '"':  word += (char) 0x22; ++cursor; break;
      case '\'': word += (char) 0x27; ++cursor; break;
      case '\\': word += (char) 0x5C; ++cursor; break;
      case 'b':  word += (char) 0x08; ++cursor; break;
      case 'f':  word += (char) 0x0C; ++cursor; break;
      case 'n':  word += (char) 0x0A; ++cursor; break;
      case 'r':  word += (char) 0x0D; ++cursor; break;
      case 't':  word += (char) 0x09; ++cursor; break;
      case 'v':  word += (char) 0x0B; ++cursor; break;
      // This pass-through default case means that anything can be escaped
      // harmlessly. In particular 'quote' is included, if it not one of the
      // above characters.
      default:   word += (char) c;    ++cursor; break;
      }
    }
    // Ordinary character.
    else
      word += utf8_character (utf8_next_char (text, cursor));
  }
  // Verify termination.
  return word[0]                  == quote &&
         word[word.length () - 1] == quote &&
         word.length () >= 2;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Full implementation of an unquoted word.  Includes:
 //   one\ two
 //   abcU+0020def
 //   abc\u0020def
 //   a\tb
 //
 // Ends at:
 //   Lexer::isEOS
 //   unicodeWhitespace
 //   Lexer::isHardBoundary
 bool Lexer::readWord (
  const std::string& text,
  std::string::size_type& cursor,
  std::string& word)
 {
  std::string::size_type eos = text.length ();
  word = "";
  int c;
  int prev = 0;
  while ((c = text[cursor]))  // Handles EOS.
  {
    // Unquoted word ends on white space.
    if (unicodeWhitespace (c))
      break;
    // Parentheses mostly.
    if (prev && Lexer::isHardBoundary (prev, c))
      break;
    // Unicode U+XXXX or \uXXXX codepoint.
    else if (eos - cursor >= 6 &&
             ((text[cursor + 0] == 'U'  && text[cursor + 1] == '+') ||
              (text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
             unicodeHexDigit (text[cursor + 2]) &&
             unicodeHexDigit (text[cursor + 3]) &&
             unicodeHexDigit (text[cursor + 4]) &&
             unicodeHexDigit (text[cursor + 5]))
    {
      word += utf8_character (
                hexToInt (
                  text[cursor + 2],
                  text[cursor + 3],
                  text[cursor + 4],
                  text[cursor + 5]));
      cursor += 6;
    }
    // An escaped thing.
    else if (c == '\\')
    {
      c = text[++cursor];
      switch (c)
      {
      case '"':  word += (char) 0x22; ++cursor; break;
      case '\'': word += (char) 0x27; ++cursor; break;
      case '\\': word += (char) 0x5C; ++cursor; break;
      case 'b':  word += (char) 0x08; ++cursor; break;
      case 'f':  word += (char) 0x0C; ++cursor; break;
      case 'n':  word += (char) 0x0A; ++cursor; break;
      case 'r':  word += (char) 0x0D; ++cursor; break;
      case 't':  word += (char) 0x09; ++cursor; break;
      case 'v':  word += (char) 0x0B; ++cursor; break;
      // This pass-through default case means that anything can be escaped
      // harmlessly. In particular 'quote' is included, if it not one of the
      // above characters.
      default:   word += (char) c;    ++cursor; break;
      }
    }
    // Ordinary character.
    else
      word += utf8_character (utf8_next_char (text, cursor));
    prev = c;
  }
  return word.length () > 0 ? true : false;
 }
 ////////////////////////////////////////////////////////////////////////////////
--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -1,96 +0,0 @@
 ////////////////////////////////////////////////////////////////////////////////
 //
 // Copyright 2013 - 2016, Paul Beckingham, Federico Hernandez.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included
 // in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.
 //
 // http://www.opensource.org/licenses/mit-license.php
 //
 ////////////////////////////////////////////////////////////////////////////////
 #ifndef INCLUDED_LEXER
 #define INCLUDED_LEXER
 #include <string>
 #include <map>
 #include <vector>
 #include <tuple>
 #include <cstddef>
 class Lexer
 {
 public:
  // These are overridable.
  static std::string dateFormat;
  enum class Type { uuid, number, hex,
                    string,
                    url,
                    path,
                    pattern,
                    op,
                    word,
                    date, duration };
  explicit Lexer (const std::string&);
  bool token (std::string&, Lexer::Type&);
  static std::string typeToString (Lexer::Type);
  // Static helpers.
  static std::vector <std::tuple <std::string, Lexer::Type>> tokenize (const std::string&);
  static const std::string typeName          (const Lexer::Type&);
  static bool isSingleCharOperator           (int);
  static bool isDoubleCharOperator           (int, int, int);
  static bool isTripleCharOperator           (int, int, int, int);
  static bool isBoundary                     (int, int);
  static bool isHardBoundary                 (int, int);
  static bool isPunctuation                  (int);
  static bool wasQuoted                      (const std::string&);
  static bool readWord                       (const std::string&, const std::string&, std::string::size_type&, std::string&);
  static bool readWord                       (const std::string&, std::string::size_type&, std::string&);
  static int hexToInt                        (int);
  static int hexToInt                        (int, int);
  static int hexToInt                        (int, int, int, int);
  static std::string trimLeft                (const std::string& in, const std::string& t = " ");
  static std::string trimRight               (const std::string& in, const std::string& t = " ");
  static std::string trim                    (const std::string& in, const std::string& t = " ");
  static std::string dequote                 (const std::string&, const std::string& quotes = "'\"");
  // Stream Classifiers.
  bool isEOS          () const;
  bool isString       (std::string&, Lexer::Type&, const std::string&);
  bool isDate         (std::string&, Lexer::Type&);
  bool isDuration     (std::string&, Lexer::Type&);
  bool isUUID         (std::string&, Lexer::Type&, bool);
  bool isNumber       (std::string&, Lexer::Type&);
  bool isInteger      (std::string&, Lexer::Type&);
  bool isHexNumber    (std::string&, Lexer::Type&);
  bool isURL          (std::string&, Lexer::Type&);
  bool isPath         (std::string&, Lexer::Type&);
  bool isPattern      (std::string&, Lexer::Type&);
  bool isOperator     (std::string&, Lexer::Type&);
  bool isWord         (std::string&, Lexer::Type&);
 private:
  std::string _text   {};
  std::size_t _cursor {0};
  std::size_t _eos    {0};
 };
 #endif
--- a/test/.gitignore
+++ b/test/.gitignore
@ -4,7 +4,6 @@ data.t
 exclusion.t
 helper.t
 interval.t
 lexer.t
 range.t
 rules.t
 util.t
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -14,7 +14,7 @@ include_directories (${CMAKE_SOURCE_DIR}
 include_directories (${CMAKE_INSTALL_PREFIX}/include)
 link_directories(${CMAKE_INSTALL_PREFIX}/lib)
-set (test_SRCS data.t exclusion.t helper.t interval.t lexer.t range.t rules.t util.t)
+set (test_SRCS data.t exclusion.t helper.t interval.t range.t rules.t util.t)
 add_custom_target (test ./run_all --verbose
                        DEPENDS ${test_SRCS}
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@ -1,330 +0,0 @@
 ////////////////////////////////////////////////////////////////////////////////
 //
 // Copyright 2013 - 2016, Göteborg Bit Factory.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included
 // in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.
 //
 // http://www.opensource.org/licenses/mit-license.php
 //
 ////////////////////////////////////////////////////////////////////////////////
 #include <cmake.h>
 #include <Lexer.h>
 #include <iostream>
 #include <vector>
 #include <string.h>
 #include <test.h>
 ////////////////////////////////////////////////////////////////////////////////
 int main (int, char**)
 {
  UnitTest t (555);
  std::vector <std::pair <std::string, Lexer::Type>> tokens;
  std::string token;
  Lexer::Type type;
  // static bool Lexer::dequote (std::string&, const std::string& quotes = "'\"");
  t.is (Lexer::dequote ("foo"),         "foo",       "Lexer::dequote foo --> foo");
  t.is (Lexer::dequote ("'foo'"),       "foo",       "Lexer::dequote 'foo' --> foo");
  t.is (Lexer::dequote ("'o\\'clock'"), "o\\'clock", "Lexer::dequote 'o\\'clock' --> o\\'clock");
  t.is (Lexer::dequote ("abba", "a"),   "bb",        "Lexer::dequote 'abba' (a) --> bb");
  // Should result in no tokens.
  Lexer l0 ("");
  t.notok (l0.token (token, type), "'' --> no tokens");
  // Should result in no tokens.
  Lexer l1 ("       \t ");
  t.notok (l1.token (token, type), "'       \\t ' --> no tokens");
  // Test for numbers that are no longer ISO-8601 dates.
  Lexer l3 ("1 12 123 1234 12345 123456 1234567");
  tokens.clear ();
  while (l3.token (token, type))
  {
    std::cout << "# «" << token << "» " << Lexer::typeName (type) << '\n';
    tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
  }
  t.is ((int)tokens.size (),     7,                           "7 tokens");
  t.is (tokens[0].first,         "1",                         "tokens[0] == '1'");
  t.is ((int) tokens[0].second,  (int) Lexer::Type::number,   "tokens[0] == Type::number");
  t.is (tokens[1].first,         "12",                        "tokens[1] == '12'");
  t.is ((int) tokens[1].second,  (int) Lexer::Type::number,   "tokens[1] == Type::number");
  t.is (tokens[2].first,         "123",                       "tokens[2] == '123'");
  t.is ((int) tokens[2].second,  (int) Lexer::Type::number,   "tokens[2] == Type::number"); // 70
  t.is (tokens[3].first,         "1234",                      "tokens[3] == '1234'");
  t.is ((int) tokens[3].second,  (int) Lexer::Type::date,     "tokens[3] == Type::date");
  t.is (tokens[4].first,         "12345",                     "tokens[4] == '12345'");
  t.is ((int) tokens[4].second,  (int) Lexer::Type::number,   "tokens[4] == Type::number");
  t.is (tokens[5].first,         "123456",                    "tokens[5] == '123456'");
  t.is ((int) tokens[5].second,  (int) Lexer::Type::date,     "tokens[5] == Type::date");
  t.is (tokens[6].first,         "1234567",                   "tokens[6] == '1234567'");
  t.is ((int) tokens[6].second,  (int) Lexer::Type::duration, "tokens[6] == Type::duration");
  // static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
  std::string::size_type cursor = 0;
  std::string word;
  t.ok (Lexer::readWord ("'one two'", "'\"", cursor, word), "readWord ''one two'' --> true");
  t.is (word, "'one two'",                                  "  word '" + word + "'");
  t.is ((int)cursor, 9,                                     "  cursor");
  // Unterminated quoted string is invalid.
  cursor = 0;
  t.notok (Lexer::readWord ("'one", "'\"", cursor, word),   "readWord ''one' --> false");
  // static bool readWord (const std::string&, std::string::size_type&, std::string&);
  cursor = 0;
  t.ok (Lexer::readWord ("input", cursor, word),            "readWord 'input' --> true");
  t.is (word, "input",                                      "  word '" + word + "'");
  t.is ((int)cursor, 5,                                     "  cursor");
  cursor = 0;
  t.ok (Lexer::readWord ("one\\ two", cursor, word),        "readWord 'one\\ two' --> true");
  t.is (word, "one two",                                    "  word '" + word + "'");
  t.is ((int)cursor, 8,                                     "  cursor");
  cursor = 0;
  t.ok (Lexer::readWord ("\\u20A43", cursor, word),         "readWord '\\u20A43' --> true");
  t.is (word, "₤3",                                         "  word '" + word + "'");
  t.is ((int)cursor, 7,                                     "  cursor");
  cursor = 0;
  t.ok (Lexer::readWord ("U+20AC4", cursor, word),          "readWord '\\u20AC4' --> true");
  t.is (word, "€4",                                         "  word '" + word + "'");
  t.is ((int)cursor, 7,                                     "  cursor");
  std::string text = "one 'two' three\\ four";
  cursor = 0;
  t.ok (Lexer::readWord (text, cursor, word),               "readWord \"one 'two' three\\ four\" --> true");
  t.is (word, "one",                                        "  word '" + word + "'");
  cursor++;
  t.ok (Lexer::readWord (text, cursor, word),               "readWord \"one 'two' three\\ four\" --> true");
  t.is (word, "'two'",                                      "  word '" + word + "'");
  cursor++;
  t.ok (Lexer::readWord (text, cursor, word),               "readWord \"one 'two' three\\ four\" --> true");
  t.is (word, "three four",                                 "  word '" + word + "'");
  text = "one     ";
  cursor = 0;
  t.ok (Lexer::readWord (text, cursor, word),               "readWord \"one     \" --> true");
  t.is (word, "one",                                        "  word '" + word + "'");
  // Test all Lexer types.
  #define NO {"",Lexer::Type::word}
  struct
  {
    const char* input;
    struct
    {
      const char* token;
      Lexer::Type type;
    } results[5];
  } lexerTests[] =
  {
    // Pattern
    { "/foo/",                                        { { "/foo/",                                        Lexer::Type::pattern      }, NO, NO, NO, NO }, },
    { "/a\\/b/",                                      { { "/a\\/b/",                                      Lexer::Type::pattern      }, NO, NO, NO, NO }, },
    { "/'/",                                          { { "/'/",                                          Lexer::Type::pattern      }, NO, NO, NO, NO }, },
    // Path
    { "/long/path/to/file.txt",                       { { "/long/path/to/file.txt",                       Lexer::Type::path         }, NO, NO, NO, NO }, },
    // Word
    { "1.foo.bar",                                    { { "1.foo.bar",                                    Lexer::Type::word         }, NO, NO, NO, NO }, },
    // URL
    { "http://tasktools.org",                         { { "http://tasktools.org",                         Lexer::Type::url          }, NO, NO, NO, NO }, },
    { "https://bug.tasktools.org",                    { { "https://bug.tasktools.org",                    Lexer::Type::url          }, NO, NO, NO, NO }, },
    // String
    { "'one two'",                                    { { "'one two'",                                    Lexer::Type::string       }, NO, NO, NO, NO }, },
    { "\"three\"",                                    { { "\"three\"",                                    Lexer::Type::string       }, NO, NO, NO, NO }, },
    { "'\\''",                                        { { "'''",                                          Lexer::Type::string       }, NO, NO, NO, NO }, },
    { "\"\\\"\"",                                     { { "\"\"\"",                                       Lexer::Type::string       }, NO, NO, NO, NO }, },
    { "\"\tfoo\t\"",                                  { { "\"\tfoo\t\"",                                  Lexer::Type::string       }, NO, NO, NO, NO }, },
    { "\"\\u20A43\"",                                 { { "\"₤3\"",                                       Lexer::Type::string       }, NO, NO, NO, NO }, },
    { "\"U+20AC4\"",                                  { { "\"€4\"",                                       Lexer::Type::string       }, NO, NO, NO, NO }, },
    // Number
    { "1",                                            { { "1",                                            Lexer::Type::number       }, NO, NO, NO, NO }, },
    { "3.14",                                         { { "3.14",                                         Lexer::Type::number       }, NO, NO, NO, NO }, },
    { "6.02217e23",                                   { { "6.02217e23",                                   Lexer::Type::number       }, NO, NO, NO, NO }, },
    { "1.2e-3.4",                                     { { "1.2e-3.4",                                     Lexer::Type::number       }, NO, NO, NO, NO }, },
    { "0x2f",                                         { { "0x2f",                                         Lexer::Type::hex          }, NO, NO, NO, NO }, },
    // Operator - complete set
    { "^",                                            { { "^",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "!",                                            { { "!",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "_neg_",                                        { { "_neg_",                                        Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "_pos_",                                        { { "_pos_",                                        Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "_hastag_",                                     { { "_hastag_",                                     Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "_notag_",                                      { { "_notag_",                                      Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "*",                                            { { "*",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "/",                                            { { "/",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "%",                                            { { "%",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "+",                                            { { "+",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "-",                                            { { "-",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "<=",                                           { { "<=",                                           Lexer::Type::op           }, NO, NO, NO, NO }, },
    { ">=",                                           { { ">=",                                           Lexer::Type::op           }, NO, NO, NO, NO }, },
    { ">",                                            { { ">",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "<",                                            { { "<",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "=",                                            { { "=",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "==",                                           { { "==",                                           Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "!=",                                           { { "!=",                                           Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "!==",                                          { { "!==",                                          Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "~",                                            { { "~",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "!~",                                           { { "!~",                                           Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "and",                                          { { "and",                                          Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "or",                                           { { "or",                                           Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "xor",                                          { { "xor",                                          Lexer::Type::op           }, NO, NO, NO, NO }, },
    { "(",                                            { { "(",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    { ")",                                            { { ")",                                            Lexer::Type::op           }, NO, NO, NO, NO }, },
    // UUID
    { "ffffffff-ffff-ffff-ffff-ffffffffffff",         { { "ffffffff-ffff-ffff-ffff-ffffffffffff",         Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "00000000-0000-0000-0000-0000000",              { { "00000000-0000-0000-0000-0000000",              Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "00000000-0000-0000-0000",                      { { "00000000-0000-0000-0000",                      Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "00000000-0000-0000",                           { { "00000000-0000-0000",                           Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "00000000-0000",                                { { "00000000-0000",                                Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "00000000",                                     { { "00000000",                                     Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "a360fc44-315c-4366-b70c-ea7e7520b749",         { { "a360fc44-315c-4366-b70c-ea7e7520b749",         Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "a360fc44-315c-4366-b70c-ea7e752",              { { "a360fc44-315c-4366-b70c-ea7e752",              Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "a360fc44-315c-4366-b70c",                      { { "a360fc44-315c-4366-b70c",                      Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "a360fc44-315c-4366",                           { { "a360fc44-315c-4366",                           Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "a360fc44-315c",                                { { "a360fc44-315c",                                Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    { "a360fc44",                                     { { "a360fc44",                                     Lexer::Type::uuid         }, NO, NO, NO, NO }, },
    // Date
    { "2015-W01",                                     { { "2015-W01",                                     Lexer::Type::date         }, NO, NO, NO, NO }, },
    { "2015-02-17",                                   { { "2015-02-17",                                   Lexer::Type::date         }, NO, NO, NO, NO }, },
    { "2013-11-29T22:58:00Z",                         { { "2013-11-29T22:58:00Z",                         Lexer::Type::date         }, NO, NO, NO, NO }, },
    { "20131129T225800Z",                             { { "20131129T225800Z",                             Lexer::Type::date         }, NO, NO, NO, NO }, },
    { "9th",                                          { { "9th",                                          Lexer::Type::date         }, NO, NO, NO, NO }, },
    { "10th",                                         { { "10th",                                         Lexer::Type::date         }, NO, NO, NO, NO }, },
    { "today",                                        { { "today",                                        Lexer::Type::date         }, NO, NO, NO, NO }, },
    // Duration
    { "year",                                         { { "year",                                         Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "4weeks",                                       { { "4weeks",                                       Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "PT23H",                                        { { "PT23H",                                        Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "1second",                                      { { "1second",                                      Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "1s",                                           { { "1s",                                           Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "1minute",                                      { { "1minute",                                      Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "2hour",                                        { { "2hour",                                        Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "3 days",                                       { { "3 days",                                       Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "4w",                                           { { "4w",                                           Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "5mo",                                          { { "5mo",                                          Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "6 years",                                      { { "6 years",                                      Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "P1Y",                                          { { "P1Y",                                          Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "PT1H",                                         { { "PT1H",                                         Lexer::Type::duration     }, NO, NO, NO, NO }, },
    { "P1Y1M1DT1H1M1S",                               { { "P1Y1M1DT1H1M1S",                               Lexer::Type::duration     }, NO, NO, NO, NO }, },
  };
  #define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0]))
  for (unsigned int i = 0; i < NUM_TESTS; i++)
  {
    // The isolated test puts the input string directly into the Lexer.
    Lexer isolated (lexerTests[i].input);
    for (int j = 0; j < 5; j++)
    {
      if (lexerTests[i].results[j].token[0])
      {
        // Isolated: "<token>"
        t.ok (isolated.token (token, type),                  "Isolated Lexer::token(...) --> true");
        t.is (token, lexerTests[i].results[j].token,         "  token --> " + token);
        t.is ((int)type, (int)lexerTests[i].results[j].type, "  type --> Lexer::Type::" + Lexer::typeToString (type));
      }
    }
    // The embedded test surrounds the input string with a space.
    Lexer embedded (std::string (" ") + lexerTests[i].input + " ");
    for (int j = 0; j < 5; j++)
    {
      if (lexerTests[i].results[j].token[0])
      {
        // Embedded: "<token>"
        t.ok (embedded.token (token, type),                  "Embedded Lexer::token(...) --> true");
        t.is (token, lexerTests[i].results[j].token,         "  token --> " + token);
        t.is ((int)type, (int)lexerTests[i].results[j].type, "  type --> Lexer::Type::" + Lexer::typeToString (type));
      }
    }
  }
  t.is (Lexer::typeName (Lexer::Type::uuid),         "uuid",         "Lexer::typeName (Lexer::Type::uuid)");
  t.is (Lexer::typeName (Lexer::Type::number),       "number",       "Lexer::typeName (Lexer::Type::number)");
  t.is (Lexer::typeName (Lexer::Type::hex),          "hex",          "Lexer::typeName (Lexer::Type::hex)");
  t.is (Lexer::typeName (Lexer::Type::string),       "string",       "Lexer::typeName (Lexer::Type::string)");
  t.is (Lexer::typeName (Lexer::Type::url),          "url",          "Lexer::typeName (Lexer::Type::url)");
  t.is (Lexer::typeName (Lexer::Type::path),         "path",         "Lexer::typeName (Lexer::Type::path)");
  t.is (Lexer::typeName (Lexer::Type::pattern),      "pattern",      "Lexer::typeName (Lexer::Type::pattern)");
  t.is (Lexer::typeName (Lexer::Type::op),           "op",           "Lexer::typeName (Lexer::Type::op)");
  t.is (Lexer::typeName (Lexer::Type::word),         "word",         "Lexer::typeName (Lexer::Type::word)");
  t.is (Lexer::typeName (Lexer::Type::date),         "date",         "Lexer::typeName (Lexer::Type::date)");
  t.is (Lexer::typeName (Lexer::Type::duration),     "duration",     "Lexer::typeName (Lexer::Type::duration)");
  // std::string Lexer::trimLeft (const std::string& in, const std::string&)
  t.is (Lexer::trimLeft (""),                     "",            "Lexer::trimLeft '' -> ''");
  t.is (Lexer::trimLeft ("   "),                  "",            "Lexer::trimLeft '   ' -> ''");
  t.is (Lexer::trimLeft ("",              " \t"), "",            "Lexer::trimLeft '' -> ''");
  t.is (Lexer::trimLeft ("xxx"),                  "xxx",         "Lexer::trimLeft 'xxx' -> 'xxx'");
  t.is (Lexer::trimLeft ("xxx",           " \t"), "xxx",         "Lexer::trimLeft 'xxx' -> 'xxx'");
  t.is (Lexer::trimLeft ("  \t xxx \t  "),        "\t xxx \t  ", "Lexer::trimLeft '  \\t xxx \\t  ' -> '\\t xxx \\t  '");
  t.is (Lexer::trimLeft ("  \t xxx \t  ", " \t"), "xxx \t  ",    "Lexer::trimLeft '  \\t xxx \\t  ' -> 'xxx \\t  '");
  // std::string Lexer::trimRight (const std::string& in, const std::string&)
  t.is (Lexer::trimRight (""),                     "",            "Lexer::trimRight '' -> ''");
  t.is (Lexer::trimRight ("   "),                  "",            "Lexer::trimRight '   ' -> ''");
  t.is (Lexer::trimRight ("",              " \t"), "",            "Lexer::trimRight '' -> ''");
  t.is (Lexer::trimRight ("xxx"),                  "xxx",         "Lexer::trimRight 'xxx' -> 'xxx'");
  t.is (Lexer::trimRight ("xxx",           " \t"), "xxx",         "Lexer::trimRight 'xxx' -> 'xxx'");
  t.is (Lexer::trimRight ("  \t xxx \t  "),        "  \t xxx \t", "Lexer::trimRight '  \\t xxx \\t  ' -> '  \\t xxx \\t'");
  t.is (Lexer::trimRight ("  \t xxx \t  ", " \t"), "  \t xxx",    "Lexer::trimRight '  \\t xxx \\t  ' -> '  \\t xxx'");
  // std::string Lexer::trim (const std::string& in, const std::string& t)
  t.is (Lexer::trim (""),                     "",          "Lexer::trim '' -> ''");
  t.is (Lexer::trim ("   "),                  "",          "Lexer::trim '   ' -> ''");
  t.is (Lexer::trim ("",              " \t"), "",          "Lexer::trim '' -> ''");
  t.is (Lexer::trim ("xxx"),                  "xxx",       "Lexer::trim 'xxx' -> 'xxx'");
  t.is (Lexer::trim ("xxx",           " \t"), "xxx",       "Lexer::trim 'xxx' -> 'xxx'");
  t.is (Lexer::trim ("  \t xxx \t  "),        "\t xxx \t", "Lexer::trim '  \\t xxx \\t  ' -> '\\t xxx \\t'");
  t.is (Lexer::trim ("  \t xxx \t  ", " \t"), "xxx",       "Lexer::trim '  \\t xxx \\t  ' -> 'xxx'");
  // std::vector <std::tuple <std::string, Lexer::Type>> Lexer::tokenize (const std::string& input)
  auto tokenized = Lexer::tokenize (" one two   three   ");
  t.is ((int)tokenized.size (), 3, "Lexer::tokenize ' one two  three   ' --> 3");
  t.is (std::get <0> (tokenized[0]), "one",               "Lexer::tokenize ' one two  three   ' [0] --> 'one'");
  t.ok (std::get <1> (tokenized[0]) == Lexer::Type::word, "Lexer::tokenize ' one two  three   ' [0] --> word");
  t.is (std::get <0> (tokenized[1]), "two",               "Lexer::tokenize ' one two  three   ' [1] --> 'two'");
  t.ok (std::get <1> (tokenized[1]) == Lexer::Type::word, "Lexer::tokenize ' one two  three   ' [1] --> word");
  t.is (std::get <0> (tokenized[2]), "three",             "Lexer::tokenize ' one two  three   ' [2] --> 'three'");
  t.ok (std::get <1> (tokenized[2]) == Lexer::Type::word, "Lexer::tokenize ' one two  three   ' [2] --> word");
  // bool wasQuoted (const std::string& input)
  t.notok (Lexer::wasQuoted (""),        "Lexer::wasQuoted '' --> false");
  t.notok (Lexer::wasQuoted ("abc"),     "Lexer::wasQuoted 'abc' --> false");
  t.ok    (Lexer::wasQuoted ("one two"), "Lexer::wasQuoted 'one two' --> true");
  return 0;
 }
 ////////////////////////////////////////////////////////////////////////////////