Build: Migrated Lexer to libshared

This commit is contained in:
Paul Beckingham 2016-06-25 10:11:08 -04:00
parent fdca94085e
commit ca57bf91e3
6 changed files with 2 additions and 1341 deletions

View file

@ -11,7 +11,6 @@ set (timew_SRCS CLI.cpp CLI.h
Exclusion.cpp Exclusion.h
Extensions.cpp Extensions.h
Interval.cpp Interval.h
Lexer.cpp Lexer.h
Range.cpp Range.h
Rules.cpp Rules.h
data.cpp
@ -29,6 +28,7 @@ set (libshared_SRCS libshared/src/Args.cpp libshared/src/Args.h
libshared/src/FS.cpp libshared/src/FS.h
libshared/src/JSON.cpp libshared/src/JSON.h
libshared/src/JSON2.cpp libshared/src/JSON2.h
libshared/src/Lexer.cpp libshared/src/Lexer.h
libshared/src/Msg.cpp libshared/src/Msg.h
libshared/src/Palette.cpp libshared/src/Palette.h
libshared/src/Pig.cpp libshared/src/Pig.h

View file

@ -1,912 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2013 - 2016, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <cmake.h>
#include <Lexer.h>
#include <Datetime.h>
#include <Duration.h>
#include <algorithm>
#include <tuple>
#include <ctype.h>
#include <unicode.h>
#include <utf8.h>
static const std::string uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
static const unsigned int uuid_min_length = 8;
std::string Lexer::dateFormat = "";
////////////////////////////////////////////////////////////////////////////////
Lexer::Lexer (const std::string& text)
: _text (text)
, _eos (text.size ())
{
}
////////////////////////////////////////////////////////////////////////////////
// When a Lexer object is constructed with a string, this method walks through
// the stream of low-level tokens.
bool Lexer::token (std::string& token, Lexer::Type& type)
{
// Eat white space.
while (unicodeWhitespace (_text[_cursor]))
utf8_next_char (_text, _cursor);
// Terminate at EOS.
if (isEOS ())
return false;
if (isString (token, type, "'\"") ||
isUUID (token, type, true) ||
isDate (token, type) ||
isDuration (token, type) ||
isURL (token, type) ||
isHexNumber (token, type) ||
isNumber (token, type) ||
isPath (token, type) ||
isPattern (token, type) ||
isOperator (token, type) ||
isWord (token, type))
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
std::vector <std::tuple <std::string, Lexer::Type>> Lexer::tokenize (const std::string& input)
{
std::vector <std::tuple <std::string, Lexer::Type>> tokens;
std::string token;
Lexer::Type type;
Lexer lexer (input);
while (lexer.token (token, type))
tokens.push_back (std::make_tuple (token, type));
return tokens;
}
////////////////////////////////////////////////////////////////////////////////
// No L10N - these are for internal purposes.
const std::string Lexer::typeName (const Lexer::Type& type)
{
switch (type)
{
case Lexer::Type::uuid: return "uuid";
case Lexer::Type::number: return "number";
case Lexer::Type::hex: return "hex";
case Lexer::Type::string: return "string";
case Lexer::Type::url: return "url";
case Lexer::Type::path: return "path";
case Lexer::Type::pattern: return "pattern";
case Lexer::Type::op: return "op";
case Lexer::Type::word: return "word";
case Lexer::Type::date: return "date";
case Lexer::Type::duration: return "duration";
}
return "unknown";
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::number
// \d+
// [ . \d+ ]
// [ e|E [ +|- ] \d+ [ . \d+ ] ]
// not followed by non-operator.
bool Lexer::isNumber (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (unicodeLatinDigit (_text[marker]))
{
++marker;
while (unicodeLatinDigit (_text[marker]))
utf8_next_char (_text, marker);
if (_text[marker] == '.')
{
++marker;
if (unicodeLatinDigit (_text[marker]))
{
++marker;
while (unicodeLatinDigit (_text[marker]))
utf8_next_char (_text, marker);
}
}
if (_text[marker] == 'e' ||
_text[marker] == 'E')
{
++marker;
if (_text[marker] == '+' ||
_text[marker] == '-')
++marker;
if (unicodeLatinDigit (_text[marker]))
{
++marker;
while (unicodeLatinDigit (_text[marker]))
utf8_next_char (_text, marker);
if (_text[marker] == '.')
{
++marker;
if (unicodeLatinDigit (_text[marker]))
{
++marker;
while (unicodeLatinDigit (_text[marker]))
utf8_next_char (_text, marker);
}
}
}
}
// Lookahread: !<unicodeWhitespace> | !<isSingleCharOperator>
// If there is an immediately consecutive character, that is not an operator, fail.
if (_eos > marker &&
! unicodeWhitespace (_text[marker]) &&
! isSingleCharOperator (_text[marker]))
return false;
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::number;
_cursor = marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::number
// \d+
bool Lexer::isInteger (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (unicodeLatinDigit (_text[marker]))
{
++marker;
while (unicodeLatinDigit (_text[marker]))
utf8_next_char (_text, marker);
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::number;
_cursor = marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isSingleCharOperator (int c)
{
return c == '+' || // Addition
c == '-' || // Subtraction or unary minus = ambiguous
c == '*' || // Multiplication
c == '/' || // Diviѕion
c == '(' || // Precedence open parenthesis
c == ')' || // Precedence close parenthesis
c == '<' || // Less than
c == '>' || // Greater than
c == '^' || // Exponent
c == '!' || // Unary not
c == '%' || // Modulus
c == '=' || // Partial match
c == '~'; // Pattern match
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isDoubleCharOperator (int c0, int c1, int c2)
{
return (c0 == '=' && c1 == '=') ||
(c0 == '!' && c1 == '=') ||
(c0 == '<' && c1 == '=') ||
(c0 == '>' && c1 == '=') ||
(c0 == 'o' && c1 == 'r' && isBoundary (c1, c2)) ||
(c0 == '|' && c1 == '|') ||
(c0 == '&' && c1 == '&') ||
(c0 == '!' && c1 == '~');
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isTripleCharOperator (int c0, int c1, int c2, int c3)
{
return (c0 == 'a' && c1 == 'n' && c2 == 'd' && isBoundary (c2, c3)) ||
(c0 == 'x' && c1 == 'o' && c2 == 'r' && isBoundary (c2, c3)) ||
(c0 == '!' && c1 == '=' && c2 == '=');
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isBoundary (int left, int right)
{
// EOS
if (right == '\0') return true;
// XOR
if (unicodeLatinAlpha (left) != unicodeLatinAlpha (right)) return true;
if (unicodeLatinDigit (left) != unicodeLatinDigit (right)) return true;
if (unicodeWhitespace (left) != unicodeWhitespace (right)) return true;
// OR
if (isPunctuation (left) || isPunctuation (right)) return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isHardBoundary (int left, int right)
{
// EOS
if (right == '\0')
return true;
// FILTER operators that don't need to be surrounded by whitespace.
if (left == '(' ||
left == ')' ||
right == '(' ||
right == ')')
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isPunctuation (int c)
{
return isprint (c) &&
c != ' ' &&
c != '@' &&
c != '#' &&
c != '$' &&
c != '_' &&
! unicodeLatinDigit (c) &&
! unicodeLatinAlpha (c);
}
////////////////////////////////////////////////////////////////////////////////
// Assumes that quotes is a string containing a non-trivial set of quote
// characters.
std::string Lexer::dequote (const std::string& input, const std::string& quotes)
{
if (input.length ())
{
int quote = input[0];
if (quotes.find (quote) != std::string::npos)
{
size_t len = input.length ();
if (quote == input[len - 1])
return input.substr (1, len - 2);
}
}
return input;
}
////////////////////////////////////////////////////////////////////////////////
// Detects characters in an input string that indicate quotes were required, or
// escapes, to get them past the shell.
bool Lexer::wasQuoted (const std::string& input)
{
if (input.find_first_of (" \t()<>&~") != std::string::npos)
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isEOS () const
{
return _cursor >= _eos;
}
////////////////////////////////////////////////////////////////////////////////
// Converts '0' -> 0
// '9' -> 9
// 'a'/'A' -> 10
// 'f'/'F' -> 15
int Lexer::hexToInt (int c)
{
if (c >= '0' && c <= '9') return (c - '0');
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
else return (c - 'A' + 10);
}
////////////////////////////////////////////////////////////////////////////////
int Lexer::hexToInt (int c0, int c1)
{
return (hexToInt (c0) << 4) + hexToInt (c1);
}
////////////////////////////////////////////////////////////////////////////////
int Lexer::hexToInt (int c0, int c1, int c2, int c3)
{
return (hexToInt (c0) << 12) +
(hexToInt (c1) << 8) +
(hexToInt (c2) << 4) +
hexToInt (c3);
}
////////////////////////////////////////////////////////////////////////////////
std::string Lexer::trimLeft (const std::string& in, const std::string& t /*= " "*/)
{
std::string::size_type ws = in.find_first_not_of (t);
if (ws > 0)
{
std::string out {in};
return out.erase (0, ws);
}
return in;
}
////////////////////////////////////////////////////////////////////////////////
std::string Lexer::trimRight (const std::string& in, const std::string& t /*= " "*/)
{
std::string out {in};
return out.erase (in.find_last_not_of (t) + 1);
}
////////////////////////////////////////////////////////////////////////////////
std::string Lexer::trim (const std::string& in, const std::string& t /*= " "*/)
{
return trimLeft (trimRight (in, t), t);
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::string
// '|"
// [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ]
// '|"
bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string& quotes)
{
std::size_t marker = _cursor;
if (readWord (_text, quotes, marker, token))
{
type = Lexer::Type::string;
_cursor = marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::date
// <Datetime> (followed by eos, WS, operator)
bool Lexer::isDate (std::string& token, Lexer::Type& type)
{
// Try an ISO date parse.
std::size_t i = _cursor;
Datetime d;
if (d.parse (_text, i, Lexer::dateFormat) &&
(i >= _eos ||
unicodeWhitespace (_text[i]) ||
isSingleCharOperator (_text[i])))
{
type = Lexer::Type::date;
token = _text.substr (_cursor, i - _cursor);
_cursor = i;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::duration
// <Duration> (followed by eos, WS, operator)
bool Lexer::isDuration (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
std::string extractedToken;
Lexer::Type extractedType;
if (isOperator(extractedToken, extractedType))
{
_cursor = marker;
return false;
}
marker = _cursor;
Duration dur;
if (dur.parse (_text, marker) &&
(marker >= _eos ||
unicodeWhitespace (_text[marker]) ||
isSingleCharOperator (_text[marker])))
{
type = Lexer::Type::duration;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::uuid
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX
// ...
// XXXXXXXX-XX
// XXXXXXXX-X
// XXXXXXXX-
// XXXXXXXX
// Followed only by EOS, whitespace, or single character operator.
bool Lexer::isUUID (std::string& token, Lexer::Type& type, bool endBoundary)
{
std::size_t marker = _cursor;
// Greedy.
std::size_t i = 0;
for (; i < 36 && marker + i < _eos; i++)
{
if (uuid_pattern[i] == 'x')
{
if (! unicodeHexDigit (_text[marker + i]))
break;
}
else if (uuid_pattern[i] != _text[marker + i])
break;
}
if (i >= uuid_min_length &&
(! endBoundary ||
! _text[marker + i] ||
unicodeWhitespace (_text[marker + i]) ||
isSingleCharOperator (_text[marker + i])))
{
token = _text.substr (_cursor, i);
type = Lexer::Type::uuid;
_cursor += i;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::hex
// 0xX+
bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (_eos - marker >= 3 &&
_text[marker + 0] == '0' &&
_text[marker + 1] == 'x')
{
marker += 2;
while (unicodeHexDigit (_text[marker]))
++marker;
if (marker - _cursor > 2)
{
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::hex;
_cursor = marker;
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::word
// [^\s]+
bool Lexer::isWord (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
while (_text[marker] &&
! unicodeWhitespace (_text[marker]) &&
! isSingleCharOperator (_text[marker]))
utf8_next_char (_text, marker);
if (marker > _cursor)
{
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::word;
_cursor = marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::url
// http [s] :// ...
bool Lexer::isURL (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (_eos - _cursor > 9 && // length 'https://*'
(_text[marker + 0] == 'h' || _text[marker + 0] == 'H') &&
(_text[marker + 1] == 't' || _text[marker + 1] == 'T') &&
(_text[marker + 2] == 't' || _text[marker + 2] == 'T') &&
(_text[marker + 3] == 'p' || _text[marker + 3] == 'P'))
{
marker += 4;
if (_text[marker + 0] == 's' || _text[marker + 0] == 'S')
++marker;
if (_text[marker + 0] == ':' &&
_text[marker + 1] == '/' &&
_text[marker + 2] == '/')
{
marker += 3;
while (marker < _eos &&
! unicodeWhitespace (_text[marker]))
utf8_next_char (_text, marker);
token = _text.substr (_cursor, marker - _cursor);
type = Lexer::Type::url;
_cursor = marker;
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::path
// ( / <non-slash, non-whitespace> )+
bool Lexer::isPath (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
int slashCount = 0;
while (1)
{
if (_text[marker] == '/')
{
++marker;
++slashCount;
}
else
break;
if (_text[marker] &&
! unicodeWhitespace (_text[marker]) &&
_text[marker] != '/')
{
utf8_next_char (_text, marker);
while (_text[marker] &&
! unicodeWhitespace (_text[marker]) &&
_text[marker] != '/')
utf8_next_char (_text, marker);
}
else
break;
}
if (marker > _cursor &&
slashCount > 3)
{
type = Lexer::Type::path;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::pattern
// / <unquoted-string> / <EOS> | <unicodeWhitespace>
bool Lexer::isPattern (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
std::string word;
if (readWord (_text, "/", _cursor, word) &&
(isEOS () ||
unicodeWhitespace (_text[_cursor])))
{
token = _text.substr (marker, _cursor - marker);
type = Lexer::Type::pattern;
return true;
}
_cursor = marker;
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Lexer::Type::op
// _hastag_ | _notag | _neg_ | _pos_ |
// <isTripleCharOperator> |
// <isDoubleCharOperator> |
// <isSingleCharOperator> |
bool Lexer::isOperator (std::string& token, Lexer::Type& type)
{
std::size_t marker = _cursor;
if (_eos - marker >= 8 && _text.substr (marker, 8) == "_hastag_")
{
marker += 8;
type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
else if (_eos - marker >= 7 && _text.substr (marker, 7) == "_notag_")
{
marker += 7;
type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_neg_")
{
marker += 5;
type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_pos_")
{
marker += 5;
type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
else if (_eos - marker >= 3 &&
isTripleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2], _text[marker + 3]))
{
marker += 3;
type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
else if (_eos - marker >= 2 &&
isDoubleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2]))
{
marker += 2;
type = Lexer::Type::op;
token = _text.substr (_cursor, marker - _cursor);
_cursor = marker;
return true;
}
else if (isSingleCharOperator (_text[marker]))
{
token = _text[marker];
type = Lexer::Type::op;
_cursor = ++marker;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Static
std::string Lexer::typeToString (Lexer::Type type)
{
if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
else if (type == Lexer::Type::uuid) return std::string ("\033[38;5;7m\033[48;5;10m") + "uuid" + "\033[0m";
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m";
else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m";
else if (type == Lexer::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m";
else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m";
else if (type == Lexer::Type::op) return std::string ("\033[38;5;7m\033[48;5;203m") + "op" + "\033[0m";
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
else if (type == Lexer::Type::date) return std::string ("\033[38;5;15m\033[48;5;34m") + "date" + "\033[0m";
else if (type == Lexer::Type::duration) return std::string ("\033[38;5;15m\033[48;5;34m") + "duration" + "\033[0m";
else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
}
////////////////////////////////////////////////////////////////////////////////
// Full implementation of a quoted word. Includes:
// '\''
// '"'
// "'"
// "\""
// 'one two'
// Result includes the quotes.
bool Lexer::readWord (
const std::string& text,
const std::string& quotes,
std::string::size_type& cursor,
std::string& word)
{
if (quotes.find (text[cursor]) == std::string::npos)
return false;
std::string::size_type eos = text.length ();
int quote = text[cursor++];
word = quote;
int c;
while ((c = text[cursor]))
{
// Quoted word ends on a quote.
if (quote && quote == c)
{
word += utf8_character (utf8_next_char (text, cursor));
break;
}
// Unicode U+XXXX or \uXXXX codepoint.
else if (eos - cursor >= 6 &&
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
(text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
unicodeHexDigit (text[cursor + 2]) &&
unicodeHexDigit (text[cursor + 3]) &&
unicodeHexDigit (text[cursor + 4]) &&
unicodeHexDigit (text[cursor + 5]))
{
word += utf8_character (
hexToInt (
text[cursor + 2],
text[cursor + 3],
text[cursor + 4],
text[cursor + 5]));
cursor += 6;
}
// An escaped thing.
else if (c == '\\')
{
c = text[++cursor];
switch (c)
{
case '"': word += (char) 0x22; ++cursor; break;
case '\'': word += (char) 0x27; ++cursor; break;
case '\\': word += (char) 0x5C; ++cursor; break;
case 'b': word += (char) 0x08; ++cursor; break;
case 'f': word += (char) 0x0C; ++cursor; break;
case 'n': word += (char) 0x0A; ++cursor; break;
case 'r': word += (char) 0x0D; ++cursor; break;
case 't': word += (char) 0x09; ++cursor; break;
case 'v': word += (char) 0x0B; ++cursor; break;
// This pass-through default case means that anything can be escaped
// harmlessly. In particular 'quote' is included, if it not one of the
// above characters.
default: word += (char) c; ++cursor; break;
}
}
// Ordinary character.
else
word += utf8_character (utf8_next_char (text, cursor));
}
// Verify termination.
return word[0] == quote &&
word[word.length () - 1] == quote &&
word.length () >= 2;
}
////////////////////////////////////////////////////////////////////////////////
// Full implementation of an unquoted word. Includes:
// one\ two
// abcU+0020def
// abc\u0020def
// a\tb
//
// Ends at:
// Lexer::isEOS
// unicodeWhitespace
// Lexer::isHardBoundary
bool Lexer::readWord (
const std::string& text,
std::string::size_type& cursor,
std::string& word)
{
std::string::size_type eos = text.length ();
word = "";
int c;
int prev = 0;
while ((c = text[cursor])) // Handles EOS.
{
// Unquoted word ends on white space.
if (unicodeWhitespace (c))
break;
// Parentheses mostly.
if (prev && Lexer::isHardBoundary (prev, c))
break;
// Unicode U+XXXX or \uXXXX codepoint.
else if (eos - cursor >= 6 &&
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
(text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
unicodeHexDigit (text[cursor + 2]) &&
unicodeHexDigit (text[cursor + 3]) &&
unicodeHexDigit (text[cursor + 4]) &&
unicodeHexDigit (text[cursor + 5]))
{
word += utf8_character (
hexToInt (
text[cursor + 2],
text[cursor + 3],
text[cursor + 4],
text[cursor + 5]));
cursor += 6;
}
// An escaped thing.
else if (c == '\\')
{
c = text[++cursor];
switch (c)
{
case '"': word += (char) 0x22; ++cursor; break;
case '\'': word += (char) 0x27; ++cursor; break;
case '\\': word += (char) 0x5C; ++cursor; break;
case 'b': word += (char) 0x08; ++cursor; break;
case 'f': word += (char) 0x0C; ++cursor; break;
case 'n': word += (char) 0x0A; ++cursor; break;
case 'r': word += (char) 0x0D; ++cursor; break;
case 't': word += (char) 0x09; ++cursor; break;
case 'v': word += (char) 0x0B; ++cursor; break;
// This pass-through default case means that anything can be escaped
// harmlessly. In particular 'quote' is included, if it not one of the
// above characters.
default: word += (char) c; ++cursor; break;
}
}
// Ordinary character.
else
word += utf8_character (utf8_next_char (text, cursor));
prev = c;
}
return word.length () > 0 ? true : false;
}
////////////////////////////////////////////////////////////////////////////////

View file

@ -1,96 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2013 - 2016, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_LEXER
#define INCLUDED_LEXER
#include <string>
#include <map>
#include <vector>
#include <tuple>
#include <cstddef>
class Lexer
{
public:
// These are overridable.
static std::string dateFormat;
enum class Type { uuid, number, hex,
string,
url,
path,
pattern,
op,
word,
date, duration };
explicit Lexer (const std::string&);
bool token (std::string&, Lexer::Type&);
static std::string typeToString (Lexer::Type);
// Static helpers.
static std::vector <std::tuple <std::string, Lexer::Type>> tokenize (const std::string&);
static const std::string typeName (const Lexer::Type&);
static bool isSingleCharOperator (int);
static bool isDoubleCharOperator (int, int, int);
static bool isTripleCharOperator (int, int, int, int);
static bool isBoundary (int, int);
static bool isHardBoundary (int, int);
static bool isPunctuation (int);
static bool wasQuoted (const std::string&);
static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
static bool readWord (const std::string&, std::string::size_type&, std::string&);
static int hexToInt (int);
static int hexToInt (int, int);
static int hexToInt (int, int, int, int);
static std::string trimLeft (const std::string& in, const std::string& t = " ");
static std::string trimRight (const std::string& in, const std::string& t = " ");
static std::string trim (const std::string& in, const std::string& t = " ");
static std::string dequote (const std::string&, const std::string& quotes = "'\"");
// Stream Classifiers.
bool isEOS () const;
bool isString (std::string&, Lexer::Type&, const std::string&);
bool isDate (std::string&, Lexer::Type&);
bool isDuration (std::string&, Lexer::Type&);
bool isUUID (std::string&, Lexer::Type&, bool);
bool isNumber (std::string&, Lexer::Type&);
bool isInteger (std::string&, Lexer::Type&);
bool isHexNumber (std::string&, Lexer::Type&);
bool isURL (std::string&, Lexer::Type&);
bool isPath (std::string&, Lexer::Type&);
bool isPattern (std::string&, Lexer::Type&);
bool isOperator (std::string&, Lexer::Type&);
bool isWord (std::string&, Lexer::Type&);
private:
std::string _text {};
std::size_t _cursor {0};
std::size_t _eos {0};
};
#endif

1
test/.gitignore vendored
View file

@ -4,7 +4,6 @@ data.t
exclusion.t
helper.t
interval.t
lexer.t
range.t
rules.t
util.t

View file

@ -14,7 +14,7 @@ include_directories (${CMAKE_SOURCE_DIR}
include_directories (${CMAKE_INSTALL_PREFIX}/include)
link_directories(${CMAKE_INSTALL_PREFIX}/lib)
set (test_SRCS data.t exclusion.t helper.t interval.t lexer.t range.t rules.t util.t)
set (test_SRCS data.t exclusion.t helper.t interval.t range.t rules.t util.t)
add_custom_target (test ./run_all --verbose
DEPENDS ${test_SRCS}

View file

@ -1,330 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2013 - 2016, Göteborg Bit Factory.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <cmake.h>
#include <Lexer.h>
#include <iostream>
#include <vector>
#include <string.h>
#include <test.h>
////////////////////////////////////////////////////////////////////////////////
int main (int, char**)
{
UnitTest t (555);
std::vector <std::pair <std::string, Lexer::Type>> tokens;
std::string token;
Lexer::Type type;
// static bool Lexer::dequote (std::string&, const std::string& quotes = "'\"");
t.is (Lexer::dequote ("foo"), "foo", "Lexer::dequote foo --> foo");
t.is (Lexer::dequote ("'foo'"), "foo", "Lexer::dequote 'foo' --> foo");
t.is (Lexer::dequote ("'o\\'clock'"), "o\\'clock", "Lexer::dequote 'o\\'clock' --> o\\'clock");
t.is (Lexer::dequote ("abba", "a"), "bb", "Lexer::dequote 'abba' (a) --> bb");
// Should result in no tokens.
Lexer l0 ("");
t.notok (l0.token (token, type), "'' --> no tokens");
// Should result in no tokens.
Lexer l1 (" \t ");
t.notok (l1.token (token, type), "' \\t ' --> no tokens");
// Test for numbers that are no longer ISO-8601 dates.
Lexer l3 ("1 12 123 1234 12345 123456 1234567");
tokens.clear ();
while (l3.token (token, type))
{
std::cout << "# «" << token << "» " << Lexer::typeName (type) << '\n';
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
}
t.is ((int)tokens.size (), 7, "7 tokens");
t.is (tokens[0].first, "1", "tokens[0] == '1'");
t.is ((int) tokens[0].second, (int) Lexer::Type::number, "tokens[0] == Type::number");
t.is (tokens[1].first, "12", "tokens[1] == '12'");
t.is ((int) tokens[1].second, (int) Lexer::Type::number, "tokens[1] == Type::number");
t.is (tokens[2].first, "123", "tokens[2] == '123'");
t.is ((int) tokens[2].second, (int) Lexer::Type::number, "tokens[2] == Type::number"); // 70
t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
t.is ((int) tokens[3].second, (int) Lexer::Type::date, "tokens[3] == Type::date");
t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
t.is ((int) tokens[4].second, (int) Lexer::Type::number, "tokens[4] == Type::number");
t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
t.is ((int) tokens[5].second, (int) Lexer::Type::date, "tokens[5] == Type::date");
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
t.is ((int) tokens[6].second, (int) Lexer::Type::duration, "tokens[6] == Type::duration");
// static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
std::string::size_type cursor = 0;
std::string word;
t.ok (Lexer::readWord ("'one two'", "'\"", cursor, word), "readWord ''one two'' --> true");
t.is (word, "'one two'", " word '" + word + "'");
t.is ((int)cursor, 9, " cursor");
// Unterminated quoted string is invalid.
cursor = 0;
t.notok (Lexer::readWord ("'one", "'\"", cursor, word), "readWord ''one' --> false");
// static bool readWord (const std::string&, std::string::size_type&, std::string&);
cursor = 0;
t.ok (Lexer::readWord ("input", cursor, word), "readWord 'input' --> true");
t.is (word, "input", " word '" + word + "'");
t.is ((int)cursor, 5, " cursor");
cursor = 0;
t.ok (Lexer::readWord ("one\\ two", cursor, word), "readWord 'one\\ two' --> true");
t.is (word, "one two", " word '" + word + "'");
t.is ((int)cursor, 8, " cursor");
cursor = 0;
t.ok (Lexer::readWord ("\\u20A43", cursor, word), "readWord '\\u20A43' --> true");
t.is (word, "₤3", " word '" + word + "'");
t.is ((int)cursor, 7, " cursor");
cursor = 0;
t.ok (Lexer::readWord ("U+20AC4", cursor, word), "readWord '\\u20AC4' --> true");
t.is (word, "€4", " word '" + word + "'");
t.is ((int)cursor, 7, " cursor");
std::string text = "one 'two' three\\ four";
cursor = 0;
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one 'two' three\\ four\" --> true");
t.is (word, "one", " word '" + word + "'");
cursor++;
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one 'two' three\\ four\" --> true");
t.is (word, "'two'", " word '" + word + "'");
cursor++;
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one 'two' three\\ four\" --> true");
t.is (word, "three four", " word '" + word + "'");
text = "one ";
cursor = 0;
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one \" --> true");
t.is (word, "one", " word '" + word + "'");
// Test all Lexer types.
#define NO {"",Lexer::Type::word}
struct
{
const char* input;
struct
{
const char* token;
Lexer::Type type;
} results[5];
} lexerTests[] =
{
// Pattern
{ "/foo/", { { "/foo/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
{ "/a\\/b/", { { "/a\\/b/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
{ "/'/", { { "/'/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
// Path
{ "/long/path/to/file.txt", { { "/long/path/to/file.txt", Lexer::Type::path }, NO, NO, NO, NO }, },
// Word
{ "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, },
// URL
{ "http://tasktools.org", { { "http://tasktools.org", Lexer::Type::url }, NO, NO, NO, NO }, },
{ "https://bug.tasktools.org", { { "https://bug.tasktools.org", Lexer::Type::url }, NO, NO, NO, NO }, },
// String
{ "'one two'", { { "'one two'", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"three\"", { { "\"three\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "'\\''", { { "'''", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"\\\"\"", { { "\"\"\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"\tfoo\t\"", { { "\"\tfoo\t\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"\\u20A43\"", { { "\"₤3\"", Lexer::Type::string }, NO, NO, NO, NO }, },
{ "\"U+20AC4\"", { { "\"€4\"", Lexer::Type::string }, NO, NO, NO, NO }, },
// Number
{ "1", { { "1", Lexer::Type::number }, NO, NO, NO, NO }, },
{ "3.14", { { "3.14", Lexer::Type::number }, NO, NO, NO, NO }, },
{ "6.02217e23", { { "6.02217e23", Lexer::Type::number }, NO, NO, NO, NO }, },
{ "1.2e-3.4", { { "1.2e-3.4", Lexer::Type::number }, NO, NO, NO, NO }, },
{ "0x2f", { { "0x2f", Lexer::Type::hex }, NO, NO, NO, NO }, },
// Operator - complete set
{ "^", { { "^", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "!", { { "!", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "_neg_", { { "_neg_", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "_pos_", { { "_pos_", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "_hastag_", { { "_hastag_", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "_notag_", { { "_notag_", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "*", { { "*", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "/", { { "/", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "%", { { "%", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "+", { { "+", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "-", { { "-", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "<=", { { "<=", Lexer::Type::op }, NO, NO, NO, NO }, },
{ ">=", { { ">=", Lexer::Type::op }, NO, NO, NO, NO }, },
{ ">", { { ">", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "<", { { "<", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "=", { { "=", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "==", { { "==", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "!=", { { "!=", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "!==", { { "!==", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "~", { { "~", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "!~", { { "!~", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "and", { { "and", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "or", { { "or", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "xor", { { "xor", Lexer::Type::op }, NO, NO, NO, NO }, },
{ "(", { { "(", Lexer::Type::op }, NO, NO, NO, NO }, },
{ ")", { { ")", Lexer::Type::op }, NO, NO, NO, NO }, },
// UUID
{ "ffffffff-ffff-ffff-ffff-ffffffffffff", { { "ffffffff-ffff-ffff-ffff-ffffffffffff", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "00000000-0000-0000-0000-0000000", { { "00000000-0000-0000-0000-0000000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "00000000-0000-0000-0000", { { "00000000-0000-0000-0000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "00000000-0000-0000", { { "00000000-0000-0000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "00000000-0000", { { "00000000-0000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "00000000", { { "00000000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "a360fc44-315c-4366-b70c-ea7e7520b749", { { "a360fc44-315c-4366-b70c-ea7e7520b749", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "a360fc44-315c-4366-b70c-ea7e752", { { "a360fc44-315c-4366-b70c-ea7e752", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "a360fc44-315c-4366-b70c", { { "a360fc44-315c-4366-b70c", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "a360fc44-315c-4366", { { "a360fc44-315c-4366", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "a360fc44-315c", { { "a360fc44-315c", Lexer::Type::uuid }, NO, NO, NO, NO }, },
{ "a360fc44", { { "a360fc44", Lexer::Type::uuid }, NO, NO, NO, NO }, },
// Date
{ "2015-W01", { { "2015-W01", Lexer::Type::date }, NO, NO, NO, NO }, },
{ "2015-02-17", { { "2015-02-17", Lexer::Type::date }, NO, NO, NO, NO }, },
{ "2013-11-29T22:58:00Z", { { "2013-11-29T22:58:00Z", Lexer::Type::date }, NO, NO, NO, NO }, },
{ "20131129T225800Z", { { "20131129T225800Z", Lexer::Type::date }, NO, NO, NO, NO }, },
{ "9th", { { "9th", Lexer::Type::date }, NO, NO, NO, NO }, },
{ "10th", { { "10th", Lexer::Type::date }, NO, NO, NO, NO }, },
{ "today", { { "today", Lexer::Type::date }, NO, NO, NO, NO }, },
// Duration
{ "year", { { "year", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "4weeks", { { "4weeks", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "PT23H", { { "PT23H", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "1second", { { "1second", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "1s", { { "1s", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "1minute", { { "1minute", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "2hour", { { "2hour", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "3 days", { { "3 days", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "4w", { { "4w", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "5mo", { { "5mo", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "6 years", { { "6 years", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "P1Y", { { "P1Y", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "PT1H", { { "PT1H", Lexer::Type::duration }, NO, NO, NO, NO }, },
{ "P1Y1M1DT1H1M1S", { { "P1Y1M1DT1H1M1S", Lexer::Type::duration }, NO, NO, NO, NO }, },
};
#define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0]))
for (unsigned int i = 0; i < NUM_TESTS; i++)
{
// The isolated test puts the input string directly into the Lexer.
Lexer isolated (lexerTests[i].input);
for (int j = 0; j < 5; j++)
{
if (lexerTests[i].results[j].token[0])
{
// Isolated: "<token>"
t.ok (isolated.token (token, type), "Isolated Lexer::token(...) --> true");
t.is (token, lexerTests[i].results[j].token, " token --> " + token);
t.is ((int)type, (int)lexerTests[i].results[j].type, " type --> Lexer::Type::" + Lexer::typeToString (type));
}
}
// The embedded test surrounds the input string with a space.
Lexer embedded (std::string (" ") + lexerTests[i].input + " ");
for (int j = 0; j < 5; j++)
{
if (lexerTests[i].results[j].token[0])
{
// Embedded: "<token>"
t.ok (embedded.token (token, type), "Embedded Lexer::token(...) --> true");
t.is (token, lexerTests[i].results[j].token, " token --> " + token);
t.is ((int)type, (int)lexerTests[i].results[j].type, " type --> Lexer::Type::" + Lexer::typeToString (type));
}
}
}
t.is (Lexer::typeName (Lexer::Type::uuid), "uuid", "Lexer::typeName (Lexer::Type::uuid)");
t.is (Lexer::typeName (Lexer::Type::number), "number", "Lexer::typeName (Lexer::Type::number)");
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
t.is (Lexer::typeName (Lexer::Type::url), "url", "Lexer::typeName (Lexer::Type::url)");
t.is (Lexer::typeName (Lexer::Type::path), "path", "Lexer::typeName (Lexer::Type::path)");
t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)");
t.is (Lexer::typeName (Lexer::Type::op), "op", "Lexer::typeName (Lexer::Type::op)");
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");
t.is (Lexer::typeName (Lexer::Type::date), "date", "Lexer::typeName (Lexer::Type::date)");
t.is (Lexer::typeName (Lexer::Type::duration), "duration", "Lexer::typeName (Lexer::Type::duration)");
// std::string Lexer::trimLeft (const std::string& in, const std::string&)
t.is (Lexer::trimLeft (""), "", "Lexer::trimLeft '' -> ''");
t.is (Lexer::trimLeft (" "), "", "Lexer::trimLeft ' ' -> ''");
t.is (Lexer::trimLeft ("", " \t"), "", "Lexer::trimLeft '' -> ''");
t.is (Lexer::trimLeft ("xxx"), "xxx", "Lexer::trimLeft 'xxx' -> 'xxx'");
t.is (Lexer::trimLeft ("xxx", " \t"), "xxx", "Lexer::trimLeft 'xxx' -> 'xxx'");
t.is (Lexer::trimLeft (" \t xxx \t "), "\t xxx \t ", "Lexer::trimLeft ' \\t xxx \\t ' -> '\\t xxx \\t '");
t.is (Lexer::trimLeft (" \t xxx \t ", " \t"), "xxx \t ", "Lexer::trimLeft ' \\t xxx \\t ' -> 'xxx \\t '");
// std::string Lexer::trimRight (const std::string& in, const std::string&)
t.is (Lexer::trimRight (""), "", "Lexer::trimRight '' -> ''");
t.is (Lexer::trimRight (" "), "", "Lexer::trimRight ' ' -> ''");
t.is (Lexer::trimRight ("", " \t"), "", "Lexer::trimRight '' -> ''");
t.is (Lexer::trimRight ("xxx"), "xxx", "Lexer::trimRight 'xxx' -> 'xxx'");
t.is (Lexer::trimRight ("xxx", " \t"), "xxx", "Lexer::trimRight 'xxx' -> 'xxx'");
t.is (Lexer::trimRight (" \t xxx \t "), " \t xxx \t", "Lexer::trimRight ' \\t xxx \\t ' -> ' \\t xxx \\t'");
t.is (Lexer::trimRight (" \t xxx \t ", " \t"), " \t xxx", "Lexer::trimRight ' \\t xxx \\t ' -> ' \\t xxx'");
// std::string Lexer::trim (const std::string& in, const std::string& t)
t.is (Lexer::trim (""), "", "Lexer::trim '' -> ''");
t.is (Lexer::trim (" "), "", "Lexer::trim ' ' -> ''");
t.is (Lexer::trim ("", " \t"), "", "Lexer::trim '' -> ''");
t.is (Lexer::trim ("xxx"), "xxx", "Lexer::trim 'xxx' -> 'xxx'");
t.is (Lexer::trim ("xxx", " \t"), "xxx", "Lexer::trim 'xxx' -> 'xxx'");
t.is (Lexer::trim (" \t xxx \t "), "\t xxx \t", "Lexer::trim ' \\t xxx \\t ' -> '\\t xxx \\t'");
t.is (Lexer::trim (" \t xxx \t ", " \t"), "xxx", "Lexer::trim ' \\t xxx \\t ' -> 'xxx'");
// std::vector <std::tuple <std::string, Lexer::Type>> Lexer::tokenize (const std::string& input)
auto tokenized = Lexer::tokenize (" one two three ");
t.is ((int)tokenized.size (), 3, "Lexer::tokenize ' one two three ' --> 3");
t.is (std::get <0> (tokenized[0]), "one", "Lexer::tokenize ' one two three ' [0] --> 'one'");
t.ok (std::get <1> (tokenized[0]) == Lexer::Type::word, "Lexer::tokenize ' one two three ' [0] --> word");
t.is (std::get <0> (tokenized[1]), "two", "Lexer::tokenize ' one two three ' [1] --> 'two'");
t.ok (std::get <1> (tokenized[1]) == Lexer::Type::word, "Lexer::tokenize ' one two three ' [1] --> word");
t.is (std::get <0> (tokenized[2]), "three", "Lexer::tokenize ' one two three ' [2] --> 'three'");
t.ok (std::get <1> (tokenized[2]) == Lexer::Type::word, "Lexer::tokenize ' one two three ' [2] --> word");
// bool wasQuoted (const std::string& input)
t.notok (Lexer::wasQuoted (""), "Lexer::wasQuoted '' --> false");
t.notok (Lexer::wasQuoted ("abc"), "Lexer::wasQuoted 'abc' --> false");
t.ok (Lexer::wasQuoted ("one two"), "Lexer::wasQuoted 'one two' --> true");
return 0;
}
////////////////////////////////////////////////////////////////////////////////