mirror of
https://github.com/GothenburgBitFactory/timewarrior.git
synced 2025-06-26 10:54:28 +02:00
Build: Migrated Lexer to libshared
This commit is contained in:
parent
fdca94085e
commit
ca57bf91e3
6 changed files with 2 additions and 1341 deletions
|
@ -11,7 +11,6 @@ set (timew_SRCS CLI.cpp CLI.h
|
||||||
Exclusion.cpp Exclusion.h
|
Exclusion.cpp Exclusion.h
|
||||||
Extensions.cpp Extensions.h
|
Extensions.cpp Extensions.h
|
||||||
Interval.cpp Interval.h
|
Interval.cpp Interval.h
|
||||||
Lexer.cpp Lexer.h
|
|
||||||
Range.cpp Range.h
|
Range.cpp Range.h
|
||||||
Rules.cpp Rules.h
|
Rules.cpp Rules.h
|
||||||
data.cpp
|
data.cpp
|
||||||
|
@ -29,6 +28,7 @@ set (libshared_SRCS libshared/src/Args.cpp libshared/src/Args.h
|
||||||
libshared/src/FS.cpp libshared/src/FS.h
|
libshared/src/FS.cpp libshared/src/FS.h
|
||||||
libshared/src/JSON.cpp libshared/src/JSON.h
|
libshared/src/JSON.cpp libshared/src/JSON.h
|
||||||
libshared/src/JSON2.cpp libshared/src/JSON2.h
|
libshared/src/JSON2.cpp libshared/src/JSON2.h
|
||||||
|
libshared/src/Lexer.cpp libshared/src/Lexer.h
|
||||||
libshared/src/Msg.cpp libshared/src/Msg.h
|
libshared/src/Msg.cpp libshared/src/Msg.h
|
||||||
libshared/src/Palette.cpp libshared/src/Palette.h
|
libshared/src/Palette.cpp libshared/src/Palette.h
|
||||||
libshared/src/Pig.cpp libshared/src/Pig.h
|
libshared/src/Pig.cpp libshared/src/Pig.h
|
||||||
|
|
912
src/Lexer.cpp
912
src/Lexer.cpp
|
@ -1,912 +0,0 @@
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Copyright 2013 - 2016, Paul Beckingham, Federico Hernandez.
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included
|
|
||||||
// in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
// SOFTWARE.
|
|
||||||
//
|
|
||||||
// http://www.opensource.org/licenses/mit-license.php
|
|
||||||
//
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <cmake.h>
|
|
||||||
#include <Lexer.h>
|
|
||||||
#include <Datetime.h>
|
|
||||||
#include <Duration.h>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <tuple>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <unicode.h>
|
|
||||||
#include <utf8.h>
|
|
||||||
|
|
||||||
static const std::string uuid_pattern = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
|
|
||||||
static const unsigned int uuid_min_length = 8;
|
|
||||||
|
|
||||||
std::string Lexer::dateFormat = "";
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
Lexer::Lexer (const std::string& text)
|
|
||||||
: _text (text)
|
|
||||||
, _eos (text.size ())
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// When a Lexer object is constructed with a string, this method walks through
|
|
||||||
// the stream of low-level tokens.
|
|
||||||
bool Lexer::token (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
// Eat white space.
|
|
||||||
while (unicodeWhitespace (_text[_cursor]))
|
|
||||||
utf8_next_char (_text, _cursor);
|
|
||||||
|
|
||||||
// Terminate at EOS.
|
|
||||||
if (isEOS ())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (isString (token, type, "'\"") ||
|
|
||||||
isUUID (token, type, true) ||
|
|
||||||
isDate (token, type) ||
|
|
||||||
isDuration (token, type) ||
|
|
||||||
isURL (token, type) ||
|
|
||||||
isHexNumber (token, type) ||
|
|
||||||
isNumber (token, type) ||
|
|
||||||
isPath (token, type) ||
|
|
||||||
isPattern (token, type) ||
|
|
||||||
isOperator (token, type) ||
|
|
||||||
isWord (token, type))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
std::vector <std::tuple <std::string, Lexer::Type>> Lexer::tokenize (const std::string& input)
|
|
||||||
{
|
|
||||||
std::vector <std::tuple <std::string, Lexer::Type>> tokens;
|
|
||||||
|
|
||||||
std::string token;
|
|
||||||
Lexer::Type type;
|
|
||||||
Lexer lexer (input);
|
|
||||||
while (lexer.token (token, type))
|
|
||||||
tokens.push_back (std::make_tuple (token, type));
|
|
||||||
|
|
||||||
return tokens;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// No L10N - these are for internal purposes.
|
|
||||||
const std::string Lexer::typeName (const Lexer::Type& type)
|
|
||||||
{
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case Lexer::Type::uuid: return "uuid";
|
|
||||||
case Lexer::Type::number: return "number";
|
|
||||||
case Lexer::Type::hex: return "hex";
|
|
||||||
case Lexer::Type::string: return "string";
|
|
||||||
case Lexer::Type::url: return "url";
|
|
||||||
case Lexer::Type::path: return "path";
|
|
||||||
case Lexer::Type::pattern: return "pattern";
|
|
||||||
case Lexer::Type::op: return "op";
|
|
||||||
case Lexer::Type::word: return "word";
|
|
||||||
case Lexer::Type::date: return "date";
|
|
||||||
case Lexer::Type::duration: return "duration";
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown";
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::number
|
|
||||||
// \d+
|
|
||||||
// [ . \d+ ]
|
|
||||||
// [ e|E [ +|- ] \d+ [ . \d+ ] ]
|
|
||||||
// not followed by non-operator.
|
|
||||||
bool Lexer::isNumber (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
if (unicodeLatinDigit (_text[marker]))
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
while (unicodeLatinDigit (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
|
|
||||||
if (_text[marker] == '.')
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
if (unicodeLatinDigit (_text[marker]))
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
while (unicodeLatinDigit (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_text[marker] == 'e' ||
|
|
||||||
_text[marker] == 'E')
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
|
|
||||||
if (_text[marker] == '+' ||
|
|
||||||
_text[marker] == '-')
|
|
||||||
++marker;
|
|
||||||
|
|
||||||
if (unicodeLatinDigit (_text[marker]))
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
while (unicodeLatinDigit (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
|
|
||||||
if (_text[marker] == '.')
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
if (unicodeLatinDigit (_text[marker]))
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
while (unicodeLatinDigit (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lookahread: !<unicodeWhitespace> | !<isSingleCharOperator>
|
|
||||||
// If there is an immediately consecutive character, that is not an operator, fail.
|
|
||||||
if (_eos > marker &&
|
|
||||||
! unicodeWhitespace (_text[marker]) &&
|
|
||||||
! isSingleCharOperator (_text[marker]))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
type = Lexer::Type::number;
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::number
|
|
||||||
// \d+
|
|
||||||
bool Lexer::isInteger (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
if (unicodeLatinDigit (_text[marker]))
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
while (unicodeLatinDigit (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
type = Lexer::Type::number;
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isSingleCharOperator (int c)
|
|
||||||
{
|
|
||||||
return c == '+' || // Addition
|
|
||||||
c == '-' || // Subtraction or unary minus = ambiguous
|
|
||||||
c == '*' || // Multiplication
|
|
||||||
c == '/' || // Diviѕion
|
|
||||||
c == '(' || // Precedence open parenthesis
|
|
||||||
c == ')' || // Precedence close parenthesis
|
|
||||||
c == '<' || // Less than
|
|
||||||
c == '>' || // Greater than
|
|
||||||
c == '^' || // Exponent
|
|
||||||
c == '!' || // Unary not
|
|
||||||
c == '%' || // Modulus
|
|
||||||
c == '=' || // Partial match
|
|
||||||
c == '~'; // Pattern match
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isDoubleCharOperator (int c0, int c1, int c2)
|
|
||||||
{
|
|
||||||
return (c0 == '=' && c1 == '=') ||
|
|
||||||
(c0 == '!' && c1 == '=') ||
|
|
||||||
(c0 == '<' && c1 == '=') ||
|
|
||||||
(c0 == '>' && c1 == '=') ||
|
|
||||||
(c0 == 'o' && c1 == 'r' && isBoundary (c1, c2)) ||
|
|
||||||
(c0 == '|' && c1 == '|') ||
|
|
||||||
(c0 == '&' && c1 == '&') ||
|
|
||||||
(c0 == '!' && c1 == '~');
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isTripleCharOperator (int c0, int c1, int c2, int c3)
|
|
||||||
{
|
|
||||||
return (c0 == 'a' && c1 == 'n' && c2 == 'd' && isBoundary (c2, c3)) ||
|
|
||||||
(c0 == 'x' && c1 == 'o' && c2 == 'r' && isBoundary (c2, c3)) ||
|
|
||||||
(c0 == '!' && c1 == '=' && c2 == '=');
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isBoundary (int left, int right)
|
|
||||||
{
|
|
||||||
// EOS
|
|
||||||
if (right == '\0') return true;
|
|
||||||
|
|
||||||
// XOR
|
|
||||||
if (unicodeLatinAlpha (left) != unicodeLatinAlpha (right)) return true;
|
|
||||||
if (unicodeLatinDigit (left) != unicodeLatinDigit (right)) return true;
|
|
||||||
if (unicodeWhitespace (left) != unicodeWhitespace (right)) return true;
|
|
||||||
|
|
||||||
// OR
|
|
||||||
if (isPunctuation (left) || isPunctuation (right)) return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isHardBoundary (int left, int right)
|
|
||||||
{
|
|
||||||
// EOS
|
|
||||||
if (right == '\0')
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// FILTER operators that don't need to be surrounded by whitespace.
|
|
||||||
if (left == '(' ||
|
|
||||||
left == ')' ||
|
|
||||||
right == '(' ||
|
|
||||||
right == ')')
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isPunctuation (int c)
|
|
||||||
{
|
|
||||||
return isprint (c) &&
|
|
||||||
c != ' ' &&
|
|
||||||
c != '@' &&
|
|
||||||
c != '#' &&
|
|
||||||
c != '$' &&
|
|
||||||
c != '_' &&
|
|
||||||
! unicodeLatinDigit (c) &&
|
|
||||||
! unicodeLatinAlpha (c);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Assumes that quotes is a string containing a non-trivial set of quote
|
|
||||||
// characters.
|
|
||||||
std::string Lexer::dequote (const std::string& input, const std::string& quotes)
|
|
||||||
{
|
|
||||||
if (input.length ())
|
|
||||||
{
|
|
||||||
int quote = input[0];
|
|
||||||
if (quotes.find (quote) != std::string::npos)
|
|
||||||
{
|
|
||||||
size_t len = input.length ();
|
|
||||||
if (quote == input[len - 1])
|
|
||||||
return input.substr (1, len - 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return input;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Detects characters in an input string that indicate quotes were required, or
|
|
||||||
// escapes, to get them past the shell.
|
|
||||||
bool Lexer::wasQuoted (const std::string& input)
|
|
||||||
{
|
|
||||||
if (input.find_first_of (" \t()<>&~") != std::string::npos)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
bool Lexer::isEOS () const
|
|
||||||
{
|
|
||||||
return _cursor >= _eos;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Converts '0' -> 0
|
|
||||||
// '9' -> 9
|
|
||||||
// 'a'/'A' -> 10
|
|
||||||
// 'f'/'F' -> 15
|
|
||||||
int Lexer::hexToInt (int c)
|
|
||||||
{
|
|
||||||
if (c >= '0' && c <= '9') return (c - '0');
|
|
||||||
else if (c >= 'a' && c <= 'f') return (c - 'a' + 10);
|
|
||||||
else return (c - 'A' + 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Lexer::hexToInt (int c0, int c1)
|
|
||||||
{
|
|
||||||
return (hexToInt (c0) << 4) + hexToInt (c1);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Lexer::hexToInt (int c0, int c1, int c2, int c3)
|
|
||||||
{
|
|
||||||
return (hexToInt (c0) << 12) +
|
|
||||||
(hexToInt (c1) << 8) +
|
|
||||||
(hexToInt (c2) << 4) +
|
|
||||||
hexToInt (c3);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
std::string Lexer::trimLeft (const std::string& in, const std::string& t /*= " "*/)
|
|
||||||
{
|
|
||||||
std::string::size_type ws = in.find_first_not_of (t);
|
|
||||||
if (ws > 0)
|
|
||||||
{
|
|
||||||
std::string out {in};
|
|
||||||
return out.erase (0, ws);
|
|
||||||
}
|
|
||||||
|
|
||||||
return in;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
std::string Lexer::trimRight (const std::string& in, const std::string& t /*= " "*/)
|
|
||||||
{
|
|
||||||
std::string out {in};
|
|
||||||
return out.erase (in.find_last_not_of (t) + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
std::string Lexer::trim (const std::string& in, const std::string& t /*= " "*/)
|
|
||||||
{
|
|
||||||
return trimLeft (trimRight (in, t), t);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::string
|
|
||||||
// '|"
|
|
||||||
// [ U+XXXX | \uXXXX | \" | \' | \\ | \/ | \b | \f | \n | \r | \t | . ]
|
|
||||||
// '|"
|
|
||||||
bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string& quotes)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
if (readWord (_text, quotes, marker, token))
|
|
||||||
{
|
|
||||||
type = Lexer::Type::string;
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::date
|
|
||||||
// <Datetime> (followed by eos, WS, operator)
|
|
||||||
bool Lexer::isDate (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
// Try an ISO date parse.
|
|
||||||
std::size_t i = _cursor;
|
|
||||||
Datetime d;
|
|
||||||
if (d.parse (_text, i, Lexer::dateFormat) &&
|
|
||||||
(i >= _eos ||
|
|
||||||
unicodeWhitespace (_text[i]) ||
|
|
||||||
isSingleCharOperator (_text[i])))
|
|
||||||
{
|
|
||||||
type = Lexer::Type::date;
|
|
||||||
token = _text.substr (_cursor, i - _cursor);
|
|
||||||
_cursor = i;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::duration
|
|
||||||
// <Duration> (followed by eos, WS, operator)
|
|
||||||
bool Lexer::isDuration (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
std::string extractedToken;
|
|
||||||
Lexer::Type extractedType;
|
|
||||||
if (isOperator(extractedToken, extractedType))
|
|
||||||
{
|
|
||||||
_cursor = marker;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
marker = _cursor;
|
|
||||||
Duration dur;
|
|
||||||
if (dur.parse (_text, marker) &&
|
|
||||||
(marker >= _eos ||
|
|
||||||
unicodeWhitespace (_text[marker]) ||
|
|
||||||
isSingleCharOperator (_text[marker])))
|
|
||||||
{
|
|
||||||
type = Lexer::Type::duration;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::uuid
|
|
||||||
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
|
|
||||||
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXX
|
|
||||||
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX
|
|
||||||
// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX
|
|
||||||
// ...
|
|
||||||
// XXXXXXXX-XX
|
|
||||||
// XXXXXXXX-X
|
|
||||||
// XXXXXXXX-
|
|
||||||
// XXXXXXXX
|
|
||||||
// Followed only by EOS, whitespace, or single character operator.
|
|
||||||
bool Lexer::isUUID (std::string& token, Lexer::Type& type, bool endBoundary)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
// Greedy.
|
|
||||||
std::size_t i = 0;
|
|
||||||
for (; i < 36 && marker + i < _eos; i++)
|
|
||||||
{
|
|
||||||
if (uuid_pattern[i] == 'x')
|
|
||||||
{
|
|
||||||
if (! unicodeHexDigit (_text[marker + i]))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (uuid_pattern[i] != _text[marker + i])
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i >= uuid_min_length &&
|
|
||||||
(! endBoundary ||
|
|
||||||
! _text[marker + i] ||
|
|
||||||
unicodeWhitespace (_text[marker + i]) ||
|
|
||||||
isSingleCharOperator (_text[marker + i])))
|
|
||||||
{
|
|
||||||
token = _text.substr (_cursor, i);
|
|
||||||
type = Lexer::Type::uuid;
|
|
||||||
_cursor += i;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::hex
|
|
||||||
// 0xX+
|
|
||||||
bool Lexer::isHexNumber (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
if (_eos - marker >= 3 &&
|
|
||||||
_text[marker + 0] == '0' &&
|
|
||||||
_text[marker + 1] == 'x')
|
|
||||||
{
|
|
||||||
marker += 2;
|
|
||||||
|
|
||||||
while (unicodeHexDigit (_text[marker]))
|
|
||||||
++marker;
|
|
||||||
|
|
||||||
if (marker - _cursor > 2)
|
|
||||||
{
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
type = Lexer::Type::hex;
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::word
|
|
||||||
// [^\s]+
|
|
||||||
bool Lexer::isWord (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
while (_text[marker] &&
|
|
||||||
! unicodeWhitespace (_text[marker]) &&
|
|
||||||
! isSingleCharOperator (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
|
|
||||||
if (marker > _cursor)
|
|
||||||
{
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
type = Lexer::Type::word;
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::url
|
|
||||||
// http [s] :// ...
|
|
||||||
bool Lexer::isURL (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
if (_eos - _cursor > 9 && // length 'https://*'
|
|
||||||
(_text[marker + 0] == 'h' || _text[marker + 0] == 'H') &&
|
|
||||||
(_text[marker + 1] == 't' || _text[marker + 1] == 'T') &&
|
|
||||||
(_text[marker + 2] == 't' || _text[marker + 2] == 'T') &&
|
|
||||||
(_text[marker + 3] == 'p' || _text[marker + 3] == 'P'))
|
|
||||||
{
|
|
||||||
marker += 4;
|
|
||||||
if (_text[marker + 0] == 's' || _text[marker + 0] == 'S')
|
|
||||||
++marker;
|
|
||||||
|
|
||||||
if (_text[marker + 0] == ':' &&
|
|
||||||
_text[marker + 1] == '/' &&
|
|
||||||
_text[marker + 2] == '/')
|
|
||||||
{
|
|
||||||
marker += 3;
|
|
||||||
|
|
||||||
while (marker < _eos &&
|
|
||||||
! unicodeWhitespace (_text[marker]))
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
type = Lexer::Type::url;
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::path
|
|
||||||
// ( / <non-slash, non-whitespace> )+
|
|
||||||
bool Lexer::isPath (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
int slashCount = 0;
|
|
||||||
|
|
||||||
while (1)
|
|
||||||
{
|
|
||||||
if (_text[marker] == '/')
|
|
||||||
{
|
|
||||||
++marker;
|
|
||||||
++slashCount;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (_text[marker] &&
|
|
||||||
! unicodeWhitespace (_text[marker]) &&
|
|
||||||
_text[marker] != '/')
|
|
||||||
{
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
while (_text[marker] &&
|
|
||||||
! unicodeWhitespace (_text[marker]) &&
|
|
||||||
_text[marker] != '/')
|
|
||||||
utf8_next_char (_text, marker);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (marker > _cursor &&
|
|
||||||
slashCount > 3)
|
|
||||||
{
|
|
||||||
type = Lexer::Type::path;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::pattern
|
|
||||||
// / <unquoted-string> / <EOS> | <unicodeWhitespace>
|
|
||||||
bool Lexer::isPattern (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
std::string word;
|
|
||||||
if (readWord (_text, "/", _cursor, word) &&
|
|
||||||
(isEOS () ||
|
|
||||||
unicodeWhitespace (_text[_cursor])))
|
|
||||||
{
|
|
||||||
token = _text.substr (marker, _cursor - marker);
|
|
||||||
type = Lexer::Type::pattern;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
_cursor = marker;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Lexer::Type::op
|
|
||||||
// _hastag_ | _notag | _neg_ | _pos_ |
|
|
||||||
// <isTripleCharOperator> |
|
|
||||||
// <isDoubleCharOperator> |
|
|
||||||
// <isSingleCharOperator> |
|
|
||||||
bool Lexer::isOperator (std::string& token, Lexer::Type& type)
|
|
||||||
{
|
|
||||||
std::size_t marker = _cursor;
|
|
||||||
|
|
||||||
if (_eos - marker >= 8 && _text.substr (marker, 8) == "_hastag_")
|
|
||||||
{
|
|
||||||
marker += 8;
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (_eos - marker >= 7 && _text.substr (marker, 7) == "_notag_")
|
|
||||||
{
|
|
||||||
marker += 7;
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_neg_")
|
|
||||||
{
|
|
||||||
marker += 5;
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (_eos - marker >= 5 && _text.substr (marker, 5) == "_pos_")
|
|
||||||
{
|
|
||||||
marker += 5;
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (_eos - marker >= 3 &&
|
|
||||||
isTripleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2], _text[marker + 3]))
|
|
||||||
{
|
|
||||||
marker += 3;
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (_eos - marker >= 2 &&
|
|
||||||
isDoubleCharOperator (_text[marker], _text[marker + 1], _text[marker + 2]))
|
|
||||||
{
|
|
||||||
marker += 2;
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
token = _text.substr (_cursor, marker - _cursor);
|
|
||||||
_cursor = marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (isSingleCharOperator (_text[marker]))
|
|
||||||
{
|
|
||||||
token = _text[marker];
|
|
||||||
type = Lexer::Type::op;
|
|
||||||
_cursor = ++marker;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Static
|
|
||||||
std::string Lexer::typeToString (Lexer::Type type)
|
|
||||||
{
|
|
||||||
if (type == Lexer::Type::string) return std::string ("\033[38;5;7m\033[48;5;3m") + "string" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::uuid) return std::string ("\033[38;5;7m\033[48;5;10m") + "uuid" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::hex) return std::string ("\033[38;5;7m\033[48;5;14m") + "hex" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::number) return std::string ("\033[38;5;7m\033[48;5;6m") + "number" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::url) return std::string ("\033[38;5;7m\033[48;5;4m") + "url" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::path) return std::string ("\033[37;102m") + "path" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::pattern) return std::string ("\033[37;42m") + "pattern" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::op) return std::string ("\033[38;5;7m\033[48;5;203m") + "op" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::word) return std::string ("\033[38;5;15m\033[48;5;236m") + "word" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::date) return std::string ("\033[38;5;15m\033[48;5;34m") + "date" + "\033[0m";
|
|
||||||
else if (type == Lexer::Type::duration) return std::string ("\033[38;5;15m\033[48;5;34m") + "duration" + "\033[0m";
|
|
||||||
else return std::string ("\033[37;41m") + "unknown" + "\033[0m";
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Full implementation of a quoted word. Includes:
|
|
||||||
// '\''
|
|
||||||
// '"'
|
|
||||||
// "'"
|
|
||||||
// "\""
|
|
||||||
// 'one two'
|
|
||||||
// Result includes the quotes.
|
|
||||||
bool Lexer::readWord (
|
|
||||||
const std::string& text,
|
|
||||||
const std::string& quotes,
|
|
||||||
std::string::size_type& cursor,
|
|
||||||
std::string& word)
|
|
||||||
{
|
|
||||||
if (quotes.find (text[cursor]) == std::string::npos)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
std::string::size_type eos = text.length ();
|
|
||||||
int quote = text[cursor++];
|
|
||||||
word = quote;
|
|
||||||
|
|
||||||
int c;
|
|
||||||
while ((c = text[cursor]))
|
|
||||||
{
|
|
||||||
// Quoted word ends on a quote.
|
|
||||||
if (quote && quote == c)
|
|
||||||
{
|
|
||||||
word += utf8_character (utf8_next_char (text, cursor));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unicode U+XXXX or \uXXXX codepoint.
|
|
||||||
else if (eos - cursor >= 6 &&
|
|
||||||
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
|
|
||||||
(text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
|
|
||||||
unicodeHexDigit (text[cursor + 2]) &&
|
|
||||||
unicodeHexDigit (text[cursor + 3]) &&
|
|
||||||
unicodeHexDigit (text[cursor + 4]) &&
|
|
||||||
unicodeHexDigit (text[cursor + 5]))
|
|
||||||
{
|
|
||||||
word += utf8_character (
|
|
||||||
hexToInt (
|
|
||||||
text[cursor + 2],
|
|
||||||
text[cursor + 3],
|
|
||||||
text[cursor + 4],
|
|
||||||
text[cursor + 5]));
|
|
||||||
cursor += 6;
|
|
||||||
}
|
|
||||||
|
|
||||||
// An escaped thing.
|
|
||||||
else if (c == '\\')
|
|
||||||
{
|
|
||||||
c = text[++cursor];
|
|
||||||
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
case '"': word += (char) 0x22; ++cursor; break;
|
|
||||||
case '\'': word += (char) 0x27; ++cursor; break;
|
|
||||||
case '\\': word += (char) 0x5C; ++cursor; break;
|
|
||||||
case 'b': word += (char) 0x08; ++cursor; break;
|
|
||||||
case 'f': word += (char) 0x0C; ++cursor; break;
|
|
||||||
case 'n': word += (char) 0x0A; ++cursor; break;
|
|
||||||
case 'r': word += (char) 0x0D; ++cursor; break;
|
|
||||||
case 't': word += (char) 0x09; ++cursor; break;
|
|
||||||
case 'v': word += (char) 0x0B; ++cursor; break;
|
|
||||||
|
|
||||||
// This pass-through default case means that anything can be escaped
|
|
||||||
// harmlessly. In particular 'quote' is included, if it not one of the
|
|
||||||
// above characters.
|
|
||||||
default: word += (char) c; ++cursor; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ordinary character.
|
|
||||||
else
|
|
||||||
word += utf8_character (utf8_next_char (text, cursor));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify termination.
|
|
||||||
return word[0] == quote &&
|
|
||||||
word[word.length () - 1] == quote &&
|
|
||||||
word.length () >= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Full implementation of an unquoted word. Includes:
|
|
||||||
// one\ two
|
|
||||||
// abcU+0020def
|
|
||||||
// abc\u0020def
|
|
||||||
// a\tb
|
|
||||||
//
|
|
||||||
// Ends at:
|
|
||||||
// Lexer::isEOS
|
|
||||||
// unicodeWhitespace
|
|
||||||
// Lexer::isHardBoundary
|
|
||||||
bool Lexer::readWord (
|
|
||||||
const std::string& text,
|
|
||||||
std::string::size_type& cursor,
|
|
||||||
std::string& word)
|
|
||||||
{
|
|
||||||
std::string::size_type eos = text.length ();
|
|
||||||
|
|
||||||
word = "";
|
|
||||||
int c;
|
|
||||||
int prev = 0;
|
|
||||||
while ((c = text[cursor])) // Handles EOS.
|
|
||||||
{
|
|
||||||
// Unquoted word ends on white space.
|
|
||||||
if (unicodeWhitespace (c))
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Parentheses mostly.
|
|
||||||
if (prev && Lexer::isHardBoundary (prev, c))
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Unicode U+XXXX or \uXXXX codepoint.
|
|
||||||
else if (eos - cursor >= 6 &&
|
|
||||||
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
|
|
||||||
(text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
|
|
||||||
unicodeHexDigit (text[cursor + 2]) &&
|
|
||||||
unicodeHexDigit (text[cursor + 3]) &&
|
|
||||||
unicodeHexDigit (text[cursor + 4]) &&
|
|
||||||
unicodeHexDigit (text[cursor + 5]))
|
|
||||||
{
|
|
||||||
word += utf8_character (
|
|
||||||
hexToInt (
|
|
||||||
text[cursor + 2],
|
|
||||||
text[cursor + 3],
|
|
||||||
text[cursor + 4],
|
|
||||||
text[cursor + 5]));
|
|
||||||
cursor += 6;
|
|
||||||
}
|
|
||||||
|
|
||||||
// An escaped thing.
|
|
||||||
else if (c == '\\')
|
|
||||||
{
|
|
||||||
c = text[++cursor];
|
|
||||||
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
case '"': word += (char) 0x22; ++cursor; break;
|
|
||||||
case '\'': word += (char) 0x27; ++cursor; break;
|
|
||||||
case '\\': word += (char) 0x5C; ++cursor; break;
|
|
||||||
case 'b': word += (char) 0x08; ++cursor; break;
|
|
||||||
case 'f': word += (char) 0x0C; ++cursor; break;
|
|
||||||
case 'n': word += (char) 0x0A; ++cursor; break;
|
|
||||||
case 'r': word += (char) 0x0D; ++cursor; break;
|
|
||||||
case 't': word += (char) 0x09; ++cursor; break;
|
|
||||||
case 'v': word += (char) 0x0B; ++cursor; break;
|
|
||||||
|
|
||||||
// This pass-through default case means that anything can be escaped
|
|
||||||
// harmlessly. In particular 'quote' is included, if it not one of the
|
|
||||||
// above characters.
|
|
||||||
default: word += (char) c; ++cursor; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ordinary character.
|
|
||||||
else
|
|
||||||
word += utf8_character (utf8_next_char (text, cursor));
|
|
||||||
|
|
||||||
prev = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
return word.length () > 0 ? true : false;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
96
src/Lexer.h
96
src/Lexer.h
|
@ -1,96 +0,0 @@
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Copyright 2013 - 2016, Paul Beckingham, Federico Hernandez.
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included
|
|
||||||
// in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
// SOFTWARE.
|
|
||||||
//
|
|
||||||
// http://www.opensource.org/licenses/mit-license.php
|
|
||||||
//
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#ifndef INCLUDED_LEXER
|
|
||||||
#define INCLUDED_LEXER
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <tuple>
|
|
||||||
#include <cstddef>
|
|
||||||
|
|
||||||
class Lexer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
// These are overridable.
|
|
||||||
static std::string dateFormat;
|
|
||||||
|
|
||||||
enum class Type { uuid, number, hex,
|
|
||||||
string,
|
|
||||||
url,
|
|
||||||
path,
|
|
||||||
pattern,
|
|
||||||
op,
|
|
||||||
word,
|
|
||||||
date, duration };
|
|
||||||
|
|
||||||
explicit Lexer (const std::string&);
|
|
||||||
bool token (std::string&, Lexer::Type&);
|
|
||||||
static std::string typeToString (Lexer::Type);
|
|
||||||
|
|
||||||
// Static helpers.
|
|
||||||
static std::vector <std::tuple <std::string, Lexer::Type>> tokenize (const std::string&);
|
|
||||||
static const std::string typeName (const Lexer::Type&);
|
|
||||||
static bool isSingleCharOperator (int);
|
|
||||||
static bool isDoubleCharOperator (int, int, int);
|
|
||||||
static bool isTripleCharOperator (int, int, int, int);
|
|
||||||
static bool isBoundary (int, int);
|
|
||||||
static bool isHardBoundary (int, int);
|
|
||||||
static bool isPunctuation (int);
|
|
||||||
static bool wasQuoted (const std::string&);
|
|
||||||
static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
|
|
||||||
static bool readWord (const std::string&, std::string::size_type&, std::string&);
|
|
||||||
static int hexToInt (int);
|
|
||||||
static int hexToInt (int, int);
|
|
||||||
static int hexToInt (int, int, int, int);
|
|
||||||
static std::string trimLeft (const std::string& in, const std::string& t = " ");
|
|
||||||
static std::string trimRight (const std::string& in, const std::string& t = " ");
|
|
||||||
static std::string trim (const std::string& in, const std::string& t = " ");
|
|
||||||
static std::string dequote (const std::string&, const std::string& quotes = "'\"");
|
|
||||||
|
|
||||||
// Stream Classifiers.
|
|
||||||
bool isEOS () const;
|
|
||||||
bool isString (std::string&, Lexer::Type&, const std::string&);
|
|
||||||
bool isDate (std::string&, Lexer::Type&);
|
|
||||||
bool isDuration (std::string&, Lexer::Type&);
|
|
||||||
bool isUUID (std::string&, Lexer::Type&, bool);
|
|
||||||
bool isNumber (std::string&, Lexer::Type&);
|
|
||||||
bool isInteger (std::string&, Lexer::Type&);
|
|
||||||
bool isHexNumber (std::string&, Lexer::Type&);
|
|
||||||
bool isURL (std::string&, Lexer::Type&);
|
|
||||||
bool isPath (std::string&, Lexer::Type&);
|
|
||||||
bool isPattern (std::string&, Lexer::Type&);
|
|
||||||
bool isOperator (std::string&, Lexer::Type&);
|
|
||||||
bool isWord (std::string&, Lexer::Type&);
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string _text {};
|
|
||||||
std::size_t _cursor {0};
|
|
||||||
std::size_t _eos {0};
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
1
test/.gitignore
vendored
1
test/.gitignore
vendored
|
@ -4,7 +4,6 @@ data.t
|
||||||
exclusion.t
|
exclusion.t
|
||||||
helper.t
|
helper.t
|
||||||
interval.t
|
interval.t
|
||||||
lexer.t
|
|
||||||
range.t
|
range.t
|
||||||
rules.t
|
rules.t
|
||||||
util.t
|
util.t
|
||||||
|
|
|
@ -14,7 +14,7 @@ include_directories (${CMAKE_SOURCE_DIR}
|
||||||
include_directories (${CMAKE_INSTALL_PREFIX}/include)
|
include_directories (${CMAKE_INSTALL_PREFIX}/include)
|
||||||
link_directories(${CMAKE_INSTALL_PREFIX}/lib)
|
link_directories(${CMAKE_INSTALL_PREFIX}/lib)
|
||||||
|
|
||||||
set (test_SRCS data.t exclusion.t helper.t interval.t lexer.t range.t rules.t util.t)
|
set (test_SRCS data.t exclusion.t helper.t interval.t range.t rules.t util.t)
|
||||||
|
|
||||||
add_custom_target (test ./run_all --verbose
|
add_custom_target (test ./run_all --verbose
|
||||||
DEPENDS ${test_SRCS}
|
DEPENDS ${test_SRCS}
|
||||||
|
|
330
test/lexer.t.cpp
330
test/lexer.t.cpp
|
@ -1,330 +0,0 @@
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Copyright 2013 - 2016, Göteborg Bit Factory.
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
// in the Software without restriction, including without limitation the rights
|
|
||||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
// furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included
|
|
||||||
// in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
||||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
// SOFTWARE.
|
|
||||||
//
|
|
||||||
// http://www.opensource.org/licenses/mit-license.php
|
|
||||||
//
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <cmake.h>
|
|
||||||
#include <Lexer.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include <string.h>
|
|
||||||
#include <test.h>
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int main (int, char**)
|
|
||||||
{
|
|
||||||
UnitTest t (555);
|
|
||||||
|
|
||||||
std::vector <std::pair <std::string, Lexer::Type>> tokens;
|
|
||||||
std::string token;
|
|
||||||
Lexer::Type type;
|
|
||||||
|
|
||||||
// static bool Lexer::dequote (std::string&, const std::string& quotes = "'\"");
|
|
||||||
t.is (Lexer::dequote ("foo"), "foo", "Lexer::dequote foo --> foo");
|
|
||||||
t.is (Lexer::dequote ("'foo'"), "foo", "Lexer::dequote 'foo' --> foo");
|
|
||||||
t.is (Lexer::dequote ("'o\\'clock'"), "o\\'clock", "Lexer::dequote 'o\\'clock' --> o\\'clock");
|
|
||||||
t.is (Lexer::dequote ("abba", "a"), "bb", "Lexer::dequote 'abba' (a) --> bb");
|
|
||||||
|
|
||||||
// Should result in no tokens.
|
|
||||||
Lexer l0 ("");
|
|
||||||
t.notok (l0.token (token, type), "'' --> no tokens");
|
|
||||||
|
|
||||||
// Should result in no tokens.
|
|
||||||
Lexer l1 (" \t ");
|
|
||||||
t.notok (l1.token (token, type), "' \\t ' --> no tokens");
|
|
||||||
|
|
||||||
// Test for numbers that are no longer ISO-8601 dates.
|
|
||||||
Lexer l3 ("1 12 123 1234 12345 123456 1234567");
|
|
||||||
tokens.clear ();
|
|
||||||
while (l3.token (token, type))
|
|
||||||
{
|
|
||||||
std::cout << "# «" << token << "» " << Lexer::typeName (type) << '\n';
|
|
||||||
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
|
|
||||||
}
|
|
||||||
|
|
||||||
t.is ((int)tokens.size (), 7, "7 tokens");
|
|
||||||
t.is (tokens[0].first, "1", "tokens[0] == '1'");
|
|
||||||
t.is ((int) tokens[0].second, (int) Lexer::Type::number, "tokens[0] == Type::number");
|
|
||||||
t.is (tokens[1].first, "12", "tokens[1] == '12'");
|
|
||||||
t.is ((int) tokens[1].second, (int) Lexer::Type::number, "tokens[1] == Type::number");
|
|
||||||
t.is (tokens[2].first, "123", "tokens[2] == '123'");
|
|
||||||
t.is ((int) tokens[2].second, (int) Lexer::Type::number, "tokens[2] == Type::number"); // 70
|
|
||||||
t.is (tokens[3].first, "1234", "tokens[3] == '1234'");
|
|
||||||
t.is ((int) tokens[3].second, (int) Lexer::Type::date, "tokens[3] == Type::date");
|
|
||||||
t.is (tokens[4].first, "12345", "tokens[4] == '12345'");
|
|
||||||
t.is ((int) tokens[4].second, (int) Lexer::Type::number, "tokens[4] == Type::number");
|
|
||||||
t.is (tokens[5].first, "123456", "tokens[5] == '123456'");
|
|
||||||
t.is ((int) tokens[5].second, (int) Lexer::Type::date, "tokens[5] == Type::date");
|
|
||||||
t.is (tokens[6].first, "1234567", "tokens[6] == '1234567'");
|
|
||||||
t.is ((int) tokens[6].second, (int) Lexer::Type::duration, "tokens[6] == Type::duration");
|
|
||||||
|
|
||||||
// static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
|
|
||||||
std::string::size_type cursor = 0;
|
|
||||||
std::string word;
|
|
||||||
t.ok (Lexer::readWord ("'one two'", "'\"", cursor, word), "readWord ''one two'' --> true");
|
|
||||||
t.is (word, "'one two'", " word '" + word + "'");
|
|
||||||
t.is ((int)cursor, 9, " cursor");
|
|
||||||
|
|
||||||
// Unterminated quoted string is invalid.
|
|
||||||
cursor = 0;
|
|
||||||
t.notok (Lexer::readWord ("'one", "'\"", cursor, word), "readWord ''one' --> false");
|
|
||||||
|
|
||||||
// static bool readWord (const std::string&, std::string::size_type&, std::string&);
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord ("input", cursor, word), "readWord 'input' --> true");
|
|
||||||
t.is (word, "input", " word '" + word + "'");
|
|
||||||
t.is ((int)cursor, 5, " cursor");
|
|
||||||
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord ("one\\ two", cursor, word), "readWord 'one\\ two' --> true");
|
|
||||||
t.is (word, "one two", " word '" + word + "'");
|
|
||||||
t.is ((int)cursor, 8, " cursor");
|
|
||||||
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord ("\\u20A43", cursor, word), "readWord '\\u20A43' --> true");
|
|
||||||
t.is (word, "₤3", " word '" + word + "'");
|
|
||||||
t.is ((int)cursor, 7, " cursor");
|
|
||||||
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord ("U+20AC4", cursor, word), "readWord '\\u20AC4' --> true");
|
|
||||||
t.is (word, "€4", " word '" + word + "'");
|
|
||||||
t.is ((int)cursor, 7, " cursor");
|
|
||||||
|
|
||||||
std::string text = "one 'two' three\\ four";
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one 'two' three\\ four\" --> true");
|
|
||||||
t.is (word, "one", " word '" + word + "'");
|
|
||||||
cursor++;
|
|
||||||
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one 'two' three\\ four\" --> true");
|
|
||||||
t.is (word, "'two'", " word '" + word + "'");
|
|
||||||
cursor++;
|
|
||||||
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one 'two' three\\ four\" --> true");
|
|
||||||
t.is (word, "three four", " word '" + word + "'");
|
|
||||||
|
|
||||||
text = "one ";
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord (text, cursor, word), "readWord \"one \" --> true");
|
|
||||||
t.is (word, "one", " word '" + word + "'");
|
|
||||||
|
|
||||||
// Test all Lexer types.
|
|
||||||
#define NO {"",Lexer::Type::word}
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
const char* input;
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
const char* token;
|
|
||||||
Lexer::Type type;
|
|
||||||
} results[5];
|
|
||||||
} lexerTests[] =
|
|
||||||
{
|
|
||||||
// Pattern
|
|
||||||
{ "/foo/", { { "/foo/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
|
|
||||||
{ "/a\\/b/", { { "/a\\/b/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
|
|
||||||
{ "/'/", { { "/'/", Lexer::Type::pattern }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// Path
|
|
||||||
{ "/long/path/to/file.txt", { { "/long/path/to/file.txt", Lexer::Type::path }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// Word
|
|
||||||
{ "1.foo.bar", { { "1.foo.bar", Lexer::Type::word }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// URL
|
|
||||||
{ "http://tasktools.org", { { "http://tasktools.org", Lexer::Type::url }, NO, NO, NO, NO }, },
|
|
||||||
{ "https://bug.tasktools.org", { { "https://bug.tasktools.org", Lexer::Type::url }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// String
|
|
||||||
{ "'one two'", { { "'one two'", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
{ "\"three\"", { { "\"three\"", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
{ "'\\''", { { "'''", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
{ "\"\\\"\"", { { "\"\"\"", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
{ "\"\tfoo\t\"", { { "\"\tfoo\t\"", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
{ "\"\\u20A43\"", { { "\"₤3\"", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
{ "\"U+20AC4\"", { { "\"€4\"", Lexer::Type::string }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// Number
|
|
||||||
{ "1", { { "1", Lexer::Type::number }, NO, NO, NO, NO }, },
|
|
||||||
{ "3.14", { { "3.14", Lexer::Type::number }, NO, NO, NO, NO }, },
|
|
||||||
{ "6.02217e23", { { "6.02217e23", Lexer::Type::number }, NO, NO, NO, NO }, },
|
|
||||||
{ "1.2e-3.4", { { "1.2e-3.4", Lexer::Type::number }, NO, NO, NO, NO }, },
|
|
||||||
{ "0x2f", { { "0x2f", Lexer::Type::hex }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// Operator - complete set
|
|
||||||
{ "^", { { "^", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "!", { { "!", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "_neg_", { { "_neg_", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "_pos_", { { "_pos_", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "_hastag_", { { "_hastag_", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "_notag_", { { "_notag_", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "*", { { "*", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "/", { { "/", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "%", { { "%", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "+", { { "+", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "-", { { "-", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "<=", { { "<=", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ ">=", { { ">=", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ ">", { { ">", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "<", { { "<", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "=", { { "=", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "==", { { "==", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "!=", { { "!=", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "!==", { { "!==", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "~", { { "~", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "!~", { { "!~", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "and", { { "and", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "or", { { "or", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "xor", { { "xor", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ "(", { { "(", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
{ ")", { { ")", Lexer::Type::op }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// UUID
|
|
||||||
{ "ffffffff-ffff-ffff-ffff-ffffffffffff", { { "ffffffff-ffff-ffff-ffff-ffffffffffff", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "00000000-0000-0000-0000-0000000", { { "00000000-0000-0000-0000-0000000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "00000000-0000-0000-0000", { { "00000000-0000-0000-0000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "00000000-0000-0000", { { "00000000-0000-0000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "00000000-0000", { { "00000000-0000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "00000000", { { "00000000", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "a360fc44-315c-4366-b70c-ea7e7520b749", { { "a360fc44-315c-4366-b70c-ea7e7520b749", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "a360fc44-315c-4366-b70c-ea7e752", { { "a360fc44-315c-4366-b70c-ea7e752", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "a360fc44-315c-4366-b70c", { { "a360fc44-315c-4366-b70c", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "a360fc44-315c-4366", { { "a360fc44-315c-4366", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "a360fc44-315c", { { "a360fc44-315c", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
{ "a360fc44", { { "a360fc44", Lexer::Type::uuid }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// Date
|
|
||||||
{ "2015-W01", { { "2015-W01", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
{ "2015-02-17", { { "2015-02-17", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
{ "2013-11-29T22:58:00Z", { { "2013-11-29T22:58:00Z", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
{ "20131129T225800Z", { { "20131129T225800Z", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
{ "9th", { { "9th", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
{ "10th", { { "10th", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
{ "today", { { "today", Lexer::Type::date }, NO, NO, NO, NO }, },
|
|
||||||
|
|
||||||
// Duration
|
|
||||||
{ "year", { { "year", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "4weeks", { { "4weeks", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "PT23H", { { "PT23H", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "1second", { { "1second", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "1s", { { "1s", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "1minute", { { "1minute", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "2hour", { { "2hour", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "3 days", { { "3 days", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "4w", { { "4w", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "5mo", { { "5mo", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "6 years", { { "6 years", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "P1Y", { { "P1Y", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "PT1H", { { "PT1H", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
{ "P1Y1M1DT1H1M1S", { { "P1Y1M1DT1H1M1S", Lexer::Type::duration }, NO, NO, NO, NO }, },
|
|
||||||
};
|
|
||||||
#define NUM_TESTS (sizeof (lexerTests) / sizeof (lexerTests[0]))
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < NUM_TESTS; i++)
|
|
||||||
{
|
|
||||||
// The isolated test puts the input string directly into the Lexer.
|
|
||||||
Lexer isolated (lexerTests[i].input);
|
|
||||||
|
|
||||||
for (int j = 0; j < 5; j++)
|
|
||||||
{
|
|
||||||
if (lexerTests[i].results[j].token[0])
|
|
||||||
{
|
|
||||||
// Isolated: "<token>"
|
|
||||||
t.ok (isolated.token (token, type), "Isolated Lexer::token(...) --> true");
|
|
||||||
t.is (token, lexerTests[i].results[j].token, " token --> " + token);
|
|
||||||
t.is ((int)type, (int)lexerTests[i].results[j].type, " type --> Lexer::Type::" + Lexer::typeToString (type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The embedded test surrounds the input string with a space.
|
|
||||||
Lexer embedded (std::string (" ") + lexerTests[i].input + " ");
|
|
||||||
|
|
||||||
for (int j = 0; j < 5; j++)
|
|
||||||
{
|
|
||||||
if (lexerTests[i].results[j].token[0])
|
|
||||||
{
|
|
||||||
// Embedded: "<token>"
|
|
||||||
t.ok (embedded.token (token, type), "Embedded Lexer::token(...) --> true");
|
|
||||||
t.is (token, lexerTests[i].results[j].token, " token --> " + token);
|
|
||||||
t.is ((int)type, (int)lexerTests[i].results[j].type, " type --> Lexer::Type::" + Lexer::typeToString (type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::uuid), "uuid", "Lexer::typeName (Lexer::Type::uuid)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::number), "number", "Lexer::typeName (Lexer::Type::number)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::hex), "hex", "Lexer::typeName (Lexer::Type::hex)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::string), "string", "Lexer::typeName (Lexer::Type::string)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::url), "url", "Lexer::typeName (Lexer::Type::url)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::path), "path", "Lexer::typeName (Lexer::Type::path)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::pattern), "pattern", "Lexer::typeName (Lexer::Type::pattern)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::op), "op", "Lexer::typeName (Lexer::Type::op)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::word), "word", "Lexer::typeName (Lexer::Type::word)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::date), "date", "Lexer::typeName (Lexer::Type::date)");
|
|
||||||
t.is (Lexer::typeName (Lexer::Type::duration), "duration", "Lexer::typeName (Lexer::Type::duration)");
|
|
||||||
|
|
||||||
// std::string Lexer::trimLeft (const std::string& in, const std::string&)
|
|
||||||
t.is (Lexer::trimLeft (""), "", "Lexer::trimLeft '' -> ''");
|
|
||||||
t.is (Lexer::trimLeft (" "), "", "Lexer::trimLeft ' ' -> ''");
|
|
||||||
t.is (Lexer::trimLeft ("", " \t"), "", "Lexer::trimLeft '' -> ''");
|
|
||||||
t.is (Lexer::trimLeft ("xxx"), "xxx", "Lexer::trimLeft 'xxx' -> 'xxx'");
|
|
||||||
t.is (Lexer::trimLeft ("xxx", " \t"), "xxx", "Lexer::trimLeft 'xxx' -> 'xxx'");
|
|
||||||
t.is (Lexer::trimLeft (" \t xxx \t "), "\t xxx \t ", "Lexer::trimLeft ' \\t xxx \\t ' -> '\\t xxx \\t '");
|
|
||||||
t.is (Lexer::trimLeft (" \t xxx \t ", " \t"), "xxx \t ", "Lexer::trimLeft ' \\t xxx \\t ' -> 'xxx \\t '");
|
|
||||||
|
|
||||||
// std::string Lexer::trimRight (const std::string& in, const std::string&)
|
|
||||||
t.is (Lexer::trimRight (""), "", "Lexer::trimRight '' -> ''");
|
|
||||||
t.is (Lexer::trimRight (" "), "", "Lexer::trimRight ' ' -> ''");
|
|
||||||
t.is (Lexer::trimRight ("", " \t"), "", "Lexer::trimRight '' -> ''");
|
|
||||||
t.is (Lexer::trimRight ("xxx"), "xxx", "Lexer::trimRight 'xxx' -> 'xxx'");
|
|
||||||
t.is (Lexer::trimRight ("xxx", " \t"), "xxx", "Lexer::trimRight 'xxx' -> 'xxx'");
|
|
||||||
t.is (Lexer::trimRight (" \t xxx \t "), " \t xxx \t", "Lexer::trimRight ' \\t xxx \\t ' -> ' \\t xxx \\t'");
|
|
||||||
t.is (Lexer::trimRight (" \t xxx \t ", " \t"), " \t xxx", "Lexer::trimRight ' \\t xxx \\t ' -> ' \\t xxx'");
|
|
||||||
|
|
||||||
// std::string Lexer::trim (const std::string& in, const std::string& t)
|
|
||||||
t.is (Lexer::trim (""), "", "Lexer::trim '' -> ''");
|
|
||||||
t.is (Lexer::trim (" "), "", "Lexer::trim ' ' -> ''");
|
|
||||||
t.is (Lexer::trim ("", " \t"), "", "Lexer::trim '' -> ''");
|
|
||||||
t.is (Lexer::trim ("xxx"), "xxx", "Lexer::trim 'xxx' -> 'xxx'");
|
|
||||||
t.is (Lexer::trim ("xxx", " \t"), "xxx", "Lexer::trim 'xxx' -> 'xxx'");
|
|
||||||
t.is (Lexer::trim (" \t xxx \t "), "\t xxx \t", "Lexer::trim ' \\t xxx \\t ' -> '\\t xxx \\t'");
|
|
||||||
t.is (Lexer::trim (" \t xxx \t ", " \t"), "xxx", "Lexer::trim ' \\t xxx \\t ' -> 'xxx'");
|
|
||||||
|
|
||||||
// std::vector <std::tuple <std::string, Lexer::Type>> Lexer::tokenize (const std::string& input)
|
|
||||||
auto tokenized = Lexer::tokenize (" one two three ");
|
|
||||||
t.is ((int)tokenized.size (), 3, "Lexer::tokenize ' one two three ' --> 3");
|
|
||||||
t.is (std::get <0> (tokenized[0]), "one", "Lexer::tokenize ' one two three ' [0] --> 'one'");
|
|
||||||
t.ok (std::get <1> (tokenized[0]) == Lexer::Type::word, "Lexer::tokenize ' one two three ' [0] --> word");
|
|
||||||
t.is (std::get <0> (tokenized[1]), "two", "Lexer::tokenize ' one two three ' [1] --> 'two'");
|
|
||||||
t.ok (std::get <1> (tokenized[1]) == Lexer::Type::word, "Lexer::tokenize ' one two three ' [1] --> word");
|
|
||||||
t.is (std::get <0> (tokenized[2]), "three", "Lexer::tokenize ' one two three ' [2] --> 'three'");
|
|
||||||
t.ok (std::get <1> (tokenized[2]) == Lexer::Type::word, "Lexer::tokenize ' one two three ' [2] --> word");
|
|
||||||
|
|
||||||
// bool wasQuoted (const std::string& input)
|
|
||||||
t.notok (Lexer::wasQuoted (""), "Lexer::wasQuoted '' --> false");
|
|
||||||
t.notok (Lexer::wasQuoted ("abc"), "Lexer::wasQuoted 'abc' --> false");
|
|
||||||
t.ok (Lexer::wasQuoted ("one two"), "Lexer::wasQuoted 'one two' --> true");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
Loading…
Add table
Add a link
Reference in a new issue