Lexer:: Implemented ::isHardBoundary to detect filter tokens

This commit is contained in:
Paul Beckingham 2015-07-11 13:10:15 -04:00
parent 15dea00eeb
commit 642f378462
2 changed files with 30 additions and 1 deletions

View file

@ -294,6 +294,22 @@ bool Lexer::isBoundary (int left, int right)
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isHardBoundary (int left, int right)
{
// EOS
if (right == '\0') return true;
// FILTER operators that don't need to be surrounded by whitespace.
if (left == '(' ||
left == ')' ||
right == '(' ||
right == ')')
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isPunctuation (int c)
{
@ -1230,6 +1246,11 @@ bool Lexer::readWord (
// abcU+0020def
// abc\u0020def
// a\tb
//
// Ends at:
// Lexer::isEOS
// Lexer::isWhitespace
// Lexer::isHardBoundary
bool Lexer::readWord (
const std::string& text,
std::string::size_type& cursor,
@ -1239,12 +1260,17 @@ bool Lexer::readWord (
word = "";
int c;
while ((c = text[cursor]))
int prev = 0;
while ((c = text[cursor])) // Handles EOS.
{
// Unquoted word ends on white space.
if (Lexer::isWhitespace (c))
break;
// Parentheses mostly.
if (prev && Lexer::isHardBoundary (prev, c))
break;
// Unicode U+XXXX or \uXXXX codepoint.
else if (eos - cursor >= 6 &&
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
@ -1290,6 +1316,8 @@ bool Lexer::readWord (
// Ordinary character.
else
word += utf8_character (utf8_next_char (text, cursor));
prev = c;
}
return word.length () > 0 ? true : false;

View file

@ -70,6 +70,7 @@ public:
static bool isDoubleCharOperator (int, int, int);
static bool isTripleCharOperator (int, int, int, int);
static bool isBoundary (int, int);
static bool isHardBoundary (int, int);
static bool isPunctuation (int);
static bool isAllDigits (const std::string&);
static bool isOneWord (const std::string&);