Lexer:: Implemented ::isHardBoundary to detect filter tokens

This commit is contained in:
Paul Beckingham 2015-07-11 13:10:15 -04:00
parent 15dea00eeb
commit 642f378462
2 changed files with 30 additions and 1 deletions

View file

@ -294,6 +294,22 @@ bool Lexer::isBoundary (int left, int right)
return false; return false;
} }
////////////////////////////////////////////////////////////////////////////////
bool Lexer::isHardBoundary (int left, int right)
{
// EOS
if (right == '\0') return true;
// FILTER operators that don't need to be surrounded by whitespace.
if (left == '(' ||
left == ')' ||
right == '(' ||
right == ')')
return true;
return false;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
bool Lexer::isPunctuation (int c) bool Lexer::isPunctuation (int c)
{ {
@ -1230,6 +1246,11 @@ bool Lexer::readWord (
// abcU+0020def // abcU+0020def
// abc\u0020def // abc\u0020def
// a\tb // a\tb
//
// Ends at:
// Lexer::isEOS
// Lexer::isWhitespace
// Lexer::isHardBoundary
bool Lexer::readWord ( bool Lexer::readWord (
const std::string& text, const std::string& text,
std::string::size_type& cursor, std::string::size_type& cursor,
@ -1239,12 +1260,17 @@ bool Lexer::readWord (
word = ""; word = "";
int c; int c;
while ((c = text[cursor])) int prev = 0;
while ((c = text[cursor])) // Handles EOS.
{ {
// Unquoted word ends on white space. // Unquoted word ends on white space.
if (Lexer::isWhitespace (c)) if (Lexer::isWhitespace (c))
break; break;
// Parentheses mostly.
if (prev && Lexer::isHardBoundary (prev, c))
break;
// Unicode U+XXXX or \uXXXX codepoint. // Unicode U+XXXX or \uXXXX codepoint.
else if (eos - cursor >= 6 && else if (eos - cursor >= 6 &&
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') || ((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
@ -1290,6 +1316,8 @@ bool Lexer::readWord (
// Ordinary character. // Ordinary character.
else else
word += utf8_character (utf8_next_char (text, cursor)); word += utf8_character (utf8_next_char (text, cursor));
prev = c;
} }
return word.length () > 0 ? true : false; return word.length () > 0 ? true : false;

View file

@ -70,6 +70,7 @@ public:
static bool isDoubleCharOperator (int, int, int); static bool isDoubleCharOperator (int, int, int);
static bool isTripleCharOperator (int, int, int, int); static bool isTripleCharOperator (int, int, int, int);
static bool isBoundary (int, int); static bool isBoundary (int, int);
static bool isHardBoundary (int, int);
static bool isPunctuation (int); static bool isPunctuation (int);
static bool isAllDigits (const std::string&); static bool isAllDigits (const std::string&);
static bool isOneWord (const std::string&); static bool isOneWord (const std::string&);