Lexer:: Implemented ::isHardBoundary to detect filter tokens

2025-06-26 10:54:26 +02:00 · 2015-07-11 13:10:15 -04:00 · 2015-07-11 13:10:15 -04:00 · 642f378462
commit 642f378462
parent 15dea00eeb
2 changed files with 30 additions and 1 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -294,6 +294,22 @@ bool Lexer::isBoundary (int left, int right)
  return false;
 }

+////////////////////////////////////////////////////////////////////////////////
+bool Lexer::isHardBoundary (int left, int right)
+{
+  // EOS
+  if (right == '\0')                                               return true;
+
+  // FILTER operators that don't need to be surrounded by whitespace.
+  if (left == '(' ||
+      left == ')' ||
+      right == '(' ||
+      right == ')')
+    return true;
+
+  return false;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::isPunctuation (int c)
 {
@ -1230,6 +1246,11 @@ bool Lexer::readWord (
 //   abcU+0020def
 //   abc\u0020def
 //   a\tb
+//
+// Ends at:
+//   Lexer::isEOS
+//   Lexer::isWhitespace
+//   Lexer::isHardBoundary
 bool Lexer::readWord (
  const std::string& text,
  std::string::size_type& cursor,
@ -1239,12 +1260,17 @@ bool Lexer::readWord (

  word = "";
  int c;
-  while ((c = text[cursor]))
+  int prev = 0;
+  while ((c = text[cursor]))  // Handles EOS.
  {
    // Unquoted word ends on white space.
    if (Lexer::isWhitespace (c))
      break;

+    // Parentheses mostly.
+    if (prev && Lexer::isHardBoundary (prev, c))
+      break;
+
    // Unicode U+XXXX or \uXXXX codepoint.
    else if (eos - cursor >= 6 &&
             ((text[cursor + 0] == 'U'  && text[cursor + 1] == '+') ||
@ -1290,6 +1316,8 @@ bool Lexer::readWord (
    // Ordinary character.
    else
      word += utf8_character (utf8_next_char (text, cursor));
+
+    prev = c;
  }

  return word.length () > 0 ? true : false;
--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -70,6 +70,7 @@ public:
  static bool isDoubleCharOperator  (int, int, int);
  static bool isTripleCharOperator  (int, int, int, int);
  static bool isBoundary            (int, int);
+  static bool isHardBoundary        (int, int);
  static bool isPunctuation         (int);
  static bool isAllDigits           (const std::string&);
  static bool isOneWord             (const std::string&);