Helpers

- Implemented isTokenEnd, as a special case of isWordEnd, but considers consecutive punctuation to be a set of individual tokens.
2025-08-28 22:47:20 +02:00 · 2011-09-10 13:24:54 -04:00 · 2011-09-10 13:24:54 -04:00 · 562fd8ce3c
commit 562fd8ce3c
parent 94bb98edac
2 changed files with 21 additions and 1 deletions
--- a/src/text.cpp
+++ b/src/text.cpp
@ -601,7 +601,7 @@ bool isWordStart (const std::string& input, std::string::size_type pos)
 // Result for pos: ....y......y
 bool isWordEnd (const std::string& input, std::string::size_type pos)
 {
-  // Short circuit: no input means no word start.
+  // Short circuit: no input means no word end.
  if (input.length () == 0)
    return false;

@ -618,6 +618,25 @@ bool isWordEnd (const std::string& input, std::string::size_type pos)
  return false;
 }

+////////////////////////////////////////////////////////////////////////////////
+//          Input: hello, world
+// Result for pos: ....y......y
+//
+//          Input: (one) two
+// Result for pos: y..yy...y
+bool isTokenEnd (const std::string& input, std::string::size_type pos)
+{
+  // Delegate.
+  if (isWordEnd (input, pos))
+    return true;
+
+  // Punctuation divides tokens.
+  if (pos < input.length () && isPunctuation (input[pos]))
+    return true;
+
+  return false;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Override of ispunct, that considers #, $ and @ not to be punctuation.
 //