Lexer: Converted ::isPair to use ::readWord

- With ::isPair using ::readWord, attribute values may now contain escaped entities such as \t, \uNNNN, and U+NNNN. - Removed distinct handling for <name><sep><value>, rc<sep><value> and rc.<name><sep><value> - all generic now.
2025-08-29 07:57:20 +02:00 · 2015-07-10 11:24:57 -04:00 · 2015-07-10 11:24:57 -04:00 · a86edaa6b2
commit a86edaa6b2
parent 45aaa530ac
1 changed files with 22 additions and 38 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -382,7 +382,6 @@ bool Lexer::isString (std::string& token, Lexer::Type& type, const std::string&
  return false;
 */
  if (quotes.find (_text[marker]) != std::string::npos)
  {
    int quote = _text[marker];
@ -728,9 +727,8 @@ bool Lexer::isURL (std::string& token, Lexer::Type& type)
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::pair
-//   <identifier> :  [ <string> | <word> ]
+//   <identifier> <separator> [ <string> | <word> ]
-//   <identifier> =  [ <string> | <word> ]
+//   separator '::' | ':=' | ':' | '='
 //   <identifier> := [ <string> | <word> ]
 bool Lexer::isPair (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
@ -739,43 +737,29 @@ bool Lexer::isPair (std::string& token, Lexer::Type& type)
  Lexer::Type ignoredType;
  if (isIdentifier (ignoredToken, ignoredType))
  {
-    // Look for rc.name{:=,=,:}value first, because '=' is allowed.
+    // Look for a valid separator.
-    if (ignoredToken == "rc" ||
+    std::string separator = _text.substr (_cursor, 2);
-        ignoredToken.substr (0, 3) == "rc.")
+    if (separator == "::" || separator == ":=")
-    {
+      _cursor += 2;
-      if (_eos - _cursor > 1 &&
+    else if (separator[0] == ':' || separator[0] == '=')
          (_text[_cursor] == ':' ||
           _text[_cursor] == '='))
      {
      _cursor++;
    else
    {
      _cursor = marker;
      return false;
    }
-        if (isString     (ignoredToken, ignoredType, "'\"")  ||
+    // String, word or nothing are all valid.
-            isContiguous (ignoredToken, ignoredType))
+    if (readWord (_text, "'\"", _cursor, ignoredToken) ||
        readWord (_text,        _cursor, ignoredToken) ||
        isEOS ()                                       ||
        isWhitespace (_text[_cursor]))
    {
      token = _text.substr (marker, _cursor - marker);
      type = Lexer::Type::pair;
      return true;
    }
  }
    }
    if (_eos - _cursor >= 1    &&
        (_text[_cursor] == ':' ||
         _text[_cursor] == '='))
    {
      _cursor++;
      if (isString     (ignoredToken, ignoredType, "'\"") ||
          isContiguous (ignoredToken, ignoredType)        ||
          _eos == _cursor                                 ||
          _text[_cursor] == ' ')
      {
        token = _text.substr (marker, _cursor - marker);
        type = Lexer::Type::pair;
        return true;
      }
    }
  }
  _cursor = marker;
  return false;
@ -841,7 +825,7 @@ bool Lexer::isSet (std::string& token, Lexer::Type& type)
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::tag
-//   ^ | '(' | ')' | <isWhiteSpace>
+//   ^ | '(' | ')' | <isWhitespace>
 //     [ +|- ] <isIdentifierStart> [ <isIdentifierNext> ]*
 bool Lexer::isTag (std::string& token, Lexer::Type& type)
 {
@ -922,7 +906,7 @@ bool Lexer::isPath (std::string& token, Lexer::Type& type)
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::substitution
-//   / <unquoted-string> / <unquoted-string> / [g]  <EOS> | <isWhiteSpace>
+//   / <unquoted-string> / <unquoted-string> / [g]  <EOS> | <isWhitespace>
 bool Lexer::isSubstitution (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;
@ -955,7 +939,7 @@ bool Lexer::isSubstitution (std::string& token, Lexer::Type& type)
 ////////////////////////////////////////////////////////////////////////////////
 // Lexer::Type::pattern
-//   / <unquoted-string> /  <EOS> | <isWhiteSpace>
+//   / <unquoted-string> /  <EOS> | <isWhitespace>
 bool Lexer::isPattern (std::string& token, Lexer::Type& type)
 {
  std::size_t marker = _cursor;