Quoting

- Removed automatic dequoting by the Lexer. - Implemented Lexer::dequote for manual control. - Variant dequotes string values when appropriate. - Fixed some unit tests that became wrong.
2025-06-26 10:54:26 +02:00 · 2014-11-18 00:55:53 -05:00 · 2014-11-18 00:55:53 -05:00 · 06319711f1
commit 06319711f1
parent f28ccdc8b1
7 changed files with 114 additions and 12 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -87,6 +87,7 @@ bool Lexer::token (std::string& result, Type& type)
      {
        type = typeString;
        quote = _n0;
+        result += utf8_character (_n0);
        shift ();
      }
      else if (_n0 == '0' &&
@ -189,6 +190,7 @@ bool Lexer::token (std::string& result, Type& type)
    case typeString:
      if (_n0 == quote)
      {
+        result += utf8_character (_n0);
        shift ();
        quote = 0;
        return true;
@ -247,6 +249,7 @@ bool Lexer::token (std::string& result, Type& type)
      else
      {
        type = quote ? typeString : typeIdentifier;
+        result += utf8_character (quote);
        result += utf8_character (_n0);
        shift ();
      }
@ -265,7 +268,8 @@ bool Lexer::token (std::string& result, Type& type)
      }
      else
      {
-        result += decode_escape (_n0);
+        result += '\\';
+        result += utf8_character (_n0);
        type = quote ? typeString : typeIdentifier;
        shift ();
      }
@ -444,6 +448,7 @@ bool Lexer::word (std::string& token, Type& type)
      {
        type = typeString;
        quote = _n0;
+        token += utf8_character (_n0);
        shift ();
      }
      else
@ -457,6 +462,7 @@ bool Lexer::word (std::string& token, Type& type)
    case typeString:
      if (_n0 == quote)
      {
+        token += utf8_character (_n0);
        shift ();
        quote = 0;
        return true;
@ -491,7 +497,8 @@ bool Lexer::word (std::string& token, Type& type)
      }
      else
      {
-        token += decode_escape (_n0);
+        token += '\\';
+        token += utf8_character (_n0);
        type = typeString;
        shift ();
      }
@ -709,6 +716,18 @@ void Lexer::token_split (std::vector <std::pair <std::string, Lexer::Type> >& le
    lexemes.push_back (std::pair <std::string, Lexer::Type>(word, type));
 }

+////////////////////////////////////////////////////////////////////////////////
+void Lexer::dequote (std::string& input)
+{
+  int quote = input[0];
+  size_t len = input.length ();
+  if ((quote == '\'' || quote == '"') &&
+      quote == input[len - 1])
+  {
+    input = input.substr (1, len - 2);
+  }
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::is_date (std::string& result)
 {
@ -830,7 +849,6 @@ int Lexer::decode_escape (int c) const
  case 'v':  return 0x0B;
  case '\'': return 0x27;
  case '"':  return 0x22;
-  case '\\': return 0x5C;
  default:   return c;
  }
 }