From 06319711f1980342209486bbb40387996f891b84 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Tue, 18 Nov 2014 00:55:53 -0500 Subject: [PATCH] Quoting - Removed automatic dequoting by the Lexer. - Implemented Lexer::dequote for manual control. - Variant dequotes string values when appropriate. - Fixed some unit tests that became wrong. --- ChangeLog | 2 ++ src/CLI.cpp | 4 ++- src/Lexer.cpp | 24 ++++++++++++-- src/Lexer.h | 1 + src/Variant.cpp | 85 ++++++++++++++++++++++++++++++++++++++++++++++-- test/filter.t | 4 +-- test/lexer.t.cpp | 6 ++-- 7 files changed, 114 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 124775d29..790069a6e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -184,6 +184,8 @@ - TW-1441 task import continues happily if filename doesn't exist. - TW-1444 Tag ordering is preserved, but should be sorted in reports. - TW-1460 Empty due dates lead to endless loop. +- TW-1463 A few more problems with special characters in filters, pluses, + question marks, and braces (thanks to Ralph Bean). - Added new holidays.xy-XY.rc definition files - Removed deprecated 'echo.command' setting, in favor of the 'header' and 'affected' verbosity tokens. diff --git a/src/CLI.cpp b/src/CLI.cpp index c9d24d5fe..8c6fffc2a 100644 --- a/src/CLI.cpp +++ b/src/CLI.cpp @@ -1637,7 +1637,9 @@ void CLI::desugarFilterPlainArgs () op.tag ("FILTER"); reconstructed.push_back (op); - A rhs ("argPattern", "'" + a->attribute ("raw") + "'"); + std::string pattern = a->attribute ("raw"); + Lexer::dequote (pattern); + A rhs ("argPattern", "'" + pattern + "'"); rhs.tag ("LITERAL"); rhs.tag ("FILTER"); reconstructed.push_back (rhs); diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 2729c9415..19acd0a69 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -87,6 +87,7 @@ bool Lexer::token (std::string& result, Type& type) { type = typeString; quote = _n0; + result += utf8_character (_n0); shift (); } else if (_n0 == '0' && @@ -189,6 +190,7 @@ bool Lexer::token (std::string& result, Type& type) case typeString: if (_n0 == quote) { + result += utf8_character (_n0); shift (); quote = 0; return true; @@ -247,6 +249,7 @@ bool Lexer::token (std::string& result, Type& type) else { type = quote ? typeString : typeIdentifier; + result += utf8_character (quote); result += utf8_character (_n0); shift (); } @@ -265,7 +268,8 @@ bool Lexer::token (std::string& result, Type& type) } else { - result += decode_escape (_n0); + result += '\\'; + result += utf8_character (_n0); type = quote ? typeString : typeIdentifier; shift (); } @@ -444,6 +448,7 @@ bool Lexer::word (std::string& token, Type& type) { type = typeString; quote = _n0; + token += utf8_character (_n0); shift (); } else @@ -457,6 +462,7 @@ bool Lexer::word (std::string& token, Type& type) case typeString: if (_n0 == quote) { + token += utf8_character (_n0); shift (); quote = 0; return true; @@ -491,7 +497,8 @@ bool Lexer::word (std::string& token, Type& type) } else { - token += decode_escape (_n0); + token += '\\'; + token += utf8_character (_n0); type = typeString; shift (); } @@ -709,6 +716,18 @@ void Lexer::token_split (std::vector >& le lexemes.push_back (std::pair (word, type)); } +//////////////////////////////////////////////////////////////////////////////// +void Lexer::dequote (std::string& input) +{ + int quote = input[0]; + size_t len = input.length (); + if ((quote == '\'' || quote == '"') && + quote == input[len - 1]) + { + input = input.substr (1, len - 2); + } +} + //////////////////////////////////////////////////////////////////////////////// bool Lexer::is_date (std::string& result) { @@ -830,7 +849,6 @@ int Lexer::decode_escape (int c) const case 'v': return 0x0B; case '\'': return 0x27; case '"': return 0x22; - case '\\': return 0x5C; default: return c; } } diff --git a/src/Lexer.h b/src/Lexer.h index a7ba74bc6..ff0c56554 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -84,6 +84,7 @@ public: static void word_split (std::vector &, const std::string&); static void token_split (std::vector &, const std::string&); static void token_split (std::vector >&, const std::string&); + static void dequote (std::string&); private: bool is_date (std::string&); diff --git a/src/Variant.cpp b/src/Variant.cpp index 39afff8ac..a08bf6e10 100644 --- a/src/Variant.cpp +++ b/src/Variant.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -193,6 +194,12 @@ bool Variant::operator&& (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + left.cast (type_boolean); right.cast (type_boolean); @@ -205,6 +212,12 @@ bool Variant::operator|| (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + left.cast (type_boolean); right.cast (type_boolean); @@ -217,6 +230,12 @@ bool Variant::operator_xor (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + left.cast (type_boolean); right.cast (type_boolean); @@ -230,6 +249,12 @@ bool Variant::operator< (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + switch (left._type) { case type_unknown: @@ -369,6 +394,12 @@ bool Variant::operator<= (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + switch (left._type) { case type_unknown: @@ -509,6 +540,12 @@ bool Variant::operator> (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + switch (left._type) { case type_unknown: @@ -647,6 +684,12 @@ bool Variant::operator>= (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + switch (left._type) { case type_unknown: @@ -787,6 +830,12 @@ bool Variant::operator== (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + switch (left._type) { case type_unknown: @@ -911,12 +960,21 @@ bool Variant::operator_match (const Variant& other, const Task& task) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + left.cast (type_string); right.cast (type_string); + std::string pattern = right._string; + Lexer::dequote (pattern); + if (searchUsingRegex) { - RX r (right._string, searchCaseSensitive); + RX r (pattern, searchCaseSensitive); if (r.match (left._string)) return true; @@ -935,7 +993,7 @@ bool Variant::operator_match (const Variant& other, const Task& task) const } else { - if (find (left._string, right._string, searchCaseSensitive) != std::string::npos) + if (find (left._string, pattern, searchCaseSensitive) != std::string::npos) return true; // If the above did not match, and the left source is "description", then @@ -947,7 +1005,7 @@ bool Variant::operator_match (const Variant& other, const Task& task) const std::map ::iterator a; for (a = annotations.begin (); a != annotations.end (); ++a) - if (find (a->second, right._string, searchCaseSensitive) != std::string::npos) + if (find (a->second, pattern, searchCaseSensitive) != std::string::npos) return true; } } @@ -972,6 +1030,12 @@ bool Variant::operator_partial (const Variant& other) const Variant left (*this); Variant right (other); + if (left._type == type_string) + Lexer::dequote (left._string); + + if (right._type == type_string) + Lexer::dequote (right._string); + switch (left._type) { case type_unknown: @@ -1155,6 +1219,7 @@ bool Variant::operator_hastag (const Variant& other, const Task& task) const { Variant right (other); right.cast (type_string); + Lexer::dequote (right._string); return task.hasTag (right._string); } @@ -1168,6 +1233,10 @@ bool Variant::operator_notag (const Variant& other, const Task& task) const bool Variant::operator! () const { Variant left (*this); + + if (left._type == type_string) + Lexer::dequote (left._string); + left.cast (type_boolean); return ! left._bool; } @@ -1330,6 +1399,9 @@ Variant& Variant::operator+= (const Variant& other) { Variant right (other); + if (right._type == type_string) + Lexer::dequote (right._string); + switch (_type) { case type_unknown: @@ -1439,6 +1511,9 @@ Variant& Variant::operator*= (const Variant& other) { Variant right (other); + if (right._type == type_string) + Lexer::dequote (right._string); + switch (_type) { case type_unknown: @@ -1893,6 +1968,9 @@ Variant::operator std::string () const //////////////////////////////////////////////////////////////////////////////// void Variant::sqrt () { + if (_type == type_string) + Lexer::dequote (_string); + cast (type_real); if (_real < 0.0) throw std::string (STRING_VARIANT_SQRT_NEG); @@ -1967,6 +2045,7 @@ void Variant::cast (const enum type new_type) break; case type_string: + Lexer::dequote (_string); switch (new_type) { case type_unknown: break; diff --git a/test/filter.t b/test/filter.t index 28ba837ae..509cfacc6 100755 --- a/test/filter.t +++ b/test/filter.t @@ -248,7 +248,7 @@ unlike ($output, qr/five/, 'v5'); unlike ($output, qr/six/, 'v6'); like ($output, qr/seven/, 'v7'); -$output = qx{../src/task rc:filter.rc rc.regex:on list /^s/ 2>&1}; +$output = qx{../src/task rc:filter.rc rc.regex:on list /\\^s/ 2>&1}; unlike ($output, qr/one/, 'w1'); unlike ($output, qr/two/, 'w2'); unlike ($output, qr/three/, 'w3'); @@ -257,7 +257,7 @@ unlike ($output, qr/five/, 'w5'); like ($output, qr/six/, 'w6'); like ($output, qr/seven/, 'w7'); -$output = qx{../src/task rc:filter.rc rc.regex:on list /^.i/ 2>&1}; +$output = qx{../src/task rc:filter.rc rc.regex:on list /\\^.i/ 2>&1}; unlike ($output, qr/one/, 'x1'); unlike ($output, qr/two/, 'x2'); unlike ($output, qr/three/, 'x3'); diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index 25afa413f..d34d41974 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -101,7 +101,7 @@ int main (int argc, char** argv) t.is (tokens[0].first, "one", "tokens[0] = 'left'"); // 30 t.is (Lexer::type_name (tokens[0].second), "Identifier", "tokens[0] = Identifier"); - t.is (tokens[1].first, "two 'three'", "tokens[1] = 'two \\'three\\''"); + t.is (tokens[1].first, "'two \\'three\\''", "tokens[1] = 'two \\'three\\''"); t.is (Lexer::type_name (tokens[1].second), "String", "tokens[1] = String"); t.is (tokens[2].first, "+", "tokens[2] = '+'"); @@ -146,7 +146,7 @@ int main (int argc, char** argv) t.is (tokens[15].first, "and", "tokens[15] = 'and'"); // 60 t.is (Lexer::type_name (tokens[15].second), "Operator", "tokens[15] = Operator"); - t.is (tokens[16].first, "€", "tokens[16] = \\u20ac --> '€'"); + t.is (tokens[16].first, "'€'", "tokens[16] = \\u20ac --> '€'"); t.is (Lexer::type_name (tokens[16].second), "String", "tokens[16] = String"); // Test for ISO-8601 dates (favoring dates in ambiguous cases). @@ -366,7 +366,7 @@ int main (int argc, char** argv) t.is (items[0], "+-*", "word_split ' +-* a+b 12.3e4 'c d'' -> [0] '+-*'"); t.is (items[1], "a+b", "word_split ' +-* a+b 12.3e4 'c d'' -> [1] 'a+b'"); t.is (items[2], "12.3e4", "word_split ' +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'"); - t.is (items[3], "c d", "word_split ' +-* a+b 12.3e4 'c d'' -> [3] 'c d'"); + t.is (items[3], "'c d'", "word_split ' +-* a+b 12.3e4 'c d'' -> [3] 'c d'"); // Test common expression element. unsplit = "name=value";