mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-08-25 12:17:20 +02:00
Lexer:: Added polymorphic ::readWord for quoteѕ and unquoted strings
This commit is contained in:
parent
abaf326855
commit
7a6d546a0d
3 changed files with 80 additions and 15 deletions
|
@ -1222,16 +1222,12 @@ bool Lexer::isOneWord (const std::string& text)
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Full implementation of a word. Includes:
|
// Full implementation of a quoted word. Includes:
|
||||||
// one\ two
|
|
||||||
// '\''
|
// '\''
|
||||||
// '"'
|
// '"'
|
||||||
// "'"
|
// "'"
|
||||||
// "\""
|
// "\""
|
||||||
// 'one two'
|
// 'one two'
|
||||||
// abcU+0020def
|
|
||||||
// abc\u0020def
|
|
||||||
// a\tb
|
|
||||||
bool Lexer::readWord (
|
bool Lexer::readWord (
|
||||||
const std::string& text,
|
const std::string& text,
|
||||||
const std::string& quotes,
|
const std::string& quotes,
|
||||||
|
@ -1255,8 +1251,75 @@ bool Lexer::readWord (
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unicode U+XXXX or \uXXXX codepoint.
|
||||||
|
else if (eos - cursor >= 6 &&
|
||||||
|
((text[cursor + 0] == 'U' && text[cursor + 1] == '+') ||
|
||||||
|
(text[cursor + 0] == '\\' && text[cursor + 1] == 'u')) &&
|
||||||
|
isHexDigit (text[cursor + 2]) &&
|
||||||
|
isHexDigit (text[cursor + 3]) &&
|
||||||
|
isHexDigit (text[cursor + 4]) &&
|
||||||
|
isHexDigit (text[cursor + 5]))
|
||||||
|
{
|
||||||
|
word += utf8_character (
|
||||||
|
hexToInt (
|
||||||
|
text[cursor + 2],
|
||||||
|
text[cursor + 3],
|
||||||
|
text[cursor + 4],
|
||||||
|
text[cursor + 5]));
|
||||||
|
cursor += 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
// An escaped thing.
|
||||||
|
else if (c == '\\')
|
||||||
|
{
|
||||||
|
c = text[++cursor];
|
||||||
|
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case '"': word += (char) 0x22; ++cursor; break;
|
||||||
|
case '\'': word += (char) 0x27; ++cursor; break;
|
||||||
|
case '\\': word += (char) 0x5C; ++cursor; break;
|
||||||
|
case 'b': word += (char) 0x08; ++cursor; break;
|
||||||
|
case 'f': word += (char) 0x0C; ++cursor; break;
|
||||||
|
case 'n': word += (char) 0x0A; ++cursor; break;
|
||||||
|
case 'r': word += (char) 0x0D; ++cursor; break;
|
||||||
|
case 't': word += (char) 0x09; ++cursor; break;
|
||||||
|
case 'v': word += (char) 0x0B; ++cursor; break;
|
||||||
|
|
||||||
|
// This pass-through default case means that anything can be escaped
|
||||||
|
// harmlessly. In particular 'quote' is included, if it not one of the
|
||||||
|
// above characters.
|
||||||
|
default: word += (char) c; ++cursor; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ordinary character.
|
||||||
|
else
|
||||||
|
word += utf8_character (utf8_next_char (text, cursor));
|
||||||
|
}
|
||||||
|
|
||||||
|
return word.length () > 0 ? true : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Full implementation of an unquoted word. Includes:
|
||||||
|
// one\ two
|
||||||
|
// abcU+0020def
|
||||||
|
// abc\u0020def
|
||||||
|
// a\tb
|
||||||
|
bool Lexer::readWord (
|
||||||
|
const std::string& text,
|
||||||
|
std::string::size_type& cursor,
|
||||||
|
std::string& word)
|
||||||
|
{
|
||||||
|
std::string::size_type eos = text.length ();
|
||||||
|
|
||||||
|
word = "";
|
||||||
|
int c;
|
||||||
|
while ((c = text[cursor]))
|
||||||
|
{
|
||||||
// Unquoted word ends on white space.
|
// Unquoted word ends on white space.
|
||||||
if (! quote && Lexer::isWhitespace (c))
|
if (Lexer::isWhitespace (c))
|
||||||
{
|
{
|
||||||
++cursor;
|
++cursor;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -76,6 +76,7 @@ public:
|
||||||
static void dequote (std::string&);
|
static void dequote (std::string&);
|
||||||
static bool wasQuoted (const std::string&);
|
static bool wasQuoted (const std::string&);
|
||||||
static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
|
static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
|
||||||
|
static bool readWord (const std::string&, std::string::size_type&, std::string&);
|
||||||
static bool decomposePair (const std::string&, std::string&, std::string&, std::string&, std::string&);
|
static bool decomposePair (const std::string&, std::string&, std::string&, std::string&, std::string&);
|
||||||
static int hexToInt (int);
|
static int hexToInt (int);
|
||||||
static int hexToInt (int, int);
|
static int hexToInt (int, int);
|
||||||
|
|
|
@ -214,33 +214,34 @@ int main (int argc, char** argv)
|
||||||
// static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
|
// static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
|
||||||
std::string::size_type cursor = 0;
|
std::string::size_type cursor = 0;
|
||||||
std::string word;
|
std::string word;
|
||||||
t.ok (Lexer::readWord ("input", "'\"", cursor, word), "readWord 'input' --> true");
|
|
||||||
t.is (word, "input", " word '" + word + "'");
|
|
||||||
t.is ((int)cursor, 5, " cursor");
|
|
||||||
|
|
||||||
cursor = 0;
|
|
||||||
t.ok (Lexer::readWord ("'one two'", "'\"", cursor, word), "readWord ''one two'' --> true");
|
t.ok (Lexer::readWord ("'one two'", "'\"", cursor, word), "readWord ''one two'' --> true");
|
||||||
t.is (word, "one two", " word '" + word + "'");
|
t.is (word, "one two", " word '" + word + "'");
|
||||||
t.is ((int)cursor, 9, " cursor");
|
t.is ((int)cursor, 9, " cursor");
|
||||||
|
|
||||||
|
// static bool readWord (const std::string&, std::string::size_type&, std::string&);
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
t.ok (Lexer::readWord ("one\\ two", "'\"", cursor, word), "readWord 'one\\ two' --> true");
|
t.ok (Lexer::readWord ("input", cursor, word), "readWord 'input' --> true");
|
||||||
|
t.is (word, "input", " word '" + word + "'");
|
||||||
|
t.is ((int)cursor, 5, " cursor");
|
||||||
|
|
||||||
|
cursor = 0;
|
||||||
|
t.ok (Lexer::readWord ("one\\ two", cursor, word), "readWord 'one\\ two' --> true");
|
||||||
t.is (word, "one two", " word '" + word + "'");
|
t.is (word, "one two", " word '" + word + "'");
|
||||||
t.is ((int)cursor, 8, " cursor");
|
t.is ((int)cursor, 8, " cursor");
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
t.ok (Lexer::readWord ("\\u20A43", "'\"", cursor, word), "readWord '\\u20A43' --> true");
|
t.ok (Lexer::readWord ("\\u20A43", cursor, word), "readWord '\\u20A43' --> true");
|
||||||
t.is (word, "₤3", " word '" + word + "'");
|
t.is (word, "₤3", " word '" + word + "'");
|
||||||
t.is ((int)cursor, 7, " cursor");
|
t.is ((int)cursor, 7, " cursor");
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
t.ok (Lexer::readWord ("U+20AC4", "'\"", cursor, word), "readWord '\\u20AC4' --> true");
|
t.ok (Lexer::readWord ("U+20AC4", cursor, word), "readWord '\\u20AC4' --> true");
|
||||||
t.is (word, "€4", " word '" + word + "'");
|
t.is (word, "€4", " word '" + word + "'");
|
||||||
t.is ((int)cursor, 7, " cursor");
|
t.is ((int)cursor, 7, " cursor");
|
||||||
|
|
||||||
std::string text = "one 'two' three\\ four";
|
std::string text = "one 'two' three\\ four";
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
while (Lexer::readWord (text, "'\"", cursor, word))
|
while (Lexer::readWord (text, cursor, word))
|
||||||
{
|
{
|
||||||
t.diag ("'" + word + "'");
|
t.diag ("'" + word + "'");
|
||||||
while (Lexer::isWhitespace(text[cursor]))
|
while (Lexer::isWhitespace(text[cursor]))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue