Text Processing

- Implemented splitq, which can handle quoted and oddly-quoted string
  splits.
This commit is contained in:
Paul Beckingham 2011-05-16 00:22:14 -04:00
parent 0308ee953a
commit 05b3fa0bb6
3 changed files with 59 additions and 5 deletions

View file

@ -33,10 +33,10 @@
#include <string> #include <string>
#include <strings.h> #include <strings.h>
#include <ctype.h> #include <ctype.h>
#include "Context.h" #include <Context.h>
#include "util.h" #include <util.h>
#include "text.h" #include <text.h>
#include "utf8.h" #include <utf8.h>
extern Context context; extern Context context;
@ -59,6 +59,48 @@ void wrapText (
} }
} }
////////////////////////////////////////////////////////////////////////////////
void splitq (
std::vector<std::string>& results,
const std::string& input,
const char delimiter)
{
results.clear ();
std::string::size_type start = 0;
std::string::size_type i = 0;
std::string word;
bool in_quote = false;
char quote;
while (utf8_next_char (input, i))
{
if (in_quote)
{
if (input[i] == quote)
{
in_quote = false;
}
}
else
{
if (input[i] == delimiter)
{
results.push_back (unquoteText (input.substr (start, i - start)));
start = i + 1;
}
else if (input[i] == '\'' ||
input[i] == '"')
{
quote = input[i];
in_quote = true;
}
}
}
results.push_back (unquoteText (input.substr (start)));
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void split ( void split (
std::vector<std::string>& results, std::vector<std::string>& results,

View file

@ -39,6 +39,7 @@ std::string trim (const std::string& in, const std::string& t = " ");
std::string unquoteText (const std::string&); std::string unquoteText (const std::string&);
int longestWord (const std::string&); int longestWord (const std::string&);
void extractLine (std::string&, std::string&, int); void extractLine (std::string&, std::string&, int);
void splitq (std::vector<std::string>&, const std::string&, const char);
void split (std::vector<std::string>&, const std::string&, const char); void split (std::vector<std::string>&, const std::string&, const char);
void split (std::vector<std::string>&, const std::string&, const std::string&); void split (std::vector<std::string>&, const std::string&, const std::string&);
void split_minimal (std::vector<std::string>&, const std::string&, const char); void split_minimal (std::vector<std::string>&, const std::string&, const char);

View file

@ -35,7 +35,7 @@ Context context;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv) int main (int argc, char** argv)
{ {
UnitTest t (236); UnitTest t (243);
// void wrapText (std::vector <std::string>& lines, const std::string& text, const int width) // void wrapText (std::vector <std::string>& lines, const std::string& text, const int width)
std::string text = "This is a test of the line wrapping code."; std::string text = "This is a test of the line wrapping code.";
@ -155,6 +155,17 @@ int main (int argc, char** argv)
t.is (items[2], "bc", "split '-a-bc-def' '--' -> [2] 'bc'"); t.is (items[2], "bc", "split '-a-bc-def' '--' -> [2] 'bc'");
t.is (items[3], "def", "split '-a-bc-def' '--' -> [3] 'def'"); t.is (items[3], "def", "split '-a-bc-def' '--' -> [3] 'def'");
// void splitq (std::vector<std::string>&, const std::string&, const char);
unsplit = "one 'two' '' 'three four' \"five six seven\" eight'nine ten'";
splitq (items, unsplit, ' ');
t.is (items.size () , (size_t) 6, "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten'");
t.is (items[0], "one", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [0] 'one'");
t.is (items[1], "two", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [1] 'two'");
t.is (items[2], "", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [2] ''");
t.is (items[3], "three four", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [3] 'three four'");
t.is (items[4], "five six seven", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [4] 'five six seven'");
t.is (items[5], "eight'nine ten'", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [4] 'eight\\'nine ten\\''");
// void join (std::string& result, const std::string& separator, const std::vector<std::string>& items) // void join (std::string& result, const std::string& separator, const std::vector<std::string>& items)
std::vector <std::string> unjoined; std::vector <std::string> unjoined;
std::string joined; std::string joined;