Text Processing

- Implemented splitq, which can handle quoted and oddly-quoted string
  splits.
This commit is contained in:
Paul Beckingham 2011-05-16 00:22:14 -04:00
parent 0308ee953a
commit 05b3fa0bb6
3 changed files with 59 additions and 5 deletions

View file

@ -33,10 +33,10 @@
#include <string>
#include <strings.h>
#include <ctype.h>
#include "Context.h"
#include "util.h"
#include "text.h"
#include "utf8.h"
#include <Context.h>
#include <util.h>
#include <text.h>
#include <utf8.h>
extern Context context;
@ -59,6 +59,48 @@ void wrapText (
}
}
////////////////////////////////////////////////////////////////////////////////
void splitq (
std::vector<std::string>& results,
const std::string& input,
const char delimiter)
{
results.clear ();
std::string::size_type start = 0;
std::string::size_type i = 0;
std::string word;
bool in_quote = false;
char quote;
while (utf8_next_char (input, i))
{
if (in_quote)
{
if (input[i] == quote)
{
in_quote = false;
}
}
else
{
if (input[i] == delimiter)
{
results.push_back (unquoteText (input.substr (start, i - start)));
start = i + 1;
}
else if (input[i] == '\'' ||
input[i] == '"')
{
quote = input[i];
in_quote = true;
}
}
}
results.push_back (unquoteText (input.substr (start)));
}
////////////////////////////////////////////////////////////////////////////////
void split (
std::vector<std::string>& results,

View file

@ -39,6 +39,7 @@ std::string trim (const std::string& in, const std::string& t = " ");
std::string unquoteText (const std::string&);
int longestWord (const std::string&);
void extractLine (std::string&, std::string&, int);
void splitq (std::vector<std::string>&, const std::string&, const char);
void split (std::vector<std::string>&, const std::string&, const char);
void split (std::vector<std::string>&, const std::string&, const std::string&);
void split_minimal (std::vector<std::string>&, const std::string&, const char);

View file

@ -35,7 +35,7 @@ Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (236);
UnitTest t (243);
// void wrapText (std::vector <std::string>& lines, const std::string& text, const int width)
std::string text = "This is a test of the line wrapping code.";
@ -155,6 +155,17 @@ int main (int argc, char** argv)
t.is (items[2], "bc", "split '-a-bc-def' '--' -> [2] 'bc'");
t.is (items[3], "def", "split '-a-bc-def' '--' -> [3] 'def'");
// void splitq (std::vector<std::string>&, const std::string&, const char);
unsplit = "one 'two' '' 'three four' \"five six seven\" eight'nine ten'";
splitq (items, unsplit, ' ');
t.is (items.size () , (size_t) 6, "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten'");
t.is (items[0], "one", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [0] 'one'");
t.is (items[1], "two", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [1] 'two'");
t.is (items[2], "", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [2] ''");
t.is (items[3], "three four", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [3] 'three four'");
t.is (items[4], "five six seven", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [4] 'five six seven'");
t.is (items[5], "eight'nine ten'", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [4] 'eight\\'nine ten\\''");
// void join (std::string& result, const std::string& separator, const std::vector<std::string>& items)
std::vector <std::string> unjoined;
std::string joined;