Text Handling

- Migrated the splitq implementation over to the Lexer. Now it handles all
  Unicode spaces.  Jsut need to obsolete the old version.
This commit is contained in:
Paul Beckingham 2014-04-23 17:34:07 -04:00
parent 45453de477
commit d099a4edfd
3 changed files with 28 additions and 1 deletions

View file

@ -34,6 +34,7 @@
#include <strings.h>
#include <ctype.h>
#include <Context.h>
#include <Lexer.h>
#include <math.h>
#include <util.h>
#include <text.h>
@ -62,6 +63,21 @@ void wrapText (
////////////////////////////////////////////////////////////////////////////////
// UTF-8
//
// Splits on unicode whitespace, removeѕ quotes.
void splitq (std::vector <std::string>& results, const std::string& input)
{
results.clear ();
std::string token;
Lexer::Type type;
Lexer lex (input);
while (lex.token (token, type))
results.push_back (token);
}
////////////////////////////////////////////////////////////////////////////////
// TODO Obsolete this call.
void splitq (
std::vector<std::string>& results,
const std::string& input,

View file

@ -39,6 +39,7 @@ std::string unquoteText (const std::string&);
int longestWord (const std::string&);
int longestLine (const std::string&);
bool extractLine (std::string&, const std::string&, int, bool, unsigned int&);
void splitq (std::vector<std::string>&, const std::string&);
void splitq (std::vector<std::string>&, const std::string&, const char);
void split (std::vector<std::string>&, const std::string&, const char);
void split (std::vector<std::string>&, const std::string&, const std::string&);

View file

@ -37,7 +37,7 @@ Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (264);
UnitTest t (270);
// Ensure environment has no influence.
unsetenv ("TASKDATA");
@ -183,6 +183,16 @@ int main (int argc, char** argv)
t.is (items[4], "five six seven", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [4] 'five six seven'");
t.is (items[5], "eight'nine ten'", "splitq 'one \\'two\\' \\'\\' \\'three four\\' \"five six seven\" eight'nine ten' -> [4] 'eight\\'nine ten\\''");
// void splitq (std::vector<std::string>&, const std::string&);
unsplit = " ( A or B ) ";
splitq (items, unsplit);
t.is (items.size (), (size_t) 5, "splitq ' ( A or B ) '");
t.is (items[0], "(", "splitq ' ( A or B ) ' -> [0] '('");
t.is (items[1], "A", "splitq ' ( A or B ) ' -> [1] 'A'");
t.is (items[2], "or", "splitq ' ( A or B ) ' -> [2] 'or'");
t.is (items[3], "B", "splitq ' ( A or B ) ' -> [3] 'B'");
t.is (items[4], ")", "splitq ' ( A or B ) ' -> [4] ')'");
// void join (std::string& result, const std::string& separator, const std::vector<std::string>& items)
std::vector <std::string> unjoined;
std::string joined;