- Implemented ::token_split, which performs a full lex, and doesn't require
  white space like ::word_split does.
- Added unit tests.
This commit is contained in:
Paul Beckingham 2014-05-31 13:51:10 -04:00
parent 0af9bbdc03
commit 7598997e70
3 changed files with 32 additions and 1 deletions

View file

@ -630,6 +630,19 @@ void Lexer::word_split (std::vector <std::string>& words, const std::string& inp
words.push_back (word);
}
////////////////////////////////////////////////////////////////////////////////
// Split 'input' into 'tokens'.
void Lexer::token_split (std::vector <std::string>& words, const std::string& input)
{
words.clear ();
std::string word;
Lexer::Type type;
Lexer lex (input);
while (lex.token (word, type))
words.push_back (word);
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_punct (int c) const
{

View file

@ -66,6 +66,7 @@ public:
static const std::string type_name (const Type&);
static bool is_ws (int);
static void word_split (std::vector <std::string>&, const std::string&);
static void token_split (std::vector <std::string>&, const std::string&);
private:
bool is_punct (int) const;

View file

@ -36,7 +36,7 @@ Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (181);
UnitTest t (185);
std::vector <std::pair <std::string, Lexer::Type> > tokens;
std::string token;
@ -319,6 +319,23 @@ int main (int argc, char** argv)
t.is (items[2], "12.3e4", "word_split ' +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'");
t.is (items[3], "c d", "word_split ' +-* a+b 12.3e4 'c d'' -> [3] 'c d'");
// Test common expression element.
unsplit = "name=value";
Lexer::token_split (items, unsplit);
t.is (items.size (), (size_t) 3, "split 'name=value'");
if (items.size () == 3)
{
t.is (items[0], "name", "token_split 'name=value' -> [0] 'name'");
t.is (items[1], "=", "token_split 'name=value' -> [1] '='");
t.is (items[2], "value", "token_split 'name=value' -> [2] 'value'");
}
else
{
t.fail ("token_split 'name=value' -> [0] 'name'");
t.fail ("token_split 'name=value' -> [1] '='");
t.fail ("token_split 'name=value' -> [2] 'value'");
}
return 0;
}