From 7598997e707778daf0917e97d4d4bdb1b4292640 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sat, 31 May 2014 13:51:10 -0400 Subject: [PATCH] Lexer - Implemented ::token_split, which performs a full lex, and doesn't require white space like ::word_split does. - Added unit tests. --- src/Lexer.cpp | 13 +++++++++++++ src/Lexer.h | 1 + test/lexer.t.cpp | 19 ++++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 885378908..27a3ab55d 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -630,6 +630,19 @@ void Lexer::word_split (std::vector & words, const std::string& inp words.push_back (word); } +//////////////////////////////////////////////////////////////////////////////// +// Split 'input' into 'tokens'. +void Lexer::token_split (std::vector & words, const std::string& input) +{ + words.clear (); + + std::string word; + Lexer::Type type; + Lexer lex (input); + while (lex.token (word, type)) + words.push_back (word); +} + //////////////////////////////////////////////////////////////////////////////// bool Lexer::is_punct (int c) const { diff --git a/src/Lexer.h b/src/Lexer.h index 6194dac06..c744a9ad4 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -66,6 +66,7 @@ public: static const std::string type_name (const Type&); static bool is_ws (int); static void word_split (std::vector &, const std::string&); + static void token_split (std::vector &, const std::string&); private: bool is_punct (int) const; diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index 7071cfa9d..7bdf404b0 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -36,7 +36,7 @@ Context context; //////////////////////////////////////////////////////////////////////////////// int main (int argc, char** argv) { - UnitTest t (181); + UnitTest t (185); std::vector > tokens; std::string token; @@ -319,6 +319,23 @@ int main (int argc, char** argv) t.is (items[2], "12.3e4", "word_split ' +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'"); t.is (items[3], "c d", "word_split ' +-* a+b 12.3e4 'c d'' -> [3] 'c d'"); + // Test common expression element. + unsplit = "name=value"; + Lexer::token_split (items, unsplit); + t.is (items.size (), (size_t) 3, "split 'name=value'"); + if (items.size () == 3) + { + t.is (items[0], "name", "token_split 'name=value' -> [0] 'name'"); + t.is (items[1], "=", "token_split 'name=value' -> [1] '='"); + t.is (items[2], "value", "token_split 'name=value' -> [2] 'value'"); + } + else + { + t.fail ("token_split 'name=value' -> [0] 'name'"); + t.fail ("token_split 'name=value' -> [1] '='"); + t.fail ("token_split 'name=value' -> [2] 'value'"); + } + return 0; }