Lexer

- Implemented ::token_split, which performs a full lex, and doesn't require white space like ::word_split does. - Added unit tests.
2025-06-26 10:54:26 +02:00 · 2014-05-31 13:51:10 -04:00 · 2014-05-31 13:51:10 -04:00 · 7598997e70
commit 7598997e70
parent 0af9bbdc03
3 changed files with 32 additions and 1 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -630,6 +630,19 @@ void Lexer::word_split (std::vector <std::string>& words, const std::string& inp
    words.push_back (word);
 }

+////////////////////////////////////////////////////////////////////////////////
+// Split 'input' into 'tokens'.
+void Lexer::token_split (std::vector <std::string>& words, const std::string& input)
+{
+  words.clear ();
+
+  std::string word;
+  Lexer::Type type;
+  Lexer lex (input);
+  while (lex.token (word, type))
+    words.push_back (word);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::is_punct (int c) const
 {
--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -66,6 +66,7 @@ public:
  static const std::string type_name (const Type&);
  static bool is_ws (int);
  static void word_split (std::vector <std::string>&, const std::string&);
+  static void token_split (std::vector <std::string>&, const std::string&);

 private:
  bool is_punct (int) const;
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@ -36,7 +36,7 @@ Context context;
 ////////////////////////////////////////////////////////////////////////////////
 int main (int argc, char** argv)
 {
-  UnitTest t (181);
+  UnitTest t (185);

  std::vector <std::pair <std::string, Lexer::Type> > tokens;
  std::string token;
@ -319,6 +319,23 @@ int main (int argc, char** argv)
  t.is (items[2], "12.3e4",        "word_split '  +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'");
  t.is (items[3], "c d",           "word_split '  +-* a+b 12.3e4 'c d'' -> [3] 'c d'");

+  // Test common expression element.
+  unsplit = "name=value";
+  Lexer::token_split (items, unsplit);
+  t.is (items.size (), (size_t) 3, "split 'name=value'");
+  if (items.size () == 3)
+  {
+    t.is (items[0], "name",          "token_split 'name=value' -> [0] 'name'");
+    t.is (items[1], "=",             "token_split 'name=value' -> [1] '='");
+    t.is (items[2], "value",         "token_split 'name=value' -> [2] 'value'");
+  }
+  else
+  {
+    t.fail ("token_split 'name=value' -> [0] 'name'");
+    t.fail ("token_split 'name=value' -> [1] '='");
+    t.fail ("token_split 'name=value' -> [2] 'value'");
+  }
+
  return 0;
 }