From 7598997e707778daf0917e97d4d4bdb1b4292640 Mon Sep 17 00:00:00 2001
From: Paul Beckingham <paul@beckingham.net>
Date: Sat, 31 May 2014 13:51:10 -0400
Subject: [PATCH] Lexer

- Implemented ::token_split, which performs a full lex, and doesn't require
  white space like ::word_split does.
- Added unit tests.
---
 src/Lexer.cpp    | 13 +++++++++++++
 src/Lexer.h      |  1 +
 test/lexer.t.cpp | 19 ++++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/Lexer.cpp b/src/Lexer.cpp
index 885378908..27a3ab55d 100644
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -630,6 +630,19 @@ void Lexer::word_split (std::vector <std::string>& words, const std::string& inp
     words.push_back (word);
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// Split 'input' into 'tokens'.
+void Lexer::token_split (std::vector <std::string>& words, const std::string& input)
+{
+  words.clear ();
+
+  std::string word;
+  Lexer::Type type;
+  Lexer lex (input);
+  while (lex.token (word, type))
+    words.push_back (word);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::is_punct (int c) const
 {
diff --git a/src/Lexer.h b/src/Lexer.h
index 6194dac06..c744a9ad4 100644
--- a/src/Lexer.h
+++ b/src/Lexer.h
@@ -66,6 +66,7 @@ public:
   static const std::string type_name (const Type&);
   static bool is_ws (int);
   static void word_split (std::vector <std::string>&, const std::string&);
+  static void token_split (std::vector <std::string>&, const std::string&);
 
 private:
   bool is_punct (int) const;
diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp
index 7071cfa9d..7bdf404b0 100644
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@@ -36,7 +36,7 @@ Context context;
 ////////////////////////////////////////////////////////////////////////////////
 int main (int argc, char** argv)
 {
-  UnitTest t (181);
+  UnitTest t (185);
 
   std::vector <std::pair <std::string, Lexer::Type> > tokens;
   std::string token;
@@ -319,6 +319,23 @@ int main (int argc, char** argv)
   t.is (items[2], "12.3e4",        "word_split '  +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'");
   t.is (items[3], "c d",           "word_split '  +-* a+b 12.3e4 'c d'' -> [3] 'c d'");
 
+  // Test common expression element.
+  unsplit = "name=value";
+  Lexer::token_split (items, unsplit);
+  t.is (items.size (), (size_t) 3, "split 'name=value'");
+  if (items.size () == 3)
+  {
+    t.is (items[0], "name",          "token_split 'name=value' -> [0] 'name'");
+    t.is (items[1], "=",             "token_split 'name=value' -> [1] '='");
+    t.is (items[2], "value",         "token_split 'name=value' -> [2] 'value'");
+  }
+  else
+  {
+    t.fail ("token_split 'name=value' -> [0] 'name'");
+    t.fail ("token_split 'name=value' -> [1] '='");
+    t.fail ("token_split 'name=value' -> [2] 'value'");
+  }
+
   return 0;
 }