Lexer

- Implemented ::token_split, which performs a full lex, and doesn't require white space like ::word_split does. - Added unit tests.
2025-08-27 19:17:19 +02:00 · 2014-05-31 13:51:10 -04:00 · 2014-05-31 13:51:10 -04:00 · 7598997e70
commit 7598997e70
parent 0af9bbdc03
3 changed files with 32 additions and 1 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -630,6 +630,19 @@ void Lexer::word_split (std::vector <std::string>& words, const std::string& inp
    words.push_back (word);
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Split 'input' into 'tokens'.
 void Lexer::token_split (std::vector <std::string>& words, const std::string& input)
 {
  words.clear ();
  std::string word;
  Lexer::Type type;
  Lexer lex (input);
  while (lex.token (word, type))
    words.push_back (word);
 }
 ////////////////////////////////////////////////////////////////////////////////
 bool Lexer::is_punct (int c) const
 {
--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -66,6 +66,7 @@ public:
  static const std::string type_name (const Type&);
  static bool is_ws (int);
  static void word_split (std::vector <std::string>&, const std::string&);
  static void token_split (std::vector <std::string>&, const std::string&);
 private:
  bool is_punct (int) const;
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@ -36,7 +36,7 @@ Context context;
 ////////////////////////////////////////////////////////////////////////////////
 int main (int argc, char** argv)
 {
-  UnitTest t (181);
+  UnitTest t (185);
  std::vector <std::pair <std::string, Lexer::Type> > tokens;
  std::string token;
@ -319,6 +319,23 @@ int main (int argc, char** argv)
  t.is (items[2], "12.3e4",        "word_split '  +-* a+b 12.3e4 'c d'' -> [2] '12.3e4'");
  t.is (items[3], "c d",           "word_split '  +-* a+b 12.3e4 'c d'' -> [3] 'c d'");
  // Test common expression element.
  unsplit = "name=value";
  Lexer::token_split (items, unsplit);
  t.is (items.size (), (size_t) 3, "split 'name=value'");
  if (items.size () == 3)
  {
    t.is (items[0], "name",          "token_split 'name=value' -> [0] 'name'");
    t.is (items[1], "=",             "token_split 'name=value' -> [1] '='");
    t.is (items[2], "value",         "token_split 'name=value' -> [2] 'value'");
  }
  else
  {
    t.fail ("token_split 'name=value' -> [0] 'name'");
    t.fail ("token_split 'name=value' -> [1] '='");
    t.fail ("token_split 'name=value' -> [2] 'value'");
  }
  return 0;
 }