From aab23692f1a2f2964b43c7bd9e02741e110f1033 Mon Sep 17 00:00:00 2001
From: Paul Beckingham <paul@beckingham.net>
Date: Sun, 7 Sep 2014 01:17:48 -0400
Subject: [PATCH] Lexer

- Added a new type Lexer::typeTag.
---
 src/Lexer.cpp    | 19 +++++++++++++++++++
 src/Lexer.h      |  1 +
 test/lexer.t.cpp | 22 +++++++++++++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/src/Lexer.cpp b/src/Lexer.cpp
index 6684a82ec..1ff303654 100644
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -126,6 +126,12 @@ bool Lexer::token (std::string& result, Type& type)
         result += utf8_character (_n0);
         shift ();
       }
+      else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1))
+      {
+        type = typeTag;
+        result += utf8_character (_n0);
+        shift ();
+      }
       else if (is_triple_op (_n0, _n1, _n2))
       {
         type = typeOperator;
@@ -199,6 +205,18 @@ bool Lexer::token (std::string& result, Type& type)
       }
       break;
 
+    case typeTag:
+      if (is_ident_start (_n0))
+      {
+        result += utf8_character (_n0);
+        shift ();
+      }
+      else
+      {
+        return true;
+      }
+      break;
+
     case typeIdentifier:
       if (is_ident (_n0))
       {
@@ -552,6 +570,7 @@ const std::string Lexer::type_name (const Type& type)
   case Lexer::typeEscapeUnicode:     return "EscapeUnicode";
   case Lexer::typeDate:              return "Date";
   case Lexer::typeDuration:          return "Duration";
+  case Lexer::typeTag:               return "Tag";
   }
 }
 
diff --git a/src/Lexer.h b/src/Lexer.h
index 87ed9754c..285330647 100644
--- a/src/Lexer.h
+++ b/src/Lexer.h
@@ -52,6 +52,7 @@ public:
     typeOperator,
     typeDate,
     typeDuration,
+    typeTag,
   };
 
   Lexer (const std::string&);
diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp
index d6c39a62b..25afa413f 100644
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@@ -36,7 +36,7 @@ Context context;
 ////////////////////////////////////////////////////////////////////////////////
 int main (int argc, char** argv)
 {
-  UnitTest t (203);
+  UnitTest t (212);
 
   std::vector <std::pair <std::string, Lexer::Type> > tokens;
   std::string token;
@@ -328,6 +328,26 @@ int main (int argc, char** argv)
   t.is (tokens[1].first,                      "10th",                 "tokens[1] == '10th'");
   t.is (tokens[1].second,                     Lexer::typeIdentifier,  "tokens[1] == typeIdentifier");
 
+  // Test tag recognition.
+  Lexer l9 ("+with -WITHOUT + 2");
+  l9.ambiguity (false);
+  tokens.clear ();
+  while (l9.token (token, type))
+  {
+    std::cout << "# «" << token << "» " << type  << " " << Lexer::type_name (type) << "\n";
+    tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
+  }
+
+  t.is ((int)tokens.size (),                  4,                      "4 tokens");
+  t.is (tokens[0].first,                      "+with",                "tokens[0] == '+with'");
+  t.is (tokens[0].second,                     Lexer::typeTag,         "tokens[0] == typeTag");
+  t.is (tokens[1].first,                      "-WITHOUT",             "tokens[1] == '-WITHOUT'");
+  t.is (tokens[1].second,                     Lexer::typeTag,         "tokens[1] == typeTag");
+  t.is (tokens[2].first,                      "+",                    "tokens[2] == '+'");
+  t.is (tokens[2].second,                     Lexer::typeOperator,    "tokens[2] == typeOperator");
+  t.is (tokens[3].first,                      "2",                    "tokens[3] == '2'");
+  t.is (tokens[3].second,                     Lexer::typeNumber,      "tokens[3] == typeNumber");
+
   // void word_split (std::vector<std::string>&, const std::string&);
   std::string unsplit = " ( A or B ) ";
   std::vector <std::string> items;