Lexer

- Added a new type Lexer::typeTag.
2025-06-26 10:54:26 +02:00 · 2014-09-07 01:17:48 -04:00 · 2014-09-07 01:17:48 -04:00 · aab23692f1
commit aab23692f1
parent 0b9c84511b
3 changed files with 41 additions and 1 deletions
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -126,6 +126,12 @@ bool Lexer::token (std::string& result, Type& type)
        result += utf8_character (_n0);
        shift ();
      }
+      else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1))
+      {
+        type = typeTag;
+        result += utf8_character (_n0);
+        shift ();
+      }
      else if (is_triple_op (_n0, _n1, _n2))
      {
        type = typeOperator;
@ -199,6 +205,18 @@ bool Lexer::token (std::string& result, Type& type)
      }
      break;

+    case typeTag:
+      if (is_ident_start (_n0))
+      {
+        result += utf8_character (_n0);
+        shift ();
+      }
+      else
+      {
+        return true;
+      }
+      break;
+
    case typeIdentifier:
      if (is_ident (_n0))
      {
@ -552,6 +570,7 @@ const std::string Lexer::type_name (const Type& type)
  case Lexer::typeEscapeUnicode:     return "EscapeUnicode";
  case Lexer::typeDate:              return "Date";
  case Lexer::typeDuration:          return "Duration";
+  case Lexer::typeTag:               return "Tag";
  }
 }

--- a/src/Lexer.h
+++ b/src/Lexer.h
@ -52,6 +52,7 @@ public:
    typeOperator,
    typeDate,
    typeDuration,
+    typeTag,
  };

  Lexer (const std::string&);
--- a/test/lexer.t.cpp
+++ b/test/lexer.t.cpp
@ -36,7 +36,7 @@ Context context;
 ////////////////////////////////////////////////////////////////////////////////
 int main (int argc, char** argv)
 {
-  UnitTest t (203);
+  UnitTest t (212);

  std::vector <std::pair <std::string, Lexer::Type> > tokens;
  std::string token;
@ -328,6 +328,26 @@ int main (int argc, char** argv)
  t.is (tokens[1].first,                      "10th",                 "tokens[1] == '10th'");
  t.is (tokens[1].second,                     Lexer::typeIdentifier,  "tokens[1] == typeIdentifier");

+  // Test tag recognition.
+  Lexer l9 ("+with -WITHOUT + 2");
+  l9.ambiguity (false);
+  tokens.clear ();
+  while (l9.token (token, type))
+  {
+    std::cout << "# «" << token << "» " << type  << " " << Lexer::type_name (type) << "\n";
+    tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
+  }
+
+  t.is ((int)tokens.size (),                  4,                      "4 tokens");
+  t.is (tokens[0].first,                      "+with",                "tokens[0] == '+with'");
+  t.is (tokens[0].second,                     Lexer::typeTag,         "tokens[0] == typeTag");
+  t.is (tokens[1].first,                      "-WITHOUT",             "tokens[1] == '-WITHOUT'");
+  t.is (tokens[1].second,                     Lexer::typeTag,         "tokens[1] == typeTag");
+  t.is (tokens[2].first,                      "+",                    "tokens[2] == '+'");
+  t.is (tokens[2].second,                     Lexer::typeOperator,    "tokens[2] == typeOperator");
+  t.is (tokens[3].first,                      "2",                    "tokens[3] == '2'");
+  t.is (tokens[3].second,                     Lexer::typeNumber,      "tokens[3] == typeNumber");
+
  // void word_split (std::vector<std::string>&, const std::string&);
  std::string unsplit = " ( A or B ) ";
  std::vector <std::string> items;