From aab23692f1a2f2964b43c7bd9e02741e110f1033 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 7 Sep 2014 01:17:48 -0400 Subject: [PATCH] Lexer - Added a new type Lexer::typeTag. --- src/Lexer.cpp | 19 +++++++++++++++++++ src/Lexer.h | 1 + test/lexer.t.cpp | 22 +++++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 6684a82ec..1ff303654 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -126,6 +126,12 @@ bool Lexer::token (std::string& result, Type& type) result += utf8_character (_n0); shift (); } + else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1)) + { + type = typeTag; + result += utf8_character (_n0); + shift (); + } else if (is_triple_op (_n0, _n1, _n2)) { type = typeOperator; @@ -199,6 +205,18 @@ bool Lexer::token (std::string& result, Type& type) } break; + case typeTag: + if (is_ident_start (_n0)) + { + result += utf8_character (_n0); + shift (); + } + else + { + return true; + } + break; + case typeIdentifier: if (is_ident (_n0)) { @@ -552,6 +570,7 @@ const std::string Lexer::type_name (const Type& type) case Lexer::typeEscapeUnicode: return "EscapeUnicode"; case Lexer::typeDate: return "Date"; case Lexer::typeDuration: return "Duration"; + case Lexer::typeTag: return "Tag"; } } diff --git a/src/Lexer.h b/src/Lexer.h index 87ed9754c..285330647 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -52,6 +52,7 @@ public: typeOperator, typeDate, typeDuration, + typeTag, }; Lexer (const std::string&); diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp index d6c39a62b..25afa413f 100644 --- a/test/lexer.t.cpp +++ b/test/lexer.t.cpp @@ -36,7 +36,7 @@ Context context; //////////////////////////////////////////////////////////////////////////////// int main (int argc, char** argv) { - UnitTest t (203); + UnitTest t (212); std::vector > tokens; std::string token; @@ -328,6 +328,26 @@ int main (int argc, char** argv) t.is (tokens[1].first, "10th", "tokens[1] == '10th'"); t.is (tokens[1].second, Lexer::typeIdentifier, "tokens[1] == typeIdentifier"); + // Test tag recognition. + Lexer l9 ("+with -WITHOUT + 2"); + l9.ambiguity (false); + tokens.clear (); + while (l9.token (token, type)) + { + std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n"; + tokens.push_back (std::pair (token, type)); + } + + t.is ((int)tokens.size (), 4, "4 tokens"); + t.is (tokens[0].first, "+with", "tokens[0] == '+with'"); + t.is (tokens[0].second, Lexer::typeTag, "tokens[0] == typeTag"); + t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'"); + t.is (tokens[1].second, Lexer::typeTag, "tokens[1] == typeTag"); + t.is (tokens[2].first, "+", "tokens[2] == '+'"); + t.is (tokens[2].second, Lexer::typeOperator, "tokens[2] == typeOperator"); + t.is (tokens[3].first, "2", "tokens[3] == '2'"); + t.is (tokens[3].second, Lexer::typeNumber, "tokens[3] == typeNumber"); + // void word_split (std::vector&, const std::string&); std::string unsplit = " ( A or B ) "; std::vector items;