- Added a new type Lexer::typeTag.
This commit is contained in:
Paul Beckingham 2014-09-07 01:17:48 -04:00
parent 0b9c84511b
commit aab23692f1
3 changed files with 41 additions and 1 deletions

View file

@ -126,6 +126,12 @@ bool Lexer::token (std::string& result, Type& type)
result += utf8_character (_n0); result += utf8_character (_n0);
shift (); shift ();
} }
else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1))
{
type = typeTag;
result += utf8_character (_n0);
shift ();
}
else if (is_triple_op (_n0, _n1, _n2)) else if (is_triple_op (_n0, _n1, _n2))
{ {
type = typeOperator; type = typeOperator;
@ -199,6 +205,18 @@ bool Lexer::token (std::string& result, Type& type)
} }
break; break;
case typeTag:
if (is_ident_start (_n0))
{
result += utf8_character (_n0);
shift ();
}
else
{
return true;
}
break;
case typeIdentifier: case typeIdentifier:
if (is_ident (_n0)) if (is_ident (_n0))
{ {
@ -552,6 +570,7 @@ const std::string Lexer::type_name (const Type& type)
case Lexer::typeEscapeUnicode: return "EscapeUnicode"; case Lexer::typeEscapeUnicode: return "EscapeUnicode";
case Lexer::typeDate: return "Date"; case Lexer::typeDate: return "Date";
case Lexer::typeDuration: return "Duration"; case Lexer::typeDuration: return "Duration";
case Lexer::typeTag: return "Tag";
} }
} }

View file

@ -52,6 +52,7 @@ public:
typeOperator, typeOperator,
typeDate, typeDate,
typeDuration, typeDuration,
typeTag,
}; };
Lexer (const std::string&); Lexer (const std::string&);

View file

@ -36,7 +36,7 @@ Context context;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv) int main (int argc, char** argv)
{ {
UnitTest t (203); UnitTest t (212);
std::vector <std::pair <std::string, Lexer::Type> > tokens; std::vector <std::pair <std::string, Lexer::Type> > tokens;
std::string token; std::string token;
@ -328,6 +328,26 @@ int main (int argc, char** argv)
t.is (tokens[1].first, "10th", "tokens[1] == '10th'"); t.is (tokens[1].first, "10th", "tokens[1] == '10th'");
t.is (tokens[1].second, Lexer::typeIdentifier, "tokens[1] == typeIdentifier"); t.is (tokens[1].second, Lexer::typeIdentifier, "tokens[1] == typeIdentifier");
// Test tag recognition.
Lexer l9 ("+with -WITHOUT + 2");
l9.ambiguity (false);
tokens.clear ();
while (l9.token (token, type))
{
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
}
t.is ((int)tokens.size (), 4, "4 tokens");
t.is (tokens[0].first, "+with", "tokens[0] == '+with'");
t.is (tokens[0].second, Lexer::typeTag, "tokens[0] == typeTag");
t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'");
t.is (tokens[1].second, Lexer::typeTag, "tokens[1] == typeTag");
t.is (tokens[2].first, "+", "tokens[2] == '+'");
t.is (tokens[2].second, Lexer::typeOperator, "tokens[2] == typeOperator");
t.is (tokens[3].first, "2", "tokens[3] == '2'");
t.is (tokens[3].second, Lexer::typeNumber, "tokens[3] == typeNumber");
// void word_split (std::vector<std::string>&, const std::string&); // void word_split (std::vector<std::string>&, const std::string&);
std::string unsplit = " ( A or B ) "; std::string unsplit = " ( A or B ) ";
std::vector <std::string> items; std::vector <std::string> items;