- Added a new type Lexer::typeTag.
This commit is contained in:
Paul Beckingham 2014-09-07 01:17:48 -04:00
parent 0b9c84511b
commit aab23692f1
3 changed files with 41 additions and 1 deletions

View file

@ -126,6 +126,12 @@ bool Lexer::token (std::string& result, Type& type)
result += utf8_character (_n0);
shift ();
}
else if ((_n0 == '+' || _n0 == '-') && is_ident_start (_n1))
{
type = typeTag;
result += utf8_character (_n0);
shift ();
}
else if (is_triple_op (_n0, _n1, _n2))
{
type = typeOperator;
@ -199,6 +205,18 @@ bool Lexer::token (std::string& result, Type& type)
}
break;
case typeTag:
if (is_ident_start (_n0))
{
result += utf8_character (_n0);
shift ();
}
else
{
return true;
}
break;
case typeIdentifier:
if (is_ident (_n0))
{
@ -552,6 +570,7 @@ const std::string Lexer::type_name (const Type& type)
case Lexer::typeEscapeUnicode: return "EscapeUnicode";
case Lexer::typeDate: return "Date";
case Lexer::typeDuration: return "Duration";
case Lexer::typeTag: return "Tag";
}
}

View file

@ -52,6 +52,7 @@ public:
typeOperator,
typeDate,
typeDuration,
typeTag,
};
Lexer (const std::string&);

View file

@ -36,7 +36,7 @@ Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (203);
UnitTest t (212);
std::vector <std::pair <std::string, Lexer::Type> > tokens;
std::string token;
@ -328,6 +328,26 @@ int main (int argc, char** argv)
t.is (tokens[1].first, "10th", "tokens[1] == '10th'");
t.is (tokens[1].second, Lexer::typeIdentifier, "tokens[1] == typeIdentifier");
// Test tag recognition.
Lexer l9 ("+with -WITHOUT + 2");
l9.ambiguity (false);
tokens.clear ();
while (l9.token (token, type))
{
std::cout << "# «" << token << "» " << type << " " << Lexer::type_name (type) << "\n";
tokens.push_back (std::pair <std::string, Lexer::Type> (token, type));
}
t.is ((int)tokens.size (), 4, "4 tokens");
t.is (tokens[0].first, "+with", "tokens[0] == '+with'");
t.is (tokens[0].second, Lexer::typeTag, "tokens[0] == typeTag");
t.is (tokens[1].first, "-WITHOUT", "tokens[1] == '-WITHOUT'");
t.is (tokens[1].second, Lexer::typeTag, "tokens[1] == typeTag");
t.is (tokens[2].first, "+", "tokens[2] == '+'");
t.is (tokens[2].second, Lexer::typeOperator, "tokens[2] == typeOperator");
t.is (tokens[3].first, "2", "tokens[3] == '2'");
t.is (tokens[3].second, Lexer::typeNumber, "tokens[3] == typeNumber");
// void word_split (std::vector<std::string>&, const std::string&);
std::string unsplit = " ( A or B ) ";
std::vector <std::string> items;