Expressions

- Reordered operator table so that longer operators match first, thus
  disambiguating between ! and !=.
- Eliminated Expression::expand_expression.
- Modified Nibbler to know what a DOM reference looks like.
- Removed alpha equivalent operators (lt, le, gt, ge, not, eq, ne) because
  these are common in descriptions (French: le, ne).
- Modified Arguments and Nibbler unit tests.
This commit is contained in:
Paul Beckingham 2011-06-13 00:45:06 -04:00
parent a749f83da3
commit d6670ba198
7 changed files with 140 additions and 185 deletions

View file

@ -89,6 +89,8 @@ static const char* modifierNames[] =
};
// Supported operators, borrowed from C++, particularly the precedence.
// Note: table is sorted by length of operator string, so searches match
// longest first.
static struct
{
std::string op;
@ -99,39 +101,27 @@ static struct
} operators[] =
{
// Operator Precedence Type Symbol Associativity
{ "and", 5, 'b', 0, 'l' }, // Conjunction
{ "xor", 4, 'b', 0, 'l' }, // Disjunction
{ "or", 3, 'b', 0, 'l' }, // Disjunction
{ "<=", 10, 'b', 1, 'l' }, // Less than or equal
{ ">=", 10, 'b', 1, 'l' }, // Greater than or equal
{ "!~", 9, 'b', 1, 'l' }, // Regex non-match
{ "!=", 9, 'b', 1, 'l' }, // Inequal
{ "=", 9, 'b', 1, 'l' }, // Equal
{ "^", 16, 'b', 1, 'r' }, // Exponent
{ ">", 10, 'b', 1, 'l' }, // Greater than
{ "~", 9, 'b', 1, 'l' }, // Regex match
{ "!", 15, 'u', 1, 'r' }, // Not
{ "not", 15, 'u', 0, 'r' }, // Not
{ "-", 15, 'u', 1, 'r' }, // Unary minus
{ "*", 13, 'b', 1, 'l' }, // Multiplication
{ "/", 13, 'b', 1, 'l' }, // Division
{ "%", 13, 'b', 1, 'l' }, // Modulus
{ "+", 12, 'b', 1, 'l' }, // Addition
{ "-", 12, 'b', 1, 'l' }, // Subtraction
{ "<", 10, 'b', 1, 'l' }, // Less than
{ "lt", 10, 'b', 0, 'l' }, // Less than
{ "<=", 10, 'b', 1, 'l' }, // Less than or equal
{ "le", 10, 'b', 0, 'l' }, // Less than or equal
{ ">=", 10, 'b', 1, 'l' }, // Greater than or equal
{ "ge", 10, 'b', 0, 'l' }, // Greater than or equal
{ ">", 10, 'b', 1, 'l' }, // Greater than
{ "gt", 10, 'b', 0, 'l' }, // Greater than
{ "~", 9, 'b', 1, 'l' }, // Regex match
{ "!~", 9, 'b', 1, 'l' }, // Regex non-match
{ "=", 9, 'b', 1, 'l' }, // Equal
{ "eq", 9, 'b', 0, 'l' }, // Equal
{ "!=", 9, 'b', 1, 'l' }, // Inequal
{ "ne", 9, 'b', 0, 'l' }, // Inequal
{ "and", 5, 'b', 0, 'l' }, // Conjunction
{ "or", 4, 'b', 0, 'l' }, // Disjunction
{ "(", 0, 'b', 1, 'l' }, // Precedence start
{ ")", 0, 'b', 1, 'l' }, // Precedence end
};
@ -724,8 +714,6 @@ bool Arguments::is_attmod (const std::string& input)
n.getUntilEOS (value) ||
n.depleted ())
{
return ! is_expression (value);
// Validate and canonicalize attribute and modifier names.
if (is_attribute (name, name) &&
is_modifier (modifier))

View file

@ -25,6 +25,7 @@
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream> // TODO Remove.
#include <sstream>
#include <Context.h>
#include <Lexer.h>
@ -61,7 +62,7 @@ Expression::Expression (Arguments& arguments)
expand_attr ();
expand_attmod ();
expand_word ();
expand_expression ();
expand_tokens ();
postfix ();
}
}
@ -79,30 +80,35 @@ bool Expression::eval (Task& task)
std::vector <std::pair <std::string, std::string> >::iterator arg;
for (arg = _args.begin (); arg != _args.end (); ++arg)
{
// if (arg->second != "op")
// value_stack.push_back (Variant (*arg));
if (arg->second == "op")
{
char type;
int precedence;
char associativity;
Arguments::is_operator (arg->first, type, precedence, associativity);
// else
// {
// if (arg->first == "+")
// {
// pop
// pop
// add the two operands
// push result
// }
// else if (arg->first == "?")
// {
// }
// else if (arg->first == "?")
// {
// }
// else
// throw std::string ("Unsupported operator '") + arg->first + "'.";
// }
if (arg->first == "+")
{
// TODO pop
// TODO pop
// TODO add the operators
// TODO push the result
}
/*
else
throw std::string ("Unsupported operator '") + arg->first + "'.";
*/
}
/*
else
value_stack.push_back (Variant (*arg));
*/
}
return true;
// TODO Return the value that is on the stack.
return false;
}
////////////////////////////////////////////////////////////////////////////////
@ -191,6 +197,7 @@ void Expression::expand_sequence ()
void Expression::expand_tokens ()
{
Arguments temp;
bool delta = false;
// Get a list of all operators.
std::vector <std::string> operators = Arguments::operator_list ();
@ -208,24 +215,48 @@ void Expression::expand_tokens ()
std::vector <std::pair <std::string, std::string> >::iterator arg;
for (arg = _args.begin (); arg != _args.end (); ++arg)
{
Nibbler n (arg->first);
if (arg->second == "exp")
{
// Nibble each arg token by token.
Nibbler n (arg->first);
if (n.getQuoted ('"', s, true) ||
n.getQuoted ('\'', s, true))
temp.push_back (std::make_pair (s, "string"));
while (! n.depleted ())
{
if (n.getQuoted ('"', s, true) ||
n.getQuoted ('\'', s, true))
temp.push_back (std::make_pair (s, "string"));
else if (n.getNumber (d))
temp.push_back (std::make_pair (format (d), "number"));
else if (n.getOneOf (operators, s))
temp.push_back (std::make_pair (s, "op"));
else if (n.getInt (i))
temp.push_back (std::make_pair (format (i), "int"));
else if (n.getDOM (s))
temp.push_back (std::make_pair (s, "dom"));
else if (n.getDateISO (t))
temp.push_back (std::make_pair (Date (t).toISO (), "date"));
else if (n.getNumber (d))
temp.push_back (std::make_pair (format (d), "number"));
else if (n.getDate (date_format, t))
temp.push_back (std::make_pair (Date (t).toString (date_format), "date"));
else if (n.getInt (i))
temp.push_back (std::make_pair (format (i), "int"));
else if (n.getDateISO (t))
temp.push_back (std::make_pair (Date (t).toISO (), "date"));
else if (n.getDate (date_format, t))
temp.push_back (std::make_pair (Date (t).toString (date_format), "date"));
else
{
if (! n.getUntilWS (s))
n.getUntilEOS (s);
temp.push_back (std::make_pair (s, "?"));
}
n.skipWS ();
}
delta = true;
}
else
temp.push_back (*arg);
}
@ -519,109 +550,6 @@ void Expression::expand_word ()
}
}
////////////////////////////////////////////////////////////////////////////////
// Look for "exp" arguments, and convert them to one of:
// "date"
// "duration"
// Lexer::tokenize
void Expression::expand_expression ()
{
Arguments temp;
bool delta = false;
// Get a list of all operators.
std::vector <std::string> operators = Arguments::operator_list ();
// Look for all 'exp' args.
std::vector <std::pair <std::string, std::string> >::iterator arg;
for (arg = _args.begin (); arg != _args.end (); ++arg)
{
if (arg->second == "exp")
{
// Split expression into space-separated tokens.
std::vector <std::string> tokens;
split (tokens, unquoteText (arg->first), ' ');
std::vector <std::string>::iterator token;
for (token = tokens.begin (); token != tokens.end (); ++token)
{
if (Date::valid (*token, context.config.get ("dateformat")))
temp.push_back (std::make_pair (*token, "date"));
else if (Duration::valid (*token))
temp.push_back (std::make_pair (*token, "duration"));
else if (Arguments::is_id (*token))
temp.push_back (std::make_pair (*token, "int"));
else if (Arguments::is_uuid (*token))
temp.push_back (std::make_pair (*token, "string"));
else if (Arguments::is_operator (*token))
temp.push_back (std::make_pair (*token, "op"));
// The expression does not appear to be syntactic sugar, so it should be
// lexed.
else
{
Lexer lexer (*token);
lexer.skipWhitespace (true);
lexer.coalesceAlpha (true);
lexer.coalesceDigits (true);
lexer.coalesceQuoted (true);
// Each operator of length > 1 is a special token.
std::vector <std::string>::iterator op;
for (op = operators.begin (); op != operators.end (); ++op)
if (op->length () > 1)
lexer.specialToken (*op);
std::vector <std::string> ltokens;
lexer.tokenize (ltokens);
std::vector <std::string>::iterator ltoken;
for (ltoken = ltokens.begin (); ltoken != ltokens.end (); ++ltoken)
{
if (Date::valid (*ltoken, context.config.get ("dateformat")))
temp.push_back (std::make_pair (*ltoken, "date"));
else if (Duration::valid (*ltoken))
temp.push_back (std::make_pair (*ltoken, "duration"));
else if (Arguments::is_id (*ltoken))
temp.push_back (std::make_pair (*ltoken, "int"));
else if (Arguments::is_uuid (*ltoken))
temp.push_back (std::make_pair (*ltoken, "string"));
else if (Arguments::is_operator (*ltoken))
temp.push_back (std::make_pair (*ltoken, "op"));
else if (Arguments::is_id (*ltoken))
temp.push_back (std::make_pair (*ltoken, "int"));
else if (Arguments::is_uuid (*ltoken))
temp.push_back (std::make_pair (*ltoken, "string"));
else
temp.push_back (std::make_pair (*ltoken, "lvalue"));
}
}
}
delta = true;
}
else
temp.push_back (*arg);
}
if (delta)
{
_args.swap (temp);
_args.dump ("Expression::expand_expression");
}
}
////////////////////////////////////////////////////////////////////////////////
// Dijkstra Shunting Algorithm.
// http://en.wikipedia.org/wiki/Shunting-yard_algorithm

View file

@ -48,7 +48,6 @@ private:
void expand_attr ();
void expand_attmod ();
void expand_word ();
void expand_expression ();
void expand_tokens ();
void postfix ();

View file

@ -908,6 +908,32 @@ bool Nibbler::getOneOf (
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Nibbler::getDOM (std::string& found)
{
std::string::size_type i = mCursor;
std::string::size_type start = mCursor;
while ( isdigit (mInput[i]) ||
mInput[i] == '.' ||
mInput[i] == '-' ||
mInput[i] == '_' ||
(! ispunct (mInput[i]) &&
! isspace (mInput[i])))
{
++i;
}
if (i > mCursor)
{
found = mInput.substr (start, i - start);
mCursor = i;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Nibbler::skipN (const int quantity /* = 1 */)
{

View file

@ -65,6 +65,7 @@ public:
bool getDateISO (time_t&);
bool getDate (const std::string&, time_t&);
bool getOneOf (const std::vector <std::string>&, std::string&);
bool getDOM (std::string&);
bool skipN (const int quantity = 1);
bool skip (char);

View file

@ -34,7 +34,7 @@ Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (103);
UnitTest t (97);
const char* fake[] =
{
@ -66,21 +66,21 @@ int main (int argc, char** argv)
"combine good");
// bool is_attr (const std::string&);
t.ok (Arguments::is_attr ("name:"), "name: -> attr");
t.ok (Arguments::is_attr ("name:\"\""), "name:\"\" -> attr");
t.ok (Arguments::is_attr ("name:one"), "name:one -> attr");
t.ok (Arguments::is_attr ("name:\"one\""), "name:\"one\" -> attr");
t.ok (Arguments::is_attr ("name:\"one two\""), "name:\"one two\" -> attr");
t.ok (Arguments::is_attr ("project:"), "name: -> attr");
t.ok (Arguments::is_attr ("project:\"\""), "name:\"\" -> attr");
t.ok (Arguments::is_attr ("project:one"), "name:one -> attr");
t.ok (Arguments::is_attr ("project:\"one\""), "name:\"one\" -> attr");
t.ok (Arguments::is_attr ("project:\"one two\""), "name:\"one two\" -> attr");
t.notok (Arguments::is_attr ("name"), "name -> not attr");
t.notok (Arguments::is_attr ("(name=val and 1<2)"), "(name=val and 1<2) -> not attr");
// bool is_attmod (const std::string&);
t.ok (Arguments::is_attmod ("name.is:"), "name.is: -> attmod");
t.ok (Arguments::is_attmod ("name.is:\"\""), "name.is:\"\" -> attmod");
t.ok (Arguments::is_attmod ("name.is:one"), "name.is:one -> attmod");
t.ok (Arguments::is_attmod ("name.is:\"one\""), "name.is:\"one\" -> attmod");
t.ok (Arguments::is_attmod ("name.is:\"one two\""), "name.is:\"one two\" -> attmod");
t.ok (Arguments::is_attmod ("project.is:"), "name.is: -> attmod");
t.ok (Arguments::is_attmod ("project.is:\"\""), "name.is:\"\" -> attmod");
t.ok (Arguments::is_attmod ("project.is:one"), "name.is:one -> attmod");
t.ok (Arguments::is_attmod ("project.is:\"one\""), "name.is:\"one\" -> attmod");
t.ok (Arguments::is_attmod ("project.is:\"one two\""), "name.is:\"one two\" -> attmod");
t.notok (Arguments::is_attmod ("name"), "name -> not attmod");
t.notok (Arguments::is_attmod ("(name=value and 1<2"), "(name=value and 1<2 -> not attmod");
@ -133,7 +133,6 @@ int main (int argc, char** argv)
// bool is_operator (const std::string&);
t.ok (Arguments::is_operator ("^"), "^ -> operator");
t.ok (Arguments::is_operator ("!"), "! -> operator");
t.ok (Arguments::is_operator ("not"), "not -> operator");
t.ok (Arguments::is_operator ("-"), "- -> operator");
t.ok (Arguments::is_operator ("*"), "* -> operator");
t.ok (Arguments::is_operator ("/"), "/ -> operator");
@ -141,20 +140,15 @@ int main (int argc, char** argv)
t.ok (Arguments::is_operator ("+"), "+ -> operator");
t.ok (Arguments::is_operator ("-"), "- -> operator");
t.ok (Arguments::is_operator ("<"), "< -> operator");
t.ok (Arguments::is_operator ("lt"), "lt -> operator");
t.ok (Arguments::is_operator ("<="), "<= -> operator");
t.ok (Arguments::is_operator ("le"), "le -> operator");
t.ok (Arguments::is_operator (">="), ">= -> operator");
t.ok (Arguments::is_operator ("ge"), "ge -> operator");
t.ok (Arguments::is_operator (">"), "> -> operator");
t.ok (Arguments::is_operator ("gt"), "gt -> operator");
t.ok (Arguments::is_operator ("~"), "~ -> operator");
t.ok (Arguments::is_operator ("!~"), "!~ -> operator");
t.ok (Arguments::is_operator ("="), "= -> operator");
t.ok (Arguments::is_operator ("eq"), "eq -> operator");
t.ok (Arguments::is_operator ("!="), "!= -> operator");
t.ok (Arguments::is_operator ("ne"), "ne -> operator");
t.ok (Arguments::is_operator ("and"), "and -> operator");
t.ok (Arguments::is_operator ("xor"), "xor -> operator");
t.ok (Arguments::is_operator ("or"), "or -> operator");
t.ok (Arguments::is_operator ("("), "( -> operator");
t.ok (Arguments::is_operator (")"), ") -> operator");
@ -165,7 +159,7 @@ int main (int argc, char** argv)
t.ok (Arguments::is_expression ("1+1"), "1+1 -> expression");
t.ok (Arguments::is_expression ("a~b"), "a~b -> expression");
t.ok (Arguments::is_expression ("(1)"), "(1) -> expression");
t.ok (Arguments::is_expression ("not a"), "not a -> expression");
t.ok (Arguments::is_expression ("!a"), "!a -> expression");
// static bool valid_modifier (const std::string&);
t.ok (Arguments::valid_modifier ("before"), "before -> modifier");

View file

@ -34,7 +34,7 @@ Context context;
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
UnitTest t (242);
UnitTest t (257);
try
{
@ -408,7 +408,26 @@ int main (int argc, char** argv)
t.is (s, "two", " 'twothreefour': getOneOf () -> two");
t.ok (n.getOneOf (options, s), " 'threefour': getOneOf () -> true");
t.is (s, "three", " 'threefour': getOneOf () -> three");
t.notok (n.getOneOf (options, s), " 'four': getOneOf () -> fasle");
t.notok (n.getOneOf (options, s), " 'four': getOneOf () -> false");
// bool getDOM (std::string&);
t.diag ("Nibbler::getDOM");
n = Nibbler ("one one.two one.two.three 1.project a0-a0-a0.due");
t.ok (n.getDOM (s), "'one' getDOM -> ok");
t.is (s, "one", "'one' getDOM -> 'one'");
t.ok (n.skipWS (), "skipWS");
t.ok (n.getDOM (s), "'one.two' getDOM -> ok");
t.is (s, "one.two", "'one.two' getDOM -> ok");
t.ok (n.skipWS (), "skipWS");
t.ok (n.getDOM (s), "'one.two.three' getDOM -> ok");
t.is (s, "one.two.three", "'one.two.three' getDOM -> ok");
t.ok (n.skipWS (), "skipWS");
t.ok (n.getDOM (s), "'1.project' getDOM -> ok");
t.is (s, "1.project", "'1.project' getDOM -> ok");
t.ok (n.skipWS (), "skipWS");
t.ok (n.getDOM (s), "'a0-a0-a0.due' getDOM -> ok");
t.is (s, "a0-a0-a0.due", "'a0-a0-a0.due' getDOM -> ok");
t.ok (n.depleted (), "depleted");
// bool getUntilEOL (std::string&);
t.diag ("Nibbler::getUntilEOL");