Expressions

- Reordered operator table so that longer operators match first, thus
  disambiguating between ! and !=.
- Eliminated Expression::expand_expression.
- Modified Nibbler to know what a DOM reference looks like.
- Removed alpha equivalent operators (lt, le, gt, ge, not, eq, ne) because
  these are common in descriptions (French: le, ne).
- Modified Arguments and Nibbler unit tests.
This commit is contained in:
Paul Beckingham 2011-06-13 00:45:06 -04:00
parent a749f83da3
commit d6670ba198
7 changed files with 140 additions and 185 deletions

View file

@ -89,6 +89,8 @@ static const char* modifierNames[] =
};
// Supported operators, borrowed from C++, particularly the precedence.
// Note: table is sorted by length of operator string, so searches match
// longest first.
static struct
{
std::string op;
@ -99,39 +101,27 @@ static struct
} operators[] =
{
// Operator Precedence Type Symbol Associativity
{ "and", 5, 'b', 0, 'l' }, // Conjunction
{ "xor", 4, 'b', 0, 'l' }, // Disjunction
{ "or", 3, 'b', 0, 'l' }, // Disjunction
{ "<=", 10, 'b', 1, 'l' }, // Less than or equal
{ ">=", 10, 'b', 1, 'l' }, // Greater than or equal
{ "!~", 9, 'b', 1, 'l' }, // Regex non-match
{ "!=", 9, 'b', 1, 'l' }, // Inequal
{ "=", 9, 'b', 1, 'l' }, // Equal
{ "^", 16, 'b', 1, 'r' }, // Exponent
{ ">", 10, 'b', 1, 'l' }, // Greater than
{ "~", 9, 'b', 1, 'l' }, // Regex match
{ "!", 15, 'u', 1, 'r' }, // Not
{ "not", 15, 'u', 0, 'r' }, // Not
{ "-", 15, 'u', 1, 'r' }, // Unary minus
{ "*", 13, 'b', 1, 'l' }, // Multiplication
{ "/", 13, 'b', 1, 'l' }, // Division
{ "%", 13, 'b', 1, 'l' }, // Modulus
{ "+", 12, 'b', 1, 'l' }, // Addition
{ "-", 12, 'b', 1, 'l' }, // Subtraction
{ "<", 10, 'b', 1, 'l' }, // Less than
{ "lt", 10, 'b', 0, 'l' }, // Less than
{ "<=", 10, 'b', 1, 'l' }, // Less than or equal
{ "le", 10, 'b', 0, 'l' }, // Less than or equal
{ ">=", 10, 'b', 1, 'l' }, // Greater than or equal
{ "ge", 10, 'b', 0, 'l' }, // Greater than or equal
{ ">", 10, 'b', 1, 'l' }, // Greater than
{ "gt", 10, 'b', 0, 'l' }, // Greater than
{ "~", 9, 'b', 1, 'l' }, // Regex match
{ "!~", 9, 'b', 1, 'l' }, // Regex non-match
{ "=", 9, 'b', 1, 'l' }, // Equal
{ "eq", 9, 'b', 0, 'l' }, // Equal
{ "!=", 9, 'b', 1, 'l' }, // Inequal
{ "ne", 9, 'b', 0, 'l' }, // Inequal
{ "and", 5, 'b', 0, 'l' }, // Conjunction
{ "or", 4, 'b', 0, 'l' }, // Disjunction
{ "(", 0, 'b', 1, 'l' }, // Precedence start
{ ")", 0, 'b', 1, 'l' }, // Precedence end
};
@ -724,8 +714,6 @@ bool Arguments::is_attmod (const std::string& input)
n.getUntilEOS (value) ||
n.depleted ())
{
return ! is_expression (value);
// Validate and canonicalize attribute and modifier names.
if (is_attribute (name, name) &&
is_modifier (modifier))

View file

@ -25,6 +25,7 @@
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream> // TODO Remove.
#include <sstream>
#include <Context.h>
#include <Lexer.h>
@ -61,7 +62,7 @@ Expression::Expression (Arguments& arguments)
expand_attr ();
expand_attmod ();
expand_word ();
expand_expression ();
expand_tokens ();
postfix ();
}
}
@ -79,30 +80,35 @@ bool Expression::eval (Task& task)
std::vector <std::pair <std::string, std::string> >::iterator arg;
for (arg = _args.begin (); arg != _args.end (); ++arg)
{
// if (arg->second != "op")
// value_stack.push_back (Variant (*arg));
if (arg->second == "op")
{
char type;
int precedence;
char associativity;
Arguments::is_operator (arg->first, type, precedence, associativity);
// else
// {
// if (arg->first == "+")
// {
// pop
// pop
// add the two operands
// push result
// }
// else if (arg->first == "?")
// {
// }
// else if (arg->first == "?")
// {
// }
// else
// throw std::string ("Unsupported operator '") + arg->first + "'.";
// }
if (arg->first == "+")
{
// TODO pop
// TODO pop
// TODO add the operators
// TODO push the result
}
/*
else
throw std::string ("Unsupported operator '") + arg->first + "'.";
*/
}
/*
else
value_stack.push_back (Variant (*arg));
*/
}
return true;
// TODO Return the value that is on the stack.
return false;
}
////////////////////////////////////////////////////////////////////////////////
@ -191,6 +197,7 @@ void Expression::expand_sequence ()
void Expression::expand_tokens ()
{
Arguments temp;
bool delta = false;
// Get a list of all operators.
std::vector <std::string> operators = Arguments::operator_list ();
@ -208,24 +215,48 @@ void Expression::expand_tokens ()
std::vector <std::pair <std::string, std::string> >::iterator arg;
for (arg = _args.begin (); arg != _args.end (); ++arg)
{
Nibbler n (arg->first);
if (arg->second == "exp")
{
// Nibble each arg token by token.
Nibbler n (arg->first);
if (n.getQuoted ('"', s, true) ||
n.getQuoted ('\'', s, true))
temp.push_back (std::make_pair (s, "string"));
while (! n.depleted ())
{
if (n.getQuoted ('"', s, true) ||
n.getQuoted ('\'', s, true))
temp.push_back (std::make_pair (s, "string"));
else if (n.getNumber (d))
temp.push_back (std::make_pair (format (d), "number"));
else if (n.getOneOf (operators, s))
temp.push_back (std::make_pair (s, "op"));
else if (n.getInt (i))
temp.push_back (std::make_pair (format (i), "int"));
else if (n.getDOM (s))
temp.push_back (std::make_pair (s, "dom"));
else if (n.getDateISO (t))
temp.push_back (std::make_pair (Date (t).toISO (), "date"));
else if (n.getNumber (d))
temp.push_back (std::make_pair (format (d), "number"));
else if (n.getDate (date_format, t))
temp.push_back (std::make_pair (Date (t).toString (date_format), "date"));
else if (n.getInt (i))
temp.push_back (std::make_pair (format (i), "int"));
else if (n.getDateISO (t))
temp.push_back (std::make_pair (Date (t).toISO (), "date"));
else if (n.getDate (date_format, t))
temp.push_back (std::make_pair (Date (t).toString (date_format), "date"));
else
{
if (! n.getUntilWS (s))
n.getUntilEOS (s);
temp.push_back (std::make_pair (s, "?"));
}
n.skipWS ();
}
delta = true;
}
else
temp.push_back (*arg);
}
@ -519,109 +550,6 @@ void Expression::expand_word ()
}
}
////////////////////////////////////////////////////////////////////////////////
// Look for "exp" arguments, and convert them to one of:
// "date"
// "duration"
// Lexer::tokenize
void Expression::expand_expression ()
{
Arguments temp;
bool delta = false;
// Get a list of all operators.
std::vector <std::string> operators = Arguments::operator_list ();
// Look for all 'exp' args.
std::vector <std::pair <std::string, std::string> >::iterator arg;
for (arg = _args.begin (); arg != _args.end (); ++arg)
{
if (arg->second == "exp")
{
// Split expression into space-separated tokens.
std::vector <std::string> tokens;
split (tokens, unquoteText (arg->first), ' ');
std::vector <std::string>::iterator token;
for (token = tokens.begin (); token != tokens.end (); ++token)
{
if (Date::valid (*token, context.config.get ("dateformat")))
temp.push_back (std::make_pair (*token, "date"));
else if (Duration::valid (*token))
temp.push_back (std::make_pair (*token, "duration"));
else if (Arguments::is_id (*token))
temp.push_back (std::make_pair (*token, "int"));
else if (Arguments::is_uuid (*token))
temp.push_back (std::make_pair (*token, "string"));
else if (Arguments::is_operator (*token))
temp.push_back (std::make_pair (*token, "op"));
// The expression does not appear to be syntactic sugar, so it should be
// lexed.
else
{
Lexer lexer (*token);
lexer.skipWhitespace (true);
lexer.coalesceAlpha (true);
lexer.coalesceDigits (true);
lexer.coalesceQuoted (true);
// Each operator of length > 1 is a special token.
std::vector <std::string>::iterator op;
for (op = operators.begin (); op != operators.end (); ++op)
if (op->length () > 1)
lexer.specialToken (*op);
std::vector <std::string> ltokens;
lexer.tokenize (ltokens);
std::vector <std::string>::iterator ltoken;
for (ltoken = ltokens.begin (); ltoken != ltokens.end (); ++ltoken)
{
if (Date::valid (*ltoken, context.config.get ("dateformat")))
temp.push_back (std::make_pair (*ltoken, "date"));
else if (Duration::valid (*ltoken))
temp.push_back (std::make_pair (*ltoken, "duration"));
else if (Arguments::is_id (*ltoken))
temp.push_back (std::make_pair (*ltoken, "int"));
else if (Arguments::is_uuid (*ltoken))
temp.push_back (std::make_pair (*ltoken, "string"));
else if (Arguments::is_operator (*ltoken))
temp.push_back (std::make_pair (*ltoken, "op"));
else if (Arguments::is_id (*ltoken))
temp.push_back (std::make_pair (*ltoken, "int"));
else if (Arguments::is_uuid (*ltoken))
temp.push_back (std::make_pair (*ltoken, "string"));
else
temp.push_back (std::make_pair (*ltoken, "lvalue"));
}
}
}
delta = true;
}
else
temp.push_back (*arg);
}
if (delta)
{
_args.swap (temp);
_args.dump ("Expression::expand_expression");
}
}
////////////////////////////////////////////////////////////////////////////////
// Dijkstra Shunting Algorithm.
// http://en.wikipedia.org/wiki/Shunting-yard_algorithm

View file

@ -48,7 +48,6 @@ private:
void expand_attr ();
void expand_attmod ();
void expand_word ();
void expand_expression ();
void expand_tokens ();
void postfix ();

View file

@ -908,6 +908,32 @@ bool Nibbler::getOneOf (
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Nibbler::getDOM (std::string& found)
{
std::string::size_type i = mCursor;
std::string::size_type start = mCursor;
while ( isdigit (mInput[i]) ||
mInput[i] == '.' ||
mInput[i] == '-' ||
mInput[i] == '_' ||
(! ispunct (mInput[i]) &&
! isspace (mInput[i])))
{
++i;
}
if (i > mCursor)
{
found = mInput.substr (start, i - start);
mCursor = i;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Nibbler::skipN (const int quantity /* = 1 */)
{

View file

@ -65,6 +65,7 @@ public:
bool getDateISO (time_t&);
bool getDate (const std::string&, time_t&);
bool getOneOf (const std::vector <std::string>&, std::string&);
bool getDOM (std::string&);
bool skipN (const int quantity = 1);
bool skip (char);