mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-08-20 04:13:07 +02:00
Expressions
- Implemented sequence --> infix converter. - Added new Lexer code. - Added Lexer unit tests.
This commit is contained in:
parent
86dcec8aea
commit
ed8454c202
10 changed files with 1247 additions and 70 deletions
|
@ -251,7 +251,6 @@ void Arguments::categorize ()
|
||||||
arg->second = "tag";
|
arg->second = "tag";
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// <name>.<modifier>[:=]<value>
|
// <name>.<modifier>[:=]<value>
|
||||||
else if (is_attmod (arg->first))
|
else if (is_attmod (arg->first))
|
||||||
{
|
{
|
||||||
|
@ -302,6 +301,16 @@ void Arguments::categorize ()
|
||||||
arg->second = "op";
|
arg->second = "op";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// <expression>
|
||||||
|
else if (is_expression (arg->first))
|
||||||
|
{
|
||||||
|
found_non_sequence = true;
|
||||||
|
if (found_sequence)
|
||||||
|
found_something_after_sequence = true;
|
||||||
|
|
||||||
|
arg->second = "exp";
|
||||||
|
}
|
||||||
|
|
||||||
// If the type is not known, it is treated as a generic word.
|
// If the type is not known, it is treated as a generic word.
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -557,13 +566,7 @@ bool Arguments::is_command (
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// ______________
|
// <name>[:=]['"][<value>]['"]
|
||||||
// | |
|
|
||||||
// | v
|
|
||||||
// start --> name --> : --> " --> value --> " --> end
|
|
||||||
// | ^
|
|
||||||
// |_____________|
|
|
||||||
//
|
|
||||||
bool Arguments::is_attr (const std::string& input)
|
bool Arguments::is_attr (const std::string& input)
|
||||||
{
|
{
|
||||||
Nibbler n (input);
|
Nibbler n (input);
|
||||||
|
@ -591,13 +594,7 @@ bool Arguments::is_attr (const std::string& input)
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// ______________
|
// <name>.<mod>[:=]['"]<value>['"]
|
||||||
// | |
|
|
||||||
// | v
|
|
||||||
// start --> name --> . --> mod --> : --> " --> value --> " --> end
|
|
||||||
// | ^ | ^
|
|
||||||
// |_____________________| |_____________|
|
|
||||||
//
|
|
||||||
bool Arguments::is_attmod (const std::string& input)
|
bool Arguments::is_attmod (const std::string& input)
|
||||||
{
|
{
|
||||||
Nibbler n (input);
|
Nibbler n (input);
|
||||||
|
@ -733,6 +730,7 @@ bool Arguments::is_tag (const std::string& input)
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// "+", "-", "*", "/", "%", "~", "!~", "<" ...
|
||||||
bool Arguments::is_operator (const std::string& input)
|
bool Arguments::is_operator (const std::string& input)
|
||||||
{
|
{
|
||||||
for (unsigned int i = 0; i < NUM_OPERATORS; ++i)
|
for (unsigned int i = 0; i < NUM_OPERATORS; ++i)
|
||||||
|
@ -743,13 +741,21 @@ bool Arguments::is_operator (const std::string& input)
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// ______________
|
bool Arguments::is_expression (const std::string& input)
|
||||||
// | |
|
{
|
||||||
// | v
|
std::vector <std::string> tokens;
|
||||||
// start --> name --> : --> " --> value --> " --> end
|
splitq (tokens, input, ' ');
|
||||||
// | ^
|
|
||||||
// |_____________|
|
std::vector <std::string>::iterator token;
|
||||||
//
|
for (token = tokens.begin (); token != tokens.end (); ++token)
|
||||||
|
if (is_operator (*token))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// <name>[:=]['"]<value>['"]
|
||||||
bool Arguments::extract_attr (
|
bool Arguments::extract_attr (
|
||||||
const std::string& input,
|
const std::string& input,
|
||||||
std::string& name,
|
std::string& name,
|
||||||
|
@ -787,13 +793,7 @@ bool Arguments::extract_attr (
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// ______________
|
// <name>.<mod>[:=]['"]<value>['"]
|
||||||
// | |
|
|
||||||
// | v
|
|
||||||
// start --> name --> . --> mod --> : --> " --> value --> " --> end
|
|
||||||
// | ^
|
|
||||||
// |_____________|
|
|
||||||
//
|
|
||||||
bool Arguments::extract_attmod (
|
bool Arguments::extract_attmod (
|
||||||
const std::string& input,
|
const std::string& input,
|
||||||
std::string& name,
|
std::string& name,
|
||||||
|
@ -821,7 +821,7 @@ bool Arguments::extract_attmod (
|
||||||
|
|
||||||
if (n.getUntilOneOf (":=", modifier))
|
if (n.getUntilOneOf (":=", modifier))
|
||||||
{
|
{
|
||||||
if (!valid_modifier (modifier))
|
if (!Arguments::valid_modifier (modifier))
|
||||||
throw std::string ("The name '") + modifier + "' is not a valid modifier."; // TODO i18n
|
throw std::string ("The name '") + modifier + "' is not a valid modifier."; // TODO i18n
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -928,19 +928,24 @@ bool Arguments::extract_pattern (const std::string& input, std::string& pattern)
|
||||||
bool Arguments::extract_id (const std::string& input, std::vector <int>& sequence)
|
bool Arguments::extract_id (const std::string& input, std::vector <int>& sequence)
|
||||||
{
|
{
|
||||||
Nibbler n (input);
|
Nibbler n (input);
|
||||||
sequence.clear ();
|
|
||||||
|
|
||||||
int id;
|
int id;
|
||||||
|
|
||||||
if (n.getUnsignedInt (id))
|
if (n.getUnsignedInt (id))
|
||||||
{
|
{
|
||||||
sequence.push_back (id);
|
sequence.push_back (id);
|
||||||
|
|
||||||
if (n.skip ('-'))
|
if (n.skip ('-'))
|
||||||
{
|
{
|
||||||
if (!n.getUnsignedInt (id))
|
int end;
|
||||||
|
if (!n.getUnsignedInt (end))
|
||||||
throw std::string ("Unrecognized ID after hyphen.");
|
throw std::string ("Unrecognized ID after hyphen.");
|
||||||
|
|
||||||
sequence.push_back (id);
|
if (id > end)
|
||||||
|
throw std::string ("Inverted range 'high-low' instead of 'low-high'");
|
||||||
|
|
||||||
|
for (int n = id + 1; n <= end; ++n)
|
||||||
|
sequence.push_back (n);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (n.skip (','))
|
while (n.skip (','))
|
||||||
|
@ -951,10 +956,15 @@ bool Arguments::extract_id (const std::string& input, std::vector <int>& sequenc
|
||||||
|
|
||||||
if (n.skip ('-'))
|
if (n.skip ('-'))
|
||||||
{
|
{
|
||||||
if (!n.getUnsignedInt (id))
|
int end;
|
||||||
|
if (!n.getUnsignedInt (end))
|
||||||
throw std::string ("Unrecognized ID after hyphen.");
|
throw std::string ("Unrecognized ID after hyphen.");
|
||||||
|
|
||||||
sequence.push_back (id);
|
if (id > end)
|
||||||
|
throw std::string ("Inverted range 'high-low' instead of 'low-high'");
|
||||||
|
|
||||||
|
for (int n = id + 1; n <= end; ++n)
|
||||||
|
sequence.push_back (n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -973,7 +983,6 @@ bool Arguments::extract_uuid (
|
||||||
std::vector <std::string>& sequence)
|
std::vector <std::string>& sequence)
|
||||||
{
|
{
|
||||||
Nibbler n (input);
|
Nibbler n (input);
|
||||||
sequence.clear ();
|
|
||||||
|
|
||||||
std::string uuid;
|
std::string uuid;
|
||||||
if (n.getUUID (uuid))
|
if (n.getUUID (uuid))
|
||||||
|
@ -1047,6 +1056,7 @@ Arguments Arguments::extract_read_only_filter ()
|
||||||
i->second == "id" ||
|
i->second == "id" ||
|
||||||
i->second == "uuid" ||
|
i->second == "uuid" ||
|
||||||
i->second == "op" ||
|
i->second == "op" ||
|
||||||
|
i->second == "exp" ||
|
||||||
i->second == "word")
|
i->second == "word")
|
||||||
{
|
{
|
||||||
filter.push_back (*i);
|
filter.push_back (*i);
|
||||||
|
@ -1092,6 +1102,7 @@ Arguments Arguments::extract_write_filter ()
|
||||||
i->second == "id" ||
|
i->second == "id" ||
|
||||||
i->second == "uuid" ||
|
i->second == "uuid" ||
|
||||||
i->second == "op" ||
|
i->second == "op" ||
|
||||||
|
i->second == "exp" ||
|
||||||
i->second == "word")
|
i->second == "word")
|
||||||
{
|
{
|
||||||
filter.push_back (*i);
|
filter.push_back (*i);
|
||||||
|
@ -1153,9 +1164,14 @@ Arguments Arguments::extract_modifications ()
|
||||||
+ "' is not allowed when modifiying a task.";
|
+ "' is not allowed when modifiying a task.";
|
||||||
|
|
||||||
else if (i->second == "attmod")
|
else if (i->second == "attmod")
|
||||||
throw std::string ("Attribute modifiers '")
|
throw std::string ("An attribute modifier '")
|
||||||
+ i->first
|
+ i->first
|
||||||
+ "' are not allowed when modifiying a task.";
|
+ "' is not allowed when modifiying a task.";
|
||||||
|
|
||||||
|
else if (i->second == "exp")
|
||||||
|
throw std::string ("An expression '")
|
||||||
|
+ i->first
|
||||||
|
+ "' is not allowed when modifiying a task.";
|
||||||
|
|
||||||
else if (i->second == "id")
|
else if (i->second == "id")
|
||||||
throw std::string ("A task id cannot be modified.");
|
throw std::string ("A task id cannot be modified.");
|
||||||
|
@ -1196,6 +1212,7 @@ void Arguments::dump (const std::string& label)
|
||||||
color_map["uuid"] = Color ("yellow on gray3");
|
color_map["uuid"] = Color ("yellow on gray3");
|
||||||
color_map["substitution"] = Color ("bold cyan on gray3");
|
color_map["substitution"] = Color ("bold cyan on gray3");
|
||||||
color_map["op"] = Color ("bold blue on gray3");
|
color_map["op"] = Color ("bold blue on gray3");
|
||||||
|
color_map["exp"] = Color ("bold green on gray5");
|
||||||
color_map["none"] = Color ("white on gray3");
|
color_map["none"] = Color ("white on gray3");
|
||||||
|
|
||||||
Color color_debug (context.config.get ("color.debug"));
|
Color color_debug (context.config.get ("color.debug"));
|
||||||
|
|
|
@ -55,31 +55,32 @@ public:
|
||||||
|
|
||||||
bool find_command (std::string&);
|
bool find_command (std::string&);
|
||||||
|
|
||||||
bool is_command (const std::vector <std::string>&, std::string&);
|
static bool is_command (const std::vector <std::string>&, std::string&);
|
||||||
bool is_attr (const std::string&);
|
static bool is_attr (const std::string&);
|
||||||
bool is_attmod (const std::string&);
|
static bool is_attmod (const std::string&);
|
||||||
bool is_subst (const std::string&);
|
static bool is_subst (const std::string&);
|
||||||
bool is_pattern (const std::string&);
|
static bool is_pattern (const std::string&);
|
||||||
bool is_id (const std::string&);
|
static bool is_id (const std::string&);
|
||||||
bool is_uuid (const std::string&);
|
static bool is_uuid (const std::string&);
|
||||||
bool is_tag (const std::string&);
|
static bool is_tag (const std::string&);
|
||||||
bool is_operator (const std::string&);
|
static bool is_operator (const std::string&);
|
||||||
|
static bool is_expression (const std::string&);
|
||||||
|
|
||||||
// TODO Decide if these are really useful.
|
// TODO Decide if these are really useful.
|
||||||
bool extract_attr (const std::string&, std::string&, std::string&);
|
static bool extract_attr (const std::string&, std::string&, std::string&);
|
||||||
bool extract_attmod (const std::string&, std::string&, std::string&, std::string&, std::string&);
|
static bool extract_attmod (const std::string&, std::string&, std::string&, std::string&, std::string&);
|
||||||
bool extract_subst (const std::string&, std::string&, std::string&, bool&);
|
static bool extract_subst (const std::string&, std::string&, std::string&, bool&);
|
||||||
bool extract_pattern (const std::string&, std::string&);
|
static bool extract_pattern (const std::string&, std::string&);
|
||||||
bool extract_id (const std::string&, std::vector <int>&);
|
static bool extract_id (const std::string&, std::vector <int>&);
|
||||||
bool extract_uuid (const std::string&, std::vector <std::string>&);
|
static bool extract_uuid (const std::string&, std::vector <std::string>&);
|
||||||
bool extract_tag (const std::string&, char&, std::string&);
|
static bool extract_tag (const std::string&, char&, std::string&);
|
||||||
bool extract_operator (const std::string&, std::string&);
|
static bool extract_operator (const std::string&, std::string&);
|
||||||
|
|
||||||
Arguments extract_read_only_filter ();
|
Arguments extract_read_only_filter ();
|
||||||
Arguments extract_write_filter ();
|
Arguments extract_write_filter ();
|
||||||
Arguments extract_modifications ();
|
Arguments extract_modifications ();
|
||||||
|
|
||||||
bool valid_modifier (const std::string&);
|
static bool valid_modifier (const std::string&);
|
||||||
|
|
||||||
void dump (const std::string&);
|
void dump (const std::string&);
|
||||||
};
|
};
|
||||||
|
|
|
@ -20,6 +20,7 @@ set (task_SRCS API.cpp API.h
|
||||||
Filter.cpp Filter.h
|
Filter.cpp Filter.h
|
||||||
Hooks.cpp Hooks.h
|
Hooks.cpp Hooks.h
|
||||||
JSON.cpp JSON.h
|
JSON.cpp JSON.h
|
||||||
|
Lexer.cpp Lexer.h
|
||||||
Location.cpp Location.h
|
Location.cpp Location.h
|
||||||
Nibbler.cpp Nibbler.h
|
Nibbler.cpp Nibbler.h
|
||||||
Path.cpp Path.h
|
Path.cpp Path.h
|
||||||
|
|
|
@ -25,15 +25,23 @@
|
||||||
//
|
//
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <iostream> // TODO Remove.
|
||||||
|
#include <sstream>
|
||||||
#include <Context.h>
|
#include <Context.h>
|
||||||
|
#include <Lexer.h>
|
||||||
#include <Expression.h>
|
#include <Expression.h>
|
||||||
|
|
||||||
extern Context context;
|
extern Context context;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Perform all the necessary steps prior to an eval call.
|
||||||
Expression::Expression (Arguments& arguments)
|
Expression::Expression (Arguments& arguments)
|
||||||
: _original (arguments)
|
: _original (arguments)
|
||||||
{
|
{
|
||||||
|
expand_sequence ();
|
||||||
|
to_infix ();
|
||||||
|
expand_expression ();
|
||||||
|
to_postfix ();
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -44,29 +52,328 @@ Expression::~Expression ()
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool Expression::eval (Task& task)
|
bool Expression::eval (Task& task)
|
||||||
{
|
{
|
||||||
|
// TODO Duplicate the _postfix vector as the operating stack.
|
||||||
|
// TODO ...
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: 1,3-5,00000000-0000-0000-0000-000000000000
|
||||||
|
//
|
||||||
|
// To: (id=1 or (id>=3 and id<=5) or
|
||||||
|
// uuid="00000000-0000-0000-0000-000000000000")
|
||||||
|
void Expression::expand_sequence ()
|
||||||
|
{
|
||||||
|
Arguments temp;
|
||||||
|
_sequenced.clear ();
|
||||||
|
|
||||||
|
// Extract all the components of a sequence.
|
||||||
|
std::vector <int> ids;
|
||||||
|
std::vector <std::string> uuids;
|
||||||
|
std::vector <std::pair <std::string, std::string> >::iterator arg;
|
||||||
|
for (arg = _original.begin (); arg != _original.end (); ++arg)
|
||||||
|
{
|
||||||
|
if (arg->second == "id")
|
||||||
|
Arguments::extract_id (arg->first, ids);
|
||||||
|
|
||||||
|
else if (arg->second == "uuid")
|
||||||
|
Arguments::extract_uuid (arg->first, uuids);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there is no sequence, we're done.
|
||||||
|
if (ids.size () == 0 && uuids.size () == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Construct the algebraic form.
|
||||||
|
std::stringstream sequence;
|
||||||
|
sequence << "(";
|
||||||
|
for (unsigned int i = 0; i < ids.size (); ++i)
|
||||||
|
{
|
||||||
|
if (i)
|
||||||
|
sequence << " or ";
|
||||||
|
|
||||||
|
sequence << "id=" << ids[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uuids.size ())
|
||||||
|
{
|
||||||
|
sequence << " or ";
|
||||||
|
for (unsigned int i = 0; i < uuids.size (); ++i)
|
||||||
|
{
|
||||||
|
if (i)
|
||||||
|
sequence << " or ";
|
||||||
|
|
||||||
|
sequence << "uuid=\"" << uuids[i] << "\"";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence << ")";
|
||||||
|
std::cout << "# sequence '" << sequence.str () << "'\n";
|
||||||
|
|
||||||
|
// Copy everything up to the first id/uuid.
|
||||||
|
for (arg = _original.begin (); arg != _original.end (); ++arg)
|
||||||
|
{
|
||||||
|
if (arg->second == "id" || arg->second == "uuid")
|
||||||
|
break;
|
||||||
|
|
||||||
|
temp.push_back (*arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now insert the new sequence expression.
|
||||||
|
temp.push_back (std::make_pair (sequence.str (), "exp"));
|
||||||
|
|
||||||
|
// Now copy everything after the last id/uuid.
|
||||||
|
bool found_id = false;
|
||||||
|
for (arg = _original.begin (); arg != _original.end (); ++arg)
|
||||||
|
{
|
||||||
|
if (arg->second == "id" || arg->second == "uuid")
|
||||||
|
found_id = true;
|
||||||
|
|
||||||
|
else if (found_id)
|
||||||
|
temp.push_back (*arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
_sequenced.swap (temp);
|
||||||
|
_sequenced.dump ("Expression::expand_sequence");
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: +with -without
|
||||||
|
//
|
||||||
|
// To: tags ~ with
|
||||||
|
// tags !~ without
|
||||||
|
void Expression::expand_tag (const std::string& input)
|
||||||
|
{
|
||||||
|
char type;
|
||||||
|
std::string value;
|
||||||
|
Arguments::extract_tag (input, type, value);
|
||||||
|
|
||||||
|
_infix.push_back (std::make_pair ("tags", "dom"));
|
||||||
|
_infix.push_back (std::make_pair (type == '+' ? "~" : "!~", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: <name>[:=]<value>
|
||||||
|
//
|
||||||
|
// To: <name> = lex<value>
|
||||||
|
void Expression::expand_attr (const std::string& input)
|
||||||
|
{
|
||||||
|
// TODO Should canonicalize 'name'.
|
||||||
|
std::string name;
|
||||||
|
std::string value;
|
||||||
|
Arguments::extract_attr (input, name, value);
|
||||||
|
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("=", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: <name>.<mod>[:=]<value>
|
||||||
|
//
|
||||||
|
// To: <name> <op> lex<value>
|
||||||
|
void Expression::expand_attmod (const std::string& input)
|
||||||
|
{
|
||||||
|
// TODO Should canonicalize 'name'.
|
||||||
|
std::string name;
|
||||||
|
// TODO Should canonicalize 'mod'.
|
||||||
|
std::string mod;
|
||||||
|
std::string value;
|
||||||
|
std::string sense;
|
||||||
|
Arguments::extract_attmod (input, name, mod, value, sense);
|
||||||
|
|
||||||
|
if (mod == "before" || mod == "under" || mod == "below")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("<", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "after" || mod == "over" || mod == "above")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair (">", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "none")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("==", "op"));
|
||||||
|
_infix.push_back (std::make_pair ("\"\"", "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "any")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("!=", "op"));
|
||||||
|
_infix.push_back (std::make_pair ("\"\"", "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "is" || mod == "equals")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("=", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "isnt" || mod == "not")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("!=", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "has" || mod == "contains")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("~", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "hasnt")
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair (name, "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("!~", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
else if (mod == "startswith" || mod == "left")
|
||||||
|
{
|
||||||
|
// TODO ?
|
||||||
|
}
|
||||||
|
else if (mod == "endswith" || mod == "right")
|
||||||
|
{
|
||||||
|
// TODO ?
|
||||||
|
}
|
||||||
|
else if (mod == "word")
|
||||||
|
{
|
||||||
|
// TODO ?
|
||||||
|
}
|
||||||
|
else if (mod == "noword")
|
||||||
|
{
|
||||||
|
// TODO ?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: <word>
|
||||||
|
//
|
||||||
|
// To: description ~ <word>
|
||||||
|
void Expression::expand_word (const std::string& input)
|
||||||
|
{
|
||||||
|
_infix.push_back (std::make_pair ("description", "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("~", "op"));
|
||||||
|
_infix.push_back (std::make_pair (input, "exp"));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: /<pattern>/
|
||||||
|
//
|
||||||
|
// To: description ~ <pattern>
|
||||||
|
void Expression::expand_pattern (const std::string& input)
|
||||||
|
{
|
||||||
|
std::string value;
|
||||||
|
Arguments::extract_pattern (input, value);
|
||||||
|
|
||||||
|
_infix.push_back (std::make_pair ("description", "dom"));
|
||||||
|
_infix.push_back (std::make_pair ("~", "op"));
|
||||||
|
_infix.push_back (std::make_pair (value, "exp"));
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert: <exp>
|
||||||
|
//
|
||||||
|
// To: lex<exp>
|
||||||
|
void Expression::expand_expression ()
|
||||||
|
{
|
||||||
|
Arguments temp;
|
||||||
|
|
||||||
|
std::vector <std::pair <std::string, std::string> >::iterator arg;
|
||||||
|
for (arg = _infix.begin (); arg != _infix.end (); ++arg)
|
||||||
|
{
|
||||||
|
if (arg->second == "exp")
|
||||||
|
{
|
||||||
|
Lexer lexer (arg->first);
|
||||||
|
lexer.skipWhitespace (true);
|
||||||
|
lexer.coalesceAlpha (true);
|
||||||
|
lexer.coalesceDigits (true);
|
||||||
|
lexer.coalesceQuoted (true);
|
||||||
|
|
||||||
|
std::vector <std::string> tokens;
|
||||||
|
lexer.tokenize (tokens);
|
||||||
|
|
||||||
|
std::vector <std::string>::iterator token;
|
||||||
|
for (token = tokens.begin (); token != tokens.end (); ++token)
|
||||||
|
{
|
||||||
|
if (_infix.is_operator (*token))
|
||||||
|
temp.push_back (std::make_pair (*token, "op"));
|
||||||
|
else
|
||||||
|
temp.push_back (std::make_pair (*token, "dom"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
temp.push_back (*arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
_infix.swap (temp);
|
||||||
|
_infix.dump ("Expression::expand_expression");
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Inserts the 'and' operator by default between terms that are not separated by
|
// Inserts the 'and' operator by default between terms that are not separated by
|
||||||
// at least one operator.
|
// at least one operator.
|
||||||
//
|
//
|
||||||
// Converts: <term1> <term2> <op> <term3>
|
// Converts: <term1> <term2> <op> <exp>
|
||||||
// to: <term1> and <term2> <op> <term3>
|
// to: <term1> and <term2> <op> <token> <token> <token>
|
||||||
//
|
//
|
||||||
void Expression::toInfix ()
|
//
|
||||||
|
//
|
||||||
|
// Rules:
|
||||||
|
// 1. Two adjacent non-operator arguments have an 'and' inserted between them.
|
||||||
|
// 2. Any argument of type "exp" is lexed and replaced by tokens.
|
||||||
|
//
|
||||||
|
void Expression::to_infix ()
|
||||||
{
|
{
|
||||||
_infix.clear ();
|
_infix.clear ();
|
||||||
|
|
||||||
|
bool new_style = is_new_style ();
|
||||||
|
|
||||||
|
std::string value;
|
||||||
std::string previous = "op";
|
std::string previous = "op";
|
||||||
std::vector <std::pair <std::string, std::string> >::iterator arg;
|
std::vector <std::pair <std::string, std::string> >::iterator arg;
|
||||||
for (arg = _original.begin (); arg != _original.end (); ++arg)
|
for (arg = _sequenced.begin (); arg != _sequenced.end (); ++arg)
|
||||||
{
|
{
|
||||||
if (previous != "op" &&
|
// Old-style filters need 'and' conjunctions.
|
||||||
|
if (!new_style &&
|
||||||
|
previous != "op" &&
|
||||||
arg->second != "op")
|
arg->second != "op")
|
||||||
|
{
|
||||||
_infix.push_back (std::make_pair ("and", "op"));
|
_infix.push_back (std::make_pair ("and", "op"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upgrade all arguments to new-style.
|
||||||
|
// ID & UUID sequence has already been converted.
|
||||||
|
if (arg->second == "id" ||
|
||||||
|
arg->second == "uuid")
|
||||||
|
; // NOP.
|
||||||
|
|
||||||
|
else if (arg->second == "tag")
|
||||||
|
expand_tag (arg->first);
|
||||||
|
|
||||||
|
else if (arg->second == "pattern")
|
||||||
|
expand_pattern (arg->first);
|
||||||
|
|
||||||
|
else if (arg->second == "attribute")
|
||||||
|
expand_attr (arg->first);
|
||||||
|
|
||||||
|
else if (arg->second == "attmod")
|
||||||
|
expand_attmod (arg->first);
|
||||||
|
|
||||||
|
else if (arg->second == "word")
|
||||||
|
expand_word (arg->first);
|
||||||
|
|
||||||
|
// Expressions will be converted later.
|
||||||
|
else if (arg->second == "exp")
|
||||||
|
_infix.push_back (*arg);
|
||||||
|
|
||||||
|
else
|
||||||
|
throw std::string ("Error: unrecognized argument category '") + arg->second + "'";
|
||||||
|
|
||||||
_infix.push_back (*arg);
|
|
||||||
previous = arg->second;
|
previous = arg->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,7 +382,41 @@ void Expression::toInfix ()
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Dijkstra Shunting Algorithm.
|
// Dijkstra Shunting Algorithm.
|
||||||
void Expression::toPostfix ()
|
//
|
||||||
|
// While there are tokens to be read:
|
||||||
|
// Read a token.
|
||||||
|
// If the token is a number, then add it to the output queue.
|
||||||
|
// If the token is a function token, then push it onto the stack.
|
||||||
|
// If the token is a function argument separator (e.g., a comma):
|
||||||
|
// Until the token at the top of the stack is a left parenthesis, pop
|
||||||
|
// operators off the stack onto the output queue. If no left parentheses
|
||||||
|
// are encountered, either the separator was misplaced or parentheses were
|
||||||
|
// mismatched.
|
||||||
|
// If the token is an operator, o1, then:
|
||||||
|
// while there is an operator token, o2, at the top of the stack, and
|
||||||
|
// either o1 is left-associative and its precedence is less than or
|
||||||
|
// equal to that of o2,
|
||||||
|
// or o1 is right-associative and its precedence is less than that
|
||||||
|
// of o2,
|
||||||
|
// pop o2 off the stack, onto the output queue;
|
||||||
|
// push o1 onto the stack.
|
||||||
|
// If the token is a left parenthesis, then push it onto the stack.
|
||||||
|
// If the token is a right parenthesis:
|
||||||
|
// Until the token at the top of the stack is a left parenthesis, pop
|
||||||
|
// operators off the stack onto the output queue.
|
||||||
|
// Pop the left parenthesis from the stack, but not onto the output queue.
|
||||||
|
// If the token at the top of the stack is a function token, pop it onto
|
||||||
|
// the output queue.
|
||||||
|
// If the stack runs out without finding a left parenthesis, then there
|
||||||
|
// are mismatched parentheses.
|
||||||
|
// When there are no more tokens to read:
|
||||||
|
// While there are still operator tokens in the stack:
|
||||||
|
// If the operator token on the top of the stack is a parenthesis, then
|
||||||
|
// there are mismatched parentheses.
|
||||||
|
// Pop the operator onto the output queue.
|
||||||
|
// Exit.
|
||||||
|
//
|
||||||
|
void Expression::to_postfix ()
|
||||||
{
|
{
|
||||||
_postfix.clear ();
|
_postfix.clear ();
|
||||||
|
|
||||||
|
@ -83,6 +424,25 @@ void Expression::toPostfix ()
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Test whether the _original arguments are old style or new style.
|
||||||
|
//
|
||||||
|
// Old style: no single argument corresponds to an operator, ie no 'and', 'or',
|
||||||
|
// etc.
|
||||||
|
//
|
||||||
|
// New style: at least one argument that is an operator.
|
||||||
|
//
|
||||||
|
bool Expression::is_new_style ()
|
||||||
|
{
|
||||||
|
std::vector <std::pair <std::string, std::string> >::iterator arg;
|
||||||
|
for (arg = _original.begin (); arg != _original.end (); ++arg)
|
||||||
|
if (Arguments::is_operator (arg->first))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// TODO Remove?
|
||||||
void Expression::dump (const std::string& label)
|
void Expression::dump (const std::string& label)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,13 +38,24 @@ public:
|
||||||
Expression (Arguments&);
|
Expression (Arguments&);
|
||||||
~Expression ();
|
~Expression ();
|
||||||
bool eval (Task&);
|
bool eval (Task&);
|
||||||
void toInfix ();
|
|
||||||
void toPostfix ();
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
void expand_sequence ();
|
||||||
|
void expand_expression ();
|
||||||
|
void expand_tag (const std::string&);
|
||||||
|
void expand_attr (const std::string&);
|
||||||
|
void expand_attmod (const std::string&);
|
||||||
|
void expand_word (const std::string&);
|
||||||
|
void expand_pattern (const std::string&);
|
||||||
|
|
||||||
|
void to_infix ();
|
||||||
|
void to_postfix ();
|
||||||
|
bool is_new_style ();
|
||||||
void dump (const std::string&);
|
void dump (const std::string&);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Arguments _original;
|
Arguments _original;
|
||||||
|
Arguments _sequenced;
|
||||||
Arguments _infix;
|
Arguments _infix;
|
||||||
Arguments _postfix;
|
Arguments _postfix;
|
||||||
};
|
};
|
||||||
|
|
374
src/Lexer.cpp
Normal file
374
src/Lexer.cpp
Normal file
|
@ -0,0 +1,374 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// taskwarrior - a command line task list manager.
|
||||||
|
//
|
||||||
|
// Copyright 2011, Paul Beckingham, Federico Hernandez.
|
||||||
|
// All rights reserved.
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify it under
|
||||||
|
// the terms of the GNU General Public License as published by the Free Software
|
||||||
|
// Foundation; either version 2 of the License, or (at your option) any later
|
||||||
|
// version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||||
|
// details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License along with
|
||||||
|
// this program; if not, write to the
|
||||||
|
//
|
||||||
|
// Free Software Foundation, Inc.,
|
||||||
|
// 51 Franklin Street, Fifth Floor,
|
||||||
|
// Boston, MA
|
||||||
|
// 02110-1301
|
||||||
|
// USA
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// This lexer works by breaking the input stream into tokens. The essence of
|
||||||
|
// the algorithm lies in the distinction between adjacent tokens, such that
|
||||||
|
// between the two extremes lies a good solution.
|
||||||
|
//
|
||||||
|
// At one extreme, the entire input is considered one token. Clearly this is
|
||||||
|
// only correct for trivial input. At the other extreme, every character of the
|
||||||
|
// input is a token. This is also wrong.
|
||||||
|
//
|
||||||
|
// If the input is as follows:
|
||||||
|
//
|
||||||
|
// It is almost 11:00am.
|
||||||
|
//
|
||||||
|
// The desired tokenization is:
|
||||||
|
//
|
||||||
|
// It
|
||||||
|
// <space>
|
||||||
|
// is
|
||||||
|
// <space>
|
||||||
|
// almost
|
||||||
|
// <space>
|
||||||
|
// 11
|
||||||
|
// :
|
||||||
|
// 00
|
||||||
|
// am
|
||||||
|
// .
|
||||||
|
// \n
|
||||||
|
//
|
||||||
|
// This can be achieved by allowing transitions to denote token boundaries.
|
||||||
|
// Given the following character classes:
|
||||||
|
//
|
||||||
|
// letter: a-z A-Z
|
||||||
|
// digit: 0-9
|
||||||
|
// whitespace: <space> <tab> <newline> <cr> <lf> <vertical-tab>
|
||||||
|
// other: Everything else
|
||||||
|
//
|
||||||
|
// Then a token boundary is a transition between:
|
||||||
|
// letter -> !letter
|
||||||
|
// digit -> !digit
|
||||||
|
// whitespace -> any
|
||||||
|
// other -> any
|
||||||
|
//
|
||||||
|
// This has the effect of allowing groups of consecutive letters to be
|
||||||
|
// considered one token, as well as groups of digits.
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <util.h>
|
||||||
|
#include <Lexer.h>
|
||||||
|
|
||||||
|
static const int other = -1;
|
||||||
|
static const int alpha = -2;
|
||||||
|
static const int digit = -3;
|
||||||
|
static const int white = -4;
|
||||||
|
static const int quote = -5;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Lexer::Lexer (const std::string& input)
|
||||||
|
: mInput (input)
|
||||||
|
|
||||||
|
, mAlpha ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
, mDigit ("0123456789")
|
||||||
|
, mQuote ("'\"")
|
||||||
|
, mWhite (" \t\n\r\f")
|
||||||
|
|
||||||
|
, mAlphaCoalesce (true)
|
||||||
|
, mDigitCoalesce (true)
|
||||||
|
, mQuotedCoalesce (false)
|
||||||
|
, mWhiteCoalesce (false)
|
||||||
|
, mSkipWhitespace (false)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::tokenize (std::vector <std::string>& all)
|
||||||
|
{
|
||||||
|
all.clear (); // Prevent repeated accumulation.
|
||||||
|
|
||||||
|
std::string token;
|
||||||
|
bool inQuote = false;
|
||||||
|
char quoteChar = '\0';
|
||||||
|
for (unsigned int i = 0; i < mInput.length (); ++i)
|
||||||
|
{
|
||||||
|
bool specialFound = false;
|
||||||
|
for (unsigned int s = 0; s < mSpecialTokens.size (); ++s)
|
||||||
|
{
|
||||||
|
std::string potential = mInput.substr (
|
||||||
|
i, min (mSpecialTokens[s].length (), mInput.length () - i));
|
||||||
|
|
||||||
|
if (potential == mSpecialTokens[s])
|
||||||
|
{
|
||||||
|
// Capture currently assembled token, the special token, increment over
|
||||||
|
// that token, and skip all remaining code in the loop.
|
||||||
|
if (token.length ())
|
||||||
|
{
|
||||||
|
all.push_back (token);
|
||||||
|
token = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
all.push_back (potential);
|
||||||
|
i += potential.length () - 1;
|
||||||
|
specialFound = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (specialFound)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
char c = mInput[i];
|
||||||
|
char next = '\0';
|
||||||
|
if (i < mInput.length () - 1)
|
||||||
|
next = mInput[i + 1];
|
||||||
|
|
||||||
|
// Classify current and next characters.
|
||||||
|
int thisChar = classify (c);
|
||||||
|
int nextChar = classify (next);
|
||||||
|
|
||||||
|
// Properly set inQuote, quoteChar.
|
||||||
|
if (!inQuote && thisChar == quote)
|
||||||
|
{
|
||||||
|
quoteChar = c;
|
||||||
|
inQuote = true;
|
||||||
|
}
|
||||||
|
else if (inQuote && c == quoteChar)
|
||||||
|
{
|
||||||
|
inQuote = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect transitions.
|
||||||
|
bool transition = false;
|
||||||
|
if (thisChar != nextChar)
|
||||||
|
transition = true;
|
||||||
|
|
||||||
|
token += c;
|
||||||
|
|
||||||
|
// Transitions mean new token. All 'other' characters are separate tokens.
|
||||||
|
if (transition || nextChar == other)
|
||||||
|
{
|
||||||
|
if (!inQuote || !mQuotedCoalesce)
|
||||||
|
{
|
||||||
|
if (!mSkipWhitespace || thisChar != white)
|
||||||
|
all.push_back (token);
|
||||||
|
token = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-transitions - runs.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Runs may be optionally coalesced.
|
||||||
|
if (!(mAlphaCoalesce && nextChar == alpha) &&
|
||||||
|
!(mDigitCoalesce && nextChar == digit) &&
|
||||||
|
!(mWhiteCoalesce && nextChar == white))
|
||||||
|
{
|
||||||
|
if (!inQuote || !mQuotedCoalesce)
|
||||||
|
{
|
||||||
|
if (!mSkipWhitespace || thisChar != white)
|
||||||
|
all.push_back (token);
|
||||||
|
token = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::categorizeAsAlpha (char value)
|
||||||
|
{
|
||||||
|
if (mAlpha.find (value) == std::string::npos)
|
||||||
|
mAlpha += value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::ignoreAsAlpha (char value)
|
||||||
|
{
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::setAlpha (const std::string& value)
|
||||||
|
{
|
||||||
|
mAlpha = value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
for (unsigned int i = 0; i < mAlpha.length (); ++i)
|
||||||
|
{
|
||||||
|
if ((pos = mDigit.find (mAlpha[i])) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
if ((pos = mQuote.find (mAlpha[i])) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
if ((pos = mWhite.find (mAlpha[i])) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::categorizeAsDigit (char value)
|
||||||
|
{
|
||||||
|
if (mDigit.find (value) == std::string::npos)
|
||||||
|
mDigit += value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::ignoreAsDigit (char value)
|
||||||
|
{
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::setDigit (const std::string& value)
|
||||||
|
{
|
||||||
|
mDigit = value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
for (unsigned int i = 0; i < mDigit.length (); ++i)
|
||||||
|
{
|
||||||
|
if ((pos = mAlpha.find (mDigit[i])) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
if ((pos = mQuote.find (mDigit[i])) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
if ((pos = mWhite.find (mDigit[i])) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::categorizeAsQuote (char value)
|
||||||
|
{
|
||||||
|
if (mQuote.find (value) == std::string::npos)
|
||||||
|
mQuote += value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::ignoreAsQuote (char value)
|
||||||
|
{
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::setQuote (const std::string& value)
|
||||||
|
{
|
||||||
|
mQuote = value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
for (unsigned int i = 0; i < mQuote.length (); ++i)
|
||||||
|
{
|
||||||
|
if ((pos = mAlpha.find (mQuote[i])) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
if ((pos = mDigit.find (mQuote[i])) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
if ((pos = mWhite.find (mQuote[i])) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::categorizeAsWhite (char value)
|
||||||
|
{
|
||||||
|
if (mWhite.find (value) == std::string::npos)
|
||||||
|
mWhite += value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::ignoreAsWhite (char value)
|
||||||
|
{
|
||||||
|
std::string::size_type pos;
|
||||||
|
if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::setWhite (const std::string& value)
|
||||||
|
{
|
||||||
|
mWhite = value;
|
||||||
|
|
||||||
|
std::string::size_type pos;
|
||||||
|
for (unsigned int i = 0; i < mWhite.length (); ++i)
|
||||||
|
{
|
||||||
|
if ((pos = mAlpha.find (mWhite[i])) != std::string::npos) mAlpha.erase (pos, 1);
|
||||||
|
if ((pos = mDigit.find (mWhite[i])) != std::string::npos) mDigit.erase (pos, 1);
|
||||||
|
if ((pos = mQuote.find (mWhite[i])) != std::string::npos) mQuote.erase (pos, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::coalesceAlpha (bool value)
|
||||||
|
{
|
||||||
|
mAlphaCoalesce = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::coalesceDigits (bool value)
|
||||||
|
{
|
||||||
|
mDigitCoalesce = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::coalesceQuoted (bool value)
|
||||||
|
{
|
||||||
|
mQuotedCoalesce = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::coalesceWhite (bool value)
|
||||||
|
{
|
||||||
|
mWhiteCoalesce = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::skipWhitespace (bool value)
|
||||||
|
{
|
||||||
|
mSkipWhitespace = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void Lexer::specialToken (const std::string& special)
|
||||||
|
{
|
||||||
|
mSpecialTokens.push_back (special);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int Lexer::classify (char c)
|
||||||
|
{
|
||||||
|
if (mAlpha.find (c) != std::string::npos) return alpha;
|
||||||
|
if (mDigit.find (c) != std::string::npos) return digit;
|
||||||
|
if (mWhite.find (c) != std::string::npos) return white;
|
||||||
|
if (mQuote.find (c) != std::string::npos) return quote;
|
||||||
|
|
||||||
|
return other;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
84
src/Lexer.h
Normal file
84
src/Lexer.h
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// taskwarrior - a command line task list manager.
|
||||||
|
//
|
||||||
|
// Copyright 2011, Paul Beckingham, Federico Hernandez.
|
||||||
|
// All rights reserved.
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify it under
|
||||||
|
// the terms of the GNU General Public License as published by the Free Software
|
||||||
|
// Foundation; either version 2 of the License, or (at your option) any later
|
||||||
|
// version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||||
|
// details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License along with
|
||||||
|
// this program; if not, write to the
|
||||||
|
//
|
||||||
|
// Free Software Foundation, Inc.,
|
||||||
|
// 51 Franklin Street, Fifth Floor,
|
||||||
|
// Boston, MA
|
||||||
|
// 02110-1301
|
||||||
|
// USA
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef INCLUDED_LEXER
|
||||||
|
#define INCLUDED_LEXER
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
class Lexer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Lexer (const std::string&);
|
||||||
|
void tokenize (std::vector <std::string>&);
|
||||||
|
|
||||||
|
void categorizeAsAlpha (char);
|
||||||
|
void ignoreAsAlpha (char);
|
||||||
|
void setAlpha (const std::string&);
|
||||||
|
|
||||||
|
void categorizeAsDigit (char);
|
||||||
|
void ignoreAsDigit (char);
|
||||||
|
void setDigit (const std::string&);
|
||||||
|
|
||||||
|
void categorizeAsQuote (char);
|
||||||
|
void ignoreAsQuote (char);
|
||||||
|
void setQuote (const std::string&);
|
||||||
|
|
||||||
|
void categorizeAsWhite (char);
|
||||||
|
void ignoreAsWhite (char);
|
||||||
|
void setWhite (const std::string&);
|
||||||
|
|
||||||
|
void coalesceAlpha (bool);
|
||||||
|
void coalesceDigits (bool);
|
||||||
|
void coalesceQuoted (bool);
|
||||||
|
void coalesceWhite (bool);
|
||||||
|
void skipWhitespace (bool);
|
||||||
|
void specialToken (const std::string&);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int classify (char);
|
||||||
|
|
||||||
|
std::string mInput;
|
||||||
|
|
||||||
|
std::string mAlpha;
|
||||||
|
std::string mDigit;
|
||||||
|
std::string mQuote;
|
||||||
|
std::string mWhite;
|
||||||
|
|
||||||
|
bool mAlphaCoalesce;
|
||||||
|
bool mDigitCoalesce;
|
||||||
|
bool mQuotedCoalesce;
|
||||||
|
bool mWhiteCoalesce;
|
||||||
|
bool mSkipWhitespace;
|
||||||
|
|
||||||
|
std::vector <std::string> mSpecialTokens;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
|
@ -95,8 +95,6 @@ int CmdCustom::execute (std::string& output)
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
Arguments f = context.args.extract_read_only_filter ();
|
Arguments f = context.args.extract_read_only_filter ();
|
||||||
Expression e (f);
|
Expression e (f);
|
||||||
e.toInfix ();
|
|
||||||
e.toPostfix ();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
// TODO e.apply (tasks);
|
// TODO e.apply (tasks);
|
||||||
|
|
|
@ -7,9 +7,9 @@ include_directories (${CMAKE_SOURCE_DIR}
|
||||||
${TASK_INCLUDE_DIRS})
|
${TASK_INCLUDE_DIRS})
|
||||||
|
|
||||||
set (test_SRCS arguments.t att.t autocomplete.t color.t config.t date.t
|
set (test_SRCS arguments.t att.t autocomplete.t color.t config.t date.t
|
||||||
directory.t dom.t duration.t file.t filt.t i18n.t json.t list.t
|
directory.t dom.t duration.t file.t filt.t i18n.t json.t lexer.t
|
||||||
nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t t.t
|
list.t nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t
|
||||||
taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t
|
t.t taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t
|
||||||
json_test)
|
json_test)
|
||||||
|
|
||||||
add_custom_target (test ./run_all DEPENDS ${test_SRCS}
|
add_custom_target (test ./run_all DEPENDS ${test_SRCS}
|
||||||
|
|
331
test/lexer.t.cpp
Normal file
331
test/lexer.t.cpp
Normal file
|
@ -0,0 +1,331 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// taskwarrior - a command line task list manager.
|
||||||
|
//
|
||||||
|
// Copyright 2006 - 2011, Paul Beckingham.
|
||||||
|
// All rights reserved.
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify it under
|
||||||
|
// the terms of the GNU General Public License as published by the Free Software
|
||||||
|
// Foundation; either version 2 of the License, or (at your option) any later
|
||||||
|
// version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||||
|
// details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License along with
|
||||||
|
// this program; if not, write to the
|
||||||
|
//
|
||||||
|
// Free Software Foundation, Inc.,
|
||||||
|
// 51 Franklin Street, Fifth Floor,
|
||||||
|
// Boston, MA
|
||||||
|
// 02110-1301
|
||||||
|
// USA
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <Lexer.h>
|
||||||
|
#include <Context.h>
|
||||||
|
#include <Test.h>
|
||||||
|
|
||||||
|
Context context;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main (int argc, char** argv)
|
||||||
|
{
|
||||||
|
UnitTest t (80);
|
||||||
|
|
||||||
|
std::string input = "This is a test.";
|
||||||
|
std::vector <std::string> tokens;
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 8, "'This is a test.' -> 'This| |is| |a| |test|.'");
|
||||||
|
if (tokens.size () == 8)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "This", "'This is a test.' [0] -> 'This'");
|
||||||
|
t.is (tokens[1], " ", "'This is a test.' [1] -> ' '");
|
||||||
|
t.is (tokens[2], "is", "'This is a test.' [2] -> 'is'");
|
||||||
|
t.is (tokens[3], " ", "'This is a test.' [3] -> ' '");
|
||||||
|
t.is (tokens[4], "a", "'This is a test.' [4] -> 'a'");
|
||||||
|
t.is (tokens[5], " ", "'This is a test.' [5] -> ' '");
|
||||||
|
t.is (tokens[6], "test", "'This is a test.' [6] -> 'test'");
|
||||||
|
t.is (tokens[7], ".", "'This is a test.' [7] -> '.'");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'This is a test.' [0] -> 'This'");
|
||||||
|
t.skip ("'This is a test.' [1] -> ' '");
|
||||||
|
t.skip ("'This is a test.' [2] -> 'is'");
|
||||||
|
t.skip ("'This is a test.' [3] -> ' '");
|
||||||
|
t.skip ("'This is a test.' [4] -> 'a'");
|
||||||
|
t.skip ("'This is a test.' [5] -> ' '");
|
||||||
|
t.skip ("'This is a test.' [6] -> 'test'");
|
||||||
|
t.skip ("'This is a test.' [7] -> '.'");
|
||||||
|
}
|
||||||
|
|
||||||
|
input = "a12bcd345efgh6789";
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 6, "'a12bcd345efgh6789' -> 'a|12|bcd|345|efgh|6789'");
|
||||||
|
if (tokens.size () == 6)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "a", "'a12bcd345efgh6789' [0] -> 'a'");
|
||||||
|
t.is (tokens[1], "12", "'a12bcd345efgh6789' [1] -> '12'");
|
||||||
|
t.is (tokens[2], "bcd", "'a12bcd345efgh6789' [2] -> 'bcd'");
|
||||||
|
t.is (tokens[3], "345", "'a12bcd345efgh6789' [3] -> '345'");
|
||||||
|
t.is (tokens[4], "efgh", "'a12bcd345efgh6789' [4] -> 'efgh'");
|
||||||
|
t.is (tokens[5], "6789", "'a12bcd345efgh6789' [5] -> '6789'");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'a12bcd345efgh6789' [0] -> 'a'");
|
||||||
|
t.skip ("'a12bcd345efgh6789' [1] -> '12'");
|
||||||
|
t.skip ("'a12bcd345efgh6789' [2] -> 'bcd'");
|
||||||
|
t.skip ("'a12bcd345efgh6789' [3] -> '345'");
|
||||||
|
t.skip ("'a12bcd345efgh6789' [4] -> 'efgh'");
|
||||||
|
t.skip ("'a12bcd345efgh6789' [5] -> '6789'");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let's throw some ugly Perl at it.
|
||||||
|
input = "my $variable_name = 'single string';";
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.categorizeAsAlpha ('_');
|
||||||
|
l.coalesceQuoted (true);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 9, "'my $variable_name = 'single string';' -> 'my| |$|variable_name| |=| |'|single string|'|;'");
|
||||||
|
if (tokens.size () == 9)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "my", "'my $variable_name = 'single string';' [0] -> 'my'");
|
||||||
|
t.is (tokens[1], " ", "'my $variable_name = 'single string';' [1] -> ' '");
|
||||||
|
t.is (tokens[2], "$", "'my $variable_name = 'single string';' [2] -> '$'");
|
||||||
|
t.is (tokens[3], "variable_name", "'my $variable_name = 'single string';' [3] -> 'variable_name'");
|
||||||
|
t.is (tokens[4], " ", "'my $variable_name = 'single string';' [4] -> ' '");
|
||||||
|
t.is (tokens[5], "=", "'my $variable_name = 'single string';' [5] -> '='");
|
||||||
|
t.is (tokens[6], " ", "'my $variable_name = 'single string';' [6] -> ' '");
|
||||||
|
t.is (tokens[7], "'single string'", "'my $variable_name = 'single string';' [8] -> ''single string''");
|
||||||
|
t.is (tokens[8], ";", "'my $variable_name = 'single string';' [10] -> ';'");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [0] -> 'my'");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [1] -> ' '");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [2] -> '$'");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [3] -> 'variable_name'");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [4] -> ' '");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [5] -> '='");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [6] -> ' '");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [8] -> ''single string''");
|
||||||
|
t.skip ("'my $variable_name = 'single string';' [10] -> ';'");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now exercise all the configurable coalescence.
|
||||||
|
input = "ab 12 'a'";
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 8, "'ab 12 'a'' -> 'ab| | |12| |'|a|''");
|
||||||
|
if (tokens.size () == 8)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.is (tokens[3], "12", "'ab 12 'a'' [3] -> '12'");
|
||||||
|
t.is (tokens[4], " ", "'ab 12 'a'' [4] -> ' '");
|
||||||
|
t.is (tokens[5], "'", "'ab 12 'a'' [5] -> '''");
|
||||||
|
t.is (tokens[6], "a", "'ab 12 'a'' [6] -> 'a'");
|
||||||
|
t.is (tokens[7], "'", "'ab 12 'a'' [7] -> '''");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.skip ("'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [3] -> '12'");
|
||||||
|
t.skip ("'ab 12 'a'' [4] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [5] -> '''");
|
||||||
|
t.skip ("'ab 12 'a'' [6] -> 'a'");
|
||||||
|
t.skip ("'ab 12 'a'' [7] -> '''");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.coalesceAlpha (false);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 9, "'ab 12 'a'' -> 'a|b| | |12| |'|a|''");
|
||||||
|
if (tokens.size () == 9)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "a", "'ab 12 'a'' [0] -> 'a'");
|
||||||
|
t.is (tokens[1], "b", "'ab 12 'a'' [1] -> 'b'");
|
||||||
|
t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.is (tokens[3], " ", "'ab 12 'a'' [3] -> ' '");
|
||||||
|
t.is (tokens[4], "12", "'ab 12 'a'' [4] -> '12'");
|
||||||
|
t.is (tokens[5], " ", "'ab 12 'a'' [5] -> ' '");
|
||||||
|
t.is (tokens[6], "'", "'ab 12 'a'' [6] -> '''");
|
||||||
|
t.is (tokens[7], "a", "'ab 12 'a'' [7] -> 'a'");
|
||||||
|
t.is (tokens[8], "'", "'ab 12 'a'' [8] -> '''");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'ab 12 'a'' [0] -> 'a'");
|
||||||
|
t.skip ("'ab 12 'a'' [1] -> 'b'");
|
||||||
|
t.skip ("'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [3] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [4] -> '12'");
|
||||||
|
t.skip ("'ab 12 'a'' [5] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [6] -> '''");
|
||||||
|
t.skip ("'ab 12 'a'' [7] -> 'a'");
|
||||||
|
t.skip ("'ab 12 'a'' [8] -> '''");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.coalesceDigits (false);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 9, "'ab 12 'a'' -> 'ab| | |1|2| |'|a|''");
|
||||||
|
if (tokens.size () == 9)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.is (tokens[3], "1", "'ab 12 'a'' [3] -> '1'");
|
||||||
|
t.is (tokens[4], "2", "'ab 12 'a'' [4] -> '2'");
|
||||||
|
t.is (tokens[5], " ", "'ab 12 'a'' [5] -> ' '");
|
||||||
|
t.is (tokens[6], "'", "'ab 12 'a'' [6] -> '''");
|
||||||
|
t.is (tokens[7], "a", "'ab 12 'a'' [7] -> 'a'");
|
||||||
|
t.is (tokens[8], "'", "'ab 12 'a'' [8] -> '''");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.skip ("'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [3] -> '1'");
|
||||||
|
t.skip ("'ab 12 'a'' [4] -> '2'");
|
||||||
|
t.skip ("'ab 12 'a'' [5] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [6] -> '''");
|
||||||
|
t.skip ("'ab 12 'a'' [7] -> 'a'");
|
||||||
|
t.skip ("'ab 12 'a'' [8] -> '''");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.coalesceQuoted (true);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 6, "'ab 12 'a'' -> 'ab| | |12| |'a''");
|
||||||
|
if (tokens.size () == 6)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.is (tokens[3], "12", "'ab 12 'a'' [3] -> '12'");
|
||||||
|
t.is (tokens[4], " ", "'ab 12 'a'' [4] -> ' '");
|
||||||
|
t.is (tokens[5], "'a'", "'ab 12 'a'' [5] -> ''a''");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.skip ("'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [2] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [3] -> '12'");
|
||||||
|
t.skip ("'ab 12 'a'' [4] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [5] -> ''a''");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.coalesceWhite (true);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 7, "'ab 12 'a'' -> 'ab| |12| |'|a|''");
|
||||||
|
if (tokens.size () == 7)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.is (tokens[2], "12", "'ab 12 'a'' [2] -> '12'");
|
||||||
|
t.is (tokens[3], " ", "'ab 12 'a'' [3] -> ' '");
|
||||||
|
t.is (tokens[4], "'", "'ab 12 'a'' [4] -> '''");
|
||||||
|
t.is (tokens[5], "a", "'ab 12 'a'' [5] -> 'a'");
|
||||||
|
t.is (tokens[6], "'", "'ab 12 'a'' [6] -> '''");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.skip ("'ab 12 'a'' [1] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [2] -> '12'");
|
||||||
|
t.skip ("'ab 12 'a'' [3] -> ' '");
|
||||||
|
t.skip ("'ab 12 'a'' [4] -> '''");
|
||||||
|
t.skip ("'ab 12 'a'' [5] -> 'a'");
|
||||||
|
t.skip ("'ab 12 'a'' [6] -> '''");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.skipWhitespace (true);
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 5, "'ab 12 'a'' -> 'ab|12|'|a|''");
|
||||||
|
if (tokens.size () == 5)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.is (tokens[1], "12", "'ab 12 'a'' [1] -> '12'");
|
||||||
|
t.is (tokens[2], "'", "'ab 12 'a'' [2] -> '''");
|
||||||
|
t.is (tokens[3], "a", "'ab 12 'a'' [3] -> 'a'");
|
||||||
|
t.is (tokens[4], "'", "'ab 12 'a'' [4] -> '''");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'ab 12 'a'' [0] -> 'ab'");
|
||||||
|
t.skip ("'ab 12 'a'' [1] -> '12'");
|
||||||
|
t.skip ("'ab 12 'a'' [2] -> '''");
|
||||||
|
t.skip ("'ab 12 'a'' [3] -> 'a'");
|
||||||
|
t.skip ("'ab 12 'a'' [4] -> '''");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special tokens
|
||||||
|
input = "a := 1";
|
||||||
|
{
|
||||||
|
Lexer l (input);
|
||||||
|
l.skipWhitespace (true);
|
||||||
|
l.specialToken (":=");
|
||||||
|
l.tokenize (tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
t.is (tokens.size (), (size_t) 3, "'a := 1' -> 'a|:=|1'");
|
||||||
|
if (tokens.size () == 3)
|
||||||
|
{
|
||||||
|
t.is (tokens[0], "a", "'a := 1' [0] -> 'a'");
|
||||||
|
t.is (tokens[1], ":=", "'a := 1' [1] -> ':='");
|
||||||
|
t.is (tokens[2], "1", "'a := 1' [2] -> '1'");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t.skip ("'a := 1' [0] -> 'a'");
|
||||||
|
t.skip ("'a := 1' [1] -> ':='");
|
||||||
|
t.skip ("'a := 1' [2] -> '1'");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue