diff --git a/src/Arguments.cpp b/src/Arguments.cpp index 1843051bd..53997621c 100644 --- a/src/Arguments.cpp +++ b/src/Arguments.cpp @@ -251,7 +251,6 @@ void Arguments::categorize () arg->second = "tag"; } - // // .[:=] else if (is_attmod (arg->first)) { @@ -302,6 +301,16 @@ void Arguments::categorize () arg->second = "op"; } + // + else if (is_expression (arg->first)) + { + found_non_sequence = true; + if (found_sequence) + found_something_after_sequence = true; + + arg->second = "exp"; + } + // If the type is not known, it is treated as a generic word. else { @@ -557,13 +566,7 @@ bool Arguments::is_command ( } //////////////////////////////////////////////////////////////////////////////// -// ______________ -// | | -// | v -// start --> name --> : --> " --> value --> " --> end -// | ^ -// |_____________| -// +// [:=]['"][]['"] bool Arguments::is_attr (const std::string& input) { Nibbler n (input); @@ -591,13 +594,7 @@ bool Arguments::is_attr (const std::string& input) } //////////////////////////////////////////////////////////////////////////////// -// ______________ -// | | -// | v -// start --> name --> . --> mod --> : --> " --> value --> " --> end -// | ^ | ^ -// |_____________________| |_____________| -// +// .[:=]['"]['"] bool Arguments::is_attmod (const std::string& input) { Nibbler n (input); @@ -733,6 +730,7 @@ bool Arguments::is_tag (const std::string& input) } //////////////////////////////////////////////////////////////////////////////// +// "+", "-", "*", "/", "%", "~", "!~", "<" ... bool Arguments::is_operator (const std::string& input) { for (unsigned int i = 0; i < NUM_OPERATORS; ++i) @@ -743,13 +741,21 @@ bool Arguments::is_operator (const std::string& input) } //////////////////////////////////////////////////////////////////////////////// -// ______________ -// | | -// | v -// start --> name --> : --> " --> value --> " --> end -// | ^ -// |_____________| -// +bool Arguments::is_expression (const std::string& input) +{ + std::vector tokens; + splitq (tokens, input, ' '); + + std::vector ::iterator token; + for (token = tokens.begin (); token != tokens.end (); ++token) + if (is_operator (*token)) + return true; + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// [:=]['"]['"] bool Arguments::extract_attr ( const std::string& input, std::string& name, @@ -787,13 +793,7 @@ bool Arguments::extract_attr ( } //////////////////////////////////////////////////////////////////////////////// -// ______________ -// | | -// | v -// start --> name --> . --> mod --> : --> " --> value --> " --> end -// | ^ -// |_____________| -// +// .[:=]['"]['"] bool Arguments::extract_attmod ( const std::string& input, std::string& name, @@ -821,7 +821,7 @@ bool Arguments::extract_attmod ( if (n.getUntilOneOf (":=", modifier)) { - if (!valid_modifier (modifier)) + if (!Arguments::valid_modifier (modifier)) throw std::string ("The name '") + modifier + "' is not a valid modifier."; // TODO i18n } else @@ -928,19 +928,24 @@ bool Arguments::extract_pattern (const std::string& input, std::string& pattern) bool Arguments::extract_id (const std::string& input, std::vector & sequence) { Nibbler n (input); - sequence.clear (); int id; + if (n.getUnsignedInt (id)) { sequence.push_back (id); if (n.skip ('-')) { - if (!n.getUnsignedInt (id)) + int end; + if (!n.getUnsignedInt (end)) throw std::string ("Unrecognized ID after hyphen."); - sequence.push_back (id); + if (id > end) + throw std::string ("Inverted range 'high-low' instead of 'low-high'"); + + for (int n = id + 1; n <= end; ++n) + sequence.push_back (n); } while (n.skip (',')) @@ -951,10 +956,15 @@ bool Arguments::extract_id (const std::string& input, std::vector & sequenc if (n.skip ('-')) { - if (!n.getUnsignedInt (id)) + int end; + if (!n.getUnsignedInt (end)) throw std::string ("Unrecognized ID after hyphen."); - sequence.push_back (id); + if (id > end) + throw std::string ("Inverted range 'high-low' instead of 'low-high'"); + + for (int n = id + 1; n <= end; ++n) + sequence.push_back (n); } } else @@ -973,7 +983,6 @@ bool Arguments::extract_uuid ( std::vector & sequence) { Nibbler n (input); - sequence.clear (); std::string uuid; if (n.getUUID (uuid)) @@ -1047,6 +1056,7 @@ Arguments Arguments::extract_read_only_filter () i->second == "id" || i->second == "uuid" || i->second == "op" || + i->second == "exp" || i->second == "word") { filter.push_back (*i); @@ -1092,6 +1102,7 @@ Arguments Arguments::extract_write_filter () i->second == "id" || i->second == "uuid" || i->second == "op" || + i->second == "exp" || i->second == "word") { filter.push_back (*i); @@ -1153,9 +1164,14 @@ Arguments Arguments::extract_modifications () + "' is not allowed when modifiying a task."; else if (i->second == "attmod") - throw std::string ("Attribute modifiers '") + throw std::string ("An attribute modifier '") + i->first - + "' are not allowed when modifiying a task."; + + "' is not allowed when modifiying a task."; + + else if (i->second == "exp") + throw std::string ("An expression '") + + i->first + + "' is not allowed when modifiying a task."; else if (i->second == "id") throw std::string ("A task id cannot be modified."); @@ -1196,6 +1212,7 @@ void Arguments::dump (const std::string& label) color_map["uuid"] = Color ("yellow on gray3"); color_map["substitution"] = Color ("bold cyan on gray3"); color_map["op"] = Color ("bold blue on gray3"); + color_map["exp"] = Color ("bold green on gray5"); color_map["none"] = Color ("white on gray3"); Color color_debug (context.config.get ("color.debug")); diff --git a/src/Arguments.h b/src/Arguments.h index 87b8c5641..6044e2b89 100644 --- a/src/Arguments.h +++ b/src/Arguments.h @@ -55,31 +55,32 @@ public: bool find_command (std::string&); - bool is_command (const std::vector &, std::string&); - bool is_attr (const std::string&); - bool is_attmod (const std::string&); - bool is_subst (const std::string&); - bool is_pattern (const std::string&); - bool is_id (const std::string&); - bool is_uuid (const std::string&); - bool is_tag (const std::string&); - bool is_operator (const std::string&); + static bool is_command (const std::vector &, std::string&); + static bool is_attr (const std::string&); + static bool is_attmod (const std::string&); + static bool is_subst (const std::string&); + static bool is_pattern (const std::string&); + static bool is_id (const std::string&); + static bool is_uuid (const std::string&); + static bool is_tag (const std::string&); + static bool is_operator (const std::string&); + static bool is_expression (const std::string&); // TODO Decide if these are really useful. - bool extract_attr (const std::string&, std::string&, std::string&); - bool extract_attmod (const std::string&, std::string&, std::string&, std::string&, std::string&); - bool extract_subst (const std::string&, std::string&, std::string&, bool&); - bool extract_pattern (const std::string&, std::string&); - bool extract_id (const std::string&, std::vector &); - bool extract_uuid (const std::string&, std::vector &); - bool extract_tag (const std::string&, char&, std::string&); - bool extract_operator (const std::string&, std::string&); + static bool extract_attr (const std::string&, std::string&, std::string&); + static bool extract_attmod (const std::string&, std::string&, std::string&, std::string&, std::string&); + static bool extract_subst (const std::string&, std::string&, std::string&, bool&); + static bool extract_pattern (const std::string&, std::string&); + static bool extract_id (const std::string&, std::vector &); + static bool extract_uuid (const std::string&, std::vector &); + static bool extract_tag (const std::string&, char&, std::string&); + static bool extract_operator (const std::string&, std::string&); Arguments extract_read_only_filter (); Arguments extract_write_filter (); Arguments extract_modifications (); - bool valid_modifier (const std::string&); + static bool valid_modifier (const std::string&); void dump (const std::string&); }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 90c52c686..597e89840 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,6 +20,7 @@ set (task_SRCS API.cpp API.h Filter.cpp Filter.h Hooks.cpp Hooks.h JSON.cpp JSON.h + Lexer.cpp Lexer.h Location.cpp Location.h Nibbler.cpp Nibbler.h Path.cpp Path.h diff --git a/src/Expression.cpp b/src/Expression.cpp index a19215e2a..13e72c613 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -25,15 +25,23 @@ // //////////////////////////////////////////////////////////////////////////////// +#include // TODO Remove. +#include #include +#include #include extern Context context; //////////////////////////////////////////////////////////////////////////////// +// Perform all the necessary steps prior to an eval call. Expression::Expression (Arguments& arguments) : _original (arguments) { + expand_sequence (); + to_infix (); + expand_expression (); + to_postfix (); } //////////////////////////////////////////////////////////////////////////////// @@ -44,29 +52,328 @@ Expression::~Expression () //////////////////////////////////////////////////////////////////////////////// bool Expression::eval (Task& task) { + // TODO Duplicate the _postfix vector as the operating stack. + // TODO ... + return true; } +//////////////////////////////////////////////////////////////////////////////// +// Convert: 1,3-5,00000000-0000-0000-0000-000000000000 +// +// To: (id=1 or (id>=3 and id<=5) or +// uuid="00000000-0000-0000-0000-000000000000") +void Expression::expand_sequence () +{ + Arguments temp; + _sequenced.clear (); + + // Extract all the components of a sequence. + std::vector ids; + std::vector uuids; + std::vector >::iterator arg; + for (arg = _original.begin (); arg != _original.end (); ++arg) + { + if (arg->second == "id") + Arguments::extract_id (arg->first, ids); + + else if (arg->second == "uuid") + Arguments::extract_uuid (arg->first, uuids); + } + + // If there is no sequence, we're done. + if (ids.size () == 0 && uuids.size () == 0) + return; + + // Construct the algebraic form. + std::stringstream sequence; + sequence << "("; + for (unsigned int i = 0; i < ids.size (); ++i) + { + if (i) + sequence << " or "; + + sequence << "id=" << ids[i]; + } + + if (uuids.size ()) + { + sequence << " or "; + for (unsigned int i = 0; i < uuids.size (); ++i) + { + if (i) + sequence << " or "; + + sequence << "uuid=\"" << uuids[i] << "\""; + } + } + + sequence << ")"; + std::cout << "# sequence '" << sequence.str () << "'\n"; + + // Copy everything up to the first id/uuid. + for (arg = _original.begin (); arg != _original.end (); ++arg) + { + if (arg->second == "id" || arg->second == "uuid") + break; + + temp.push_back (*arg); + } + + // Now insert the new sequence expression. + temp.push_back (std::make_pair (sequence.str (), "exp")); + + // Now copy everything after the last id/uuid. + bool found_id = false; + for (arg = _original.begin (); arg != _original.end (); ++arg) + { + if (arg->second == "id" || arg->second == "uuid") + found_id = true; + + else if (found_id) + temp.push_back (*arg); + } + + _sequenced.swap (temp); + _sequenced.dump ("Expression::expand_sequence"); +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert: +with -without +// +// To: tags ~ with +// tags !~ without +void Expression::expand_tag (const std::string& input) +{ + char type; + std::string value; + Arguments::extract_tag (input, type, value); + + _infix.push_back (std::make_pair ("tags", "dom")); + _infix.push_back (std::make_pair (type == '+' ? "~" : "!~", "op")); + _infix.push_back (std::make_pair (value, "exp")); +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert: [:=] +// +// To: = lex +void Expression::expand_attr (const std::string& input) +{ + // TODO Should canonicalize 'name'. + std::string name; + std::string value; + Arguments::extract_attr (input, name, value); + + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("=", "op")); + _infix.push_back (std::make_pair (value, "exp")); +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert: .[:=] +// +// To: lex +void Expression::expand_attmod (const std::string& input) +{ + // TODO Should canonicalize 'name'. + std::string name; + // TODO Should canonicalize 'mod'. + std::string mod; + std::string value; + std::string sense; + Arguments::extract_attmod (input, name, mod, value, sense); + + if (mod == "before" || mod == "under" || mod == "below") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("<", "op")); + _infix.push_back (std::make_pair (value, "exp")); + } + else if (mod == "after" || mod == "over" || mod == "above") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair (">", "op")); + _infix.push_back (std::make_pair (value, "exp")); + } + else if (mod == "none") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("==", "op")); + _infix.push_back (std::make_pair ("\"\"", "exp")); + } + else if (mod == "any") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("!=", "op")); + _infix.push_back (std::make_pair ("\"\"", "exp")); + } + else if (mod == "is" || mod == "equals") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("=", "op")); + _infix.push_back (std::make_pair (value, "exp")); + } + else if (mod == "isnt" || mod == "not") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("!=", "op")); + _infix.push_back (std::make_pair (value, "exp")); + } + else if (mod == "has" || mod == "contains") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("~", "op")); + _infix.push_back (std::make_pair (value, "exp")); + } + else if (mod == "hasnt") + { + _infix.push_back (std::make_pair (name, "dom")); + _infix.push_back (std::make_pair ("!~", "op")); + _infix.push_back (std::make_pair (value, "exp")); + } + else if (mod == "startswith" || mod == "left") + { + // TODO ? + } + else if (mod == "endswith" || mod == "right") + { + // TODO ? + } + else if (mod == "word") + { + // TODO ? + } + else if (mod == "noword") + { + // TODO ? + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert: +// +// To: description ~ +void Expression::expand_word (const std::string& input) +{ + _infix.push_back (std::make_pair ("description", "dom")); + _infix.push_back (std::make_pair ("~", "op")); + _infix.push_back (std::make_pair (input, "exp")); +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert: // +// +// To: description ~ +void Expression::expand_pattern (const std::string& input) +{ + std::string value; + Arguments::extract_pattern (input, value); + + _infix.push_back (std::make_pair ("description", "dom")); + _infix.push_back (std::make_pair ("~", "op")); + _infix.push_back (std::make_pair (value, "exp")); +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert: +// +// To: lex +void Expression::expand_expression () +{ + Arguments temp; + + std::vector >::iterator arg; + for (arg = _infix.begin (); arg != _infix.end (); ++arg) + { + if (arg->second == "exp") + { + Lexer lexer (arg->first); + lexer.skipWhitespace (true); + lexer.coalesceAlpha (true); + lexer.coalesceDigits (true); + lexer.coalesceQuoted (true); + + std::vector tokens; + lexer.tokenize (tokens); + + std::vector ::iterator token; + for (token = tokens.begin (); token != tokens.end (); ++token) + { + if (_infix.is_operator (*token)) + temp.push_back (std::make_pair (*token, "op")); + else + temp.push_back (std::make_pair (*token, "dom")); + } + } + else + temp.push_back (*arg); + } + + _infix.swap (temp); + _infix.dump ("Expression::expand_expression"); +} + //////////////////////////////////////////////////////////////////////////////// // Inserts the 'and' operator by default between terms that are not separated by // at least one operator. // -// Converts: -// to: and +// Converts: +// to: and // -void Expression::toInfix () +// +// +// Rules: +// 1. Two adjacent non-operator arguments have an 'and' inserted between them. +// 2. Any argument of type "exp" is lexed and replaced by tokens. +// +void Expression::to_infix () { _infix.clear (); + bool new_style = is_new_style (); + + std::string value; std::string previous = "op"; std::vector >::iterator arg; - for (arg = _original.begin (); arg != _original.end (); ++arg) + for (arg = _sequenced.begin (); arg != _sequenced.end (); ++arg) { - if (previous != "op" && + // Old-style filters need 'and' conjunctions. + if (!new_style && + previous != "op" && arg->second != "op") + { _infix.push_back (std::make_pair ("and", "op")); + } + + // Upgrade all arguments to new-style. + // ID & UUID sequence has already been converted. + if (arg->second == "id" || + arg->second == "uuid") + ; // NOP. + + else if (arg->second == "tag") + expand_tag (arg->first); + + else if (arg->second == "pattern") + expand_pattern (arg->first); + + else if (arg->second == "attribute") + expand_attr (arg->first); + + else if (arg->second == "attmod") + expand_attmod (arg->first); + + else if (arg->second == "word") + expand_word (arg->first); + + // Expressions will be converted later. + else if (arg->second == "exp") + _infix.push_back (*arg); + + else + throw std::string ("Error: unrecognized argument category '") + arg->second + "'"; - _infix.push_back (*arg); previous = arg->second; } @@ -75,7 +382,41 @@ void Expression::toInfix () //////////////////////////////////////////////////////////////////////////////// // Dijkstra Shunting Algorithm. -void Expression::toPostfix () +// +// While there are tokens to be read: +// Read a token. +// If the token is a number, then add it to the output queue. +// If the token is a function token, then push it onto the stack. +// If the token is a function argument separator (e.g., a comma): +// Until the token at the top of the stack is a left parenthesis, pop +// operators off the stack onto the output queue. If no left parentheses +// are encountered, either the separator was misplaced or parentheses were +// mismatched. +// If the token is an operator, o1, then: +// while there is an operator token, o2, at the top of the stack, and +// either o1 is left-associative and its precedence is less than or +// equal to that of o2, +// or o1 is right-associative and its precedence is less than that +// of o2, +// pop o2 off the stack, onto the output queue; +// push o1 onto the stack. +// If the token is a left parenthesis, then push it onto the stack. +// If the token is a right parenthesis: +// Until the token at the top of the stack is a left parenthesis, pop +// operators off the stack onto the output queue. +// Pop the left parenthesis from the stack, but not onto the output queue. +// If the token at the top of the stack is a function token, pop it onto +// the output queue. +// If the stack runs out without finding a left parenthesis, then there +// are mismatched parentheses. +// When there are no more tokens to read: +// While there are still operator tokens in the stack: +// If the operator token on the top of the stack is a parenthesis, then +// there are mismatched parentheses. +// Pop the operator onto the output queue. +// Exit. +// +void Expression::to_postfix () { _postfix.clear (); @@ -83,6 +424,25 @@ void Expression::toPostfix () } //////////////////////////////////////////////////////////////////////////////// +// Test whether the _original arguments are old style or new style. +// +// Old style: no single argument corresponds to an operator, ie no 'and', 'or', +// etc. +// +// New style: at least one argument that is an operator. +// +bool Expression::is_new_style () +{ + std::vector >::iterator arg; + for (arg = _original.begin (); arg != _original.end (); ++arg) + if (Arguments::is_operator (arg->first)) + return true; + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// TODO Remove? void Expression::dump (const std::string& label) { } diff --git a/src/Expression.h b/src/Expression.h index 54bd4f138..5579e3ffc 100644 --- a/src/Expression.h +++ b/src/Expression.h @@ -38,13 +38,24 @@ public: Expression (Arguments&); ~Expression (); bool eval (Task&); - void toInfix (); - void toPostfix (); +private: + void expand_sequence (); + void expand_expression (); + void expand_tag (const std::string&); + void expand_attr (const std::string&); + void expand_attmod (const std::string&); + void expand_word (const std::string&); + void expand_pattern (const std::string&); + + void to_infix (); + void to_postfix (); + bool is_new_style (); void dump (const std::string&); private: Arguments _original; + Arguments _sequenced; Arguments _infix; Arguments _postfix; }; diff --git a/src/Lexer.cpp b/src/Lexer.cpp new file mode 100644 index 000000000..b74a1640b --- /dev/null +++ b/src/Lexer.cpp @@ -0,0 +1,374 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2011, Paul Beckingham, Federico Hernandez. +// All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation; either version 2 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the +// +// Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, +// Boston, MA +// 02110-1301 +// USA +// +//////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// This lexer works by breaking the input stream into tokens. The essence of +// the algorithm lies in the distinction between adjacent tokens, such that +// between the two extremes lies a good solution. +// +// At one extreme, the entire input is considered one token. Clearly this is +// only correct for trivial input. At the other extreme, every character of the +// input is a token. This is also wrong. +// +// If the input is as follows: +// +// It is almost 11:00am. +// +// The desired tokenization is: +// +// It +// +// is +// +// almost +// +// 11 +// : +// 00 +// am +// . +// \n +// +// This can be achieved by allowing transitions to denote token boundaries. +// Given the following character classes: +// +// letter: a-z A-Z +// digit: 0-9 +// whitespace: +// other: Everything else +// +// Then a token boundary is a transition between: +// letter -> !letter +// digit -> !digit +// whitespace -> any +// other -> any +// +// This has the effect of allowing groups of consecutive letters to be +// considered one token, as well as groups of digits. +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +static const int other = -1; +static const int alpha = -2; +static const int digit = -3; +static const int white = -4; +static const int quote = -5; + +//////////////////////////////////////////////////////////////////////////////// +Lexer::Lexer (const std::string& input) +: mInput (input) + +, mAlpha ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +, mDigit ("0123456789") +, mQuote ("'\"") +, mWhite (" \t\n\r\f") + +, mAlphaCoalesce (true) +, mDigitCoalesce (true) +, mQuotedCoalesce (false) +, mWhiteCoalesce (false) +, mSkipWhitespace (false) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::tokenize (std::vector & all) +{ + all.clear (); // Prevent repeated accumulation. + + std::string token; + bool inQuote = false; + char quoteChar = '\0'; + for (unsigned int i = 0; i < mInput.length (); ++i) + { + bool specialFound = false; + for (unsigned int s = 0; s < mSpecialTokens.size (); ++s) + { + std::string potential = mInput.substr ( + i, min (mSpecialTokens[s].length (), mInput.length () - i)); + + if (potential == mSpecialTokens[s]) + { + // Capture currently assembled token, the special token, increment over + // that token, and skip all remaining code in the loop. + if (token.length ()) + { + all.push_back (token); + token = ""; + } + + all.push_back (potential); + i += potential.length () - 1; + specialFound = true; + } + } + + if (specialFound) + continue; + + char c = mInput[i]; + char next = '\0'; + if (i < mInput.length () - 1) + next = mInput[i + 1]; + + // Classify current and next characters. + int thisChar = classify (c); + int nextChar = classify (next); + + // Properly set inQuote, quoteChar. + if (!inQuote && thisChar == quote) + { + quoteChar = c; + inQuote = true; + } + else if (inQuote && c == quoteChar) + { + inQuote = false; + } + + // Detect transitions. + bool transition = false; + if (thisChar != nextChar) + transition = true; + + token += c; + + // Transitions mean new token. All 'other' characters are separate tokens. + if (transition || nextChar == other) + { + if (!inQuote || !mQuotedCoalesce) + { + if (!mSkipWhitespace || thisChar != white) + all.push_back (token); + token = ""; + } + } + + // Non-transitions - runs. + else + { + // Runs may be optionally coalesced. + if (!(mAlphaCoalesce && nextChar == alpha) && + !(mDigitCoalesce && nextChar == digit) && + !(mWhiteCoalesce && nextChar == white)) + { + if (!inQuote || !mQuotedCoalesce) + { + if (!mSkipWhitespace || thisChar != white) + all.push_back (token); + token = ""; + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::categorizeAsAlpha (char value) +{ + if (mAlpha.find (value) == std::string::npos) + mAlpha += value; + + std::string::size_type pos; + if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1); + if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1); + if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::ignoreAsAlpha (char value) +{ + std::string::size_type pos; + if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::setAlpha (const std::string& value) +{ + mAlpha = value; + + std::string::size_type pos; + for (unsigned int i = 0; i < mAlpha.length (); ++i) + { + if ((pos = mDigit.find (mAlpha[i])) != std::string::npos) mDigit.erase (pos, 1); + if ((pos = mQuote.find (mAlpha[i])) != std::string::npos) mQuote.erase (pos, 1); + if ((pos = mWhite.find (mAlpha[i])) != std::string::npos) mWhite.erase (pos, 1); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::categorizeAsDigit (char value) +{ + if (mDigit.find (value) == std::string::npos) + mDigit += value; + + std::string::size_type pos; + if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1); + if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1); + if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::ignoreAsDigit (char value) +{ + std::string::size_type pos; + if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::setDigit (const std::string& value) +{ + mDigit = value; + + std::string::size_type pos; + for (unsigned int i = 0; i < mDigit.length (); ++i) + { + if ((pos = mAlpha.find (mDigit[i])) != std::string::npos) mAlpha.erase (pos, 1); + if ((pos = mQuote.find (mDigit[i])) != std::string::npos) mQuote.erase (pos, 1); + if ((pos = mWhite.find (mDigit[i])) != std::string::npos) mWhite.erase (pos, 1); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::categorizeAsQuote (char value) +{ + if (mQuote.find (value) == std::string::npos) + mQuote += value; + + std::string::size_type pos; + if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1); + if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1); + if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::ignoreAsQuote (char value) +{ + std::string::size_type pos; + if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::setQuote (const std::string& value) +{ + mQuote = value; + + std::string::size_type pos; + for (unsigned int i = 0; i < mQuote.length (); ++i) + { + if ((pos = mAlpha.find (mQuote[i])) != std::string::npos) mAlpha.erase (pos, 1); + if ((pos = mDigit.find (mQuote[i])) != std::string::npos) mDigit.erase (pos, 1); + if ((pos = mWhite.find (mQuote[i])) != std::string::npos) mWhite.erase (pos, 1); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::categorizeAsWhite (char value) +{ + if (mWhite.find (value) == std::string::npos) + mWhite += value; + + std::string::size_type pos; + if ((pos = mAlpha.find (value)) != std::string::npos) mAlpha.erase (pos, 1); + if ((pos = mDigit.find (value)) != std::string::npos) mDigit.erase (pos, 1); + if ((pos = mQuote.find (value)) != std::string::npos) mQuote.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::ignoreAsWhite (char value) +{ + std::string::size_type pos; + if ((pos = mWhite.find (value)) != std::string::npos) mWhite.erase (pos, 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::setWhite (const std::string& value) +{ + mWhite = value; + + std::string::size_type pos; + for (unsigned int i = 0; i < mWhite.length (); ++i) + { + if ((pos = mAlpha.find (mWhite[i])) != std::string::npos) mAlpha.erase (pos, 1); + if ((pos = mDigit.find (mWhite[i])) != std::string::npos) mDigit.erase (pos, 1); + if ((pos = mQuote.find (mWhite[i])) != std::string::npos) mQuote.erase (pos, 1); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::coalesceAlpha (bool value) +{ + mAlphaCoalesce = value; +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::coalesceDigits (bool value) +{ + mDigitCoalesce = value; +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::coalesceQuoted (bool value) +{ + mQuotedCoalesce = value; +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::coalesceWhite (bool value) +{ + mWhiteCoalesce = value; +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::skipWhitespace (bool value) +{ + mSkipWhitespace = value; +} + +//////////////////////////////////////////////////////////////////////////////// +void Lexer::specialToken (const std::string& special) +{ + mSpecialTokens.push_back (special); +} + +//////////////////////////////////////////////////////////////////////////////// +int Lexer::classify (char c) +{ + if (mAlpha.find (c) != std::string::npos) return alpha; + if (mDigit.find (c) != std::string::npos) return digit; + if (mWhite.find (c) != std::string::npos) return white; + if (mQuote.find (c) != std::string::npos) return quote; + + return other; +} + +//////////////////////////////////////////////////////////////////////////////// + diff --git a/src/Lexer.h b/src/Lexer.h new file mode 100644 index 000000000..62d529909 --- /dev/null +++ b/src/Lexer.h @@ -0,0 +1,84 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2011, Paul Beckingham, Federico Hernandez. +// All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation; either version 2 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the +// +// Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, +// Boston, MA +// 02110-1301 +// USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDED_LEXER +#define INCLUDED_LEXER + +#include +#include + +class Lexer +{ +public: + Lexer (const std::string&); + void tokenize (std::vector &); + + void categorizeAsAlpha (char); + void ignoreAsAlpha (char); + void setAlpha (const std::string&); + + void categorizeAsDigit (char); + void ignoreAsDigit (char); + void setDigit (const std::string&); + + void categorizeAsQuote (char); + void ignoreAsQuote (char); + void setQuote (const std::string&); + + void categorizeAsWhite (char); + void ignoreAsWhite (char); + void setWhite (const std::string&); + + void coalesceAlpha (bool); + void coalesceDigits (bool); + void coalesceQuoted (bool); + void coalesceWhite (bool); + void skipWhitespace (bool); + void specialToken (const std::string&); + +private: + int classify (char); + + std::string mInput; + + std::string mAlpha; + std::string mDigit; + std::string mQuote; + std::string mWhite; + + bool mAlphaCoalesce; + bool mDigitCoalesce; + bool mQuotedCoalesce; + bool mWhiteCoalesce; + bool mSkipWhitespace; + + std::vector mSpecialTokens; +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/commands/CmdCustom.cpp b/src/commands/CmdCustom.cpp index 502a0caa7..ca573e388 100644 --- a/src/commands/CmdCustom.cpp +++ b/src/commands/CmdCustom.cpp @@ -95,8 +95,6 @@ int CmdCustom::execute (std::string& output) //////////////////////////////////// Arguments f = context.args.extract_read_only_filter (); Expression e (f); - e.toInfix (); - e.toPostfix (); return 0; // TODO e.apply (tasks); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c80059fc4..50e087a64 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,9 +7,9 @@ include_directories (${CMAKE_SOURCE_DIR} ${TASK_INCLUDE_DIRS}) set (test_SRCS arguments.t att.t autocomplete.t color.t config.t date.t - directory.t dom.t duration.t file.t filt.t i18n.t json.t list.t - nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t t.t - taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t + directory.t dom.t duration.t file.t filt.t i18n.t json.t lexer.t + list.t nibbler.t path.t record.t rx.t seq.t subst.t t.benchmark.t + t.t taskmod.t tdb.t tdb2.t text.t uri.t util.t variant.t view.t json_test) add_custom_target (test ./run_all DEPENDS ${test_SRCS} diff --git a/test/lexer.t.cpp b/test/lexer.t.cpp new file mode 100644 index 000000000..a0c2bcb78 --- /dev/null +++ b/test/lexer.t.cpp @@ -0,0 +1,331 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006 - 2011, Paul Beckingham. +// All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation; either version 2 of the License, or (at your option) any later +// version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +// details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the +// +// Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, +// Boston, MA +// 02110-1301 +// USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +Context context; + +//////////////////////////////////////////////////////////////////////////////// +int main (int argc, char** argv) +{ + UnitTest t (80); + + std::string input = "This is a test."; + std::vector tokens; + { + Lexer l (input); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 8, "'This is a test.' -> 'This| |is| |a| |test|.'"); + if (tokens.size () == 8) + { + t.is (tokens[0], "This", "'This is a test.' [0] -> 'This'"); + t.is (tokens[1], " ", "'This is a test.' [1] -> ' '"); + t.is (tokens[2], "is", "'This is a test.' [2] -> 'is'"); + t.is (tokens[3], " ", "'This is a test.' [3] -> ' '"); + t.is (tokens[4], "a", "'This is a test.' [4] -> 'a'"); + t.is (tokens[5], " ", "'This is a test.' [5] -> ' '"); + t.is (tokens[6], "test", "'This is a test.' [6] -> 'test'"); + t.is (tokens[7], ".", "'This is a test.' [7] -> '.'"); + } + else + { + t.skip ("'This is a test.' [0] -> 'This'"); + t.skip ("'This is a test.' [1] -> ' '"); + t.skip ("'This is a test.' [2] -> 'is'"); + t.skip ("'This is a test.' [3] -> ' '"); + t.skip ("'This is a test.' [4] -> 'a'"); + t.skip ("'This is a test.' [5] -> ' '"); + t.skip ("'This is a test.' [6] -> 'test'"); + t.skip ("'This is a test.' [7] -> '.'"); + } + + input = "a12bcd345efgh6789"; + { + Lexer l (input); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 6, "'a12bcd345efgh6789' -> 'a|12|bcd|345|efgh|6789'"); + if (tokens.size () == 6) + { + t.is (tokens[0], "a", "'a12bcd345efgh6789' [0] -> 'a'"); + t.is (tokens[1], "12", "'a12bcd345efgh6789' [1] -> '12'"); + t.is (tokens[2], "bcd", "'a12bcd345efgh6789' [2] -> 'bcd'"); + t.is (tokens[3], "345", "'a12bcd345efgh6789' [3] -> '345'"); + t.is (tokens[4], "efgh", "'a12bcd345efgh6789' [4] -> 'efgh'"); + t.is (tokens[5], "6789", "'a12bcd345efgh6789' [5] -> '6789'"); + } + else + { + t.skip ("'a12bcd345efgh6789' [0] -> 'a'"); + t.skip ("'a12bcd345efgh6789' [1] -> '12'"); + t.skip ("'a12bcd345efgh6789' [2] -> 'bcd'"); + t.skip ("'a12bcd345efgh6789' [3] -> '345'"); + t.skip ("'a12bcd345efgh6789' [4] -> 'efgh'"); + t.skip ("'a12bcd345efgh6789' [5] -> '6789'"); + } + + // Let's throw some ugly Perl at it. + input = "my $variable_name = 'single string';"; + { + Lexer l (input); + l.categorizeAsAlpha ('_'); + l.coalesceQuoted (true); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 9, "'my $variable_name = 'single string';' -> 'my| |$|variable_name| |=| |'|single string|'|;'"); + if (tokens.size () == 9) + { + t.is (tokens[0], "my", "'my $variable_name = 'single string';' [0] -> 'my'"); + t.is (tokens[1], " ", "'my $variable_name = 'single string';' [1] -> ' '"); + t.is (tokens[2], "$", "'my $variable_name = 'single string';' [2] -> '$'"); + t.is (tokens[3], "variable_name", "'my $variable_name = 'single string';' [3] -> 'variable_name'"); + t.is (tokens[4], " ", "'my $variable_name = 'single string';' [4] -> ' '"); + t.is (tokens[5], "=", "'my $variable_name = 'single string';' [5] -> '='"); + t.is (tokens[6], " ", "'my $variable_name = 'single string';' [6] -> ' '"); + t.is (tokens[7], "'single string'", "'my $variable_name = 'single string';' [8] -> ''single string''"); + t.is (tokens[8], ";", "'my $variable_name = 'single string';' [10] -> ';'"); + } + else + { + t.skip ("'my $variable_name = 'single string';' [0] -> 'my'"); + t.skip ("'my $variable_name = 'single string';' [1] -> ' '"); + t.skip ("'my $variable_name = 'single string';' [2] -> '$'"); + t.skip ("'my $variable_name = 'single string';' [3] -> 'variable_name'"); + t.skip ("'my $variable_name = 'single string';' [4] -> ' '"); + t.skip ("'my $variable_name = 'single string';' [5] -> '='"); + t.skip ("'my $variable_name = 'single string';' [6] -> ' '"); + t.skip ("'my $variable_name = 'single string';' [8] -> ''single string''"); + t.skip ("'my $variable_name = 'single string';' [10] -> ';'"); + } + + // Now exercise all the configurable coalescence. + input = "ab 12 'a'"; + { + Lexer l (input); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 8, "'ab 12 'a'' -> 'ab| | |12| |'|a|''"); + if (tokens.size () == 8) + { + t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'"); + t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '"); + t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '"); + t.is (tokens[3], "12", "'ab 12 'a'' [3] -> '12'"); + t.is (tokens[4], " ", "'ab 12 'a'' [4] -> ' '"); + t.is (tokens[5], "'", "'ab 12 'a'' [5] -> '''"); + t.is (tokens[6], "a", "'ab 12 'a'' [6] -> 'a'"); + t.is (tokens[7], "'", "'ab 12 'a'' [7] -> '''"); + } + else + { + t.skip ("'ab 12 'a'' [0] -> 'ab'"); + t.skip ("'ab 12 'a'' [1] -> ' '"); + t.skip ("'ab 12 'a'' [2] -> ' '"); + t.skip ("'ab 12 'a'' [3] -> '12'"); + t.skip ("'ab 12 'a'' [4] -> ' '"); + t.skip ("'ab 12 'a'' [5] -> '''"); + t.skip ("'ab 12 'a'' [6] -> 'a'"); + t.skip ("'ab 12 'a'' [7] -> '''"); + } + + { + Lexer l (input); + l.coalesceAlpha (false); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 9, "'ab 12 'a'' -> 'a|b| | |12| |'|a|''"); + if (tokens.size () == 9) + { + t.is (tokens[0], "a", "'ab 12 'a'' [0] -> 'a'"); + t.is (tokens[1], "b", "'ab 12 'a'' [1] -> 'b'"); + t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '"); + t.is (tokens[3], " ", "'ab 12 'a'' [3] -> ' '"); + t.is (tokens[4], "12", "'ab 12 'a'' [4] -> '12'"); + t.is (tokens[5], " ", "'ab 12 'a'' [5] -> ' '"); + t.is (tokens[6], "'", "'ab 12 'a'' [6] -> '''"); + t.is (tokens[7], "a", "'ab 12 'a'' [7] -> 'a'"); + t.is (tokens[8], "'", "'ab 12 'a'' [8] -> '''"); + } + else + { + t.skip ("'ab 12 'a'' [0] -> 'a'"); + t.skip ("'ab 12 'a'' [1] -> 'b'"); + t.skip ("'ab 12 'a'' [2] -> ' '"); + t.skip ("'ab 12 'a'' [3] -> ' '"); + t.skip ("'ab 12 'a'' [4] -> '12'"); + t.skip ("'ab 12 'a'' [5] -> ' '"); + t.skip ("'ab 12 'a'' [6] -> '''"); + t.skip ("'ab 12 'a'' [7] -> 'a'"); + t.skip ("'ab 12 'a'' [8] -> '''"); + } + + { + Lexer l (input); + l.coalesceDigits (false); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 9, "'ab 12 'a'' -> 'ab| | |1|2| |'|a|''"); + if (tokens.size () == 9) + { + t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'"); + t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '"); + t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '"); + t.is (tokens[3], "1", "'ab 12 'a'' [3] -> '1'"); + t.is (tokens[4], "2", "'ab 12 'a'' [4] -> '2'"); + t.is (tokens[5], " ", "'ab 12 'a'' [5] -> ' '"); + t.is (tokens[6], "'", "'ab 12 'a'' [6] -> '''"); + t.is (tokens[7], "a", "'ab 12 'a'' [7] -> 'a'"); + t.is (tokens[8], "'", "'ab 12 'a'' [8] -> '''"); + } + else + { + t.skip ("'ab 12 'a'' [0] -> 'ab'"); + t.skip ("'ab 12 'a'' [1] -> ' '"); + t.skip ("'ab 12 'a'' [2] -> ' '"); + t.skip ("'ab 12 'a'' [3] -> '1'"); + t.skip ("'ab 12 'a'' [4] -> '2'"); + t.skip ("'ab 12 'a'' [5] -> ' '"); + t.skip ("'ab 12 'a'' [6] -> '''"); + t.skip ("'ab 12 'a'' [7] -> 'a'"); + t.skip ("'ab 12 'a'' [8] -> '''"); + } + + { + Lexer l (input); + l.coalesceQuoted (true); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 6, "'ab 12 'a'' -> 'ab| | |12| |'a''"); + if (tokens.size () == 6) + { + t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'"); + t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '"); + t.is (tokens[2], " ", "'ab 12 'a'' [2] -> ' '"); + t.is (tokens[3], "12", "'ab 12 'a'' [3] -> '12'"); + t.is (tokens[4], " ", "'ab 12 'a'' [4] -> ' '"); + t.is (tokens[5], "'a'", "'ab 12 'a'' [5] -> ''a''"); + } + else + { + t.skip ("'ab 12 'a'' [0] -> 'ab'"); + t.skip ("'ab 12 'a'' [1] -> ' '"); + t.skip ("'ab 12 'a'' [2] -> ' '"); + t.skip ("'ab 12 'a'' [3] -> '12'"); + t.skip ("'ab 12 'a'' [4] -> ' '"); + t.skip ("'ab 12 'a'' [5] -> ''a''"); + } + + { + Lexer l (input); + l.coalesceWhite (true); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 7, "'ab 12 'a'' -> 'ab| |12| |'|a|''"); + if (tokens.size () == 7) + { + t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'"); + t.is (tokens[1], " ", "'ab 12 'a'' [1] -> ' '"); + t.is (tokens[2], "12", "'ab 12 'a'' [2] -> '12'"); + t.is (tokens[3], " ", "'ab 12 'a'' [3] -> ' '"); + t.is (tokens[4], "'", "'ab 12 'a'' [4] -> '''"); + t.is (tokens[5], "a", "'ab 12 'a'' [5] -> 'a'"); + t.is (tokens[6], "'", "'ab 12 'a'' [6] -> '''"); + } + else + { + t.skip ("'ab 12 'a'' [0] -> 'ab'"); + t.skip ("'ab 12 'a'' [1] -> ' '"); + t.skip ("'ab 12 'a'' [2] -> '12'"); + t.skip ("'ab 12 'a'' [3] -> ' '"); + t.skip ("'ab 12 'a'' [4] -> '''"); + t.skip ("'ab 12 'a'' [5] -> 'a'"); + t.skip ("'ab 12 'a'' [6] -> '''"); + } + + { + Lexer l (input); + l.skipWhitespace (true); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 5, "'ab 12 'a'' -> 'ab|12|'|a|''"); + if (tokens.size () == 5) + { + t.is (tokens[0], "ab", "'ab 12 'a'' [0] -> 'ab'"); + t.is (tokens[1], "12", "'ab 12 'a'' [1] -> '12'"); + t.is (tokens[2], "'", "'ab 12 'a'' [2] -> '''"); + t.is (tokens[3], "a", "'ab 12 'a'' [3] -> 'a'"); + t.is (tokens[4], "'", "'ab 12 'a'' [4] -> '''"); + } + else + { + t.skip ("'ab 12 'a'' [0] -> 'ab'"); + t.skip ("'ab 12 'a'' [1] -> '12'"); + t.skip ("'ab 12 'a'' [2] -> '''"); + t.skip ("'ab 12 'a'' [3] -> 'a'"); + t.skip ("'ab 12 'a'' [4] -> '''"); + } + + // Special tokens + input = "a := 1"; + { + Lexer l (input); + l.skipWhitespace (true); + l.specialToken (":="); + l.tokenize (tokens); + } + + t.is (tokens.size (), (size_t) 3, "'a := 1' -> 'a|:=|1'"); + if (tokens.size () == 3) + { + t.is (tokens[0], "a", "'a := 1' [0] -> 'a'"); + t.is (tokens[1], ":=", "'a := 1' [1] -> ':='"); + t.is (tokens[2], "1", "'a := 1' [2] -> '1'"); + } + else + { + t.skip ("'a := 1' [0] -> 'a'"); + t.skip ("'a := 1' [1] -> ':='"); + t.skip ("'a := 1' [2] -> '1'"); + } + + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// +