From 6ebee13fa47134f97ab69a3ba7513ccbf8d04b9f Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Fri, 30 Aug 2013 11:21:47 -0700 Subject: [PATCH] LRParser - Added old (non-task) parsing code, as a basis for the new parser. - Added Tree object for parse trees. - Added top-level entry point for testing. --- CMakeLists.txt | 1 + src/parser/CMakeLists.txt | 24 ++ src/parser/LRParser.cpp | 465 ++++++++++++++++++++++++++++++++++++++ src/parser/LRParser.h | 53 +++++ src/parser/Parser.cpp | 389 +++++++++++++++++++++++++++++++ src/parser/Parser.h | 103 +++++++++ src/parser/Tree.cpp | 342 ++++++++++++++++++++++++++++ src/parser/Tree.h | 87 +++++++ src/parser/bnf.cpp | 126 +++++++++++ 9 files changed, 1590 insertions(+) create mode 100644 src/parser/CMakeLists.txt create mode 100644 src/parser/LRParser.cpp create mode 100644 src/parser/LRParser.h create mode 100644 src/parser/Parser.cpp create mode 100644 src/parser/Parser.h create mode 100644 src/parser/Tree.cpp create mode 100644 src/parser/Tree.h create mode 100644 src/parser/bnf.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c5805a3a..f5572314d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,6 +142,7 @@ add_subdirectory (src) add_subdirectory (src/commands) add_subdirectory (src/columns) add_subdirectory (src/shell) +add_subdirectory (src/parser) add_subdirectory (doc) add_subdirectory (i18n) add_subdirectory (scripts) diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt new file mode 100644 index 000000000..65f4a019d --- /dev/null +++ b/src/parser/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required (VERSION 2.8) +include_directories (${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/src + ${CMAKE_SOURCE_DIR}/src/columns + ${CMAKE_SOURCE_DIR}/src/commands + ${CMAKE_SOURCE_DIR}/src/parser + ${TASK_INCLUDE_DIRS}) + +set (parser_SRCS LRParser.cpp LRParser.h + Parser.cpp Parser.h + Tree.cpp Tree.h) + +add_library (parser STATIC ${parser_SRCS}) +add_executable (parser_executable bnf.cpp) + +target_link_libraries (parser_executable columns commands task parser ${TASK_LIBRARIES}) + +set_property (TARGET parser_executable PROPERTY OUTPUT_NAME "parser") + +#install (TARGETS parser_executable DESTINATION ${TASK_BINDIR}) + +set (CMAKE_BUILD_TYPE debug) +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -Wall") +set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -Wall") diff --git a/src/parser/LRParser.cpp b/src/parser/LRParser.cpp new file mode 100644 index 000000000..09cbb43ba --- /dev/null +++ b/src/parser/LRParser.cpp @@ -0,0 +1,465 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +LRParser::LRParser () +{ +} + +//////////////////////////////////////////////////////////////////////////////// +// This is called only from external code. +Tree* LRParser::parse (const std::string& tokens) +{ + Tree* tree = new Tree ("root"); + if (! tree) + throw std::string ("Failed to allocate memory for parse tree."); + + unsigned int cursor = 0; + if (matchRule (_primary, '=', cursor, tokens, tree)) + { + if (_verbose) + std::cout << "syntax pass" << std::endl; + } + else + { + if (_verbose) + std::cout << "syntax fail" << std::endl; + + delete tree; + tree = NULL; + } + + return tree; +} + +//////////////////////////////////////////////////////////////////////////////// +// Wraps calls to matchRule, while properly handling the quantifier. +bool LRParser::matchRuleQuant ( + const std::string& rule, + char quantifier, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + // Must match exactly once, so run once and return the result. + if (quantifier == '=') + { + return matchRule (rule, quantifier, cursor, tokens, tree); + } + + // May match zero or one time. If it matches, the cursor will be advanced. + // If it fails, the cursor will not be advanced, but this is still considered + // successful. Return true either way, but backtrack the cursor on failure. + + // TODO Make greedy. + else if (quantifier == '?') + { + unsigned int original_cursor = cursor; + if (! matchRule (rule, quantifier, cursor, tokens, tree)) + cursor = original_cursor; + return true; + } + + // May match 1 or more times. If it matches on the first attempt, continue + // to greedily match until it fails. If it fails on the first attempt, then + // the rule fails. + + // TODO Make greedy. + else if (quantifier == '+') + { + if (! matchRule (rule, quantifier, cursor, tokens, tree)) + return false; + + while (matchRule (rule, quantifier, cursor, tokens, tree)) + ; + return true; + } + + // May match zero or more times. Keep calling while there are matches, and + // return true always. Backtrack the cursor on failure. + + // TODO Make greedy. + else if (quantifier == '*') + { + bool result; + do + { + unsigned int original_cursor = cursor; + result = matchRule (rule, quantifier, cursor, tokens, tree); + if (! result) + cursor = original_cursor; + } + while (result); + return true; + } + + throw std::string ("LRParser::matchRuleQuant - this should never happen."); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Returns true, with cursor incremented, if any of the alternates match. +bool LRParser::matchRule ( + const std::string& rule, + char quantifier, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + if (cursor >= tokens.length ()) return false; + unsigned int original_cursor = cursor; // Preserve + + for (unsigned int alt = 0; + alt < _rules[rule].size () && cursor < tokens.length (); + ++alt) + { + Tree* b = new Tree (rule); + if (! b) + throw std::string ("Failed to allocate memory for parse tree."); + + if (matchAlternate (rule, quantifier, alt, _rules[rule][alt], cursor, tokens, b)) + { + if (_verbose) + std::cout << "\033[32m" + << "matchRule " + << rule + << quantifier + << "/a" + << alt + << " tokens[" + << cursor - 1 + << "]=" + << visible (tokens[cursor - 1]) + << " SUCCEED" + << " " + << tree + << "->" + << b + << "\033[0m" + << std::endl; + + tree->addBranch (b); + return true; + } + + delete b; + } + + cursor = original_cursor; // Restore + + if (_verbose) + std::cout << "\033[31m" + << "matchRule " + << rule + << quantifier + << " FAIL" + << "\033[0m" + << std::endl; + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Returns true, with cursor incremented, if all of the token match. +bool LRParser::matchAlternate ( + const std::string& rule, + char quantifier, + unsigned int alt, + const Alternate& alternate, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + if (cursor >= tokens.length ()) return false; + + unsigned int original_cursor = cursor; // Preserve + + for (unsigned int token = 0; + token < alternate.size () && cursor < tokens.length (); + ++token) + { + if (! matchToken (rule, quantifier, alt, token, alternate[token], cursor, tokens, tree)) + { + cursor = original_cursor; // Restore + if (_verbose) + std::cout << "\033[31m" + << "matchAlternate " + << rule + << quantifier + << "/a" + << alt + << "/t" + << token + << " tokens[" + << cursor + << "]=" + << visible (tokens[cursor]) + << " FAIL" + << "\033[0m" + << std::endl; + + return false; + } + + if (_verbose) + std::cout << "\033[32m" + << "matchAlternate " + << rule + << quantifier + << "/a" + << alt + << "/t" + << token + << " SUCCEED" + << "\033[0m" + << std::endl; + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Returns true, if the token, an optional quantifier, and all optional +// modifiers match. +bool LRParser::matchToken ( + const std::string& rule, + char quantifier, + unsigned int alt, + unsigned int tok, + const Token& token, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + if (cursor >= tokens.length ()) return false; + + unsigned int original_cursor = cursor; // Preserve + + if (tokenMatchRule (rule, quantifier, alt, tok, token, cursor, tokens, tree) || + tokenMatchSpecialLiteral (token, cursor, tokens, tree) || + tokenMatchLiteral (token, cursor, tokens, tree) || + tokenMatchRegex (token, cursor, tokens, tree)) + { + if (_verbose) + std::cout << "\033[32m" + << "matchToken " + << rule + << quantifier + << "/a" + << alt + << "/t" + << tok + << " tokens[" + << cursor + << "]=" + << visible (tokens[cursor]) + << " token=" + << token.value + << " SUCCEED" + << "\033[0m" + << std::endl; + + return true; + } + + cursor = original_cursor; // Restore + + if (_verbose) + std::cout << "\033[31m" + << "matchToken " + << rule + << quantifier + << "/a" + << alt + << "/t" + << tok + << " tokens[" + << cursor + << "]=" + << visible (tokens[cursor]) + << " token=" + << token.value + << " FAIL" + << "\033[0m" + << std::endl; + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool LRParser::tokenMatchSpecialLiteral ( + const Token& token, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + if (cursor >= tokens.length ()) return false; + + if ((tokens[cursor] == '\t' && token.value == "\"\\t\"") || + (tokens[cursor] == '\n' && token.value == "\"\\n\"") || + (tokens[cursor] == '\r' && token.value == "\"\\r\"") || + (tokens[cursor] == '\f' && token.value == "\"\\f\"") || + (tokens[cursor] == '\v' && token.value == "\"\\v\"") || + (tokens[cursor] == '"' && token.value == "\"\\\"\"")) + { + tree->tag ("literal"); + tree->tag ("special"); + tree->attribute ("token", tokens[cursor]); + + if (_verbose) + std::cout << "tokenMatchSpecialLiteral " + << token.value + << " SUCCEED" + << std::endl; + + cursor++; + return true; + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool LRParser::tokenMatchLiteral ( + const Token& token, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + int len = token.value.length () - 2; + if (cursor > tokens.length () - len) return false; + + std::string tok = token.value.substr (1, len); + + if (token.value[0] == '"' && + token.value[len + 1] == '"' && + tokens.find (tok, cursor) == cursor) + { + tree->tag ("literal"); + tree->attribute ("token", tok); + cursor += len; + + if (_verbose) + std::cout << "tokenMatchLiteral " + << token.value + << " SUCCEED" + << std::endl; + + return true; + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool LRParser::tokenMatchRegex ( + const Token& token, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + if (cursor >= tokens.length () - 1) return false; + + // If it looks like a regex. + if (token.value[0] == '/' && + token.value[token.value.length () - 1] == '/') + { + // If the regex matches at all. + RX rx ("(" + token.value.substr (1, token.value.length () - 2) + ")", false); + std::vector start; + std::vector end; + if (rx.match (start, + end, + tokens.substr (cursor, std::string::npos))) + { + // If the match is at position 'cursor'. + if (start[0] == 0) + { + tree->tag ("regex"); + tree->attribute ("token", tokens.substr (cursor + start[0], end[0])); + cursor += end[0]; + + if (_verbose) + std::cout << "tokenMatchRegex \"" + << tokens.substr (cursor + start[0], end[0]) + << "\"" + << " SUCCEED" + << std::endl; + + return true; + } + } + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool LRParser::tokenMatchRule ( + const std::string& rule, + char quantifier, + unsigned int alt, + unsigned int it, + const Token& token, + unsigned int& cursor, + const std::string& tokens, + Tree* tree) +{ + if (cursor >= tokens.length ()) return false; + + // If this is a definition, recurse. + if (_rules.find (token.value) != _rules.end ()) + { + if (_verbose) + std::cout << "tokenMatchRule " + << rule + << quantifier + << "/a" + << alt + << "/t" + << it + << " tokens[" + << cursor + << "]=" + << visible (tokens[cursor]) + << " token=" + << token.value + << " RECURSING matchRuleQuant" + << std::endl; + + return matchRuleQuant (token.value, token.quantifier, cursor, tokens, tree); + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/parser/LRParser.h b/src/parser/LRParser.h new file mode 100644 index 000000000..489f1136a --- /dev/null +++ b/src/parser/LRParser.h @@ -0,0 +1,53 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDED_LRPARSER +#define INCLUDED_LRPARSER + +#include "Parser.h" + +class LRParser : public Parser +{ +public: + LRParser (); + Tree* parse (const std::string&); + +private: + bool matchRuleQuant (const std::string&, char, unsigned int&, const std::string&, Tree*); + bool matchRule (const std::string&, char, unsigned int&, const std::string&, Tree*); + bool matchAlternate (const std::string&, char, unsigned int, const Alternate&, unsigned int&, const std::string&, Tree*); + bool matchToken (const std::string&, char, unsigned int, unsigned int, const Token&, unsigned int&, const std::string&, Tree*); + + bool tokenMatchSpecialLiteral (const Token&, unsigned int&, const std::string&, Tree*); + bool tokenMatchLiteral (const Token&, unsigned int&, const std::string&, Tree*); + bool tokenMatchRegex (const Token&, unsigned int&, const std::string&, Tree*); + bool tokenMatchRule (const std::string&, char, unsigned int, unsigned int, const Token&, unsigned int&, const std::string&, Tree*); +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/parser/Parser.cpp b/src/parser/Parser.cpp new file mode 100644 index 000000000..5429dd5a8 --- /dev/null +++ b/src/parser/Parser.cpp @@ -0,0 +1,389 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +Parser::Parser () +: _primary ("") +, _verbose (false) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::~Parser () +{ +} + +//////////////////////////////////////////////////////////////////////////////// +void Parser::grammar (const std::string& file) +{ + // Strip comments. + std::vector lines; + split (lines, file, '\n'); + + std::string stripped = ""; + std::string::size_type comment; + std::vector ::iterator it; + for (it = lines.begin (); it != lines.end (); ++it) + { + comment = it->find ("#"); + + if (comment != std::string::npos) + stripped += it->substr (0, comment); + else + stripped += *it; + + stripped += "\n"; + } + + // Now parse the grammar. + Nibbler n (stripped); + std::string rule; + Production prod; + while (bnfNibbleRule (n, rule, prod)) + { + if (_primary == "") + _primary = rule; + + _rules[rule] = prod; + } + + // Now the hard part. + checkConsistency (); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Parser::bnfNibbleRule (Nibbler& n, std::string& rule, Production& prod) +{ + prod.clear (); + n.skipWS (); + if (n.getUntilOneOf (": ", rule)) + { + std::string att; + while (n.skip (':') && + n.getUntilOneOf (": ", att)) + { + prod.tag (att); + } + + // Definition. + n.skipWS (); + if (n.getLiteral ("::=")) + { + // Alternates. + Alternate alt; + while (bnfNibbleAlternate (n, alt)) + { + prod.push_back (alt); + alt.clear (); + } + + if (alt.size ()) + prod.push_back (alt); + + return true; + } + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Parser::bnfNibbleAlternate (Nibbler& n, Alternate& alt) +{ + n.skipWS (); + + Token tok; + while (bnfNibbleToken (n, tok)) + alt.push_back (tok); + + if (n.skip ('|')) {return true;} + else if (n.skip (';')) {return false;} + else + throw std::string ("Expected | or ;"); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Parser::bnfNibbleToken (Nibbler& n, Token& tok) +{ + tok.clear (); + n.skipWS (); + + if (n.next () == '|') return false; // Alternate + if (n.next () == ';') return false; // Terminator + + if (n.getQuoted ('/', tok.value, true) || // Regex + n.getQuoted ('"', tok.value, true) || // Literal + n.getUntilOneOf ("\n\t =?+*", tok.value)) // Name + { + if (n.skip ('=')) tok.quantifier = '='; // 1 + else if (n.skip ('?')) tok.quantifier = '?'; // 0,1 + else if (n.skip ('+')) tok.quantifier = '+'; // 1-> + else if (n.skip ('*')) tok.quantifier = '*'; // 0-> + + return true; + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Check consistency of the syntax. This is where all static analysis occurs. +void Parser::checkConsistency () +{ + std::vector allRules; + std::vector allToken; + std::vector allLeftRecursive; + + std::map ::iterator r; + for (r = _rules.begin (); r != _rules.end (); ++r) + { + allRules.push_back (r->first); + + std::vector ::iterator a; + for (a = r->second.begin (); a != r->second.end (); ++a) + { + std::vector ::iterator i; + for (i = a->begin (); i != a->end (); ++i) + { + if (i->value[0] != '"' && + i->value[0] != '/') + allToken.push_back (i->value); + + if (i == a->begin () && r->first == i->value) + allLeftRecursive.push_back (i->value); + } + } + } + + std::vector notUsed; + std::vector notDefined; + listDiff (allRules, allToken, notUsed, notDefined); + + // Undefined value - these are definitions that appear in token, but are + // not in _rules. + for (unsigned int i = 0; i < notDefined.size (); ++i) + throw std::string ("definition '") + notDefined[i] + "' referenced, but not defined."; + + // Circular definitions - these are names in _rules that also appear as + // token 0 in any of the alternates for that definition. + for (unsigned int i = 0; i < allLeftRecursive.size (); ++i) + throw std::string ("definition '") + allLeftRecursive[i] + "' is left recursive."; + + for (unsigned int i = 0; i < allRules.size (); ++i) + if (allRules[i][0] == '"') + throw std::string ("definition '") + allRules[i] + "' must not be a literal."; + + // Unused definitions - these are names in _rules that are never + // referenced as token. + for (unsigned int i = 0; i < notUsed.size (); ++i) + if (notUsed[i] != _primary) + throw std::string ("definition '") + notUsed[i] + "' defined, but not referenced."; +} + +//////////////////////////////////////////////////////////////////////////////// +void Parser::verbose () +{ + _verbose = true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Display the entire parsed tree. Highlight the primary definition. +void Parser::dump () const +{ + std::map ::const_iterator def; + for (def = _rules.begin (); def != _rules.end (); ++def) + { + if (def->first == _primary) + std::cout << "\033[1m" << def->first << "\033[0m"; + else + std::cout << def->first; + + std::cout << " ::=" << std::endl; + + std::vector ::const_iterator alt; + for (alt = def->second.begin (); alt != def->second.end (); ++alt) + { + if (alt != def->second.begin ()) + std::cout << " | "; + else + std::cout << " "; + + std::vector ::const_iterator tok; + for (tok = alt->begin (); tok != alt->end (); ++tok) + { + std::cout << tok->value; + + if (tok->quantifier != '=') + std::cout << tok->quantifier; + + std::cout << " "; + } + + std::cout << std::endl; + } + + std::cout << " ;" << std::endl; + } +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Token::Token () +: value ("") +, quantifier ('=') +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Token::Token (const Parser::Token& other) +: value (other.value) +, quantifier (other.quantifier) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Token& Parser::Token::operator= (const Parser::Token& other) +{ + if (this != &other) + { + value = other.value; + quantifier = other.quantifier; + } + + return *this; +} + +//////////////////////////////////////////////////////////////////////////////// +void Parser::Token::clear () +{ + value = ""; + quantifier = '='; +} + +//////////////////////////////////////////////////////////////////////////////// +std::string Parser::Token::dump () +{ + return value + quantifier; +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Alternate::Alternate () +: std::vector () +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Alternate::Alternate (const Parser::Alternate& other) +: std::vector (other) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Alternate& Parser::Alternate::operator= (const Parser::Alternate& other) +{ + if (this != &other) + std::vector ::operator= (other); + + return *this; +} + +//////////////////////////////////////////////////////////////////////////////// +std::string Parser::Alternate::dump () +{ + std::string result; + std::vector ::iterator i; + for (i = this->begin (); i != this->end (); ++i) + result += i->dump () + ' '; + return result; +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Production::Production () +: std::vector () +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Production::Production (const Parser::Production& other) +: std::vector (other) +, mTags (other.mTags) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Parser::Production& Parser::Production::operator= (const Parser::Production& other) +{ + if (this != &other) + { + std::vector ::operator= (other); + mTags = other.mTags; + } + + return *this; +} + +//////////////////////////////////////////////////////////////////////////////// +void Parser::Production::tag (const std::string& t) +{ + mTags.push_back (t); +} + +//////////////////////////////////////////////////////////////////////////////// +bool Parser::Production::hasTag (const std::string& t) const +{ + return std::find (mTags.begin (), mTags.end (), t) != mTags.end () + ? true : false; +} + +//////////////////////////////////////////////////////////////////////////////// +void Parser::Production::clear () +{ + std::vector ::clear (); + mTags.clear (); +} + +//////////////////////////////////////////////////////////////////////////////// +std::string Parser::Production::dump () +{ + std::string result; + std::vector ::iterator i; + for (i = this->begin (); i != this->end (); ++i) + { + if (i != this->begin ()) + result += " | "; + result += i->dump (); + } + + result += "\n"; + return result; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/parser/Parser.h b/src/parser/Parser.h new file mode 100644 index 000000000..224dae580 --- /dev/null +++ b/src/parser/Parser.h @@ -0,0 +1,103 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDED_PARSER +#define INCLUDED_PARSER + +#include +#include +#include +#include +#include +#include + +class Parser +{ +public: + Parser (); + virtual ~Parser (); + void grammar (const std::string&); + virtual Tree* parse (const std::string&) = 0; + + void verbose (); + void dump () const; + +protected: + class Token + { + public: + Token (); + Token (const Token&); + Token& operator= (const Token&); + void clear (); + std::string dump (); + + public: + std::string value; + char quantifier; + }; + + class Alternate : public std::vector + { + public: + Alternate (); + Alternate (const Alternate&); + Alternate& operator= (const Alternate&); + std::string dump (); + }; + + class Production : public std::vector + { + public: + Production (); + Production (const Production&); + Production& operator= (const Production&); + void tag (const std::string&); + bool hasTag (const std::string&) const; + void clear (); + std::string dump (); + + private: + std::vector mTags; + }; + +private: + bool bnfNibbleRule (Nibbler&, std::string&, Production&); + bool bnfNibbleAlternate (Nibbler&, Alternate&); + bool bnfNibbleToken (Nibbler&, Token&); + + void checkConsistency (); + +protected: + std::string _primary; + std::map _rules; + bool _verbose; +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/parser/Tree.cpp b/src/parser/Tree.cpp new file mode 100644 index 000000000..a6e77490f --- /dev/null +++ b/src/parser/Tree.cpp @@ -0,0 +1,342 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// - Tree, Branch and Node are synonymous. +// - A Tree may contain any number of branches. +// - A Branch may contain any number of name/value pairs, unique by name. +// - The destructor will delete all branches recursively. +// - Tree::enumerate is a snapshot, and is invalidated by modification. +// - Branch sequence is preserved. +Tree::Tree (const std::string& name) +: _trunk (NULL) +, _name (name) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +Tree::~Tree () +{ + for (std::vector ::iterator i = _branches.begin (); + i != _branches.end (); + ++i) + delete *i; +} + +//////////////////////////////////////////////////////////////////////////////// +Tree::Tree (const Tree& other) +{ + throw "Unimplemented Tree::Tree (Tree&)"; +} + +//////////////////////////////////////////////////////////////////////////////// +Tree& Tree::operator= (const Tree& other) +{ + throw "Unimplemented Tree::operator= ()"; + return *this; +} + +//////////////////////////////////////////////////////////////////////////////// +Tree* Tree::operator[] (const int branch) +{ + if (branch < 0 || + branch > (int) _branches.size () - 1) + throw "Tree::operator[] out of range"; + + return _branches[branch]; +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::addBranch (Tree* branch) +{ + branch->_trunk = this; + _branches.push_back (branch); +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::removeBranch (Tree* branch) +{ + for (std::vector ::iterator i = _branches.begin (); + i != _branches.end (); + ++i) + { + if (*i == branch) + { + _branches.erase (i); + return; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::replaceBranch (Tree* from, Tree* to) +{ + for (unsigned int i = 0; i < _branches.size (); ++i) + { + if (_branches[i] == from) + { + to->_trunk = this; + _branches[i] = to; + return; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +int Tree::branches () +{ + return _branches.size (); +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::name (const std::string& name) +{ + _name = name; +} + +//////////////////////////////////////////////////////////////////////////////// +std::string Tree::name () const +{ + return _name; +} + +//////////////////////////////////////////////////////////////////////////////// +// Accessor for attributes. +void Tree::attribute (const std::string& name, const std::string& value) +{ + _attributes[name] = value; +} + +//////////////////////////////////////////////////////////////////////////////// +// Accessor for attributes. +void Tree::attribute (const std::string& name, const int value) +{ + _attributes[name] = format (value); +} + +//////////////////////////////////////////////////////////////////////////////// +// Accessor for attributes. +void Tree::attribute (const std::string& name, const double value) +{ + _attributes[name] = format (value, 1, 8); +} + +//////////////////////////////////////////////////////////////////////////////// +// Accessor for attributes. +std::string Tree::attribute (const std::string& name) +{ + // Prevent autovivification. + std::map::iterator i = _attributes.find (name); + if (i != _attributes.end ()) + return i->second; + + return ""; +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::removeAttribute (const std::string& name) +{ + _attributes.erase (name); +} + +//////////////////////////////////////////////////////////////////////////////// +int Tree::attributes () const +{ + return _attributes.size (); +} + +//////////////////////////////////////////////////////////////////////////////// +std::vector Tree::allAttributes () const +{ + std::vector names; + std::map ::const_iterator it; + for (it = _attributes.begin (); it != _attributes.end (); ++it) + names.push_back (it->first); + + return names; +} + +//////////////////////////////////////////////////////////////////////////////// +// Recursively completes a list of Tree* objects, left to right, depth first. +// The reason for the depth-first enumeration is that a client may wish to +// traverse the tree and delete nodes. With a depth-first iteration, this is a +// safe mechanism, and a node pointer will never be dereferenced after it has +// been deleted. +void Tree::enumerate (std::vector & all) const +{ + for (std::vector ::const_iterator i = _branches.begin (); + i != _branches.end (); + ++i) + { + (*i)->enumerate (all); + all.push_back (*i); + } +} + +//////////////////////////////////////////////////////////////////////////////// +Tree* Tree::parent () const +{ + return _trunk; +} + +//////////////////////////////////////////////////////////////////////////////// +bool Tree::hasTag (const std::string& tag) const +{ + if (std::find (_tags.begin (), _tags.end (), tag) != _tags.end ()) + return true; + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::tag (const std::string& tag) +{ + if (! hasTag (tag)) + _tags.push_back (tag); +} + +//////////////////////////////////////////////////////////////////////////////// +int Tree::tags () const +{ + return _tags.size (); +} + +//////////////////////////////////////////////////////////////////////////////// +std::vector Tree::allTags () const +{ + return _tags; +} + +//////////////////////////////////////////////////////////////////////////////// +int Tree::count () const +{ + int total = 1; // this one. + + for (std::vector ::const_iterator i = _branches.begin (); + i != _branches.end (); + ++i) + { + // Recurse and count the branches. + total += (*i)->count (); + } + + return total; +} + +//////////////////////////////////////////////////////////////////////////////// +Tree* Tree::find (const std::string& path) +{ + std::vector elements; + split (elements, path, '/'); + + // Must start at the trunk. + Tree* cursor = this; + std::vector ::iterator it = elements.begin (); + if (cursor->name () != *it) + return NULL; + + // Perhaps the trunk is what is needed? + if (elements.size () == 1) + return this; + + // Now look for the next branch. + for (++it; it != elements.end (); ++it) + { + bool found = false; + + // If the cursor has a branch that matches *it, proceed. + for (int i = 0; i < cursor->branches (); ++i) + { + if ((*cursor)[i]->name () == *it) + { + cursor = (*cursor)[i]; + found = true; + break; + } + } + + if (!found) + return NULL; + } + + return cursor; +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::dumpNode (Tree* t, int depth) +{ + // Dump node + for (int i = 0; i < depth; ++i) + std::cout << " "; + + std::cout << t << " \033[1m" << t->name () << "\033[0m"; + + // Dump attributes. + std::string atts; + std::vector attributes = t->allAttributes (); + std::vector ::iterator it; + for (it = attributes.begin (); it != attributes.end (); ++it) + { + if (it != attributes.begin ()) + atts += " "; + + atts += *it + "='\033[33m" + t->attribute (*it) + "\033[0m'"; + } + + if (atts.length ()) + std::cout << " " << atts; + + // Dump tags. + std::string tags; + std::vector allTags = t->allTags (); + for (it = allTags.begin (); it != allTags.end (); ++it) + tags += (tags.length () ? " " : "") + *it; + + if (tags.length ()) + std::cout << " \033[32m" << tags << "\033[0m"; + + std::cout << "\n"; + + // Recurse for branches. + for (int i = 0; i < t->branches (); ++i) + dumpNode ((*t)[i], depth + 1); +} + +//////////////////////////////////////////////////////////////////////////////// +void Tree::dump () +{ + std::cout << "Tree (" << count () << " nodes)\n"; + dumpNode (this, 1); +} + +//////////////////////////////////////////////////////////////////////////////// + diff --git a/src/parser/Tree.h b/src/parser/Tree.h new file mode 100644 index 000000000..6ef10b36e --- /dev/null +++ b/src/parser/Tree.h @@ -0,0 +1,87 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// +#ifndef INCLUDED_TREE +#define INCLUDED_TREE + +#include +#include +#include + +class Tree; + +class Tree +{ +public: + Tree (const std::string&); + ~Tree (); + Tree (const Tree&); + Tree& operator= (const Tree&); + Tree* operator[] (const int); + + void addBranch (Tree*); + void removeBranch (Tree*); + void replaceBranch (Tree*, Tree*); + int branches (); + + void name (const std::string&); + std::string name () const; + void attribute (const std::string&, const std::string&); + void attribute (const std::string&, const int); + void attribute (const std::string&, const double); + std::string attribute (const std::string&); + void removeAttribute (const std::string&); + int attributes () const; + std::vector allAttributes () const; + + bool hasTag (const std::string&) const; + void tag (const std::string&); + int tags () const; + std::vector allTags () const; + + void enumerate (std::vector & all) const; + Tree* parent () const; + + int count () const; + + Tree* find (const std::string&); + + void dump (); + +private: + void dumpNode (Tree*, int); + +private: + Tree* _trunk; // Parent. + std::string _name; // Name. + std::vector _branches; // Children. + std::map _attributes; // Attributes (name->value). + std::vector _tags; // Tags (tag, tag ...). +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////// diff --git a/src/parser/bnf.cpp b/src/parser/bnf.cpp new file mode 100644 index 000000000..2d4f81ea1 --- /dev/null +++ b/src/parser/bnf.cpp @@ -0,0 +1,126 @@ +//////////////////////////////////////////////////////////////////////////////// +// taskwarrior - a command line task list manager. +// +// Copyright 2006-2013, Paul Beckingham, Federico Hernandez. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +Context context; + +//////////////////////////////////////////////////////////////////////////////// +void usage () +{ + std::cout << std::endl + << "Usage: parser [options] " + << std::endl + << std::endl + << "Options are:" + << std::endl + << " -v/--verbose Increased verbosity" + << std::endl + << std::endl; + + exit (-1); +} + +//////////////////////////////////////////////////////////////////////////////// +int main (int argc, char** argv) +{ + // Process command line arguments + std::string grammarFile = ""; + std::string commandLine = ""; + std::vector args; + bool verbose = false; + + for (int i = 1; i < argc; ++i) + { + if (argv[i][0] == '-') + { + if (!strcmp (argv[i], "-h")) usage (); + else if (!strcmp (argv[i], "--help")) usage (); + else if (!strcmp (argv[i], "-v")) verbose = true; + else if (!strcmp (argv[i], "--verbose")) verbose = true; + else + { + std::cout << "Unrecognized option '" << argv[i] << "'" << std::endl; + usage (); + } + } + else if (grammarFile == "") + { + grammarFile = argv[i]; + } + else + { + if (commandLine != "") + commandLine += " "; + + commandLine += "'" + std::string (argv[i]) + "'"; + } + } + + // Display usage for incorrect command line. + if (grammarFile == "" || commandLine == "") + usage (); + + try + { + std::string grammar; + if (File::read (grammarFile, grammar)) + { + // Parse the tokens. + LRParser p; + if (verbose) p.verbose (); + + p.grammar (grammar); + + if (verbose) p.dump (); + Tree* t = p.parse (commandLine); + if (t) + { + t->dump (); + delete t; + } + } + } + + catch (const std::string& error) + { + std::cout << "Error: " << error << std::endl; + } + + return 0; +} + +////////////////////////////////////////////////////////////////////////////////