- Added old (non-task) parsing code, as a basis for the new parser.
- Added Tree object for parse trees.
- Added top-level entry point for testing.
This commit is contained in:
Paul Beckingham 2013-08-30 11:21:47 -07:00
parent 7e5b43bb9d
commit 6ebee13fa4
9 changed files with 1590 additions and 0 deletions

24
src/parser/CMakeLists.txt Normal file
View file

@ -0,0 +1,24 @@
cmake_minimum_required (VERSION 2.8)
include_directories (${CMAKE_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/src
${CMAKE_SOURCE_DIR}/src/columns
${CMAKE_SOURCE_DIR}/src/commands
${CMAKE_SOURCE_DIR}/src/parser
${TASK_INCLUDE_DIRS})
set (parser_SRCS LRParser.cpp LRParser.h
Parser.cpp Parser.h
Tree.cpp Tree.h)
add_library (parser STATIC ${parser_SRCS})
add_executable (parser_executable bnf.cpp)
target_link_libraries (parser_executable columns commands task parser ${TASK_LIBRARIES})
set_property (TARGET parser_executable PROPERTY OUTPUT_NAME "parser")
#install (TARGETS parser_executable DESTINATION ${TASK_BINDIR})
set (CMAKE_BUILD_TYPE debug)
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -Wall")
set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -Wall")

465
src/parser/LRParser.cpp Normal file
View file

@ -0,0 +1,465 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <LRParser.h>
#include <RX.h>
#include <text.h>
#include <main.h>
////////////////////////////////////////////////////////////////////////////////
LRParser::LRParser ()
{
}
////////////////////////////////////////////////////////////////////////////////
// This is called only from external code.
Tree* LRParser::parse (const std::string& tokens)
{
Tree* tree = new Tree ("root");
if (! tree)
throw std::string ("Failed to allocate memory for parse tree.");
unsigned int cursor = 0;
if (matchRule (_primary, '=', cursor, tokens, tree))
{
if (_verbose)
std::cout << "syntax pass" << std::endl;
}
else
{
if (_verbose)
std::cout << "syntax fail" << std::endl;
delete tree;
tree = NULL;
}
return tree;
}
////////////////////////////////////////////////////////////////////////////////
// Wraps calls to matchRule, while properly handling the quantifier.
bool LRParser::matchRuleQuant (
const std::string& rule,
char quantifier,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
// Must match exactly once, so run once and return the result.
if (quantifier == '=')
{
return matchRule (rule, quantifier, cursor, tokens, tree);
}
// May match zero or one time. If it matches, the cursor will be advanced.
// If it fails, the cursor will not be advanced, but this is still considered
// successful. Return true either way, but backtrack the cursor on failure.
// TODO Make greedy.
else if (quantifier == '?')
{
unsigned int original_cursor = cursor;
if (! matchRule (rule, quantifier, cursor, tokens, tree))
cursor = original_cursor;
return true;
}
// May match 1 or more times. If it matches on the first attempt, continue
// to greedily match until it fails. If it fails on the first attempt, then
// the rule fails.
// TODO Make greedy.
else if (quantifier == '+')
{
if (! matchRule (rule, quantifier, cursor, tokens, tree))
return false;
while (matchRule (rule, quantifier, cursor, tokens, tree))
;
return true;
}
// May match zero or more times. Keep calling while there are matches, and
// return true always. Backtrack the cursor on failure.
// TODO Make greedy.
else if (quantifier == '*')
{
bool result;
do
{
unsigned int original_cursor = cursor;
result = matchRule (rule, quantifier, cursor, tokens, tree);
if (! result)
cursor = original_cursor;
}
while (result);
return true;
}
throw std::string ("LRParser::matchRuleQuant - this should never happen.");
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Returns true, with cursor incremented, if any of the alternates match.
bool LRParser::matchRule (
const std::string& rule,
char quantifier,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
if (cursor >= tokens.length ()) return false;
unsigned int original_cursor = cursor; // Preserve
for (unsigned int alt = 0;
alt < _rules[rule].size () && cursor < tokens.length ();
++alt)
{
Tree* b = new Tree (rule);
if (! b)
throw std::string ("Failed to allocate memory for parse tree.");
if (matchAlternate (rule, quantifier, alt, _rules[rule][alt], cursor, tokens, b))
{
if (_verbose)
std::cout << "\033[32m"
<< "matchRule "
<< rule
<< quantifier
<< "/a"
<< alt
<< " tokens["
<< cursor - 1
<< "]="
<< visible (tokens[cursor - 1])
<< " SUCCEED"
<< " "
<< tree
<< "->"
<< b
<< "\033[0m"
<< std::endl;
tree->addBranch (b);
return true;
}
delete b;
}
cursor = original_cursor; // Restore
if (_verbose)
std::cout << "\033[31m"
<< "matchRule "
<< rule
<< quantifier
<< " FAIL"
<< "\033[0m"
<< std::endl;
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Returns true, with cursor incremented, if all of the token match.
bool LRParser::matchAlternate (
const std::string& rule,
char quantifier,
unsigned int alt,
const Alternate& alternate,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
if (cursor >= tokens.length ()) return false;
unsigned int original_cursor = cursor; // Preserve
for (unsigned int token = 0;
token < alternate.size () && cursor < tokens.length ();
++token)
{
if (! matchToken (rule, quantifier, alt, token, alternate[token], cursor, tokens, tree))
{
cursor = original_cursor; // Restore
if (_verbose)
std::cout << "\033[31m"
<< "matchAlternate "
<< rule
<< quantifier
<< "/a"
<< alt
<< "/t"
<< token
<< " tokens["
<< cursor
<< "]="
<< visible (tokens[cursor])
<< " FAIL"
<< "\033[0m"
<< std::endl;
return false;
}
if (_verbose)
std::cout << "\033[32m"
<< "matchAlternate "
<< rule
<< quantifier
<< "/a"
<< alt
<< "/t"
<< token
<< " SUCCEED"
<< "\033[0m"
<< std::endl;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Returns true, if the token, an optional quantifier, and all optional
// modifiers match.
bool LRParser::matchToken (
const std::string& rule,
char quantifier,
unsigned int alt,
unsigned int tok,
const Token& token,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
if (cursor >= tokens.length ()) return false;
unsigned int original_cursor = cursor; // Preserve
if (tokenMatchRule (rule, quantifier, alt, tok, token, cursor, tokens, tree) ||
tokenMatchSpecialLiteral (token, cursor, tokens, tree) ||
tokenMatchLiteral (token, cursor, tokens, tree) ||
tokenMatchRegex (token, cursor, tokens, tree))
{
if (_verbose)
std::cout << "\033[32m"
<< "matchToken "
<< rule
<< quantifier
<< "/a"
<< alt
<< "/t"
<< tok
<< " tokens["
<< cursor
<< "]="
<< visible (tokens[cursor])
<< " token="
<< token.value
<< " SUCCEED"
<< "\033[0m"
<< std::endl;
return true;
}
cursor = original_cursor; // Restore
if (_verbose)
std::cout << "\033[31m"
<< "matchToken "
<< rule
<< quantifier
<< "/a"
<< alt
<< "/t"
<< tok
<< " tokens["
<< cursor
<< "]="
<< visible (tokens[cursor])
<< " token="
<< token.value
<< " FAIL"
<< "\033[0m"
<< std::endl;
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool LRParser::tokenMatchSpecialLiteral (
const Token& token,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
if (cursor >= tokens.length ()) return false;
if ((tokens[cursor] == '\t' && token.value == "\"\\t\"") ||
(tokens[cursor] == '\n' && token.value == "\"\\n\"") ||
(tokens[cursor] == '\r' && token.value == "\"\\r\"") ||
(tokens[cursor] == '\f' && token.value == "\"\\f\"") ||
(tokens[cursor] == '\v' && token.value == "\"\\v\"") ||
(tokens[cursor] == '"' && token.value == "\"\\\"\""))
{
tree->tag ("literal");
tree->tag ("special");
tree->attribute ("token", tokens[cursor]);
if (_verbose)
std::cout << "tokenMatchSpecialLiteral "
<< token.value
<< " SUCCEED"
<< std::endl;
cursor++;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool LRParser::tokenMatchLiteral (
const Token& token,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
int len = token.value.length () - 2;
if (cursor > tokens.length () - len) return false;
std::string tok = token.value.substr (1, len);
if (token.value[0] == '"' &&
token.value[len + 1] == '"' &&
tokens.find (tok, cursor) == cursor)
{
tree->tag ("literal");
tree->attribute ("token", tok);
cursor += len;
if (_verbose)
std::cout << "tokenMatchLiteral "
<< token.value
<< " SUCCEED"
<< std::endl;
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool LRParser::tokenMatchRegex (
const Token& token,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
if (cursor >= tokens.length () - 1) return false;
// If it looks like a regex.
if (token.value[0] == '/' &&
token.value[token.value.length () - 1] == '/')
{
// If the regex matches at all.
RX rx ("(" + token.value.substr (1, token.value.length () - 2) + ")", false);
std::vector <int> start;
std::vector <int> end;
if (rx.match (start,
end,
tokens.substr (cursor, std::string::npos)))
{
// If the match is at position 'cursor'.
if (start[0] == 0)
{
tree->tag ("regex");
tree->attribute ("token", tokens.substr (cursor + start[0], end[0]));
cursor += end[0];
if (_verbose)
std::cout << "tokenMatchRegex \""
<< tokens.substr (cursor + start[0], end[0])
<< "\""
<< " SUCCEED"
<< std::endl;
return true;
}
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool LRParser::tokenMatchRule (
const std::string& rule,
char quantifier,
unsigned int alt,
unsigned int it,
const Token& token,
unsigned int& cursor,
const std::string& tokens,
Tree* tree)
{
if (cursor >= tokens.length ()) return false;
// If this is a definition, recurse.
if (_rules.find (token.value) != _rules.end ())
{
if (_verbose)
std::cout << "tokenMatchRule "
<< rule
<< quantifier
<< "/a"
<< alt
<< "/t"
<< it
<< " tokens["
<< cursor
<< "]="
<< visible (tokens[cursor])
<< " token="
<< token.value
<< " RECURSING matchRuleQuant"
<< std::endl;
return matchRuleQuant (token.value, token.quantifier, cursor, tokens, tree);
}
return false;
}
////////////////////////////////////////////////////////////////////////////////

53
src/parser/LRParser.h Normal file
View file

@ -0,0 +1,53 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_LRPARSER
#define INCLUDED_LRPARSER
#include "Parser.h"
class LRParser : public Parser
{
public:
LRParser ();
Tree* parse (const std::string&);
private:
bool matchRuleQuant (const std::string&, char, unsigned int&, const std::string&, Tree*);
bool matchRule (const std::string&, char, unsigned int&, const std::string&, Tree*);
bool matchAlternate (const std::string&, char, unsigned int, const Alternate&, unsigned int&, const std::string&, Tree*);
bool matchToken (const std::string&, char, unsigned int, unsigned int, const Token&, unsigned int&, const std::string&, Tree*);
bool tokenMatchSpecialLiteral (const Token&, unsigned int&, const std::string&, Tree*);
bool tokenMatchLiteral (const Token&, unsigned int&, const std::string&, Tree*);
bool tokenMatchRegex (const Token&, unsigned int&, const std::string&, Tree*);
bool tokenMatchRule (const std::string&, char, unsigned int, unsigned int, const Token&, unsigned int&, const std::string&, Tree*);
};
#endif
////////////////////////////////////////////////////////////////////////////////

389
src/parser/Parser.cpp Normal file
View file

@ -0,0 +1,389 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <Parser.h>
#include <text.h>
#include <main.h>
////////////////////////////////////////////////////////////////////////////////
Parser::Parser ()
: _primary ("")
, _verbose (false)
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::~Parser ()
{
}
////////////////////////////////////////////////////////////////////////////////
void Parser::grammar (const std::string& file)
{
// Strip comments.
std::vector <std::string> lines;
split (lines, file, '\n');
std::string stripped = "";
std::string::size_type comment;
std::vector <std::string>::iterator it;
for (it = lines.begin (); it != lines.end (); ++it)
{
comment = it->find ("#");
if (comment != std::string::npos)
stripped += it->substr (0, comment);
else
stripped += *it;
stripped += "\n";
}
// Now parse the grammar.
Nibbler n (stripped);
std::string rule;
Production prod;
while (bnfNibbleRule (n, rule, prod))
{
if (_primary == "")
_primary = rule;
_rules[rule] = prod;
}
// Now the hard part.
checkConsistency ();
}
////////////////////////////////////////////////////////////////////////////////
bool Parser::bnfNibbleRule (Nibbler& n, std::string& rule, Production& prod)
{
prod.clear ();
n.skipWS ();
if (n.getUntilOneOf (": ", rule))
{
std::string att;
while (n.skip (':') &&
n.getUntilOneOf (": ", att))
{
prod.tag (att);
}
// Definition.
n.skipWS ();
if (n.getLiteral ("::="))
{
// Alternates.
Alternate alt;
while (bnfNibbleAlternate (n, alt))
{
prod.push_back (alt);
alt.clear ();
}
if (alt.size ())
prod.push_back (alt);
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Parser::bnfNibbleAlternate (Nibbler& n, Alternate& alt)
{
n.skipWS ();
Token tok;
while (bnfNibbleToken (n, tok))
alt.push_back (tok);
if (n.skip ('|')) {return true;}
else if (n.skip (';')) {return false;}
else
throw std::string ("Expected | or ;");
}
////////////////////////////////////////////////////////////////////////////////
bool Parser::bnfNibbleToken (Nibbler& n, Token& tok)
{
tok.clear ();
n.skipWS ();
if (n.next () == '|') return false; // Alternate
if (n.next () == ';') return false; // Terminator
if (n.getQuoted ('/', tok.value, true) || // Regex
n.getQuoted ('"', tok.value, true) || // Literal
n.getUntilOneOf ("\n\t =?+*", tok.value)) // Name
{
if (n.skip ('=')) tok.quantifier = '='; // 1
else if (n.skip ('?')) tok.quantifier = '?'; // 0,1
else if (n.skip ('+')) tok.quantifier = '+'; // 1->
else if (n.skip ('*')) tok.quantifier = '*'; // 0->
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
// Check consistency of the syntax. This is where all static analysis occurs.
void Parser::checkConsistency ()
{
std::vector <std::string> allRules;
std::vector <std::string> allToken;
std::vector <std::string> allLeftRecursive;
std::map <std::string, Production>::iterator r;
for (r = _rules.begin (); r != _rules.end (); ++r)
{
allRules.push_back (r->first);
std::vector <Alternate>::iterator a;
for (a = r->second.begin (); a != r->second.end (); ++a)
{
std::vector <Token>::iterator i;
for (i = a->begin (); i != a->end (); ++i)
{
if (i->value[0] != '"' &&
i->value[0] != '/')
allToken.push_back (i->value);
if (i == a->begin () && r->first == i->value)
allLeftRecursive.push_back (i->value);
}
}
}
std::vector <std::string> notUsed;
std::vector <std::string> notDefined;
listDiff (allRules, allToken, notUsed, notDefined);
// Undefined value - these are definitions that appear in token, but are
// not in _rules.
for (unsigned int i = 0; i < notDefined.size (); ++i)
throw std::string ("definition '") + notDefined[i] + "' referenced, but not defined.";
// Circular definitions - these are names in _rules that also appear as
// token 0 in any of the alternates for that definition.
for (unsigned int i = 0; i < allLeftRecursive.size (); ++i)
throw std::string ("definition '") + allLeftRecursive[i] + "' is left recursive.";
for (unsigned int i = 0; i < allRules.size (); ++i)
if (allRules[i][0] == '"')
throw std::string ("definition '") + allRules[i] + "' must not be a literal.";
// Unused definitions - these are names in _rules that are never
// referenced as token.
for (unsigned int i = 0; i < notUsed.size (); ++i)
if (notUsed[i] != _primary)
throw std::string ("definition '") + notUsed[i] + "' defined, but not referenced.";
}
////////////////////////////////////////////////////////////////////////////////
void Parser::verbose ()
{
_verbose = true;
}
////////////////////////////////////////////////////////////////////////////////
// Display the entire parsed tree. Highlight the primary definition.
void Parser::dump () const
{
std::map <std::string, Production>::const_iterator def;
for (def = _rules.begin (); def != _rules.end (); ++def)
{
if (def->first == _primary)
std::cout << "\033[1m" << def->first << "\033[0m";
else
std::cout << def->first;
std::cout << " ::=" << std::endl;
std::vector <Alternate>::const_iterator alt;
for (alt = def->second.begin (); alt != def->second.end (); ++alt)
{
if (alt != def->second.begin ())
std::cout << " | ";
else
std::cout << " ";
std::vector <Token>::const_iterator tok;
for (tok = alt->begin (); tok != alt->end (); ++tok)
{
std::cout << tok->value;
if (tok->quantifier != '=')
std::cout << tok->quantifier;
std::cout << " ";
}
std::cout << std::endl;
}
std::cout << " ;" << std::endl;
}
}
////////////////////////////////////////////////////////////////////////////////
Parser::Token::Token ()
: value ("")
, quantifier ('=')
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::Token::Token (const Parser::Token& other)
: value (other.value)
, quantifier (other.quantifier)
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::Token& Parser::Token::operator= (const Parser::Token& other)
{
if (this != &other)
{
value = other.value;
quantifier = other.quantifier;
}
return *this;
}
////////////////////////////////////////////////////////////////////////////////
void Parser::Token::clear ()
{
value = "";
quantifier = '=';
}
////////////////////////////////////////////////////////////////////////////////
std::string Parser::Token::dump ()
{
return value + quantifier;
}
////////////////////////////////////////////////////////////////////////////////
Parser::Alternate::Alternate ()
: std::vector <Parser::Token> ()
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::Alternate::Alternate (const Parser::Alternate& other)
: std::vector <Parser::Token> (other)
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::Alternate& Parser::Alternate::operator= (const Parser::Alternate& other)
{
if (this != &other)
std::vector <Parser::Token>::operator= (other);
return *this;
}
////////////////////////////////////////////////////////////////////////////////
std::string Parser::Alternate::dump ()
{
std::string result;
std::vector <Parser::Token>::iterator i;
for (i = this->begin (); i != this->end (); ++i)
result += i->dump () + ' ';
return result;
}
////////////////////////////////////////////////////////////////////////////////
Parser::Production::Production ()
: std::vector <Parser::Alternate> ()
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::Production::Production (const Parser::Production& other)
: std::vector <Parser::Alternate> (other)
, mTags (other.mTags)
{
}
////////////////////////////////////////////////////////////////////////////////
Parser::Production& Parser::Production::operator= (const Parser::Production& other)
{
if (this != &other)
{
std::vector <Parser::Alternate>::operator= (other);
mTags = other.mTags;
}
return *this;
}
////////////////////////////////////////////////////////////////////////////////
void Parser::Production::tag (const std::string& t)
{
mTags.push_back (t);
}
////////////////////////////////////////////////////////////////////////////////
bool Parser::Production::hasTag (const std::string& t) const
{
return std::find (mTags.begin (), mTags.end (), t) != mTags.end ()
? true : false;
}
////////////////////////////////////////////////////////////////////////////////
void Parser::Production::clear ()
{
std::vector <Parser::Alternate>::clear ();
mTags.clear ();
}
////////////////////////////////////////////////////////////////////////////////
std::string Parser::Production::dump ()
{
std::string result;
std::vector <Parser::Alternate>::iterator i;
for (i = this->begin (); i != this->end (); ++i)
{
if (i != this->begin ())
result += " | ";
result += i->dump ();
}
result += "\n";
return result;
}
////////////////////////////////////////////////////////////////////////////////

103
src/parser/Parser.h Normal file
View file

@ -0,0 +1,103 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_PARSER
#define INCLUDED_PARSER
#include <algorithm>
#include <map>
#include <vector>
#include <string>
#include <Nibbler.h>
#include <Tree.h>
class Parser
{
public:
Parser ();
virtual ~Parser ();
void grammar (const std::string&);
virtual Tree* parse (const std::string&) = 0;
void verbose ();
void dump () const;
protected:
class Token
{
public:
Token ();
Token (const Token&);
Token& operator= (const Token&);
void clear ();
std::string dump ();
public:
std::string value;
char quantifier;
};
class Alternate : public std::vector <Token>
{
public:
Alternate ();
Alternate (const Alternate&);
Alternate& operator= (const Alternate&);
std::string dump ();
};
class Production : public std::vector <Alternate>
{
public:
Production ();
Production (const Production&);
Production& operator= (const Production&);
void tag (const std::string&);
bool hasTag (const std::string&) const;
void clear ();
std::string dump ();
private:
std::vector <std::string> mTags;
};
private:
bool bnfNibbleRule (Nibbler&, std::string&, Production&);
bool bnfNibbleAlternate (Nibbler&, Alternate&);
bool bnfNibbleToken (Nibbler&, Token&);
void checkConsistency ();
protected:
std::string _primary;
std::map <std::string, Production> _rules;
bool _verbose;
};
#endif
////////////////////////////////////////////////////////////////////////////////

342
src/parser/Tree.cpp Normal file
View file

@ -0,0 +1,342 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <algorithm>
#include <iostream>
#include <text.h>
#include <Tree.h>
////////////////////////////////////////////////////////////////////////////////
// - Tree, Branch and Node are synonymous.
// - A Tree may contain any number of branches.
// - A Branch may contain any number of name/value pairs, unique by name.
// - The destructor will delete all branches recursively.
// - Tree::enumerate is a snapshot, and is invalidated by modification.
// - Branch sequence is preserved.
Tree::Tree (const std::string& name)
: _trunk (NULL)
, _name (name)
{
}
////////////////////////////////////////////////////////////////////////////////
Tree::~Tree ()
{
for (std::vector <Tree*>::iterator i = _branches.begin ();
i != _branches.end ();
++i)
delete *i;
}
////////////////////////////////////////////////////////////////////////////////
Tree::Tree (const Tree& other)
{
throw "Unimplemented Tree::Tree (Tree&)";
}
////////////////////////////////////////////////////////////////////////////////
Tree& Tree::operator= (const Tree& other)
{
throw "Unimplemented Tree::operator= ()";
return *this;
}
////////////////////////////////////////////////////////////////////////////////
Tree* Tree::operator[] (const int branch)
{
if (branch < 0 ||
branch > (int) _branches.size () - 1)
throw "Tree::operator[] out of range";
return _branches[branch];
}
////////////////////////////////////////////////////////////////////////////////
void Tree::addBranch (Tree* branch)
{
branch->_trunk = this;
_branches.push_back (branch);
}
////////////////////////////////////////////////////////////////////////////////
void Tree::removeBranch (Tree* branch)
{
for (std::vector <Tree*>::iterator i = _branches.begin ();
i != _branches.end ();
++i)
{
if (*i == branch)
{
_branches.erase (i);
return;
}
}
}
////////////////////////////////////////////////////////////////////////////////
void Tree::replaceBranch (Tree* from, Tree* to)
{
for (unsigned int i = 0; i < _branches.size (); ++i)
{
if (_branches[i] == from)
{
to->_trunk = this;
_branches[i] = to;
return;
}
}
}
////////////////////////////////////////////////////////////////////////////////
int Tree::branches ()
{
return _branches.size ();
}
////////////////////////////////////////////////////////////////////////////////
void Tree::name (const std::string& name)
{
_name = name;
}
////////////////////////////////////////////////////////////////////////////////
std::string Tree::name () const
{
return _name;
}
////////////////////////////////////////////////////////////////////////////////
// Accessor for attributes.
void Tree::attribute (const std::string& name, const std::string& value)
{
_attributes[name] = value;
}
////////////////////////////////////////////////////////////////////////////////
// Accessor for attributes.
void Tree::attribute (const std::string& name, const int value)
{
_attributes[name] = format (value);
}
////////////////////////////////////////////////////////////////////////////////
// Accessor for attributes.
void Tree::attribute (const std::string& name, const double value)
{
_attributes[name] = format (value, 1, 8);
}
////////////////////////////////////////////////////////////////////////////////
// Accessor for attributes.
std::string Tree::attribute (const std::string& name)
{
// Prevent autovivification.
std::map<std::string, std::string>::iterator i = _attributes.find (name);
if (i != _attributes.end ())
return i->second;
return "";
}
////////////////////////////////////////////////////////////////////////////////
void Tree::removeAttribute (const std::string& name)
{
_attributes.erase (name);
}
////////////////////////////////////////////////////////////////////////////////
int Tree::attributes () const
{
return _attributes.size ();
}
////////////////////////////////////////////////////////////////////////////////
std::vector <std::string> Tree::allAttributes () const
{
std::vector <std::string> names;
std::map <std::string, std::string>::const_iterator it;
for (it = _attributes.begin (); it != _attributes.end (); ++it)
names.push_back (it->first);
return names;
}
////////////////////////////////////////////////////////////////////////////////
// Recursively completes a list of Tree* objects, left to right, depth first.
// The reason for the depth-first enumeration is that a client may wish to
// traverse the tree and delete nodes. With a depth-first iteration, this is a
// safe mechanism, and a node pointer will never be dereferenced after it has
// been deleted.
void Tree::enumerate (std::vector <Tree*>& all) const
{
for (std::vector <Tree*>::const_iterator i = _branches.begin ();
i != _branches.end ();
++i)
{
(*i)->enumerate (all);
all.push_back (*i);
}
}
////////////////////////////////////////////////////////////////////////////////
Tree* Tree::parent () const
{
return _trunk;
}
////////////////////////////////////////////////////////////////////////////////
bool Tree::hasTag (const std::string& tag) const
{
if (std::find (_tags.begin (), _tags.end (), tag) != _tags.end ())
return true;
return false;
}
////////////////////////////////////////////////////////////////////////////////
void Tree::tag (const std::string& tag)
{
if (! hasTag (tag))
_tags.push_back (tag);
}
////////////////////////////////////////////////////////////////////////////////
int Tree::tags () const
{
return _tags.size ();
}
////////////////////////////////////////////////////////////////////////////////
std::vector <std::string> Tree::allTags () const
{
return _tags;
}
////////////////////////////////////////////////////////////////////////////////
int Tree::count () const
{
int total = 1; // this one.
for (std::vector <Tree*>::const_iterator i = _branches.begin ();
i != _branches.end ();
++i)
{
// Recurse and count the branches.
total += (*i)->count ();
}
return total;
}
////////////////////////////////////////////////////////////////////////////////
Tree* Tree::find (const std::string& path)
{
std::vector <std::string> elements;
split (elements, path, '/');
// Must start at the trunk.
Tree* cursor = this;
std::vector <std::string>::iterator it = elements.begin ();
if (cursor->name () != *it)
return NULL;
// Perhaps the trunk is what is needed?
if (elements.size () == 1)
return this;
// Now look for the next branch.
for (++it; it != elements.end (); ++it)
{
bool found = false;
// If the cursor has a branch that matches *it, proceed.
for (int i = 0; i < cursor->branches (); ++i)
{
if ((*cursor)[i]->name () == *it)
{
cursor = (*cursor)[i];
found = true;
break;
}
}
if (!found)
return NULL;
}
return cursor;
}
////////////////////////////////////////////////////////////////////////////////
void Tree::dumpNode (Tree* t, int depth)
{
// Dump node
for (int i = 0; i < depth; ++i)
std::cout << " ";
std::cout << t << " \033[1m" << t->name () << "\033[0m";
// Dump attributes.
std::string atts;
std::vector <std::string> attributes = t->allAttributes ();
std::vector <std::string>::iterator it;
for (it = attributes.begin (); it != attributes.end (); ++it)
{
if (it != attributes.begin ())
atts += " ";
atts += *it + "='\033[33m" + t->attribute (*it) + "\033[0m'";
}
if (atts.length ())
std::cout << " " << atts;
// Dump tags.
std::string tags;
std::vector <std::string> allTags = t->allTags ();
for (it = allTags.begin (); it != allTags.end (); ++it)
tags += (tags.length () ? " " : "") + *it;
if (tags.length ())
std::cout << " \033[32m" << tags << "\033[0m";
std::cout << "\n";
// Recurse for branches.
for (int i = 0; i < t->branches (); ++i)
dumpNode ((*t)[i], depth + 1);
}
////////////////////////////////////////////////////////////////////////////////
void Tree::dump ()
{
std::cout << "Tree (" << count () << " nodes)\n";
dumpNode (this, 1);
}
////////////////////////////////////////////////////////////////////////////////

87
src/parser/Tree.h Normal file
View file

@ -0,0 +1,87 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_TREE
#define INCLUDED_TREE
#include <map>
#include <vector>
#include <string>
class Tree;
class Tree
{
public:
Tree (const std::string&);
~Tree ();
Tree (const Tree&);
Tree& operator= (const Tree&);
Tree* operator[] (const int);
void addBranch (Tree*);
void removeBranch (Tree*);
void replaceBranch (Tree*, Tree*);
int branches ();
void name (const std::string&);
std::string name () const;
void attribute (const std::string&, const std::string&);
void attribute (const std::string&, const int);
void attribute (const std::string&, const double);
std::string attribute (const std::string&);
void removeAttribute (const std::string&);
int attributes () const;
std::vector <std::string> allAttributes () const;
bool hasTag (const std::string&) const;
void tag (const std::string&);
int tags () const;
std::vector <std::string> allTags () const;
void enumerate (std::vector <Tree*>& all) const;
Tree* parent () const;
int count () const;
Tree* find (const std::string&);
void dump ();
private:
void dumpNode (Tree*, int);
private:
Tree* _trunk; // Parent.
std::string _name; // Name.
std::vector <Tree*> _branches; // Children.
std::map <std::string, std::string> _attributes; // Attributes (name->value).
std::vector <std::string> _tags; // Tags (tag, tag ...).
};
#endif
////////////////////////////////////////////////////////////////////////////////

126
src/parser/bnf.cpp Normal file
View file

@ -0,0 +1,126 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <fstream>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <Context.h>
#include <File.h>
#include <Tree.h>
#include <LRParser.h>
#include <text.h>
Context context;
////////////////////////////////////////////////////////////////////////////////
void usage ()
{
std::cout << std::endl
<< "Usage: parser [options] <grammar file> <args>"
<< std::endl
<< std::endl
<< "Options are:"
<< std::endl
<< " -v/--verbose Increased verbosity"
<< std::endl
<< std::endl;
exit (-1);
}
////////////////////////////////////////////////////////////////////////////////
int main (int argc, char** argv)
{
// Process command line arguments
std::string grammarFile = "";
std::string commandLine = "";
std::vector <std::string> args;
bool verbose = false;
for (int i = 1; i < argc; ++i)
{
if (argv[i][0] == '-')
{
if (!strcmp (argv[i], "-h")) usage ();
else if (!strcmp (argv[i], "--help")) usage ();
else if (!strcmp (argv[i], "-v")) verbose = true;
else if (!strcmp (argv[i], "--verbose")) verbose = true;
else
{
std::cout << "Unrecognized option '" << argv[i] << "'" << std::endl;
usage ();
}
}
else if (grammarFile == "")
{
grammarFile = argv[i];
}
else
{
if (commandLine != "")
commandLine += " ";
commandLine += "'" + std::string (argv[i]) + "'";
}
}
// Display usage for incorrect command line.
if (grammarFile == "" || commandLine == "")
usage ();
try
{
std::string grammar;
if (File::read (grammarFile, grammar))
{
// Parse the tokens.
LRParser p;
if (verbose) p.verbose ();
p.grammar (grammar);
if (verbose) p.dump ();
Tree* t = p.parse (commandLine);
if (t)
{
t->dump ();
delete t;
}
}
}
catch (const std::string& error)
{
std::cout << "Error: " << error << std::endl;
}
return 0;
}
////////////////////////////////////////////////////////////////////////////////