mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-09-09 00:30:36 +02:00
LRParser
- Added old (non-task) parsing code, as a basis for the new parser. - Added Tree object for parse trees. - Added top-level entry point for testing.
This commit is contained in:
parent
7e5b43bb9d
commit
6ebee13fa4
9 changed files with 1590 additions and 0 deletions
465
src/parser/LRParser.cpp
Normal file
465
src/parser/LRParser.cpp
Normal file
|
@ -0,0 +1,465 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// taskwarrior - a command line task list manager.
|
||||
//
|
||||
// Copyright 2006-2013, Paul Beckingham, Federico Hernandez.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// http://www.opensource.org/licenses/mit-license.php
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <iostream>
|
||||
#include <LRParser.h>
|
||||
#include <RX.h>
|
||||
#include <text.h>
|
||||
#include <main.h>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
LRParser::LRParser ()
|
||||
{
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// This is called only from external code.
|
||||
Tree* LRParser::parse (const std::string& tokens)
|
||||
{
|
||||
Tree* tree = new Tree ("root");
|
||||
if (! tree)
|
||||
throw std::string ("Failed to allocate memory for parse tree.");
|
||||
|
||||
unsigned int cursor = 0;
|
||||
if (matchRule (_primary, '=', cursor, tokens, tree))
|
||||
{
|
||||
if (_verbose)
|
||||
std::cout << "syntax pass" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (_verbose)
|
||||
std::cout << "syntax fail" << std::endl;
|
||||
|
||||
delete tree;
|
||||
tree = NULL;
|
||||
}
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Wraps calls to matchRule, while properly handling the quantifier.
|
||||
bool LRParser::matchRuleQuant (
|
||||
const std::string& rule,
|
||||
char quantifier,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
// Must match exactly once, so run once and return the result.
|
||||
if (quantifier == '=')
|
||||
{
|
||||
return matchRule (rule, quantifier, cursor, tokens, tree);
|
||||
}
|
||||
|
||||
// May match zero or one time. If it matches, the cursor will be advanced.
|
||||
// If it fails, the cursor will not be advanced, but this is still considered
|
||||
// successful. Return true either way, but backtrack the cursor on failure.
|
||||
|
||||
// TODO Make greedy.
|
||||
else if (quantifier == '?')
|
||||
{
|
||||
unsigned int original_cursor = cursor;
|
||||
if (! matchRule (rule, quantifier, cursor, tokens, tree))
|
||||
cursor = original_cursor;
|
||||
return true;
|
||||
}
|
||||
|
||||
// May match 1 or more times. If it matches on the first attempt, continue
|
||||
// to greedily match until it fails. If it fails on the first attempt, then
|
||||
// the rule fails.
|
||||
|
||||
// TODO Make greedy.
|
||||
else if (quantifier == '+')
|
||||
{
|
||||
if (! matchRule (rule, quantifier, cursor, tokens, tree))
|
||||
return false;
|
||||
|
||||
while (matchRule (rule, quantifier, cursor, tokens, tree))
|
||||
;
|
||||
return true;
|
||||
}
|
||||
|
||||
// May match zero or more times. Keep calling while there are matches, and
|
||||
// return true always. Backtrack the cursor on failure.
|
||||
|
||||
// TODO Make greedy.
|
||||
else if (quantifier == '*')
|
||||
{
|
||||
bool result;
|
||||
do
|
||||
{
|
||||
unsigned int original_cursor = cursor;
|
||||
result = matchRule (rule, quantifier, cursor, tokens, tree);
|
||||
if (! result)
|
||||
cursor = original_cursor;
|
||||
}
|
||||
while (result);
|
||||
return true;
|
||||
}
|
||||
|
||||
throw std::string ("LRParser::matchRuleQuant - this should never happen.");
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Returns true, with cursor incremented, if any of the alternates match.
|
||||
bool LRParser::matchRule (
|
||||
const std::string& rule,
|
||||
char quantifier,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
if (cursor >= tokens.length ()) return false;
|
||||
unsigned int original_cursor = cursor; // Preserve
|
||||
|
||||
for (unsigned int alt = 0;
|
||||
alt < _rules[rule].size () && cursor < tokens.length ();
|
||||
++alt)
|
||||
{
|
||||
Tree* b = new Tree (rule);
|
||||
if (! b)
|
||||
throw std::string ("Failed to allocate memory for parse tree.");
|
||||
|
||||
if (matchAlternate (rule, quantifier, alt, _rules[rule][alt], cursor, tokens, b))
|
||||
{
|
||||
if (_verbose)
|
||||
std::cout << "\033[32m"
|
||||
<< "matchRule "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< "/a"
|
||||
<< alt
|
||||
<< " tokens["
|
||||
<< cursor - 1
|
||||
<< "]="
|
||||
<< visible (tokens[cursor - 1])
|
||||
<< " SUCCEED"
|
||||
<< " "
|
||||
<< tree
|
||||
<< "->"
|
||||
<< b
|
||||
<< "\033[0m"
|
||||
<< std::endl;
|
||||
|
||||
tree->addBranch (b);
|
||||
return true;
|
||||
}
|
||||
|
||||
delete b;
|
||||
}
|
||||
|
||||
cursor = original_cursor; // Restore
|
||||
|
||||
if (_verbose)
|
||||
std::cout << "\033[31m"
|
||||
<< "matchRule "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< " FAIL"
|
||||
<< "\033[0m"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Returns true, with cursor incremented, if all of the token match.
|
||||
bool LRParser::matchAlternate (
|
||||
const std::string& rule,
|
||||
char quantifier,
|
||||
unsigned int alt,
|
||||
const Alternate& alternate,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
if (cursor >= tokens.length ()) return false;
|
||||
|
||||
unsigned int original_cursor = cursor; // Preserve
|
||||
|
||||
for (unsigned int token = 0;
|
||||
token < alternate.size () && cursor < tokens.length ();
|
||||
++token)
|
||||
{
|
||||
if (! matchToken (rule, quantifier, alt, token, alternate[token], cursor, tokens, tree))
|
||||
{
|
||||
cursor = original_cursor; // Restore
|
||||
if (_verbose)
|
||||
std::cout << "\033[31m"
|
||||
<< "matchAlternate "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< "/a"
|
||||
<< alt
|
||||
<< "/t"
|
||||
<< token
|
||||
<< " tokens["
|
||||
<< cursor
|
||||
<< "]="
|
||||
<< visible (tokens[cursor])
|
||||
<< " FAIL"
|
||||
<< "\033[0m"
|
||||
<< std::endl;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_verbose)
|
||||
std::cout << "\033[32m"
|
||||
<< "matchAlternate "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< "/a"
|
||||
<< alt
|
||||
<< "/t"
|
||||
<< token
|
||||
<< " SUCCEED"
|
||||
<< "\033[0m"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Returns true, if the token, an optional quantifier, and all optional
|
||||
// modifiers match.
|
||||
bool LRParser::matchToken (
|
||||
const std::string& rule,
|
||||
char quantifier,
|
||||
unsigned int alt,
|
||||
unsigned int tok,
|
||||
const Token& token,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
if (cursor >= tokens.length ()) return false;
|
||||
|
||||
unsigned int original_cursor = cursor; // Preserve
|
||||
|
||||
if (tokenMatchRule (rule, quantifier, alt, tok, token, cursor, tokens, tree) ||
|
||||
tokenMatchSpecialLiteral (token, cursor, tokens, tree) ||
|
||||
tokenMatchLiteral (token, cursor, tokens, tree) ||
|
||||
tokenMatchRegex (token, cursor, tokens, tree))
|
||||
{
|
||||
if (_verbose)
|
||||
std::cout << "\033[32m"
|
||||
<< "matchToken "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< "/a"
|
||||
<< alt
|
||||
<< "/t"
|
||||
<< tok
|
||||
<< " tokens["
|
||||
<< cursor
|
||||
<< "]="
|
||||
<< visible (tokens[cursor])
|
||||
<< " token="
|
||||
<< token.value
|
||||
<< " SUCCEED"
|
||||
<< "\033[0m"
|
||||
<< std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
cursor = original_cursor; // Restore
|
||||
|
||||
if (_verbose)
|
||||
std::cout << "\033[31m"
|
||||
<< "matchToken "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< "/a"
|
||||
<< alt
|
||||
<< "/t"
|
||||
<< tok
|
||||
<< " tokens["
|
||||
<< cursor
|
||||
<< "]="
|
||||
<< visible (tokens[cursor])
|
||||
<< " token="
|
||||
<< token.value
|
||||
<< " FAIL"
|
||||
<< "\033[0m"
|
||||
<< std::endl;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool LRParser::tokenMatchSpecialLiteral (
|
||||
const Token& token,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
if (cursor >= tokens.length ()) return false;
|
||||
|
||||
if ((tokens[cursor] == '\t' && token.value == "\"\\t\"") ||
|
||||
(tokens[cursor] == '\n' && token.value == "\"\\n\"") ||
|
||||
(tokens[cursor] == '\r' && token.value == "\"\\r\"") ||
|
||||
(tokens[cursor] == '\f' && token.value == "\"\\f\"") ||
|
||||
(tokens[cursor] == '\v' && token.value == "\"\\v\"") ||
|
||||
(tokens[cursor] == '"' && token.value == "\"\\\"\""))
|
||||
{
|
||||
tree->tag ("literal");
|
||||
tree->tag ("special");
|
||||
tree->attribute ("token", tokens[cursor]);
|
||||
|
||||
if (_verbose)
|
||||
std::cout << "tokenMatchSpecialLiteral "
|
||||
<< token.value
|
||||
<< " SUCCEED"
|
||||
<< std::endl;
|
||||
|
||||
cursor++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool LRParser::tokenMatchLiteral (
|
||||
const Token& token,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
int len = token.value.length () - 2;
|
||||
if (cursor > tokens.length () - len) return false;
|
||||
|
||||
std::string tok = token.value.substr (1, len);
|
||||
|
||||
if (token.value[0] == '"' &&
|
||||
token.value[len + 1] == '"' &&
|
||||
tokens.find (tok, cursor) == cursor)
|
||||
{
|
||||
tree->tag ("literal");
|
||||
tree->attribute ("token", tok);
|
||||
cursor += len;
|
||||
|
||||
if (_verbose)
|
||||
std::cout << "tokenMatchLiteral "
|
||||
<< token.value
|
||||
<< " SUCCEED"
|
||||
<< std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool LRParser::tokenMatchRegex (
|
||||
const Token& token,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
if (cursor >= tokens.length () - 1) return false;
|
||||
|
||||
// If it looks like a regex.
|
||||
if (token.value[0] == '/' &&
|
||||
token.value[token.value.length () - 1] == '/')
|
||||
{
|
||||
// If the regex matches at all.
|
||||
RX rx ("(" + token.value.substr (1, token.value.length () - 2) + ")", false);
|
||||
std::vector <int> start;
|
||||
std::vector <int> end;
|
||||
if (rx.match (start,
|
||||
end,
|
||||
tokens.substr (cursor, std::string::npos)))
|
||||
{
|
||||
// If the match is at position 'cursor'.
|
||||
if (start[0] == 0)
|
||||
{
|
||||
tree->tag ("regex");
|
||||
tree->attribute ("token", tokens.substr (cursor + start[0], end[0]));
|
||||
cursor += end[0];
|
||||
|
||||
if (_verbose)
|
||||
std::cout << "tokenMatchRegex \""
|
||||
<< tokens.substr (cursor + start[0], end[0])
|
||||
<< "\""
|
||||
<< " SUCCEED"
|
||||
<< std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool LRParser::tokenMatchRule (
|
||||
const std::string& rule,
|
||||
char quantifier,
|
||||
unsigned int alt,
|
||||
unsigned int it,
|
||||
const Token& token,
|
||||
unsigned int& cursor,
|
||||
const std::string& tokens,
|
||||
Tree* tree)
|
||||
{
|
||||
if (cursor >= tokens.length ()) return false;
|
||||
|
||||
// If this is a definition, recurse.
|
||||
if (_rules.find (token.value) != _rules.end ())
|
||||
{
|
||||
if (_verbose)
|
||||
std::cout << "tokenMatchRule "
|
||||
<< rule
|
||||
<< quantifier
|
||||
<< "/a"
|
||||
<< alt
|
||||
<< "/t"
|
||||
<< it
|
||||
<< " tokens["
|
||||
<< cursor
|
||||
<< "]="
|
||||
<< visible (tokens[cursor])
|
||||
<< " token="
|
||||
<< token.value
|
||||
<< " RECURSING matchRuleQuant"
|
||||
<< std::endl;
|
||||
|
||||
return matchRuleQuant (token.value, token.quantifier, cursor, tokens, tree);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
Loading…
Add table
Add a link
Reference in a new issue