mirror of
https://github.com/GothenburgBitFactory/timewarrior.git
synced 2025-07-07 20:06:39 +02:00
488 lines
13 KiB
C++
488 lines
13 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Copyright 2015 - 2016, Paul Beckingham, Federico Hernandez.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included
|
|
// in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
//
|
|
// http://www.opensource.org/licenses/mit-license.php
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <cmake.h>
|
|
#include <LR0.h>
|
|
#include <Lexer.h>
|
|
#include <Table.h>
|
|
#include <Color.h>
|
|
#include <text.h>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Given a grammar:
|
|
// - Obtain the augmented grammar
|
|
// - Create the first state from the first augmented grammar rule
|
|
// - Expand the first state
|
|
// - Recursively create and expand all the other states
|
|
void LR0::initialize (const Grammar& grammar)
|
|
{
|
|
// Save important grammar information.
|
|
_augmented = grammar.augmented ();
|
|
_rules = grammar.rules ();
|
|
_terminals = grammar.terminals ();
|
|
|
|
if (_debug)
|
|
{
|
|
std::cout << "Augmented Grammar\n";
|
|
auto count = 0;
|
|
for (const auto& item : _augmented)
|
|
{
|
|
std::cout << " [" << count++ << "]";
|
|
for (const auto& term : item)
|
|
std::cout << " " << term;
|
|
std::cout << "\n";
|
|
}
|
|
|
|
std::cout << "\n";
|
|
std::cout << "Rules\n";
|
|
for (const auto& r : _rules)
|
|
std::cout << " " << r << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "Terminals\n";
|
|
for (const auto& t : _terminals)
|
|
std::cout << " " << t << "\n";
|
|
|
|
std::cout << "\n";
|
|
}
|
|
|
|
// Determine the parser states.
|
|
States states;
|
|
initializeFirstState (states);
|
|
closeState (states, 0);
|
|
if (_debug)
|
|
std::cout << states.dump () << "\n";
|
|
|
|
// Construct the parser tables.
|
|
createParseTable (states);
|
|
if (_debug)
|
|
std::cout << dump () << "\n";;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Collect a unique set of expected symbols from the closure. This is the set
|
|
// of symbols where '● X' appears in an item.
|
|
std::set <std::string> LR0::getExpectedSymbols (const Closure& closure) const
|
|
{
|
|
std::set <std::string> expected;
|
|
for (auto& item : closure)
|
|
if (! item.done ())
|
|
expected.insert (item.next ());
|
|
|
|
return expected;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// The first state is юust the augmented grammar with the expected symbols all
|
|
// at the first symbol of each rule, i.e. exactly what happens when an Item is
|
|
// instantiated from a grammar rule.
|
|
void LR0::initializeFirstState (States& states) const
|
|
{
|
|
LR0::Closure result;
|
|
for (unsigned int r = 0; r < _augmented.size (); ++r)
|
|
{
|
|
Item item (_augmented[r]);
|
|
item.setGrammarRuleIndex (r);
|
|
result.push_back (item);
|
|
}
|
|
|
|
states.push_back (result);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void LR0::closeState (States& states, const int state) const
|
|
{
|
|
// Obtain all the expected symbols for this state.
|
|
auto expectedSymbols = getExpectedSymbols (states[state]);
|
|
for (auto& expected : expectedSymbols)
|
|
{
|
|
// This will be the new state.
|
|
Closure closure;
|
|
|
|
// Track additional symbols.
|
|
std::set <std::string> seen;
|
|
|
|
// Find all the rules in this state that are expecting 'expected'.
|
|
for (auto& item : states[state])
|
|
{
|
|
if (! item.done () &&
|
|
item.next () == expected)
|
|
{
|
|
Item advanced (item);
|
|
advanced.advance ();
|
|
closure.push_back (advanced);
|
|
|
|
if (! advanced.done ())
|
|
{
|
|
auto nextSymbol = advanced.next ();
|
|
if (seen.find (nextSymbol) == seen.end ())
|
|
{
|
|
for (unsigned int r = 0; r < _augmented.size (); ++r)
|
|
{
|
|
if (_augmented[r][0] == nextSymbol)
|
|
{
|
|
Item additional (_augmented[r]);
|
|
additional.setGrammarRuleIndex (r);
|
|
closure.push_back (additional);
|
|
seen.insert (nextSymbol);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check that the new state is not already created.
|
|
bool skip = false;
|
|
for (auto& state : states)
|
|
if (state[0] == closure[0])
|
|
skip = true;
|
|
|
|
// Create the new state, and recurse to close it.
|
|
if (! skip)
|
|
{
|
|
states.push_back (closure);
|
|
closeState (states, states.size () - 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void LR0::createParseTable (States& states)
|
|
{
|
|
// Add artificial end-point.
|
|
_terminals.push_back ("$");
|
|
|
|
// First size the tables:
|
|
// - Create a row for every state
|
|
// - Create a column for every terminal in the _actions table
|
|
// - Create a column for every rule in the _goto table
|
|
for (unsigned int state = 0; state < states.size (); ++state)
|
|
{
|
|
// Create a column for every terminal.
|
|
_actions.push_back ({});
|
|
for (const auto& terminal : _terminals)
|
|
_actions[state][terminal] = "";
|
|
|
|
_goto.push_back ({});
|
|
for (const auto& rule : _rules)
|
|
_goto[state][rule] = "";
|
|
}
|
|
|
|
// Look for states with items that are done, and mark them as reductions, but
|
|
// a reduction of grammar rule 0 is marked "acc".
|
|
for (unsigned int state = 0; state < states.size (); ++state)
|
|
for (unsigned int item = 0; item < states[state].size (); ++item)
|
|
if (states[state][item].done ())
|
|
{
|
|
if (states[state][item]._grammarRule == 0)
|
|
_actions[state]["$"] = "acc";
|
|
else
|
|
for (const auto& terminal : _terminals)
|
|
_actions[state][terminal] = format ("r{1}", states[state][item]._grammarRule);
|
|
}
|
|
|
|
// Look at all items in all states, and if the item is not done, find the
|
|
// state with a matching, advanced, first item.
|
|
for (unsigned int state = 0; state < states.size (); ++state)
|
|
for (unsigned int item = 0; item < states[state].size (); ++item)
|
|
if (! states[state][item].done ())
|
|
{
|
|
auto nextSymbol = states[state][item].next ();
|
|
|
|
Item advanced (states[state][item]);
|
|
advanced.advance ();
|
|
|
|
for (unsigned int s = 0; s < states.size (); ++s)
|
|
{
|
|
if (states[s][0] == advanced)
|
|
{
|
|
if (std::find (_terminals.begin (), _terminals.end (), nextSymbol) != _terminals.end ())
|
|
_actions[state][nextSymbol] = format ("s{1}", s);
|
|
else
|
|
_goto[state][nextSymbol] = format ("s{1}", s);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// "Compilers. Principles, Techniques, and Tools", Aho/Sethi/Ullman. p219.
|
|
//
|
|
// set ip to point to the first symbol of w$
|
|
// repeat forever begin
|
|
// let s be the state on top of the stack
|
|
// let a be the symbol pointed to by ip
|
|
//
|
|
// if action[s,a] = shift s' then begin
|
|
// push a then s' on top of the stack
|
|
// advance ip to the next input symbol
|
|
// end
|
|
//
|
|
// else if action[s,a] = reduce A --> B then begin
|
|
// pop 2 * [B] symbols off the stack
|
|
// let s' be the state now at the top of the stack
|
|
// push A, then goto[s',A] on top of the stack
|
|
// output the production A --> B
|
|
// end
|
|
//
|
|
// else if then begin
|
|
// if action[s,a] = accept then begin
|
|
// return
|
|
// end
|
|
// end
|
|
//
|
|
// else
|
|
// error
|
|
// end
|
|
// end
|
|
//
|
|
void LR0::parse (const std::string& input)
|
|
{
|
|
// set ip to point to the first symbol of w$
|
|
Lexer l (input);
|
|
std::string token;
|
|
Lexer::Type type;
|
|
if (! l.token (token, type))
|
|
{
|
|
// TODO Error: Nothing to parse.
|
|
std::cout << "# LR0::parse EOS\n";
|
|
return;
|
|
}
|
|
|
|
std::cout << "# LR0::parse token '" << token << "' " << Lexer::typeToString (type) << "\n";
|
|
|
|
// repeat forever begin
|
|
while (1)
|
|
{
|
|
// let s be the state on top of the stack
|
|
// let token be the symbol pointed to by ip
|
|
|
|
// TODO Shift
|
|
// if action[s,token] == shift s'
|
|
// push token then s' on top of the stack
|
|
// advance ip to the next input symbol
|
|
// end
|
|
if (0)
|
|
{
|
|
std::cout << "# LR0::parse shift\n";
|
|
|
|
if (! l.token (token, type))
|
|
{
|
|
// TODO Error: No more to parse.
|
|
std::cout << "# LR0::parse unexpected EOS\n";
|
|
return;
|
|
}
|
|
}
|
|
|
|
// TODO Reduce
|
|
// else if action[s,token] == reduce A --> B
|
|
// pop 2 * [B] symbols off the stack
|
|
// let s' be the state now at the top of the stack
|
|
// push A, then goto[s',A] on top of the stack
|
|
// output the production A --> B
|
|
else if (0)
|
|
{
|
|
std::cout << "# LR0::parse reduce\n";
|
|
}
|
|
|
|
// TODO Accept
|
|
// else if then begin
|
|
// if action[s,token] == accept
|
|
// return
|
|
else if (0)
|
|
{
|
|
std::cout << "# LR0::parse accept\n";
|
|
return;
|
|
}
|
|
|
|
// TODO Error
|
|
else
|
|
{
|
|
std::cout << "# LR0::parse error\n";
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void LR0::debug (bool value)
|
|
{
|
|
_debug = value;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// +-------+---------------+---------------+
|
|
// | state | terminals $ | non-terminals |
|
|
// +-------+--+--+--+--+---+----+-----+----+
|
|
// | | | | | | | | | |
|
|
// +-------+--+--+--+--+---+----+-----+----+
|
|
// | | | | | | | | | |
|
|
// +-------+--+--+--+--+---+----+-----+----+
|
|
std::string LR0::dump () const
|
|
{
|
|
std::stringstream out;
|
|
out << "Parser Tables\n";
|
|
|
|
Table t;
|
|
t.leftMargin (2);
|
|
t.colorHeader (Color ("underline"));
|
|
|
|
// Add columns.
|
|
t.add ("State", true);
|
|
for (const auto& terminal : _terminals)
|
|
t.add (terminal, true);
|
|
|
|
for (const auto& rule : _rules)
|
|
t.add (rule, true);
|
|
|
|
// Add cells.
|
|
for (unsigned int state = 0; state < _actions.size (); ++state)
|
|
{
|
|
int row = t.addRow ();
|
|
int col = 0;
|
|
t.set (row, col++, state);
|
|
|
|
for (const auto& terminal : _terminals)
|
|
{
|
|
auto data = _actions[state].at (terminal);
|
|
Color color (data[0] == 'r' ? "rgb535 on rgb412" :
|
|
data[0] == 's' ? "rgb045 on rgb015" :
|
|
data[0] == 'a' ? "rgb154 on rgb031" :
|
|
"");
|
|
t.set (row, col++, data, color);
|
|
}
|
|
|
|
for (const auto& rule : _rules)
|
|
{
|
|
auto data = _goto[state].at (rule);
|
|
Color color (data != "" ? "rgb045 on rgb015" : "");
|
|
t.set (row, col++, data, color);
|
|
}
|
|
}
|
|
|
|
out << t.render ();
|
|
|
|
return out.str ();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
LR0::Item::Item (const std::vector <std::string>& rule)
|
|
: _rule (rule)
|
|
, _cursor (2)
|
|
, _grammarRule (-1)
|
|
{
|
|
if (_rule.size () == 3 && _rule[2] == "є")
|
|
_rule.pop_back ();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool LR0::Item::operator== (const Item& other)
|
|
{
|
|
if (_cursor != other._cursor ||
|
|
_grammarRule != other._grammarRule ||
|
|
_rule.size () != other._rule.size ())
|
|
return false;
|
|
|
|
for (unsigned int i = 0; i < _rule.size () - 1; ++i)
|
|
if (_rule[i] != other._rule[i])
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void LR0::Item::setGrammarRuleIndex (const int rule)
|
|
{
|
|
_grammarRule = rule;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool LR0::Item::advance ()
|
|
{
|
|
if (_cursor >= _rule.size ())
|
|
return false;
|
|
|
|
++_cursor;
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool LR0::Item::done () const
|
|
{
|
|
return _cursor >= _rule.size ();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
std::string LR0::Item::next () const
|
|
{
|
|
return _rule[_cursor];
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
std::string LR0::Item::dump () const
|
|
{
|
|
std::stringstream out;
|
|
|
|
for (unsigned int i = 0; i < _rule.size (); ++i)
|
|
{
|
|
if (i)
|
|
out << " ";
|
|
|
|
if (i == _cursor)
|
|
out << "● ";
|
|
|
|
out << _rule[i];
|
|
}
|
|
|
|
if (_cursor >= _rule.size ())
|
|
out << " ●";
|
|
|
|
if (_grammarRule != -1)
|
|
out << " [g" << _grammarRule << "]";
|
|
|
|
return out.str ();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
std::string LR0::States::dump () const
|
|
{
|
|
std::stringstream out;
|
|
out << "States\n";
|
|
|
|
for (unsigned int state = 0; state < this->size (); ++state)
|
|
{
|
|
out << " State " << state << "\n";
|
|
for (const auto& item : (*this)[state])
|
|
out << " " << item.dump () << "\n";
|
|
}
|
|
|
|
return out.str ();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|