timewarrior/src/LR0.cpp
2016-01-19 00:48:27 -05:00

488 lines
13 KiB
C++

////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2015 - 2016, Paul Beckingham, Federico Hernandez.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// http://www.opensource.org/licenses/mit-license.php
//
////////////////////////////////////////////////////////////////////////////////
#include <cmake.h>
#include <LR0.h>
#include <Lexer.h>
#include <Table.h>
#include <Color.h>
#include <text.h>
#include <iostream>
#include <sstream>
////////////////////////////////////////////////////////////////////////////////
// Given a grammar:
// - Obtain the augmented grammar
// - Create the first state from the first augmented grammar rule
// - Expand the first state
// - Recursively create and expand all the other states
void LR0::initialize (const Grammar& grammar)
{
// Save important grammar information.
_augmented = grammar.augmented ();
_rules = grammar.rules ();
_terminals = grammar.terminals ();
if (_debug)
{
std::cout << "Augmented Grammar\n";
auto count = 0;
for (const auto& item : _augmented)
{
std::cout << " [" << count++ << "]";
for (const auto& term : item)
std::cout << " " << term;
std::cout << "\n";
}
std::cout << "\n";
std::cout << "Rules\n";
for (const auto& r : _rules)
std::cout << " " << r << "\n";
std::cout << "\n";
std::cout << "Terminals\n";
for (const auto& t : _terminals)
std::cout << " " << t << "\n";
std::cout << "\n";
}
// Determine the parser states.
States states;
initializeFirstState (states);
closeState (states, 0);
if (_debug)
std::cout << states.dump () << "\n";
// Construct the parser tables.
createParseTable (states);
if (_debug)
std::cout << dump () << "\n";;
}
////////////////////////////////////////////////////////////////////////////////
// Collect a unique set of expected symbols from the closure. This is the set
// of symbols where '● X' appears in an item.
std::set <std::string> LR0::getExpectedSymbols (const Closure& closure) const
{
std::set <std::string> expected;
for (auto& item : closure)
if (! item.done ())
expected.insert (item.next ());
return expected;
}
////////////////////////////////////////////////////////////////////////////////
// The first state is юust the augmented grammar with the expected symbols all
// at the first symbol of each rule, i.e. exactly what happens when an Item is
// instantiated from a grammar rule.
void LR0::initializeFirstState (States& states) const
{
LR0::Closure result;
for (unsigned int r = 0; r < _augmented.size (); ++r)
{
Item item (_augmented[r]);
item.setGrammarRuleIndex (r);
result.push_back (item);
}
states.push_back (result);
}
////////////////////////////////////////////////////////////////////////////////
void LR0::closeState (States& states, const int state) const
{
// Obtain all the expected symbols for this state.
auto expectedSymbols = getExpectedSymbols (states[state]);
for (auto& expected : expectedSymbols)
{
// This will be the new state.
Closure closure;
// Track additional symbols.
std::set <std::string> seen;
// Find all the rules in this state that are expecting 'expected'.
for (auto& item : states[state])
{
if (! item.done () &&
item.next () == expected)
{
Item advanced (item);
advanced.advance ();
closure.push_back (advanced);
if (! advanced.done ())
{
auto nextSymbol = advanced.next ();
if (seen.find (nextSymbol) == seen.end ())
{
for (unsigned int r = 0; r < _augmented.size (); ++r)
{
if (_augmented[r][0] == nextSymbol)
{
Item additional (_augmented[r]);
additional.setGrammarRuleIndex (r);
closure.push_back (additional);
seen.insert (nextSymbol);
}
}
}
}
}
}
// Check that the new state is not already created.
bool skip = false;
for (auto& state : states)
if (state[0] == closure[0])
skip = true;
// Create the new state, and recurse to close it.
if (! skip)
{
states.push_back (closure);
closeState (states, states.size () - 1);
}
}
}
////////////////////////////////////////////////////////////////////////////////
void LR0::createParseTable (States& states)
{
// Add artificial end-point.
_terminals.push_back ("$");
// First size the tables:
// - Create a row for every state
// - Create a column for every terminal in the _actions table
// - Create a column for every rule in the _goto table
for (unsigned int state = 0; state < states.size (); ++state)
{
// Create a column for every terminal.
_actions.push_back ({});
for (const auto& terminal : _terminals)
_actions[state][terminal] = "";
_goto.push_back ({});
for (const auto& rule : _rules)
_goto[state][rule] = "";
}
// Look for states with items that are done, and mark them as reductions, but
// a reduction of grammar rule 0 is marked "acc".
for (unsigned int state = 0; state < states.size (); ++state)
for (unsigned int item = 0; item < states[state].size (); ++item)
if (states[state][item].done ())
{
if (states[state][item]._grammarRule == 0)
_actions[state]["$"] = "acc";
else
for (const auto& terminal : _terminals)
_actions[state][terminal] = format ("r{1}", states[state][item]._grammarRule);
}
// Look at all items in all states, and if the item is not done, find the
// state with a matching, advanced, first item.
for (unsigned int state = 0; state < states.size (); ++state)
for (unsigned int item = 0; item < states[state].size (); ++item)
if (! states[state][item].done ())
{
auto nextSymbol = states[state][item].next ();
Item advanced (states[state][item]);
advanced.advance ();
for (unsigned int s = 0; s < states.size (); ++s)
{
if (states[s][0] == advanced)
{
if (std::find (_terminals.begin (), _terminals.end (), nextSymbol) != _terminals.end ())
_actions[state][nextSymbol] = format ("s{1}", s);
else
_goto[state][nextSymbol] = format ("s{1}", s);
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
// "Compilers. Principles, Techniques, and Tools", Aho/Sethi/Ullman. p219.
//
// set ip to point to the first symbol of w$
// repeat forever begin
// let s be the state on top of the stack
// let a be the symbol pointed to by ip
//
// if action[s,a] = shift s' then begin
// push a then s' on top of the stack
// advance ip to the next input symbol
// end
//
// else if action[s,a] = reduce A --> B then begin
// pop 2 * [B] symbols off the stack
// let s' be the state now at the top of the stack
// push A, then goto[s',A] on top of the stack
// output the production A --> B
// end
//
// else if then begin
// if action[s,a] = accept then begin
// return
// end
// end
//
// else
// error
// end
// end
//
void LR0::parse (const std::string& input)
{
// set ip to point to the first symbol of w$
Lexer l (input);
std::string token;
Lexer::Type type;
if (! l.token (token, type))
{
// TODO Error: Nothing to parse.
std::cout << "# LR0::parse EOS\n";
return;
}
std::cout << "# LR0::parse token '" << token << "' " << Lexer::typeToString (type) << "\n";
// repeat forever begin
while (1)
{
// let s be the state on top of the stack
// let token be the symbol pointed to by ip
// TODO Shift
// if action[s,token] == shift s'
// push token then s' on top of the stack
// advance ip to the next input symbol
// end
if (0)
{
std::cout << "# LR0::parse shift\n";
if (! l.token (token, type))
{
// TODO Error: No more to parse.
std::cout << "# LR0::parse unexpected EOS\n";
return;
}
}
// TODO Reduce
// else if action[s,token] == reduce A --> B
// pop 2 * [B] symbols off the stack
// let s' be the state now at the top of the stack
// push A, then goto[s',A] on top of the stack
// output the production A --> B
else if (0)
{
std::cout << "# LR0::parse reduce\n";
}
// TODO Accept
// else if then begin
// if action[s,token] == accept
// return
else if (0)
{
std::cout << "# LR0::parse accept\n";
return;
}
// TODO Error
else
{
std::cout << "# LR0::parse error\n";
return;
}
}
}
////////////////////////////////////////////////////////////////////////////////
void LR0::debug (bool value)
{
_debug = value;
}
////////////////////////////////////////////////////////////////////////////////
// +-------+---------------+---------------+
// | state | terminals $ | non-terminals |
// +-------+--+--+--+--+---+----+-----+----+
// | | | | | | | | | |
// +-------+--+--+--+--+---+----+-----+----+
// | | | | | | | | | |
// +-------+--+--+--+--+---+----+-----+----+
std::string LR0::dump () const
{
std::stringstream out;
out << "Parser Tables\n";
Table t;
t.leftMargin (2);
t.colorHeader (Color ("underline"));
// Add columns.
t.add ("State", true);
for (const auto& terminal : _terminals)
t.add (terminal, true);
for (const auto& rule : _rules)
t.add (rule, true);
// Add cells.
for (unsigned int state = 0; state < _actions.size (); ++state)
{
int row = t.addRow ();
int col = 0;
t.set (row, col++, state);
for (const auto& terminal : _terminals)
{
auto data = _actions[state].at (terminal);
Color color (data[0] == 'r' ? "rgb535 on rgb412" :
data[0] == 's' ? "rgb045 on rgb015" :
data[0] == 'a' ? "rgb154 on rgb031" :
"");
t.set (row, col++, data, color);
}
for (const auto& rule : _rules)
{
auto data = _goto[state].at (rule);
Color color (data != "" ? "rgb045 on rgb015" : "");
t.set (row, col++, data, color);
}
}
out << t.render ();
return out.str ();
}
////////////////////////////////////////////////////////////////////////////////
LR0::Item::Item (const std::vector <std::string>& rule)
: _rule (rule)
, _cursor (2)
, _grammarRule (-1)
{
if (_rule.size () == 3 && _rule[2] == "є")
_rule.pop_back ();
}
////////////////////////////////////////////////////////////////////////////////
bool LR0::Item::operator== (const Item& other)
{
if (_cursor != other._cursor ||
_grammarRule != other._grammarRule ||
_rule.size () != other._rule.size ())
return false;
for (unsigned int i = 0; i < _rule.size () - 1; ++i)
if (_rule[i] != other._rule[i])
return false;
return true;
}
////////////////////////////////////////////////////////////////////////////////
void LR0::Item::setGrammarRuleIndex (const int rule)
{
_grammarRule = rule;
}
////////////////////////////////////////////////////////////////////////////////
bool LR0::Item::advance ()
{
if (_cursor >= _rule.size ())
return false;
++_cursor;
return true;
}
////////////////////////////////////////////////////////////////////////////////
bool LR0::Item::done () const
{
return _cursor >= _rule.size ();
}
////////////////////////////////////////////////////////////////////////////////
std::string LR0::Item::next () const
{
return _rule[_cursor];
}
////////////////////////////////////////////////////////////////////////////////
std::string LR0::Item::dump () const
{
std::stringstream out;
for (unsigned int i = 0; i < _rule.size (); ++i)
{
if (i)
out << " ";
if (i == _cursor)
out << "";
out << _rule[i];
}
if (_cursor >= _rule.size ())
out << "";
if (_grammarRule != -1)
out << " [g" << _grammarRule << "]";
return out.str ();
}
////////////////////////////////////////////////////////////////////////////////
std::string LR0::States::dump () const
{
std::stringstream out;
out << "States\n";
for (unsigned int state = 0; state < this->size (); ++state)
{
out << " State " << state << "\n";
for (const auto& item : (*this)[state])
out << " " << item.dump () << "\n";
}
return out.str ();
}
////////////////////////////////////////////////////////////////////////////////