From 8991a3c87bfec9482883edcec1fd50493c3b1a2c Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Mon, 21 Dec 2015 00:08:26 -0500 Subject: [PATCH] Grammar: Implemented ::loadFromFile and ::loadFromString --- src/Grammar.cpp | 102 +++++++++++++++++++++++++++++++++++++++++++++++- src/Grammar.h | 16 ++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/src/Grammar.cpp b/src/Grammar.cpp index 87c300c8..f3037cf3 100644 --- a/src/Grammar.cpp +++ b/src/Grammar.cpp @@ -26,7 +26,9 @@ #include #include +#include #include +#include //////////////////////////////////////////////////////////////////////////////// Grammar::Grammar () @@ -47,9 +49,107 @@ void Grammar::loadFromFile (File& file) } //////////////////////////////////////////////////////////////////////////////// +// Load and parse BNF. +// +// Syntax: +// rule-name: alternate1-token1 alternate1-token2 +// alternate2-token1 +// +// - Rules are aligned at left margin only, followed by a comma. +// - Productions are indented and never at left margin. +// - Blank line between rules. +// +// Details: +// - Literals are always double-quoted. +// - "*", "+" and "?" suffixes have POSIX semantics. +// - "є" means empty set. +// - Literal modifiers: +// - :a Accept abbreviations +// - :i Accept caseless match +// void Grammar::loadFromString (const std::string& input) { - // TODO Load and parse BNF. + std::string rule_name = ""; + int token_count = 0; + + for (auto& line : split (input, '\n')) + { + // Skip whole-line comments. + if (line[0] == '#') + continue; + + // Eliminate inline comments. + std::string::size_type hash = line.find ('#'); + if (hash != std::string::npos) + line.resize (hash); + + line = Lexer::trim (line); + + // Skip blank lines with no semantics. + if (line == "" and rule_name == "") + continue; + + if (line != "") + { + token_count = 0; + + Lexer l (line); + Lexer::Type type; + std::string token; + while (l.token (token, type)) + { + ++token_count; + + if (token.back () == ':') + { + rule_name = token.substr (0, token.size () - 1); + if (_start == "") + _start = rule_name; + + _rules[rule_name] = Grammar::Rule (); + token_count = 0; + } + else if (token.front () == ':') + { + } + else + { + if (token_count <= 1) + _rules[rule_name].push_back (Grammar::Production ()); + + _rules[rule_name].back ().push_back (token); + } + } + } + else + rule_name = ""; + } +} + +//////////////////////////////////////////////////////////////////////////////// +std::string Grammar::dump () const +{ + std::stringstream out; + for (auto& rule : _rules) + { + if (rule.first == _start) + out << "▶ "; + + out << rule.first << ":\n"; + + for (auto& production : rule.second) + { + out << " "; + for (auto& term : production) + out << term << " "; + + out << "\n"; + } + + out << "\n"; + } + + return out.str (); } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/Grammar.h b/src/Grammar.h index ab8cb6dd..27f8ca06 100644 --- a/src/Grammar.h +++ b/src/Grammar.h @@ -29,6 +29,8 @@ #include #include +#include +#include class Grammar { @@ -36,6 +38,20 @@ public: Grammar (); void loadFromFile (File&); void loadFromString (const std::string&); + std::string dump () const; + +protected: + class Production : public std::vector + { + }; + + class Rule : public std::vector + { + }; + +private: + std::string _start; + std::map _rules; }; #endif