From 8991a3c87bfec9482883edcec1fd50493c3b1a2c Mon Sep 17 00:00:00 2001
From: Paul Beckingham <paul@beckingham.net>
Date: Mon, 21 Dec 2015 00:08:26 -0500
Subject: [PATCH] Grammar: Implemented ::loadFromFile and ::loadFromString

---
 src/Grammar.cpp | 102 +++++++++++++++++++++++++++++++++++++++++++++++-
 src/Grammar.h   |  16 ++++++++
 2 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/src/Grammar.cpp b/src/Grammar.cpp
index 87c300c8..f3037cf3 100644
--- a/src/Grammar.cpp
+++ b/src/Grammar.cpp
@@ -26,7 +26,9 @@
 
 #include <cmake.h>
 #include <Grammar.h>
+#include <Lexer.h>
 #include <text.h>
+#include <sstream>
 
 ////////////////////////////////////////////////////////////////////////////////
 Grammar::Grammar ()
@@ -47,9 +49,107 @@ void Grammar::loadFromFile (File& file)
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Load and parse BNF.
+//
+// Syntax:
+//   rule-name:  alternate1-token1 alternate1-token2
+//               alternate2-token1
+//
+// - Rules are aligned at left margin only, followed by a comma.
+// - Productions are indented and never at left margin.
+// - Blank line between rules.
+//
+// Details:
+// - Literals are always double-quoted.
+// - "*", "+" and "?" suffixes have POSIX semantics.
+// - "є" means empty set.
+// - Literal modifiers:
+//   - :a  Accept abbreviations
+//   - :i  Accept caseless match
+//
 void Grammar::loadFromString (const std::string& input)
 {
-  // TODO Load and parse BNF.
+  std::string rule_name = "";
+  int token_count = 0;
+
+  for (auto& line : split (input, '\n'))
+  {
+    // Skip whole-line comments.
+    if (line[0] == '#')
+      continue;
+
+    // Eliminate inline comments.
+    std::string::size_type hash = line.find ('#');
+    if (hash != std::string::npos)
+      line.resize (hash);
+
+    line = Lexer::trim (line);
+
+    // Skip blank lines with no semantics.
+    if (line == "" and rule_name == "")
+      continue;
+
+    if (line != "")
+    {
+      token_count = 0;
+
+      Lexer l (line);
+      Lexer::Type type;
+      std::string token;
+      while (l.token (token, type))
+      {
+        ++token_count;
+
+        if (token.back () == ':')
+        {
+          rule_name = token.substr (0, token.size () - 1);
+          if (_start == "")
+            _start = rule_name;
+
+          _rules[rule_name] = Grammar::Rule ();
+          token_count = 0;
+        }
+        else if (token.front () == ':')
+        {
+        }
+        else
+        {
+          if (token_count <= 1)
+            _rules[rule_name].push_back (Grammar::Production ());
+
+          _rules[rule_name].back ().push_back (token);
+        }
+      }
+    }
+    else
+      rule_name = "";
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+std::string Grammar::dump () const
+{
+  std::stringstream out;
+  for (auto& rule : _rules)
+  {
+    if (rule.first == _start)
+      out << "▶ ";
+
+    out << rule.first << ":\n";
+
+    for (auto& production : rule.second)
+    {
+      out << "    ";
+      for (auto& term : production)
+        out << term << " ";
+
+      out << "\n";
+    }
+
+    out << "\n";
+  }
+
+  return out.str ();
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/Grammar.h b/src/Grammar.h
index ab8cb6dd..27f8ca06 100644
--- a/src/Grammar.h
+++ b/src/Grammar.h
@@ -29,6 +29,8 @@
 
 #include <FS.h>
 #include <string>
+#include <vector>
+#include <map>
 
 class Grammar
 {
@@ -36,6 +38,20 @@ public:
   Grammar ();
   void loadFromFile (File&);
   void loadFromString (const std::string&);
+  std::string dump () const;
+
+protected:
+  class Production : public std::vector <std::string>
+  {
+  };
+
+  class Rule : public std::vector <Production>
+  {
+  };
+
+private:
+  std::string _start;
+  std::map <std::string, Grammar::Rule> _rules;
 };
 
 #endif