From b55f47ec9a518999cdcd91fce01d631df9951a03 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 24 Jul 2011 10:57:30 -0400 Subject: [PATCH] Expression reboot - Implemented A3::is_modifier which canonicalized modifiers. - Implemtned A3::is_attmod. --- src/A3.cpp | 211 ++++++++++++++++++++++++++++++++++------------------- src/A3.h | 3 +- 2 files changed, 139 insertions(+), 75 deletions(-) diff --git a/src/A3.cpp b/src/A3.cpp index f73a2c11a..c1d1f0030 100644 --- a/src/A3.cpp +++ b/src/A3.cpp @@ -42,7 +42,6 @@ extern Context context; // Supported modifiers, synonyms on the same line. -/* static const char* modifierNames[] = { "before", "under", "below", @@ -58,7 +57,7 @@ static const char* modifierNames[] = "word", "noword" }; -*/ + // Supported operators, borrowed from C++, particularly the precedence. // Note: table is sorted by length of operator string, so searches match // longest first. @@ -97,7 +96,7 @@ static struct { ")", 0, 'b', 1, 'l' }, // Precedence end }; -//#define NUM_MODIFIER_NAMES (sizeof (modifierNames) / sizeof (modifierNames[0])) +#define NUM_MODIFIER_NAMES (sizeof (modifierNames) / sizeof (modifierNames[0])) #define NUM_OPERATORS (sizeof (operators) / sizeof (operators[0])) //static const char* non_word_chars = " +-*/%()=<>!~"; @@ -611,7 +610,6 @@ const A3 A3::tokenize (const A3& input) const combined += arg->_raw; } - std::cout << "# A3::tokenize combined '" << combined << "'\n"; // List of operators for recognition. std::vector operators = A3::operator_list (); @@ -625,55 +623,92 @@ const A3 A3::tokenize (const A3& input) const n.skipWS (); std::string s; -// int i; -// double d; + int i; + double d; time_t t; + std::cout << "# " << n.dump () << "\n"; while (! n.depleted ()) { if (n.getQuoted ('"', s, true) || n.getQuoted ('\'', s, true)) - output.push_back (Arg (s, "literal string")); + { + std::cout << "# string '" << s << "'\n"; + output.push_back (Arg (s, "string")); + } else if (n.getQuoted ('/', s, true)) + { + std::cout << "# pattern '" << s << "'\n"; output.push_back (Arg (s, "pattern")); + } else if (n.getOneOf (operators, s)) + { + std::cout << "# operator '" << s << "'\n"; output.push_back (Arg (s, "op")); + } else if (is_attr (n, s)) + { + std::cout << "# attr '" << s << "'\n"; output.push_back (Arg (s, "attr")); + } + + else if (is_attmod (n, s)) + { + std::cout << "# attmod '" << s << "'\n"; + output.push_back (Arg (s, "attmod")); + } -/* else if (n.getDOM (s)) + { + std::cout << "# dom '" << s << "'\n"; output.push_back (Arg (s, "dom")); - - else if (n.getNumber (d)) - output.push_back (Arg (format (d), "literal number")); + } else if (n.getDateISO (t)) - output.push_back (Arg (Date (t).toISO (), "literal date")); -*/ + { + std::cout << "# date '" << t << "'\n"; + output.push_back (Arg (Date (t).toISO (), "date")); + } else if (n.getDate (date_format, t)) - output.push_back (Arg (Date (t).toString (date_format), "literal date")); + { + std::cout << "# date '" << t << "'\n"; + output.push_back (Arg (Date (t).toString (date_format), "date")); + } + + else if (n.getNumber (d)) + { + std::cout << "# num '" << d << "'\n"; + output.push_back (Arg (format (d), "num")); + } -/* else if (n.getInt (i)) - output.push_back (Arg (format (i), "literal int")); + { + std::cout << "# int '" << i << "'\n"; + output.push_back (Arg (format (i), "int")); + } - else if (n.getWord (s)) + else if (n.getName (s) || + n.getWord (s)) // After DOM + { + std::cout << "# word '" << s << "'\n"; output.push_back (Arg (s, "word")); -*/ + } else { if (! n.getUntilWS (s)) n.getUntilEOS (s); + std::cout << "# word '" << s << "'\n"; output.push_back (Arg (s, "word")); } + std::cout << "# " << n.dump () << "\n"; n.skipWS (); + std::cout << "# " << n.dump () << "\n"; } return output; @@ -697,7 +732,7 @@ bool A3::is_attr (Nibbler& n, std::string& result) // Consider removing this for a stricter parse. if (n.getQuoted ('"', value) || n.getQuoted ('\'', value) || - n.getUntil (' ', value) || + n.getName (value) || n.getUntilEOS (value) || n.depleted ()) { @@ -724,6 +759,60 @@ bool A3::is_attr (Nibbler& n, std::string& result) return false; } +//////////////////////////////////////////////////////////////////////////////// +// .:['"]['"] +bool A3::is_attmod (Nibbler& n, std::string& result) +{ + n.save (); + std::string name; + std::string modifier; + std::string value; + + if (n.getName (name) && + name.length ()) + { + if (n.skip ('.')) + { + // Skip the negation character. + n.skip ('~'); + + if (n.getName (modifier) && + modifier.length ()) + { + if (n.skip (':')) + { + // Both quoted and unquoted Att's are accepted. + // Consider removing this for a stricter parse. + if (n.getQuoted ('"', value) || + n.getQuoted ('\'', value) || + n.getName (value) || + n.getUntilEOS (value) || + n.depleted ()) + { +/* + TODO Eliminate anything that looks like a URL. + // Exclude certain URLs, that look like attrs. + if (value.find ('@') <= n.cursor () || + value.find ('/') <= n.cursor ()) + return false; +*/ + + // Validate and canonicalize attribute name. + if (is_attribute (name, name) && + is_modifier (modifier, modifier)) + { + result = name + '.' + modifier + ':' + value; + return true; + } + } + } + } + } + } + + return false; +} + //////////////////////////////////////////////////////////////////////////////// // Canonicalize attribute names. bool A3::is_attribute (const std::string& input, std::string& canonical) @@ -744,6 +833,31 @@ bool A3::is_attribute (const std::string& input, std::string& canonical) return false; } +//////////////////////////////////////////////////////////////////////////////// +// Canonicalize modifier names. +bool A3::is_modifier (const std::string& input, std::string& canonical) +{ + std::vector candidates; + for (unsigned int i = 0; i < NUM_MODIFIER_NAMES; ++i) + candidates.push_back (modifierNames[i]); + + std::vector matches; + autoComplete (input, + candidates, + matches, + context.config.getInteger ("abbreviation.minimum")); + + if (matches.size () == 1) + { + canonical = matches[0]; + return true; + } + + return false; +} + + + @@ -780,58 +894,6 @@ bool A3::is_multipart ( return parts.size () > 1 ? true : false; } -//////////////////////////////////////////////////////////////////////////////// -// .:['"]['"] -bool A3::is_attmod (const std::string& input) -{ - Nibbler n (input); - std::string name; - std::string modifier; - std::string value; - - if (n.getUntilOneOf (".", name)) - { - if (name.length () == 0) - return false; - - if (name.find_first_of (non_word_chars) != std::string::npos) - return false; - - if (n.skip ('.')) - { - n.skip ('~'); - n.getUntil (':', modifier); - - if (modifier.length () == 0) - return false; - } - - if (n.skip (':')) - { - // Exclude certain URLs, that look like attrs. - if (input.find ('@') <= n.cursor () || - input.find ('/') <= n.cursor ()) - return false; - - // Both quoted and unquoted Att's are accepted. - // Consider removing this for a stricter parse. - if (n.getQuoted ('"', value) || - n.getQuoted ('\'', value) || - n.getUntil (' ', value) || - n.getUntilEOS (value) || - n.depleted ()) - { - // Validate and canonicalize attribute and modifier names. - if (is_attribute (name, name) && - is_modifier (modifier)) - return true; - } - } - } - - return false; -} - //////////////////////////////////////////////////////////////////////////////// // ///[g] // @@ -1582,11 +1644,13 @@ void A3::dump (const std::string& label) // Filter colors. color_map["attr"] = Color ("bold red on gray4"); + color_map["attmod"] = Color ("bold red on gray4"); color_map["pattern"] = Color ("cyan on gray4"); - color_map["op"] = Color ("white on rgb010"); + color_map["op"] = Color ("green on gray4"); + color_map["string"] = Color ("bold yellow on gray4"); + color_map["date"] = Color ("bold yellow on gray4"); /* color_map["tag"] = Color ("green on gray2"); - color_map["attmod"] = Color ("bold red on gray2"); color_map["id"] = Color ("yellow on gray2"); color_map["uuid"] = Color ("yellow on gray2"); color_map["subst"] = Color ("bold cyan on gray2"); @@ -1598,7 +1662,6 @@ void A3::dump (const std::string& label) color_map["lvalue"] = Color ("bold green on rgb010"); color_map["int"] = Color ("bold yellow on rgb010"); color_map["number"] = Color ("bold yellow on rgb010"); - color_map["string"] = Color ("bold yellow on rgb010"); color_map["rx"] = Color ("bold red on rgb010"); */ diff --git a/src/A3.h b/src/A3.h index 607ffa0de..d9983104b 100644 --- a/src/A3.h +++ b/src/A3.h @@ -106,13 +106,14 @@ public: const A3 tokenize (const A3&) const; static bool is_attr (Nibbler&, std::string&); + static bool is_attmod (Nibbler&, std::string&); static bool is_attribute (const std::string&, std::string&); + static bool is_modifier (const std::string&, std::string&); /* static bool is_multipart (const std::string&, std::vector &); - static bool is_attmod (const std::string&); static bool is_subst (const std::string&); static bool is_pattern (const std::string&); static bool is_id (const std::string&);