From 94dfc68cca07440c6c8282b835cfa87d035f0ef5 Mon Sep 17 00:00:00 2001 From: Paul Beckingham Date: Sun, 24 Jul 2011 01:06:26 -0400 Subject: [PATCH] Expression reboot - A3::tokenize recognizes strings, patterns, attr, ops, dates. - A3::tokenize canonicalizes attributes. --- src/A3.cpp | 261 ++++++++++++++++++++++++--------------- src/A3.h | 9 +- src/commands/Command.cpp | 6 - 3 files changed, 168 insertions(+), 108 deletions(-) diff --git a/src/A3.cpp b/src/A3.cpp index 25db06da3..f73a2c11a 100644 --- a/src/A3.cpp +++ b/src/A3.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -522,6 +523,16 @@ const std::string A3::find_limit () const return ""; } +//////////////////////////////////////////////////////////////////////////////// +const std::vector A3::operator_list () +{ + std::vector all; + for (unsigned int i = 0; i < NUM_OPERATORS; ++i) + all.push_back (operators[i].op); + + return all; +} + //////////////////////////////////////////////////////////////////////////////// const A3 A3::extract_filter () const { @@ -544,6 +555,7 @@ const A3 A3::extract_filter () const filter.push_back (*arg); } + filter = tokenize (filter); return filter; } @@ -586,6 +598,152 @@ const A3 A3::extract_words () const return words; } +//////////////////////////////////////////////////////////////////////////////// +const A3 A3::tokenize (const A3& input) const +{ + // Join all the arguments together. + std::string combined; + std::vector ::const_iterator arg; + for (arg = input.begin (); arg != input.end (); ++arg) + { + if (arg != input.begin ()) + combined += " "; + + combined += arg->_raw; + } + std::cout << "# A3::tokenize combined '" << combined << "'\n"; + + // List of operators for recognition. + std::vector operators = A3::operator_list (); + + // Date format, for both parsing and rendering. + std::string date_format = context.config.get ("dateformat"); + + // Nibble them apart. + A3 output; + Nibbler n (combined); + n.skipWS (); + + std::string s; +// int i; +// double d; + time_t t; + while (! n.depleted ()) + { + if (n.getQuoted ('"', s, true) || + n.getQuoted ('\'', s, true)) + output.push_back (Arg (s, "literal string")); + + else if (n.getQuoted ('/', s, true)) + output.push_back (Arg (s, "pattern")); + + else if (n.getOneOf (operators, s)) + output.push_back (Arg (s, "op")); + + else if (is_attr (n, s)) + output.push_back (Arg (s, "attr")); + +/* + else if (n.getDOM (s)) + output.push_back (Arg (s, "dom")); + + else if (n.getNumber (d)) + output.push_back (Arg (format (d), "literal number")); + + else if (n.getDateISO (t)) + output.push_back (Arg (Date (t).toISO (), "literal date")); +*/ + + else if (n.getDate (date_format, t)) + output.push_back (Arg (Date (t).toString (date_format), "literal date")); + +/* + else if (n.getInt (i)) + output.push_back (Arg (format (i), "literal int")); + + else if (n.getWord (s)) + output.push_back (Arg (s, "word")); +*/ + + else + { + if (! n.getUntilWS (s)) + n.getUntilEOS (s); + + output.push_back (Arg (s, "word")); + } + + n.skipWS (); + } + + return output; +} + +//////////////////////////////////////////////////////////////////////////////// +// :['"][]['"] +bool A3::is_attr (Nibbler& n, std::string& result) +{ + n.save (); + std::string name; + std::string value; + + if (n.getName (name)) + { + if (name.length ()) + { + if (n.skip (':')) + { + // Both quoted and unquoted Att's are accepted. + // Consider removing this for a stricter parse. + if (n.getQuoted ('"', value) || + n.getQuoted ('\'', value) || + n.getUntil (' ', value) || + n.getUntilEOS (value) || + n.depleted ()) + { +/* + TODO Eliminate anything that looks like a URL. + // Exclude certain URLs, that look like attrs. + if (value.find ('@') <= n.cursor () || + value.find ('/') <= n.cursor ()) + return false; +*/ + + // Validate and canonicalize attribute name. + if (is_attribute (name, name)) + { + result = name + ':' + value; + return true; + } + } + } + } + } + + n.restore (); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Canonicalize attribute names. +bool A3::is_attribute (const std::string& input, std::string& canonical) +{ + std::vector columns = context.getColumns (); + std::vector matches; + autoComplete (input, + columns, + matches, + context.config.getInteger ("abbreviation.minimum")); + + if (matches.size () == 1) + { + canonical = matches[0]; + return true; + } + + return false; +} + @@ -602,16 +760,6 @@ const A3 A3::extract_words () const #ifdef NOPE -//////////////////////////////////////////////////////////////////////////////// -std::vector A3::operator_list () -{ - std::vector all; - for (unsigned int i = 0; i < NUM_OPERATORS; ++i) - all.push_back (operators[i].op); - - return all; -} - //////////////////////////////////////////////////////////////////////////////// bool A3::is_multipart ( const std::string& input, @@ -632,47 +780,6 @@ bool A3::is_multipart ( return parts.size () > 1 ? true : false; } -//////////////////////////////////////////////////////////////////////////////// -// :['"][]['"] -bool A3::is_attr (const std::string& input) -{ - Nibbler n (input); - std::string name; - std::string value; - - if (n.getUntilOneOf ("=:", name)) - { - if (name.length () == 0) - return false; - - if (name.find_first_of (non_word_chars) != std::string::npos) - return false; - - if (n.skip (':')) - { - // Exclude certain URLs, that look like attrs. - if (input.find ('@') <= n.cursor () || - input.find ('/') <= n.cursor ()) - return false; - - // Both quoted and unquoted Att's are accepted. - // Consider removing this for a stricter parse. - if (n.getQuoted ('"', value) || - n.getQuoted ('\'', value) || - n.getUntil (' ', value) || - n.getUntilEOS (value) || - n.depleted ()) - { - // Validate and canonicalize attribute name. - if (is_attribute (name, name)) - return true; - } - } - } - - return false; -} - //////////////////////////////////////////////////////////////////////////////// // .:['"]['"] bool A3::is_attmod (const std::string& input) @@ -884,51 +991,6 @@ bool A3::is_symbol_operator (const std::string& input) return false; } -//////////////////////////////////////////////////////////////////////////////// -// Canonicalize attribute names. -bool A3::is_attribute (const std::string& input, std::string& canonical) -{ - // Guess at the full attribute name. - std::vector candidates; - for (unsigned i = 0; i < NUM_ATT_NAMES; ++i) - { - // Short-circuit: exact matches cause immediate return. - if (attributeNames[i] == input) - { - canonical = input; - return true; - } - - candidates.push_back (attributeNames[i]); - } - - for (unsigned i = 0; i < NUM_MODIFIABLE_ATT_NAMES; ++i) - { - // Short-circuit: exact matches cause immediate return. - if (modifiableAttributeNames[i] == input) - { - canonical = input; - return true; - } - - candidates.push_back (modifiableAttributeNames[i]); - } - - std::vector matches; - autoComplete (input, - candidates, - matches, - context.config.getInteger ("abbreviation.minimum")); - - if (matches.size () == 1) - { - canonical = matches[0]; - return true; - } - - return false; -} - //////////////////////////////////////////////////////////////////////////////// bool A3::is_modifier (const std::string& input) { @@ -1518,10 +1580,12 @@ void A3::dump (const std::string& label) color_map["word"] = Color ("white on gray4"); color_map["none"] = Color ("black on white"); + // Filter colors. + color_map["attr"] = Color ("bold red on gray4"); + color_map["pattern"] = Color ("cyan on gray4"); + color_map["op"] = Color ("white on rgb010"); /* color_map["tag"] = Color ("green on gray2"); - color_map["pattern"] = Color ("cyan on gray2"); - color_map["attr"] = Color ("bold red on gray2"); color_map["attmod"] = Color ("bold red on gray2"); color_map["id"] = Color ("yellow on gray2"); color_map["uuid"] = Color ("yellow on gray2"); @@ -1532,7 +1596,6 @@ void A3::dump (const std::string& label) // Fundamentals. /* color_map["lvalue"] = Color ("bold green on rgb010"); - color_map["op"] = Color ("white on rgb010"); color_map["int"] = Color ("bold yellow on rgb010"); color_map["number"] = Color ("bold yellow on rgb010"); color_map["string"] = Color ("bold yellow on rgb010"); diff --git a/src/A3.h b/src/A3.h index b727d2cf4..607ffa0de 100644 --- a/src/A3.h +++ b/src/A3.h @@ -85,6 +85,7 @@ public: void categorize (); static bool is_command (const std::vector &, std::string&); + static const std::vector operator_list (); void append_stdin (); void rc_override (std::string&, File&); @@ -102,12 +103,15 @@ public: const A3 extract_modifications () const; const A3 extract_words () const; + const A3 tokenize (const A3&) const; + + static bool is_attr (Nibbler&, std::string&); + static bool is_attribute (const std::string&, std::string&); + /* - static std::vector operator_list (); static bool is_multipart (const std::string&, std::vector &); - static bool is_attr (const std::string&); static bool is_attmod (const std::string&); static bool is_subst (const std::string&); static bool is_pattern (const std::string&); @@ -117,7 +121,6 @@ public: static bool is_operator (const std::string&); static bool is_operator (const std::string&, char&, int&, char&); static bool is_symbol_operator (const std::string&); - static bool is_attribute (const std::string&, std::string&); static bool is_modifier (const std::string&); static bool is_expression (const std::string&); diff --git a/src/commands/Command.cpp b/src/commands/Command.cpp index fe14300cd..6c76e6e16 100644 --- a/src/commands/Command.cpp +++ b/src/commands/Command.cpp @@ -271,12 +271,6 @@ void Command::filter (std::vector & input, std::vector & output) /**/ A3 filt = context.a3.extract_filter (); filt.dump ("extract_filter"); - - A3 mods = context.a3.extract_modifications (); - mods.dump ("extract_modifications"); - - A3 words = context.a3.extract_words (); - words.dump ("extract_words"); /**/ Arguments f;