- Implemented RegX class to maintain a separate compile, and match
  method, thereby allowing efficient re-use of the regex.  This is
  critical to Expression::eval, where an identical regex might be
  applied to every task.
- Obsoleted rx.{h,cpp}, which combined the compile and match steps
  into a single call, and is therefore not efficient when used in
  the context of filtering.
- Fixed some unit tests that weren't building.  Now they do.  They
  don't work of course (don't be silly) but that's a problem for
  another day.
- Modified all code that relies on rx.h to use RegX.h.
This commit is contained in:
Paul Beckingham 2011-06-21 01:43:57 -04:00
parent aa8d872466
commit b49523c06d
14 changed files with 249 additions and 490 deletions

View file

@ -30,7 +30,7 @@
#include <stdlib.h>
#include <string.h>
#include <text.h>
#include <rx.h>
#include <RegX.h>
#include <Color.h>
#include <util.h>
#include <Date.h>
@ -634,7 +634,8 @@ bool Att::match (const Att& other) const
if (regex)
{
std::string pattern = "^" + mValue + "$";
if (!regexMatch (other.mValue, pattern, case_sensitive))
RegX r (pattern, case_sensitive);
if (!r.match (other.mValue))
return false;
}
else if (!compare (mValue, other.mValue, (bool) case_sensitive))
@ -652,7 +653,8 @@ bool Att::match (const Att& other) const
#ifdef FEATURE_REGEX
if (regex)
{
if (!regexMatch (other.mValue, mValue, case_sensitive))
RegX r (mValue, case_sensitive);
if (!r.match (other.mValue))
return false;
}
else if (find (other.mValue, mValue, (bool) case_sensitive) == std::string::npos)
@ -670,7 +672,8 @@ bool Att::match (const Att& other) const
if (regex)
{
std::string pattern = "^" + mValue + "$";
if (!regexMatch (other.mValue, pattern, case_sensitive))
RegX r (pattern, case_sensitive);
if (!r.match (other.mValue))
return false;
}
else if (!compare (mValue, other.mValue, (bool) case_sensitive))
@ -688,7 +691,8 @@ bool Att::match (const Att& other) const
if (regex)
{
std::string pattern = "^" + mValue + "$";
if (regexMatch (other.mValue, pattern, case_sensitive))
RegX r (pattern, case_sensitive);
if (r.match (other.mValue))
return false;
}
else if (compare (mValue, other.mValue, (bool) case_sensitive))
@ -720,7 +724,8 @@ bool Att::match (const Att& other) const
if (regex)
{
std::string pattern = "^" + mValue;
if (!regexMatch (other.mValue, pattern, case_sensitive))
RegX r (pattern, case_sensitive);
if (!r.match (other.mValue))
return false;
}
else
@ -743,7 +748,8 @@ bool Att::match (const Att& other) const
if (regex)
{
std::string pattern = mValue + "$";
if (!regexMatch (other.mValue, pattern, case_sensitive))
RegX r (pattern, case_sensitive);
if (!r.match (other.mValue))
return false;
}
else
@ -767,7 +773,8 @@ bool Att::match (const Att& other) const
#ifdef FEATURE_REGEX
if (regex)
{
if (regexMatch (other.mValue, mValue, case_sensitive))
RegX r (mValue, case_sensitive);
if (r.match (other.mValue))
return false;
}
else if (find (other.mValue, mValue, (bool) case_sensitive) != std::string::npos)
@ -862,7 +869,8 @@ bool Att::match (const Att& other) const
{
std::vector <int> start;
std::vector <int> end;
if (!regexMatch (start, end, other.mValue, mValue, case_sensitive))
RegX r (mValue, case_sensitive);
if (!r.match (start, end, other.mValue))
return false;
if (!isWordStart (other.mValue, start[0]))
@ -898,8 +906,9 @@ bool Att::match (const Att& other) const
{
std::vector <int> start;
std::vector <int> end;
if (regexMatch (start, end, other.mValue, mValue, case_sensitive) &&
isWordStart (other.mValue, start[0]) &&
RegX r (mValue, case_sensitive);
if (r.match (start, end, other.mValue) &&
isWordStart (other.mValue, start[0]) &&
isWordEnd (other.mValue, end[0]))
return false;
}

View file

@ -25,6 +25,7 @@ set (task_SRCS API.cpp API.h
Path.cpp Path.h
Permission.cpp Permission.h
Record.cpp Record.h
RegX.cpp RegX.h
TDB.cpp TDB.h
TDB2.cpp TDB2.h
Task.cpp Task.h
@ -45,7 +46,6 @@ set (task_SRCS API.cpp API.h
interactive.cpp
recur.cpp
rules.cpp
rx.cpp rx.h
sort.cpp
text.cpp text.h
utf8.cpp utf8.h

View file

@ -33,7 +33,7 @@
#include <inttypes.h>
#include <Nibbler.h>
#include <Date.h>
#include <rx.h>
#include <RegX.h>
const char* c_digits = "0123456789";
@ -146,9 +146,10 @@ bool Nibbler::getUntilRx (const std::string& regex, std::string& result)
else
modified_regex = regex;
RegX r (modified_regex, true);
std::vector <int> start;
std::vector <int> end;
if (regexMatch (start, end, mInput.substr (mCursor), modified_regex, true))
if (r.match (start, end, mInput.substr (mCursor)))
{
result = mInput.substr (mCursor, start[0]);
mCursor += start[0];
@ -450,8 +451,9 @@ bool Nibbler::getRx (const std::string& regex, std::string& result)
else
modified_regex = regex;
RegX r (modified_regex, true);
std::vector <std::string> results;
if (regexMatch (results, mInput.substr (mCursor), modified_regex, true))
if (r.match (results, mInput.substr (mCursor)))
{
result = results[0];
mCursor += result.length ();
@ -1010,8 +1012,9 @@ bool Nibbler::skipRx (const std::string& regex)
else
modified_regex = regex;
RegX r (modified_regex, true);
std::vector <std::string> results;
if (regexMatch (results, mInput.substr (mCursor), modified_regex, true))
if (r.match (results, mInput.substr (mCursor)))
{
mCursor += results[0].length ();
return true;

157
src/RegX.cpp Normal file
View file

@ -0,0 +1,157 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software
// Foundation; either version 2 of the License, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the
//
// Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor,
// Boston, MA
// 02110-1301
// USA
//
////////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <string.h>
#include <RegX.h>
#define L10N // Localization complete.
//#define _POSIX_C_SOURCE 1
#define MAX_MATCHES 64
////////////////////////////////////////////////////////////////////////////////
RegX::RegX (
const std::string& pattern,
bool case_sensitive /* = true */)
: _compiled (false)
, _pattern (pattern)
, _case_sensitive (case_sensitive)
{
compile ();
}
////////////////////////////////////////////////////////////////////////////////
RegX::RegX (const RegX& other)
: _compiled (false)
, _pattern (other._pattern)
, _case_sensitive (other._case_sensitive)
{
}
////////////////////////////////////////////////////////////////////////////////
RegX& RegX::operator= (const RegX& other)
{
if (this != &other)
{
_compiled = false;
_pattern = other._pattern;
_case_sensitive = other._case_sensitive;
}
return *this;
}
////////////////////////////////////////////////////////////////////////////////
bool RegX::operator== (const RegX& other) const
{
return _pattern == other._pattern &&
_case_sensitive == other._case_sensitive;
}
////////////////////////////////////////////////////////////////////////////////
RegX::~RegX ()
{
if (_compiled)
regfree (&_regex);
}
////////////////////////////////////////////////////////////////////////////////
void RegX::compile ()
{
if (!_compiled)
{
memset (&_regex, 0, sizeof (regex_t));
int result;
if ((result = regcomp (&_regex, _pattern.c_str (),
REG_EXTENDED | /*REG_NOSUB |*/ REG_NEWLINE |
(_case_sensitive ? 0 : REG_ICASE))) != 0)
{
char message[256];
regerror (result, &_regex, message, 256);
throw std::string (message);
}
_compiled = true;
}
}
////////////////////////////////////////////////////////////////////////////////
bool RegX::match (const std::string& in)
{
if (!_compiled)
compile ();
return regexec (&_regex, in.c_str (), 0, NULL, 0) == 0 ? true : false;
}
////////////////////////////////////////////////////////////////////////////////
bool RegX::match (
std::vector<std::string>& matches,
const std::string& in)
{
if (!_compiled)
compile ();
regmatch_t rm[MAX_MATCHES];
if (regexec (&_regex, in.c_str (), MAX_MATCHES, rm, 0) == 0)
{
for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i)
matches.push_back (in.substr (rm[i].rm_so, rm[i].rm_eo - rm[i].rm_so));
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool RegX::match (
std::vector <int>& start,
std::vector <int>& end,
const std::string& in)
{
if (!_compiled)
compile ();
regmatch_t rm[MAX_MATCHES];
if (regexec (&_regex, in.c_str (), MAX_MATCHES, rm, 0) == 0)
{
for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i)
{
start.push_back (rm[i].rm_so);
end.push_back (rm[i].rm_eo);
}
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////

View file

@ -25,16 +25,36 @@
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_RX
#define INCLuDED_RX
#ifndef INCLUDED_REGX
#define INCLUDED_REGX
#define L10N // Localization complete.
#include <string>
#include <vector>
#include <regex.h>
bool regexMatch (const std::string&, const std::string&, bool caseSensitive = true);
bool regexMatch (std::vector<std::string>&, const std::string&, const std::string&, bool caseSensitive = true);
bool regexMatch (std::vector<int>&, std::vector<int>&, const std::string&, const std::string&, bool caseSensitive = true);
class RegX
{
public:
RegX (const std::string&, bool caseSensitive = true);
RegX (const RegX&);
RegX& operator= (const RegX&);
bool operator== (const RegX&) const;
~RegX ();
bool match (const std::string&);
bool match (std::vector<std::string>&, const std::string&);
bool match (std::vector <int>&, std::vector <int>&, const std::string&);
private:
void compile ();
private:
bool _compiled;
std::string _pattern;
bool _case_sensitive;
regex_t _regex;
};
#endif

View file

@ -29,7 +29,7 @@
#include <sstream>
#include <algorithm>
#include <stdlib.h>
#include <rx.h>
#include <RegX.h>
#include <Context.h>
#include <util.h>
#include <cmake.h>
@ -223,9 +223,10 @@ int CmdDiagnostics::execute (std::string& output)
char* p = fgets (buffer, 1023, fp);
pclose (fp);
RegX r ("usage", false);
if (p)
out << " scp: "
<< (regexMatch (buffer, "usage") ? "found" : "n/a")
<< (r.match (buffer) ? "found" : "n/a")
<< "\n";
}
@ -237,8 +238,9 @@ int CmdDiagnostics::execute (std::string& output)
// rsync version 2.6.9 protocol version 29
if (p)
{
RegX r ("version ([0-9]+\\.[0-9]+\\.[0-9]+)", false);
matches.clear ();
regexMatch (matches, buffer, "version ([0-9]+\\.[0-9]+\\.[0-9]+)");
r.match (matches, buffer);
out << " rsync: "
<< (matches.size () ? matches[0] : "n/a")
<< "\n";
@ -253,8 +255,9 @@ int CmdDiagnostics::execute (std::string& output)
// curl 7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3
if (p)
{
RegX r ("curl ([0-9]+\\.[0-9]+\\.[0-9]+)", false);
matches.clear ();
regexMatch (matches, buffer, "curl ([0-9]+\\.[0-9]+\\.[0-9]+)");
r.match (matches, buffer);
out << " curl: "
<< (matches.size () ? matches[0] : "n/a")
<< "\n";

View file

@ -1,138 +0,0 @@
////////////////////////////////////////////////////////////////////////////////
// taskwarrior - a command line task list manager.
//
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
// All rights reserved.
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software
// Foundation; either version 2 of the License, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the
//
// Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor,
// Boston, MA
// 02110-1301
// USA
//
////////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <regex.h>
#include <rx.h>
#define L10N // Localization complete.
//#define _POSIX_C_SOURCE 1
#define MAX_MATCHES 8
////////////////////////////////////////////////////////////////////////////////
bool regexMatch (
const std::string& in,
const std::string& pattern,
bool caseSensitive /* = true */)
{
regex_t r = {0};
int result;
if ((result = regcomp (&r, pattern.c_str (),
REG_EXTENDED | REG_NOSUB | REG_NEWLINE |
(caseSensitive ? 0 : REG_ICASE))) == 0)
{
if ((result = regexec (&r, in.c_str (), 0, NULL, 0)) == 0)
{
regfree (&r);
return true;
}
if (result == REG_NOMATCH)
return false;
}
char message[256];
regerror (result, &r, message, 256);
throw std::string (message);
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool regexMatch (
std::vector<std::string>& out,
const std::string& in,
const std::string& pattern,
bool caseSensitive /* = true */)
{
regex_t r = {0};
int result;
if ((result = regcomp (&r, pattern.c_str (),
REG_EXTENDED | REG_NEWLINE |
(caseSensitive ? 0 : REG_ICASE))) == 0)
{
regmatch_t rm[MAX_MATCHES];
if ((result = regexec (&r, in.c_str (), MAX_MATCHES, rm, 0)) == 0)
{
for (unsigned int i = 1; i < 1 + r.re_nsub; ++i)
out.push_back (in.substr (rm[i].rm_so, rm[i].rm_eo - rm[i].rm_so));
regfree (&r);
return true;
}
if (result == REG_NOMATCH)
return false;
}
char message[256];
regerror (result, &r, message, 256);
throw std::string (message);
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool regexMatch (
std::vector <int>& start,
std::vector <int>& end,
const std::string& in,
const std::string& pattern,
bool caseSensitive /* = true */)
{
regex_t r = {0};
int result;
if ((result = regcomp (&r, pattern.c_str (),
REG_EXTENDED | REG_NEWLINE |
(caseSensitive ? 0 : REG_ICASE))) == 0)
{
regmatch_t rm[MAX_MATCHES];
if ((result = regexec (&r, in.c_str (), MAX_MATCHES, rm, 0)) == 0)
{
for (unsigned int i = 1; i < 1 + r.re_nsub; ++i)
{
start.push_back (rm[i].rm_so);
end.push_back (rm[i].rm_eo);
}
regfree (&r);
return true;
}
if (result == REG_NOMATCH)
return false;
}
char message[256];
regerror (result, &r, message, 256);
throw std::string (message);
return false;
}
////////////////////////////////////////////////////////////////////////////////