mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-07-07 20:06:36 +02:00
Regex
- Implemented RegX class to maintain a separate compile, and match method, thereby allowing efficient re-use of the regex. This is critical to Expression::eval, where an identical regex might be applied to every task. - Obsoleted rx.{h,cpp}, which combined the compile and match steps into a single call, and is therefore not efficient when used in the context of filtering. - Fixed some unit tests that weren't building. Now they do. They don't work of course (don't be silly) but that's a problem for another day. - Modified all code that relies on rx.h to use RegX.h.
This commit is contained in:
parent
aa8d872466
commit
b49523c06d
14 changed files with 249 additions and 490 deletions
31
src/Att.cpp
31
src/Att.cpp
|
@ -30,7 +30,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <text.h>
|
||||
#include <rx.h>
|
||||
#include <RegX.h>
|
||||
#include <Color.h>
|
||||
#include <util.h>
|
||||
#include <Date.h>
|
||||
|
@ -634,7 +634,8 @@ bool Att::match (const Att& other) const
|
|||
if (regex)
|
||||
{
|
||||
std::string pattern = "^" + mValue + "$";
|
||||
if (!regexMatch (other.mValue, pattern, case_sensitive))
|
||||
RegX r (pattern, case_sensitive);
|
||||
if (!r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else if (!compare (mValue, other.mValue, (bool) case_sensitive))
|
||||
|
@ -652,7 +653,8 @@ bool Att::match (const Att& other) const
|
|||
#ifdef FEATURE_REGEX
|
||||
if (regex)
|
||||
{
|
||||
if (!regexMatch (other.mValue, mValue, case_sensitive))
|
||||
RegX r (mValue, case_sensitive);
|
||||
if (!r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else if (find (other.mValue, mValue, (bool) case_sensitive) == std::string::npos)
|
||||
|
@ -670,7 +672,8 @@ bool Att::match (const Att& other) const
|
|||
if (regex)
|
||||
{
|
||||
std::string pattern = "^" + mValue + "$";
|
||||
if (!regexMatch (other.mValue, pattern, case_sensitive))
|
||||
RegX r (pattern, case_sensitive);
|
||||
if (!r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else if (!compare (mValue, other.mValue, (bool) case_sensitive))
|
||||
|
@ -688,7 +691,8 @@ bool Att::match (const Att& other) const
|
|||
if (regex)
|
||||
{
|
||||
std::string pattern = "^" + mValue + "$";
|
||||
if (regexMatch (other.mValue, pattern, case_sensitive))
|
||||
RegX r (pattern, case_sensitive);
|
||||
if (r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else if (compare (mValue, other.mValue, (bool) case_sensitive))
|
||||
|
@ -720,7 +724,8 @@ bool Att::match (const Att& other) const
|
|||
if (regex)
|
||||
{
|
||||
std::string pattern = "^" + mValue;
|
||||
if (!regexMatch (other.mValue, pattern, case_sensitive))
|
||||
RegX r (pattern, case_sensitive);
|
||||
if (!r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
|
@ -743,7 +748,8 @@ bool Att::match (const Att& other) const
|
|||
if (regex)
|
||||
{
|
||||
std::string pattern = mValue + "$";
|
||||
if (!regexMatch (other.mValue, pattern, case_sensitive))
|
||||
RegX r (pattern, case_sensitive);
|
||||
if (!r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
|
@ -767,7 +773,8 @@ bool Att::match (const Att& other) const
|
|||
#ifdef FEATURE_REGEX
|
||||
if (regex)
|
||||
{
|
||||
if (regexMatch (other.mValue, mValue, case_sensitive))
|
||||
RegX r (mValue, case_sensitive);
|
||||
if (r.match (other.mValue))
|
||||
return false;
|
||||
}
|
||||
else if (find (other.mValue, mValue, (bool) case_sensitive) != std::string::npos)
|
||||
|
@ -862,7 +869,8 @@ bool Att::match (const Att& other) const
|
|||
{
|
||||
std::vector <int> start;
|
||||
std::vector <int> end;
|
||||
if (!regexMatch (start, end, other.mValue, mValue, case_sensitive))
|
||||
RegX r (mValue, case_sensitive);
|
||||
if (!r.match (start, end, other.mValue))
|
||||
return false;
|
||||
|
||||
if (!isWordStart (other.mValue, start[0]))
|
||||
|
@ -898,8 +906,9 @@ bool Att::match (const Att& other) const
|
|||
{
|
||||
std::vector <int> start;
|
||||
std::vector <int> end;
|
||||
if (regexMatch (start, end, other.mValue, mValue, case_sensitive) &&
|
||||
isWordStart (other.mValue, start[0]) &&
|
||||
RegX r (mValue, case_sensitive);
|
||||
if (r.match (start, end, other.mValue) &&
|
||||
isWordStart (other.mValue, start[0]) &&
|
||||
isWordEnd (other.mValue, end[0]))
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ set (task_SRCS API.cpp API.h
|
|||
Path.cpp Path.h
|
||||
Permission.cpp Permission.h
|
||||
Record.cpp Record.h
|
||||
RegX.cpp RegX.h
|
||||
TDB.cpp TDB.h
|
||||
TDB2.cpp TDB2.h
|
||||
Task.cpp Task.h
|
||||
|
@ -45,7 +46,6 @@ set (task_SRCS API.cpp API.h
|
|||
interactive.cpp
|
||||
recur.cpp
|
||||
rules.cpp
|
||||
rx.cpp rx.h
|
||||
sort.cpp
|
||||
text.cpp text.h
|
||||
utf8.cpp utf8.h
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
#include <inttypes.h>
|
||||
#include <Nibbler.h>
|
||||
#include <Date.h>
|
||||
#include <rx.h>
|
||||
#include <RegX.h>
|
||||
|
||||
const char* c_digits = "0123456789";
|
||||
|
||||
|
@ -146,9 +146,10 @@ bool Nibbler::getUntilRx (const std::string& regex, std::string& result)
|
|||
else
|
||||
modified_regex = regex;
|
||||
|
||||
RegX r (modified_regex, true);
|
||||
std::vector <int> start;
|
||||
std::vector <int> end;
|
||||
if (regexMatch (start, end, mInput.substr (mCursor), modified_regex, true))
|
||||
if (r.match (start, end, mInput.substr (mCursor)))
|
||||
{
|
||||
result = mInput.substr (mCursor, start[0]);
|
||||
mCursor += start[0];
|
||||
|
@ -450,8 +451,9 @@ bool Nibbler::getRx (const std::string& regex, std::string& result)
|
|||
else
|
||||
modified_regex = regex;
|
||||
|
||||
RegX r (modified_regex, true);
|
||||
std::vector <std::string> results;
|
||||
if (regexMatch (results, mInput.substr (mCursor), modified_regex, true))
|
||||
if (r.match (results, mInput.substr (mCursor)))
|
||||
{
|
||||
result = results[0];
|
||||
mCursor += result.length ();
|
||||
|
@ -1010,8 +1012,9 @@ bool Nibbler::skipRx (const std::string& regex)
|
|||
else
|
||||
modified_regex = regex;
|
||||
|
||||
RegX r (modified_regex, true);
|
||||
std::vector <std::string> results;
|
||||
if (regexMatch (results, mInput.substr (mCursor), modified_regex, true))
|
||||
if (r.match (results, mInput.substr (mCursor)))
|
||||
{
|
||||
mCursor += results[0].length ();
|
||||
return true;
|
||||
|
|
157
src/RegX.cpp
Normal file
157
src/RegX.cpp
Normal file
|
@ -0,0 +1,157 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// taskwarrior - a command line task list manager.
|
||||
//
|
||||
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it under
|
||||
// the terms of the GNU General Public License as published by the Free Software
|
||||
// Foundation; either version 2 of the License, or (at your option) any later
|
||||
// version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
// details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the
|
||||
//
|
||||
// Free Software Foundation, Inc.,
|
||||
// 51 Franklin Street, Fifth Floor,
|
||||
// Boston, MA
|
||||
// 02110-1301
|
||||
// USA
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <RegX.h>
|
||||
|
||||
#define L10N // Localization complete.
|
||||
|
||||
//#define _POSIX_C_SOURCE 1
|
||||
#define MAX_MATCHES 64
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
RegX::RegX (
|
||||
const std::string& pattern,
|
||||
bool case_sensitive /* = true */)
|
||||
: _compiled (false)
|
||||
, _pattern (pattern)
|
||||
, _case_sensitive (case_sensitive)
|
||||
{
|
||||
compile ();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
RegX::RegX (const RegX& other)
|
||||
: _compiled (false)
|
||||
, _pattern (other._pattern)
|
||||
, _case_sensitive (other._case_sensitive)
|
||||
{
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
RegX& RegX::operator= (const RegX& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
_compiled = false;
|
||||
_pattern = other._pattern;
|
||||
_case_sensitive = other._case_sensitive;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool RegX::operator== (const RegX& other) const
|
||||
{
|
||||
return _pattern == other._pattern &&
|
||||
_case_sensitive == other._case_sensitive;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
RegX::~RegX ()
|
||||
{
|
||||
if (_compiled)
|
||||
regfree (&_regex);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RegX::compile ()
|
||||
{
|
||||
if (!_compiled)
|
||||
{
|
||||
memset (&_regex, 0, sizeof (regex_t));
|
||||
|
||||
int result;
|
||||
if ((result = regcomp (&_regex, _pattern.c_str (),
|
||||
REG_EXTENDED | /*REG_NOSUB |*/ REG_NEWLINE |
|
||||
(_case_sensitive ? 0 : REG_ICASE))) != 0)
|
||||
{
|
||||
char message[256];
|
||||
regerror (result, &_regex, message, 256);
|
||||
throw std::string (message);
|
||||
}
|
||||
|
||||
_compiled = true;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool RegX::match (const std::string& in)
|
||||
{
|
||||
if (!_compiled)
|
||||
compile ();
|
||||
|
||||
return regexec (&_regex, in.c_str (), 0, NULL, 0) == 0 ? true : false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool RegX::match (
|
||||
std::vector<std::string>& matches,
|
||||
const std::string& in)
|
||||
{
|
||||
if (!_compiled)
|
||||
compile ();
|
||||
|
||||
regmatch_t rm[MAX_MATCHES];
|
||||
if (regexec (&_regex, in.c_str (), MAX_MATCHES, rm, 0) == 0)
|
||||
{
|
||||
for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i)
|
||||
matches.push_back (in.substr (rm[i].rm_so, rm[i].rm_eo - rm[i].rm_so));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool RegX::match (
|
||||
std::vector <int>& start,
|
||||
std::vector <int>& end,
|
||||
const std::string& in)
|
||||
{
|
||||
if (!_compiled)
|
||||
compile ();
|
||||
|
||||
regmatch_t rm[MAX_MATCHES];
|
||||
if (regexec (&_regex, in.c_str (), MAX_MATCHES, rm, 0) == 0)
|
||||
{
|
||||
for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i)
|
||||
{
|
||||
start.push_back (rm[i].rm_so);
|
||||
end.push_back (rm[i].rm_eo);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
|
@ -25,16 +25,36 @@
|
|||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef INCLUDED_RX
|
||||
#define INCLuDED_RX
|
||||
#ifndef INCLUDED_REGX
|
||||
#define INCLUDED_REGX
|
||||
#define L10N // Localization complete.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <regex.h>
|
||||
|
||||
bool regexMatch (const std::string&, const std::string&, bool caseSensitive = true);
|
||||
bool regexMatch (std::vector<std::string>&, const std::string&, const std::string&, bool caseSensitive = true);
|
||||
bool regexMatch (std::vector<int>&, std::vector<int>&, const std::string&, const std::string&, bool caseSensitive = true);
|
||||
class RegX
|
||||
{
|
||||
public:
|
||||
RegX (const std::string&, bool caseSensitive = true);
|
||||
RegX (const RegX&);
|
||||
RegX& operator= (const RegX&);
|
||||
bool operator== (const RegX&) const;
|
||||
~RegX ();
|
||||
|
||||
bool match (const std::string&);
|
||||
bool match (std::vector<std::string>&, const std::string&);
|
||||
bool match (std::vector <int>&, std::vector <int>&, const std::string&);
|
||||
|
||||
private:
|
||||
void compile ();
|
||||
|
||||
private:
|
||||
bool _compiled;
|
||||
std::string _pattern;
|
||||
bool _case_sensitive;
|
||||
regex_t _regex;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
#include <rx.h>
|
||||
#include <RegX.h>
|
||||
#include <Context.h>
|
||||
#include <util.h>
|
||||
#include <cmake.h>
|
||||
|
@ -223,9 +223,10 @@ int CmdDiagnostics::execute (std::string& output)
|
|||
char* p = fgets (buffer, 1023, fp);
|
||||
pclose (fp);
|
||||
|
||||
RegX r ("usage", false);
|
||||
if (p)
|
||||
out << " scp: "
|
||||
<< (regexMatch (buffer, "usage") ? "found" : "n/a")
|
||||
<< (r.match (buffer) ? "found" : "n/a")
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
|
@ -237,8 +238,9 @@ int CmdDiagnostics::execute (std::string& output)
|
|||
// rsync version 2.6.9 protocol version 29
|
||||
if (p)
|
||||
{
|
||||
RegX r ("version ([0-9]+\\.[0-9]+\\.[0-9]+)", false);
|
||||
matches.clear ();
|
||||
regexMatch (matches, buffer, "version ([0-9]+\\.[0-9]+\\.[0-9]+)");
|
||||
r.match (matches, buffer);
|
||||
out << " rsync: "
|
||||
<< (matches.size () ? matches[0] : "n/a")
|
||||
<< "\n";
|
||||
|
@ -253,8 +255,9 @@ int CmdDiagnostics::execute (std::string& output)
|
|||
// curl 7.19.7 (universal-apple-darwin10.0) libcurl/7.19.7 OpenSSL/0.9.8l zlib/1.2.3
|
||||
if (p)
|
||||
{
|
||||
RegX r ("curl ([0-9]+\\.[0-9]+\\.[0-9]+)", false);
|
||||
matches.clear ();
|
||||
regexMatch (matches, buffer, "curl ([0-9]+\\.[0-9]+\\.[0-9]+)");
|
||||
r.match (matches, buffer);
|
||||
out << " curl: "
|
||||
<< (matches.size () ? matches[0] : "n/a")
|
||||
<< "\n";
|
||||
|
|
138
src/rx.cpp
138
src/rx.cpp
|
@ -1,138 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// taskwarrior - a command line task list manager.
|
||||
//
|
||||
// Copyright 2010 - 2011, Paul Beckingham, Federico Hernandez.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it under
|
||||
// the terms of the GNU General Public License as published by the Free Software
|
||||
// Foundation; either version 2 of the License, or (at your option) any later
|
||||
// version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
// details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the
|
||||
//
|
||||
// Free Software Foundation, Inc.,
|
||||
// 51 Franklin Street, Fifth Floor,
|
||||
// Boston, MA
|
||||
// 02110-1301
|
||||
// USA
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
#include <rx.h>
|
||||
|
||||
#define L10N // Localization complete.
|
||||
|
||||
//#define _POSIX_C_SOURCE 1
|
||||
#define MAX_MATCHES 8
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool regexMatch (
|
||||
const std::string& in,
|
||||
const std::string& pattern,
|
||||
bool caseSensitive /* = true */)
|
||||
{
|
||||
regex_t r = {0};
|
||||
int result;
|
||||
if ((result = regcomp (&r, pattern.c_str (),
|
||||
REG_EXTENDED | REG_NOSUB | REG_NEWLINE |
|
||||
(caseSensitive ? 0 : REG_ICASE))) == 0)
|
||||
{
|
||||
if ((result = regexec (&r, in.c_str (), 0, NULL, 0)) == 0)
|
||||
{
|
||||
regfree (&r);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (result == REG_NOMATCH)
|
||||
return false;
|
||||
}
|
||||
|
||||
char message[256];
|
||||
regerror (result, &r, message, 256);
|
||||
throw std::string (message);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool regexMatch (
|
||||
std::vector<std::string>& out,
|
||||
const std::string& in,
|
||||
const std::string& pattern,
|
||||
bool caseSensitive /* = true */)
|
||||
{
|
||||
regex_t r = {0};
|
||||
int result;
|
||||
if ((result = regcomp (&r, pattern.c_str (),
|
||||
REG_EXTENDED | REG_NEWLINE |
|
||||
(caseSensitive ? 0 : REG_ICASE))) == 0)
|
||||
{
|
||||
regmatch_t rm[MAX_MATCHES];
|
||||
if ((result = regexec (&r, in.c_str (), MAX_MATCHES, rm, 0)) == 0)
|
||||
{
|
||||
for (unsigned int i = 1; i < 1 + r.re_nsub; ++i)
|
||||
out.push_back (in.substr (rm[i].rm_so, rm[i].rm_eo - rm[i].rm_so));
|
||||
|
||||
regfree (&r);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (result == REG_NOMATCH)
|
||||
return false;
|
||||
}
|
||||
|
||||
char message[256];
|
||||
regerror (result, &r, message, 256);
|
||||
throw std::string (message);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool regexMatch (
|
||||
std::vector <int>& start,
|
||||
std::vector <int>& end,
|
||||
const std::string& in,
|
||||
const std::string& pattern,
|
||||
bool caseSensitive /* = true */)
|
||||
{
|
||||
regex_t r = {0};
|
||||
int result;
|
||||
if ((result = regcomp (&r, pattern.c_str (),
|
||||
REG_EXTENDED | REG_NEWLINE |
|
||||
(caseSensitive ? 0 : REG_ICASE))) == 0)
|
||||
{
|
||||
regmatch_t rm[MAX_MATCHES];
|
||||
if ((result = regexec (&r, in.c_str (), MAX_MATCHES, rm, 0)) == 0)
|
||||
{
|
||||
for (unsigned int i = 1; i < 1 + r.re_nsub; ++i)
|
||||
{
|
||||
start.push_back (rm[i].rm_so);
|
||||
end.push_back (rm[i].rm_eo);
|
||||
}
|
||||
|
||||
regfree (&r);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (result == REG_NOMATCH)
|
||||
return false;
|
||||
}
|
||||
|
||||
char message[256];
|
||||
regerror (result, &r, message, 256);
|
||||
throw std::string (message);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
Loading…
Add table
Add a link
Reference in a new issue