mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-08-26 15:47:19 +02:00
Bug - regular expressions
- Fixed bug where regexec was not being properly called. - Fixed bug where multiple subexpressions were not being properly handled. - Fixed bug that allowed regex matching to run off the end of a string. - Fixed bug that causes regex captures to not be properly offset into the input string. - Removed () from unit tests.
This commit is contained in:
parent
bfca3766b6
commit
2ab11655db
3 changed files with 48 additions and 32 deletions
40
src/RX.cpp
40
src/RX.cpp
|
@ -96,7 +96,7 @@ void RX::compile ()
|
||||||
|
|
||||||
int result;
|
int result;
|
||||||
if ((result = regcomp (&_regex, _pattern.c_str (),
|
if ((result = regcomp (&_regex, _pattern.c_str (),
|
||||||
REG_EXTENDED | /*REG_NOSUB |*/ REG_NEWLINE |
|
REG_EXTENDED | REG_NEWLINE |
|
||||||
(_case_sensitive ? 0 : REG_ICASE))) != 0)
|
(_case_sensitive ? 0 : REG_ICASE))) != 0)
|
||||||
{
|
{
|
||||||
char message[256];
|
char message[256];
|
||||||
|
@ -125,16 +125,21 @@ bool RX::match (
|
||||||
if (!_compiled)
|
if (!_compiled)
|
||||||
compile ();
|
compile ();
|
||||||
|
|
||||||
regmatch_t rm[RX_MAX_MATCHES];
|
regmatch_t rm[2];
|
||||||
if (regexec (&_regex, in.c_str (), RX_MAX_MATCHES, rm, 0) == 0)
|
int offset = 0;
|
||||||
|
int length = in.length ();
|
||||||
|
while (regexec (&_regex, in.c_str () + offset, 2, &rm[0], 0) == 0 &&
|
||||||
|
offset < length)
|
||||||
{
|
{
|
||||||
for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i)
|
matches.push_back (in.substr (rm[0].rm_so + offset, rm[0].rm_eo - rm[0].rm_so));
|
||||||
matches.push_back (in.substr (rm[i].rm_so, rm[i].rm_eo - rm[i].rm_so));
|
offset += rm[0].rm_eo;
|
||||||
|
|
||||||
return true;
|
// Protection against zero-width patterns causing infinite loops.
|
||||||
|
if (rm[0].rm_so == rm[0].rm_eo)
|
||||||
|
++offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return matches.size () ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -146,19 +151,22 @@ bool RX::match (
|
||||||
if (!_compiled)
|
if (!_compiled)
|
||||||
compile ();
|
compile ();
|
||||||
|
|
||||||
regmatch_t rm[RX_MAX_MATCHES];
|
regmatch_t rm[2];
|
||||||
if (regexec (&_regex, in.c_str (), RX_MAX_MATCHES, rm, 0) == 0)
|
int offset = 0;
|
||||||
|
int length = in.length ();
|
||||||
|
while (regexec (&_regex, in.c_str () + offset, 2, &rm[0], 0) == 0 &&
|
||||||
|
offset < length)
|
||||||
{
|
{
|
||||||
for (unsigned int i = 1; i < 1 + _regex.re_nsub; ++i)
|
start.push_back (rm[0].rm_so + offset);
|
||||||
{
|
end.push_back (rm[0].rm_eo + offset);
|
||||||
start.push_back (rm[i].rm_so);
|
offset += rm[0].rm_eo;
|
||||||
end.push_back (rm[i].rm_eo);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
// Protection against zero-width patterns causing infinite loops.
|
||||||
|
if (rm[0].rm_so == rm[0].rm_eo)
|
||||||
|
++offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return start.size () ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
2
src/RX.h
2
src/RX.h
|
@ -33,8 +33,6 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
|
|
||||||
#define RX_MAX_MATCHES 64
|
|
||||||
|
|
||||||
class RX
|
class RX
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -32,7 +32,7 @@ Context context;
|
||||||
|
|
||||||
int main (int argc, char** argv)
|
int main (int argc, char** argv)
|
||||||
{
|
{
|
||||||
UnitTest ut (16);
|
UnitTest ut (21);
|
||||||
|
|
||||||
std::string text = "This is a test.";
|
std::string text = "This is a test.";
|
||||||
|
|
||||||
|
@ -40,10 +40,10 @@ int main (int argc, char** argv)
|
||||||
ut.ok (r1.match (text), text + " =~ /i. /");
|
ut.ok (r1.match (text), text + " =~ /i. /");
|
||||||
|
|
||||||
std::vector <std::string> matches;
|
std::vector <std::string> matches;
|
||||||
RX r2 ("(i.) ", false);
|
ut.ok (r1.match (matches, text), text + " =~ /i. /");
|
||||||
ut.ok (r2.match (matches, text), text + " =~ /(i.) /");
|
ut.ok (matches.size () == 2, "2 match");
|
||||||
ut.ok (matches.size () == 1, "1 match");
|
ut.is (matches[0], "is ", "$1 == is\\s");
|
||||||
ut.is (matches[0], "is", "$1 == is");
|
ut.is (matches[1], "is ", "$1 == is\\s");
|
||||||
|
|
||||||
text = "abcdefghijklmnopqrstuvwxyz";
|
text = "abcdefghijklmnopqrstuvwxyz";
|
||||||
|
|
||||||
|
@ -68,17 +68,27 @@ int main (int argc, char** argv)
|
||||||
std::vector <std::string> results;
|
std::vector <std::string> results;
|
||||||
std::vector <int> start;
|
std::vector <int> start;
|
||||||
std::vector <int> end;
|
std::vector <int> end;
|
||||||
RX r8 ("(e..)", true);
|
RX r8 ("e..", true);
|
||||||
ut.ok (r8.match (results, text), "(e..) there are matches");
|
ut.ok (r8.match (results, text), "e.. there are matches");
|
||||||
ut.ok (r8.match (start, end, text), "(e..) there are matches");
|
ut.ok (r8.match (start, end, text), "e.. there are matches");
|
||||||
ut.is (results.size (), (size_t) 1, "(e..) == 1 match");
|
ut.is (results.size (), (size_t) 4, "e.. == 4 matches");
|
||||||
ut.is (results[0], "est", "(e..)[0] == 'est'");
|
ut.is (results[0], "est", "e..[0] == 'est'");
|
||||||
ut.is (start[0], 11, "(e..)[0] == 11->");
|
ut.is (start[0], 11, "e..[0] == 11->");
|
||||||
ut.is (end[0], 14, "(e..)[0] == ->14");
|
ut.is (end[0], 14, "e..[0] == ->14");
|
||||||
|
|
||||||
RX r9 ("\\bthe\\b");
|
results.clear ();
|
||||||
|
RX r9 ("e", true);
|
||||||
|
ut.ok (r9.match (results, text), "e there are matches");
|
||||||
|
ut.is (results.size (), (size_t) 6, "e == 6 matches");
|
||||||
|
|
||||||
|
start.clear ();
|
||||||
|
end.clear ();
|
||||||
|
ut.ok (r9.match (start, end, text), "e there are matches");
|
||||||
|
ut.is (start.size (), (size_t) 6, "e == 6 matches");
|
||||||
|
|
||||||
|
RX r10 ("\\bthe\\b");
|
||||||
text = "this is the end.";
|
text = "this is the end.";
|
||||||
ut.ok (r9.match (text), text + " =~ /\\bthe\\b/");
|
ut.ok (r10.match (text), text + " =~ /\\bthe\\b/");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue