mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-06-26 10:54:26 +02:00
Lexer
- Implmented boundary detection hints.
This commit is contained in:
parent
cbb6decf93
commit
008ba6ecab
2 changed files with 27 additions and 0 deletions
|
@ -24,6 +24,7 @@
|
||||||
//
|
//
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
#include <utf8.h>
|
#include <utf8.h>
|
||||||
#include <ISO8601.h>
|
#include <ISO8601.h>
|
||||||
#include <Date.h>
|
#include <Date.h>
|
||||||
|
@ -41,6 +42,9 @@ Lexer::Lexer (const std::string& input)
|
||||||
, _n1 (32)
|
, _n1 (32)
|
||||||
, _n2 (32)
|
, _n2 (32)
|
||||||
, _n3 (32)
|
, _n3 (32)
|
||||||
|
, _boundary01 (false)
|
||||||
|
, _boundary12 (false)
|
||||||
|
, _boundary23 (false)
|
||||||
, _ambiguity (true)
|
, _ambiguity (true)
|
||||||
{
|
{
|
||||||
// Read 4 chars in preparation. Even if there are < 4. Take a deep breath.
|
// Read 4 chars in preparation. Even if there are < 4. Take a deep breath.
|
||||||
|
@ -636,6 +640,20 @@ bool Lexer::is_ws (int c)
|
||||||
c == 0x3000); // ideographic space Common Separator, space
|
c == 0x3000); // ideographic space Common Separator, space
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
bool Lexer::boundary (int left, int right)
|
||||||
|
{
|
||||||
|
// XOR
|
||||||
|
if (!isdigit (left) != !isdigit (right)) return true;
|
||||||
|
if (!isalpha (left) != !isalpha (right)) return true;
|
||||||
|
if (!isspace (left) != !isspace (right)) return true;
|
||||||
|
|
||||||
|
// OR
|
||||||
|
if (ispunct (left) || ispunct (right)) return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Split 'input' into 'words' on Lexer::is_ws boundaries, observing quotes.
|
// Split 'input' into 'words' on Lexer::is_ws boundaries, observing quotes.
|
||||||
void Lexer::word_split (std::vector <std::string>& words, const std::string& input)
|
void Lexer::word_split (std::vector <std::string>& words, const std::string& input)
|
||||||
|
@ -814,6 +832,11 @@ void Lexer::shift ()
|
||||||
_n2 = _n3;
|
_n2 = _n3;
|
||||||
_n3 = utf8_next_char (_input, _i);
|
_n3 = utf8_next_char (_input, _i);
|
||||||
++_shift_counter;
|
++_shift_counter;
|
||||||
|
|
||||||
|
// Detect type boundaries between characters.
|
||||||
|
_boundary01 = boundary (_n0, _n1);
|
||||||
|
_boundary12 = boundary (_n1, _n2);
|
||||||
|
_boundary23 = boundary (_n2, _n3);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -65,6 +65,7 @@ public:
|
||||||
|
|
||||||
static const std::string type_name (const Type&);
|
static const std::string type_name (const Type&);
|
||||||
static bool is_ws (int);
|
static bool is_ws (int);
|
||||||
|
static bool boundary (int, int);
|
||||||
static void word_split (std::vector <std::string>&, const std::string&);
|
static void word_split (std::vector <std::string>&, const std::string&);
|
||||||
static void token_split (std::vector <std::string>&, const std::string&);
|
static void token_split (std::vector <std::string>&, const std::string&);
|
||||||
static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
|
static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
|
||||||
|
@ -93,6 +94,9 @@ private:
|
||||||
int _n1;
|
int _n1;
|
||||||
int _n2;
|
int _n2;
|
||||||
int _n3;
|
int _n3;
|
||||||
|
bool _boundary01;
|
||||||
|
bool _boundary12;
|
||||||
|
bool _boundary23;
|
||||||
bool _ambiguity;
|
bool _ambiguity;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue