mirror of
https://github.com/GothenburgBitFactory/taskwarrior.git
synced 2025-07-07 20:06:36 +02:00
Lexer
- Refactored (step 1) the ISO and Legacy date/duration parsing for lexer state machine breakout.
This commit is contained in:
parent
d54afc844c
commit
65f979cb4f
2 changed files with 75 additions and 60 deletions
133
src/Lexer.cpp
133
src/Lexer.cpp
|
@ -102,56 +102,18 @@ bool Lexer::token (std::string& result, Type& type)
|
||||||
else if (is_dec_digit (_n0))
|
else if (is_dec_digit (_n0))
|
||||||
{
|
{
|
||||||
// Speculatively try a date and duration parse. Longest wins.
|
// Speculatively try a date and duration parse. Longest wins.
|
||||||
std::string::size_type iso_i = 0;
|
if (is_date (result))
|
||||||
std::string iso_result;
|
|
||||||
ISO8601d iso;
|
|
||||||
iso.ambiguity (_ambiguity);
|
|
||||||
if (iso.parse (_input.substr (_shift_counter), iso_i))
|
|
||||||
iso_result = _input.substr (_shift_counter, iso_i);
|
|
||||||
|
|
||||||
std::string::size_type dur_i = 0;
|
|
||||||
std::string dur_result;
|
|
||||||
Duration dur;
|
|
||||||
if (dur.parse (_input.substr (_shift_counter), dur_i))
|
|
||||||
dur_result = _input.substr (_shift_counter, dur_i);
|
|
||||||
|
|
||||||
if (iso_result.length () > dur_result.length ())
|
|
||||||
{
|
{
|
||||||
while (iso_i--) shift ();
|
|
||||||
result = iso_result;
|
|
||||||
type = typeDate;
|
type = typeDate;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (dur_result.length () > iso_result.length ())
|
|
||||||
|
if (is_duration (result))
|
||||||
{
|
{
|
||||||
while (dur_i--) shift ();
|
|
||||||
result = dur_result;
|
|
||||||
type = typeDuration;
|
type = typeDuration;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try a legacy rc.dateformat parse here.
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (Lexer::dateFormat != "")
|
|
||||||
{
|
|
||||||
std::string::size_type space = _input.find (' ', _i);
|
|
||||||
if (space == std::string::npos)
|
|
||||||
space = _input.length ();
|
|
||||||
|
|
||||||
std::string legacy = _input.substr (_shift_counter, space - _shift_counter);
|
|
||||||
Date legacyDate (legacy, Lexer::dateFormat, true, false);
|
|
||||||
|
|
||||||
space -= _shift_counter;
|
|
||||||
while (space--) shift ();
|
|
||||||
result = legacy;
|
|
||||||
type = typeDate;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
catch (...) { /* Never mind. */ }
|
|
||||||
|
|
||||||
type = typeNumber;
|
type = typeNumber;
|
||||||
result += utf8_character (_n0);
|
result += utf8_character (_n0);
|
||||||
shift ();
|
shift ();
|
||||||
|
@ -196,29 +158,14 @@ bool Lexer::token (std::string& result, Type& type)
|
||||||
}
|
}
|
||||||
else if (is_ident_start (_n0))
|
else if (is_ident_start (_n0))
|
||||||
{
|
{
|
||||||
std::string::size_type iso_i = 0;
|
if (is_date (result))
|
||||||
std::string iso_result;
|
|
||||||
ISO8601p iso;
|
|
||||||
if (iso.parse (_input.substr (_shift_counter), iso_i))
|
|
||||||
iso_result = _input.substr (_shift_counter, iso_i);
|
|
||||||
|
|
||||||
std::string::size_type dur_i = 0;
|
|
||||||
std::string dur_result;
|
|
||||||
Duration dur;
|
|
||||||
if (dur.parse (_input.substr (_shift_counter), dur_i))
|
|
||||||
dur_result = _input.substr (_shift_counter, dur_i);
|
|
||||||
|
|
||||||
if (iso_result.length () > dur_result.length ())
|
|
||||||
{
|
{
|
||||||
while (iso_i--) shift ();
|
type = typeDate;
|
||||||
result = iso_result;
|
|
||||||
type = typeDuration;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (dur_result.length () > iso_result.length ())
|
|
||||||
|
if (is_duration (result))
|
||||||
{
|
{
|
||||||
while (dur_i--) shift ();
|
|
||||||
result = dur_result;
|
|
||||||
type = typeDuration;
|
type = typeDuration;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -693,6 +640,72 @@ void Lexer::token_split (std::vector <std::pair <std::string, Lexer::Type> >& le
|
||||||
lexemes.push_back (std::pair <std::string, Lexer::Type>(word, type));
|
lexemes.push_back (std::pair <std::string, Lexer::Type>(word, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
bool Lexer::is_date (std::string& result)
|
||||||
|
{
|
||||||
|
// Try an ISO date parse.
|
||||||
|
std::string::size_type iso_i = 0;
|
||||||
|
std::string iso_result;
|
||||||
|
ISO8601d iso;
|
||||||
|
iso.ambiguity (_ambiguity);
|
||||||
|
if (iso.parse (_input.substr (_shift_counter), iso_i))
|
||||||
|
{
|
||||||
|
result = _input.substr (_shift_counter, iso_i);
|
||||||
|
while (iso_i--) shift ();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try a legacy rc.dateformat parse here.
|
||||||
|
if (Lexer::dateFormat != "")
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// TODO Why stop at the space? This seems wrong.
|
||||||
|
std::string::size_type legacy_i = _input.find (' ', _i);
|
||||||
|
if (legacy_i == std::string::npos)
|
||||||
|
legacy_i = _input.length ();
|
||||||
|
|
||||||
|
std::string legacy_result = _input.substr (_shift_counter, legacy_i - _shift_counter);
|
||||||
|
Date legacyDate (legacy_result, Lexer::dateFormat, true, false);
|
||||||
|
|
||||||
|
legacy_i -= _shift_counter;
|
||||||
|
while (legacy_i--) shift ();
|
||||||
|
result = legacy_result;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
catch (...) { /* Never mind. */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
bool Lexer::is_duration (std::string& result)
|
||||||
|
{
|
||||||
|
std::string::size_type iso_i = 0;
|
||||||
|
std::string iso_result;
|
||||||
|
ISO8601p iso;
|
||||||
|
if (iso.parse (_input.substr (_shift_counter), iso_i))
|
||||||
|
{
|
||||||
|
result = _input.substr (_shift_counter, iso_i);
|
||||||
|
while (iso_i--) shift ();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string::size_type dur_i = 0;
|
||||||
|
std::string dur_result;
|
||||||
|
Duration dur;
|
||||||
|
if (dur.parse (_input.substr (_shift_counter), dur_i))
|
||||||
|
{
|
||||||
|
result = _input.substr (_shift_counter, dur_i);
|
||||||
|
while (dur_i--) shift ();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
bool Lexer::is_punct (int c) const
|
bool Lexer::is_punct (int c) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -71,6 +71,8 @@ public:
|
||||||
static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
|
static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
bool is_date (std::string&);
|
||||||
|
bool is_duration (std::string&);
|
||||||
bool is_punct (int) const;
|
bool is_punct (int) const;
|
||||||
bool is_num (int) const;
|
bool is_num (int) const;
|
||||||
bool is_ident_start (int) const;
|
bool is_ident_start (int) const;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue