- Refactored (step 1) the ISO and Legacy date/duration parsing for lexer state
  machine breakout.
This commit is contained in:
Paul Beckingham 2014-06-29 09:36:27 -04:00
parent d54afc844c
commit 65f979cb4f
2 changed files with 75 additions and 60 deletions

View file

@ -102,56 +102,18 @@ bool Lexer::token (std::string& result, Type& type)
else if (is_dec_digit (_n0))
{
// Speculatively try a date and duration parse. Longest wins.
std::string::size_type iso_i = 0;
std::string iso_result;
ISO8601d iso;
iso.ambiguity (_ambiguity);
if (iso.parse (_input.substr (_shift_counter), iso_i))
iso_result = _input.substr (_shift_counter, iso_i);
std::string::size_type dur_i = 0;
std::string dur_result;
Duration dur;
if (dur.parse (_input.substr (_shift_counter), dur_i))
dur_result = _input.substr (_shift_counter, dur_i);
if (iso_result.length () > dur_result.length ())
if (is_date (result))
{
while (iso_i--) shift ();
result = iso_result;
type = typeDate;
return true;
}
else if (dur_result.length () > iso_result.length ())
if (is_duration (result))
{
while (dur_i--) shift ();
result = dur_result;
type = typeDuration;
return true;
}
// Try a legacy rc.dateformat parse here.
try
{
if (Lexer::dateFormat != "")
{
std::string::size_type space = _input.find (' ', _i);
if (space == std::string::npos)
space = _input.length ();
std::string legacy = _input.substr (_shift_counter, space - _shift_counter);
Date legacyDate (legacy, Lexer::dateFormat, true, false);
space -= _shift_counter;
while (space--) shift ();
result = legacy;
type = typeDate;
return true;
}
}
catch (...) { /* Never mind. */ }
type = typeNumber;
result += utf8_character (_n0);
shift ();
@ -196,29 +158,14 @@ bool Lexer::token (std::string& result, Type& type)
}
else if (is_ident_start (_n0))
{
std::string::size_type iso_i = 0;
std::string iso_result;
ISO8601p iso;
if (iso.parse (_input.substr (_shift_counter), iso_i))
iso_result = _input.substr (_shift_counter, iso_i);
std::string::size_type dur_i = 0;
std::string dur_result;
Duration dur;
if (dur.parse (_input.substr (_shift_counter), dur_i))
dur_result = _input.substr (_shift_counter, dur_i);
if (iso_result.length () > dur_result.length ())
if (is_date (result))
{
while (iso_i--) shift ();
result = iso_result;
type = typeDuration;
type = typeDate;
return true;
}
else if (dur_result.length () > iso_result.length ())
if (is_duration (result))
{
while (dur_i--) shift ();
result = dur_result;
type = typeDuration;
return true;
}
@ -693,6 +640,72 @@ void Lexer::token_split (std::vector <std::pair <std::string, Lexer::Type> >& le
lexemes.push_back (std::pair <std::string, Lexer::Type>(word, type));
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_date (std::string& result)
{
// Try an ISO date parse.
std::string::size_type iso_i = 0;
std::string iso_result;
ISO8601d iso;
iso.ambiguity (_ambiguity);
if (iso.parse (_input.substr (_shift_counter), iso_i))
{
result = _input.substr (_shift_counter, iso_i);
while (iso_i--) shift ();
return true;
}
// Try a legacy rc.dateformat parse here.
if (Lexer::dateFormat != "")
{
try
{
// TODO Why stop at the space? This seems wrong.
std::string::size_type legacy_i = _input.find (' ', _i);
if (legacy_i == std::string::npos)
legacy_i = _input.length ();
std::string legacy_result = _input.substr (_shift_counter, legacy_i - _shift_counter);
Date legacyDate (legacy_result, Lexer::dateFormat, true, false);
legacy_i -= _shift_counter;
while (legacy_i--) shift ();
result = legacy_result;
return true;
}
catch (...) { /* Never mind. */ }
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_duration (std::string& result)
{
std::string::size_type iso_i = 0;
std::string iso_result;
ISO8601p iso;
if (iso.parse (_input.substr (_shift_counter), iso_i))
{
result = _input.substr (_shift_counter, iso_i);
while (iso_i--) shift ();
return true;
}
std::string::size_type dur_i = 0;
std::string dur_result;
Duration dur;
if (dur.parse (_input.substr (_shift_counter), dur_i))
{
result = _input.substr (_shift_counter, dur_i);
while (dur_i--) shift ();
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
bool Lexer::is_punct (int c) const
{

View file

@ -71,6 +71,8 @@ public:
static void token_split (std::vector <std::pair <std::string, Lexer::Type> >&, const std::string&);
private:
bool is_date (std::string&);
bool is_duration (std::string&);
bool is_punct (int) const;
bool is_num (int) const;
bool is_ident_start (int) const;