diff --git a/src/CLI.cpp b/src/CLI.cpp index 4ead2d8f..020c8538 100644 --- a/src/CLI.cpp +++ b/src/CLI.cpp @@ -36,6 +36,7 @@ #include #include #include +#include "DatetimeParser.h" //////////////////////////////////////////////////////////////////////////////// A2::A2 (const std::string& raw, Lexer::Type lextype) @@ -724,7 +725,9 @@ Interval CLI::getFilter (const Range& default_range) const else if (args.size () == 1 && args[0] == "") { - filter.setRange ({Datetime (start), 0}); + DatetimeParser dtp; + Range range = dtp.parse_range(start); + filter.setRange (range); } // from @@ -734,7 +737,6 @@ Interval CLI::getFilter (const Range& default_range) const { filter.setRange ({Datetime (start), 0}); } - // to/- else if (args.size () == 3 && args[0] == "" && diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 268b7e5f..f0df1cfb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,7 @@ set (timew_SRCS AtomicFile.cpp AtomicFile.h ChartConfig.h Database.cpp Database.h Datafile.cpp Datafile.h + DatetimeParser.cpp DatetimeParser.h Exclusion.cpp Exclusion.h Extensions.cpp Extensions.h Interval.cpp Interval.h diff --git a/src/DatetimeParser.cpp b/src/DatetimeParser.cpp new file mode 100644 index 00000000..c715f652 --- /dev/null +++ b/src/DatetimeParser.cpp @@ -0,0 +1,2924 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright 2020, Thomas Lauf, Paul Beckingham, Federico Hernandez +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// http://www.opensource.org/licenses/mit-license.php +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static std::vector dayNames { + "sunday", + "monday", + "tuesday", + "wednesday", + "thursday", + "friday", + "saturday"}; + +static std::vector monthNames { + "january", + "february", + "march", + "april", + "may", + "june", + "july", + "august", + "september", + "october", + "november", + "december"}; + +//////////////////////////////////////////////////////////////////////////////// +Range DatetimeParser::parse_range (const std::string& input) +{ + clear (); + std::string::size_type start = 0; + auto i = start; + Pig pig (input); + if (i) + pig.skipN (static_cast (i)); + + auto checkpoint = pig.cursor (); + + // Parse epoch first, as it's the most common scenario. + if (parse_epoch (pig)) + { + // ::validate and ::resolve are not needed in this case. + start = pig.cursor (); + return Range {}; + } + + // Allow parse_date_time and parse_date_time_ext regardless of + // DatetimeParser::isoEnabled setting, because these formats are relied upon by + // the 'import' command, JSON parser and hook system. + if (parse_date_time_ext (pig) || // Strictest first. + parse_date_time (pig)) + { + // Check the values and determine time_t. + if (validate ()) + { + start = pig.cursor (); + resolve (); + return Range { Datetime {_date}, 0 }; + } + } + + // Allow parse_date_time and parse_date_time_ext regardless of + // DatetimeParser::isoEnabled setting, because these formats are relied upon by + // the 'import' command, JSON parser and hook system. + if (Datetime::isoEnabled && + ( parse_date_ext (pig) || + (Datetime::standaloneDateEnabled && parse_date (pig)) + ) + ) + { + // Check the values and determine time_t. + if (validate ()) + { + start = pig.cursor (); + resolve (); + + if (_day != 0) + { + auto start_date = Datetime (_date); + auto end_date = start_date + Duration ("1d").toTime_t (); + return Range{start_date, end_date }; + } + else if (_month != 0) + { + auto start_date = Datetime (_date); + auto end_date = Datetime(start_date.year(), start_date.month()+1, 1); + return Range { start_date, end_date }; + } + else if (_year != 0) + { + auto start_date = Datetime (_date); + auto end_date = Datetime(start_date.year()+1, 1, 1); + return Range { start_date, end_date }; + } + return Range {}; + } + } + + // Allow parse_date_time and parse_date_time_ext regardless of + // DatetimeParser::isoEnabled setting, because these formats are relied upon by + // the 'import' command, JSON parser and hook system. + if (Datetime::isoEnabled && + ( parse_time_utc_ext (pig) || + parse_time_utc (pig) || + parse_time_off_ext (pig) || + parse_time_off (pig) || + parse_time_ext (pig) || + (Datetime::standaloneTimeEnabled && parse_time (pig)) // Time last, as it is the most permissive. + ) + ) + { + // Check the values and determine time_t. + if (validate ()) + { + start = pig.cursor (); + resolve (); + return Range { Datetime (_date), 0 }; + } + } + + pig.restoreTo (checkpoint); + + if (parse_informal_time (pig)) + { + return Range { Datetime {_date}, 0 }; + } + + if (parse_named_day (pig)) + { + // ::validate and ::resolve are not needed in this case. + start = pig.cursor (); + return Range { Datetime (_date), Datetime (_date) + Duration ("1d").toTime_t () }; + } + + if (parse_named_month (pig)) + { + // ::validate and ::resolve are not needed in this case. + start = pig.cursor (); + auto begin = Datetime (_date); + auto month = (begin.month() + 1) % 13 + (begin.month() == 12); + auto year = (begin.year() + (begin.month() == 12)); + auto end = Datetime (year, month, 1); + return Range { begin, end }; + } + + if (parse_named (pig)) + { + // ::validate and ::resolve are not needed in this case. + start = pig.cursor (); + return Range { Datetime (_date), 0 }; + } + + throw format ("'{1}' is not a valid range.", input); +} + +//////////////////////////////////////////////////////////////////////////////// +void DatetimeParser::clear () +{ + _year = 0; + _month = 0; + _week = 0; + _weekday = 0; + _julian = 0; + _day = 0; + _seconds = 0; + _offset = 0; + _utc = false; + _date = 0; +} + +//////////////////////////////////////////////////////////////////////////////// +// Note how these are all single words. +// +// Examples and descriptions, assuming now == 2017-03-05T12:34:56. +// +// Example Notes +// ------------------- ------------------ +// yesterday 2017-03-04T00:00:00 Unaffected +// today 2017-03-05T00:00:00 Unaffected +// tomorrow 2017-03-06T00:00:00 Unaffected +// 12th 2017-03-12T00:00:00 +// monday 2017-03-06T00:00:00 +// easter 2017-04-16T00:00:00 +// eastermonday 2017-04-16T00:00:00 +// ascension 2017-05-25T00:00:00 +// pentecost 2017-06-04T00:00:00 +// goodfriday 2017-04-14T00:00:00 +// midsommar 2017-06-24T00:00:00 midnight, 1st Saturday after 20th June +// midsommarafton 2017-06-23T00:00:00 midnight, 1st Friday after 19th June +// juhannus 2017-06-23T00:00:00 midnight, 1st Friday after 19th June +bool DatetimeParser::parse_named_day (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (initializeYesterday (pig) || + initializeToday (pig) || + initializeTomorrow (pig) || + initializeOrdinal (pig) || + initializeDayName (pig) || + initializeEaster (pig) || + initializeMidsommar (pig) || + initializeMidsommarafton (pig)) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool DatetimeParser::parse_named_month (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (initializeMonthName (pig)) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +bool DatetimeParser::parse_informal_time (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (initializeInformalTime (pig)) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Note how these are all single words. +// +// Examples and descriptions, assuming now == 2017-03-05T12:34:56. +// +// Example Notes +// ------------------- ------------------ +// now 2017-03-05T12:34:56 Unaffected +// yesterday 2017-03-04T00:00:00 Unaffected +// today 2017-03-05T00:00:00 Unaffected +// tomorrow 2017-03-06T00:00:00 Unaffected +// 12th 2017-03-12T00:00:00 +// monday 2017-03-06T00:00:00 +// april 2017-04-01T00:00:00 +// later 2038-01-18T00:00:00 Unaffected +// someday 2038-01-18T00:00:00 Unaffected +// sopd 2017-03-04T00:00:00 Unaffected +// sod 2017-03-05T00:00:00 Unaffected +// sond 2017-03-06T00:00:00 Unaffected +// eopd 2017-03-05T00:00:00 Unaffected +// eod 2017-03-06T00:00:00 Unaffected +// eond 2017-03-07T00:00:00 Unaffected +// sopw 2017-02-26T00:00:00 Unaffected +// sow 2017-03-05T00:00:00 Unaffected +// sonw 2017-03-12T00:00:00 Unaffected +// eopw 2017-03-05T00:00:00 Unaffected +// eow 2017-03-12T00:00:00 Unaffected +// eonw 2017-03-19T00:00:00 Unaffected +// sopww 2017-02-27T00:00:00 Unaffected +// soww 2017-03-06T00:00:00 +// sonww 2017-03-06T00:00:00 Unaffected +// eopww 2017-03-03T00:00:00 Unaffected +// eoww 2017-03-10T00:00:00 +// eonww 2017-03-17T00:00:00 Unaffected +// sopm 2017-02-01T00:00:00 Unaffected +// som 2017-03-01T00:00:00 Unaffected +// sonm 2017-04-01T00:00:00 Unaffected +// eopm 2017-03-01T00:00:00 Unaffected +// eom 2017-04-01T00:00:00 Unaffected +// eonm 2017-05-01T00:00:00 Unaffected +// sopq 2017-10-01T00:00:00 Unaffected +// soq 2017-01-01T00:00:00 Unaffected +// sonq 2017-04-01T00:00:00 Unaffected +// eopq 2017-01-01T00:00:00 Unaffected +// eoq 2017-04-01T00:00:00 Unaffected +// eonq 2017-07-01T00:00:00 Unaffected +// sopy 2016-01-01T00:00:00 Unaffected +// soy 2017-01-01T00:00:00 Unaffected +// sony 2018-01-01T00:00:00 Unaffected +// eopy 2017-01-01T00:00:00 Unaffected +// eoy 2018-01-01T00:00:00 Unaffected +// eony 2019-01-01T00:00:00 Unaffected +// easter 2017-04-16T00:00:00 +// eastermonday 2017-04-16T00:00:00 +// ascension 2017-05-25T00:00:00 +// pentecost 2017-06-04T00:00:00 +// goodfriday 2017-04-14T00:00:00 +// midsommar 2017-06-24T00:00:00 midnight, 1st Saturday after 20th June +// midsommarafton 2017-06-23T00:00:00 midnight, 1st Friday after 19th June +// juhannus 2017-06-23T00:00:00 midnight, 1st Friday after 19th June +// +bool DatetimeParser::parse_named (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + // Experimental handling of date phrases, such as "first monday in march". + // Note that this requires that phrases are delimited by EOS or WS. + std::string token; + std::vector tokens; + while (pig.getUntilWS (token)) + { + tokens.push_back (token); + if (! pig.skipWS ()) + break; + } + +/* + // This group contains "1st monday ..." which must be processed before + // initializeOrdinal below. + if (initializeNthDayInMonth (tokens)) + { + return true; + } +*/ + + // Restoration necessary because of the tokenization. + pig.restoreTo (checkpoint); + + if (initializeNow (pig) || + initializeLater (pig) || + initializeSopd (pig) || + initializeSod (pig) || + initializeSond (pig) || + initializeEopd (pig) || + initializeEod (pig) || + initializeEond (pig) || + initializeSopw (pig) || + initializeSow (pig) || + initializeSonw (pig) || + initializeEopw (pig) || + initializeEow (pig) || + initializeEonw (pig) || + initializeSopww (pig) || // Must appear after sopw + initializeSonww (pig) || // Must appear after sonw + initializeSoww (pig) || // Must appear after sow + initializeEopww (pig) || // Must appear after eopw + initializeEonww (pig) || // Must appear after eonw + initializeEoww (pig) || // Must appear after eow + initializeSopm (pig) || + initializeSom (pig) || + initializeSonm (pig) || + initializeEopm (pig) || + initializeEom (pig) || + initializeEonm (pig) || + initializeSopq (pig) || + initializeSoq (pig) || + initializeSonq (pig) || + initializeEopq (pig) || + initializeEoq (pig) || + initializeEonq (pig) || + initializeSopy (pig) || + initializeSoy (pig) || + initializeSony (pig) || + initializeEopy (pig) || + initializeEoy (pig) || + initializeEony (pig) || + initializeInformalTime (pig)) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// Valid epoch values are unsigned integers after 1980-01-01T00:00:00Z. This +// restriction means that '12' will not be identified as an epoch date. +bool DatetimeParser::parse_epoch (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + int epoch {}; + if (pig.getDigits (epoch) && + ! unicodeLatinAlpha (pig.peek ()) && + epoch >= 315532800) + { + _date = static_cast (epoch); + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// date_ext 'T' time_utc_ext 'Z' +// date_ext 'T' time_off_ext +// date_ext 'T' time_ext +bool DatetimeParser::parse_date_time_ext (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (parse_date_ext (pig) && + pig.skip ('T') && + (parse_time_utc_ext (pig) || + parse_time_off_ext (pig) || + parse_time_ext (pig))) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// YYYY-MM-DD +// YYYY-MM +// YYYY-DDD +// YYYY-Www-D +// YYYY-Www +bool DatetimeParser::parse_date_ext (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + int year {}; + if (parse_year (pig, year) && + pig.skip ('-')) + { + auto checkpointYear = pig.cursor (); + + int month {}; + int day {}; + int julian {}; + + if (pig.skip ('W') && + parse_week (pig, _week)) + { + if (pig.skip ('-') && + pig.getDigit (_weekday)) + { + // What is happening here - must be something to do? + } + + if (! unicodeLatinDigit (pig.peek ())) + { + _year = year; + return true; + } + } + + pig.restoreTo (checkpointYear); + + if (parse_month (pig, month) && + pig.skip ('-') && + parse_day (pig, day) && + ! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _month = month; + _day = day; + return true; + } + + pig.restoreTo (checkpointYear); + + if (parse_julian (pig, julian) && + ! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _julian = julian; + return true; + } + + pig.restoreTo (checkpointYear); + + if (parse_month (pig, month) && + pig.peek () != '-' && + ! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _month = month; + _day = 1; + return true; + } + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// ±hh[:mm] +bool DatetimeParser::parse_off_ext (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + int sign = pig.peek (); + if (sign == '+' || sign == '-') + { + pig.skipN (1); + + int hour {0}; + int minute {0}; + + if (parse_off_hour (pig, hour)) + { + if (pig.skip (':')) + { + if (! parse_off_minute (pig, minute)) + { + pig.restoreTo (checkpoint); + return false; + } + } + + _offset = (hour * 3600) + (minute * 60); + if (sign == '-') + _offset = - _offset; + + if (! unicodeLatinDigit (pig.peek ())) + return true; + } + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// hh:mm[:ss] +bool DatetimeParser::parse_time_ext (Pig& pig, bool terminated) +{ + auto checkpoint = pig.cursor (); + + int hour {}; + int minute {}; + if (parse_hour (pig, hour) && + pig.skip (':') && + parse_minute (pig, minute)) + { + if (pig.skip (':')) + { + int second {}; + if (parse_second (pig, second) && + ! unicodeLatinDigit (pig.peek ()) && + (! terminated || (pig.peek () != '-' && pig.peek () != '+'))) + { + _seconds = (hour * 3600) + (minute * 60) + second; + return true; + } + + pig.restoreTo (checkpoint); + return false; + } + + auto following = pig.peek (); + if (! unicodeLatinDigit (following) && + (! terminated || (following != '+' && following != '-')) && + following != 'A' && + following != 'a' && + following != 'P' && + following != 'p') + { + _seconds = (hour * 3600) + (minute * 60); + return true; + } + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// time-ext 'Z' +bool DatetimeParser::parse_time_utc_ext (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (parse_time_ext (pig, false) && + pig.skip ('Z')) + { + if (! unicodeLatinDigit (pig.peek ())) + { + _utc = true; + return true; + } + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// time-ext off-ext +bool DatetimeParser::parse_time_off_ext (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (parse_time_ext (pig, false) && + parse_off_ext (pig)) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// YYYYMMDDTHHMMSSZ +// YYYYMMDDTHHMMSS +bool DatetimeParser::parse_date_time (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + if (parse_date (pig) && + pig.skip ('T') && + (parse_time_utc (pig) || + parse_time_off (pig) || + parse_time (pig))) + { + return true; + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +// YYYYWww +// YYYYDDD +// YYYYMMDD +// YYYYMM +bool DatetimeParser::parse_date (Pig& pig) +{ + auto checkpoint = pig.cursor (); + + int year {}; + int month {}; + int julian {}; + int week {}; + int weekday {}; + int day {}; + if (parse_year (pig, year)) + { + auto checkpointYear = pig.cursor (); + + if (pig.skip ('W') && + parse_week (pig, week)) + { + if (pig.getDigit (weekday)) + _weekday = weekday; + + if (! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _week = week; + return true; + } + } + + pig.restoreTo (checkpointYear); + + if (parse_julian (pig, julian) && + ! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _julian = julian; + return true; + } + + pig.restoreTo (checkpointYear); + + if (parse_month (pig, month)) + { + if (parse_day (pig, day)) + { + if (! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _month = month; + _day = day; + return true; + } + } + else + { + if (! unicodeLatinDigit (pig.peek ())) + { + _year = year; + _month = month; + _day = 1; + return true; + } + } + } + } + + pig.restoreTo (checkpoint); + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +//