The parser now accepts the basic format for combined date and time representations, which ommits the date and time separators, "-" and ":".
See bug 23767 for GNU coreutils, <https://savannah.gnu.org/bugs/?23767>. * lib/parse-datetime.y: Parse combined date and time representations in ISO 8601 basic format. (set_hhmmss_iso_8601_basic_time) New function. (digits_iso_8601_basic_to_date) New function. * tests/test-parse-datetime.c: Add tests for combined date and time representations in ISO 8601 basic format. * doc/parse-datetime.texi Document support for combined date and time representations in ISO 8601 basic format. Signed-off-by: Mihai Capotă <mi...@mihaic.ro> --- On Thu, Apr 25, 2013 at 12:02 AM, Paul Eggert <egg...@cs.ucla.edu> wrote: > The most important thing is that the patch also needs > to update doc/parse-datetime.texi. Done. > On 03/30/13 12:18, Mihai Capotă wrote: >> + /* not ISO 8601 time, forcing mktime error */ >> + pc->hour = 90; > > How does this force a mktime error? mktime allows tm_hour == 90. I meant to say mktime_ok. I changed the code to reject input by incrementing times_seen, like time_zone_hhmm. >> datetime: >> iso_8601_datetime >> + | iso_8601_basic_datetime >> ; >> >> iso_8601_datetime: >> iso_8601_date 'T' iso_8601_time >> ; >> >> +iso_8601_basic_datetime: >> + number 'T' iso_8601_basic_time >> + { pc->dates_seen--; } /* already incremented in digits_to_date_time */ > > This doesn't look right. 'number' accepts all sort of things that we > would rather not accept here. I was trying to make use of the existing digits_to_date_time function. I replaced it with tUNUMBER and a new function. > Conversely, why require ":" in times to > correlate with "-" in dates? Shouldn't we accept a "-"less date along > with a ":"ful time, and vice versa? No, that is not allowed by the standard. > And that "dates_seen--" business > is a hack; can't we arrange things so that dates_seen is incremented > just once? The hack is gone. >> +iso_8601_basic_time: >> + tUNUMBER o_zone_offset >> + { >> + set_hhmmss_iso_8601_basic_time (pc, $1.value, 0); >> + pc->meridian = MER24; >> + } >> + | tUDECIMAL_NUMBER o_zone_offset >> + { >> + /* FIXME avoid time_t to long int cast */ > > Why is the cast needed? Also, can't the grammar be simplified > here, by using unsigned_seconds instead of using both > tUDECIMAL_NUMBER and tUNUMBER? I switched to using unsigned_seconds. doc/parse-datetime.texi | 9 +++++- lib/parse-datetime.y | 68 +++++++++++++++++++++++++++++++++++++++++-- tests/test-parse-datetime.c | 61 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 3 deletions(-) diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi index 6b3e973..9aa87ed 100644 --- a/doc/parse-datetime.texi +++ b/doc/parse-datetime.texi @@ -327,7 +327,12 @@ The ISO 8601 date and time of day extended format consists of an ISO day. This format is also recognized if the @samp{T} is replaced by a space. -In this format, the time of day should use 24-hour notation. +The ISO 8601 basic format is also recognized. It is identical to the ISO 8601 +extended format, except for omitting the @samp{-} separator in the date and the +@samp{:} separator in the time. Only the HHMMSS format is supported for the +time of day, the reduced accuracy HHMM and HH formats are not supported. + +In these formats, the time of day should use 24-hour notation. Fractional seconds are allowed, with either comma or period preceding the fraction. ISO 8601 fractional minutes and hours are not supported. Typically, hosts support nanosecond timestamp resolution; @@ -339,6 +344,8 @@ Here are some examples: 2012-09-24T20:02:00.052-0500 2012-12-31T23:59:59,999999999+1100 1970-01-01 00:00Z +20120924T200200.052-0500 +20121231T235959,999999999+1100 @end example @node Day of week items diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y index 4dce7fa..fa9719d 100644 --- a/lib/parse-datetime.y +++ b/lib/parse-datetime.y @@ -257,6 +257,38 @@ digits_to_date_time (parser_control *pc, textint text_int) } } +/* Extract into *PC the date info from a string of digits in ISO 8601 basic + format, i.e., YYYYMMHH, YYYY, or YY meaning century. Note that YYYYMM is not + allowed to avoid confusion with YYMMHH */ +static void +digits_iso_8601_basic_to_date (parser_control *pc, textint text_int) +{ + switch (text_int.digits) + { + case 8: + pc->day = text_int.value % 100; + pc->month = (text_int.value / 100) % 100; + pc->year.value = text_int.value / 10000; + pc->year.digits = 4; + return; + case 4: + pc->day = 1; + pc->month = 1; + pc->year.value = text_int.value; + pc->year.digits = 4; + return; + case 2: + pc->day = 1; + pc->month = 1; + pc->year.value = text_int.value * 100; + pc->year.digits = 4; + return; + default: + pc->dates_seen++; + return; + } +} + /* Increment PC->rel by FACTOR * REL (FACTOR is 1 or -1). */ static void apply_relative_time (parser_control *pc, relative_time rel, int factor) @@ -282,6 +314,28 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes, pc->seconds.tv_nsec = nsec; } +/* Set PC-> hour, minutes, seconds and nanoseconds members from ISO 8601 basic + time. */ +static void +set_hhmmss_iso_8601_basic_time (parser_control *pc, time_t integer_part, + long int fractional_part) +{ + if (integer_part / 1000000 > 0) + { + /* Not ISO 8601 time, arrange to reject it by incrementing + pc->times_seen.*/ + pc->times_seen++; + } + else + { + /* FIXME support reduced accuracy times, i.e. HHMM and HH */ + pc->hour = integer_part / 10000; + pc->minutes = (integer_part % 10000) / 100; + pc->seconds.tv_sec = integer_part % 100; + pc->seconds.tv_nsec = fractional_part; + } +} + %} /* We want a reentrant parser, even if the TZ manipulation and the calls to @@ -290,8 +344,8 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes, %parse-param { parser_control *pc } %lex-param { parser_control *pc } -/* This grammar has 31 shift/reduce conflicts. */ -%expect 31 +/* This grammar has 33 shift/reduce conflicts. */ +%expect 33 %union { @@ -358,12 +412,22 @@ item: datetime: iso_8601_datetime + | iso_8601_basic_datetime ; iso_8601_datetime: iso_8601_date 'T' iso_8601_time ; +iso_8601_basic_datetime: + tUNUMBER 'T' unsigned_seconds o_zone_offset + { + digits_iso_8601_basic_to_date (pc, $1); + set_hhmmss_iso_8601_basic_time (pc, $3.tv_sec, $3.tv_nsec); + pc->meridian = MER24; + } + ; + time: tUNUMBER tMERIDIAN { diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c index 7eba9ad..c620009 100644 --- a/tests/test-parse-datetime.c +++ b/tests/test-parse-datetime.c @@ -216,6 +216,67 @@ main (int argc _GL_UNUSED, char **argv) && expected.tv_nsec == result.tv_nsec); + /* ISO 8601 basic date and time of day representation, + 'T' separator, local time zone */ + p = "20110501T115518"; + expected.tv_sec = ref_time - gmtoff; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601 basic date and time of day representation, + 'T' separator, UTC */ + p = "20110501T115518Z"; + expected.tv_sec = ref_time; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601 basic date and time of day representation, + 'T' separator, w/UTC offset */ + p = "20110501T115518-0700"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601 basic date and time of day representation, + 'T' separator, w/hour only UTC offset */ + p = "20110501T115518-07"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601 basic date and time of day representation, + 'T' separator, w/hour only UTC offset, with ns */ + p = "20110501T115518,123456789-07"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 123456789; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* Invalid ISO 8601 basic date and time of day representation, + too many digits for time */ + p = "20110501T11551800"; + ASSERT (!parse_datetime (&result, p, 0)); + + now.tv_sec = 4711; now.tv_nsec = 1267; p = "now"; -- 1.7.9.5