This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 15a8f16b38 Support ISO8601 timestamp parser (#13667) 15a8f16b38 is described below commit 15a8f16b38130e2ea48287382b68aa0095dd626a Author: Xiang Fu <xiangfu.1...@gmail.com> AuthorDate: Tue Jul 23 01:45:48 2024 +0800 Support ISO8601 timestamp parser (#13667) --- .../org/apache/pinot/spi/utils/TimestampUtils.java | 61 +++++++-- .../apache/pinot/spi/utils/TimestampUtilsTest.java | 149 +++++++++++++++------ 2 files changed, 159 insertions(+), 51 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java index c0d23e95b7..3157dfbd11 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java @@ -21,15 +21,43 @@ package org.apache.pinot.spi.utils; import java.sql.Timestamp; import java.time.LocalDateTime; import java.time.ZoneId; +import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; -import java.time.format.DateTimeParseException; import java.time.temporal.ChronoField; public class TimestampUtils { - private static final DateTimeFormatter DATE_TIME_FORMATTER = new DateTimeFormatterBuilder() - .appendPattern("yyyy-MM-dd[ HH:mm[:ss]]") + private static final DateTimeFormatter UNIVERSAL_DATE_TIME_FORMATTER = new DateTimeFormatterBuilder() + // Date part + .appendPattern("yyyy-MM-dd") + // Optional time part starting with 'T' + .optionalStart() + .appendLiteral('T') + .appendPattern("HH:mm") + .optionalStart() + .appendLiteral(':') + .appendPattern("ss") + .optionalEnd() + .optionalStart() + .appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true) + .optionalEnd() + .optionalEnd() + // Optional space-separated time part + .optionalStart() + .appendLiteral(' ') + .appendPattern("HH:mm") + .optionalStart() + .appendLiteral(':') + .appendPattern("ss") + .optionalEnd() + .optionalStart() + .appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true) + .optionalEnd() + .optionalEnd() + // Time zone handling, allows parsing of 'Z', '+hh:mm', '-hh:mm' + .appendOptional(DateTimeFormatter.ofPattern("XXX")) + // Default values for missing time components .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) @@ -40,11 +68,12 @@ public class TimestampUtils { /** * Parses the given timestamp string into {@link Timestamp}. - * <p>Two formats of timestamp are supported: + * <p>Below formats of timestamp are supported: * <ul> * <li>'yyyy-mm-dd hh:mm:ss[.fffffffff]'</li> * <li>'yyyy-MM-dd[ HH:mm[:ss]]'</li> * <li>Millis since epoch</li> + * <li>ISO8601 format</li> * </ul> */ public static Timestamp toTimestamp(String timestampString) { @@ -55,24 +84,29 @@ public class TimestampUtils { } try { return new Timestamp(Long.parseLong(timestampString)); - } catch (Exception e1) { + } catch (Exception e) { + } + try { + return Timestamp.from(ZonedDateTime.parse(timestampString, UNIVERSAL_DATE_TIME_FORMATTER).toInstant()); + } catch (Exception e) { // Try the next format } try { - LocalDateTime dateTime = LocalDateTime.parse(timestampString, DATE_TIME_FORMATTER); + LocalDateTime dateTime = LocalDateTime.parse(timestampString, UNIVERSAL_DATE_TIME_FORMATTER); return Timestamp.valueOf(dateTime); - } catch (DateTimeParseException e) { + } catch (Exception e) { throw new IllegalArgumentException(String.format("Invalid timestamp: '%s'", timestampString)); } } /** * Parses the given timestamp string into millis since epoch. - * <p>Two formats of timestamp are supported: + * <p>Below formats of timestamp are supported: * <ul> * <li>'yyyy-mm-dd hh:mm:ss[.fffffffff]'</li> * <li>'yyyy-MM-dd[ HH:mm[:ss]]'</li> * <li>Millis since epoch</li> + * <li>ISO8601 format</li> * </ul> */ public static long toMillisSinceEpoch(String timestampString) { @@ -83,13 +117,18 @@ public class TimestampUtils { } try { return Long.parseLong(timestampString); - } catch (Exception e1) { + } catch (Exception e) { // Try the next format } try { - LocalDateTime dateTime = LocalDateTime.parse(timestampString, DATE_TIME_FORMATTER); + return ZonedDateTime.parse(timestampString, UNIVERSAL_DATE_TIME_FORMATTER).toInstant().toEpochMilli(); + } catch (Exception e) { + // Try the next format + } + try { + LocalDateTime dateTime = LocalDateTime.parse(timestampString, UNIVERSAL_DATE_TIME_FORMATTER); return dateTime.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli(); - } catch (DateTimeParseException e) { + } catch (Exception e) { throw new IllegalArgumentException(String.format("Invalid timestamp: '%s'", timestampString)); } } diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java index 142d99256a..b6e447374b 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java @@ -19,53 +19,122 @@ package org.apache.pinot.spi.utils; import java.sql.Timestamp; -import java.time.LocalDate; import java.time.LocalDateTime; -import org.testng.Assert; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; import org.testng.annotations.Test; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + + public class TimestampUtilsTest { @Test - public void testToTimestamp() { - Assert.assertEquals( - TimestampUtils.toTimestamp("2024-07-12 15:32:36.111"), - Timestamp.valueOf("2024-07-12 15:32:36.111") - ); - Assert.assertEquals( - TimestampUtils.toTimestamp("2024-07-12 15:32:36"), - Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32, 36)) - ); - Assert.assertEquals( - TimestampUtils.toTimestamp("2024-07-12 15:32"), - Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32)) - ); - Assert.assertEquals( - TimestampUtils.toTimestamp("2024-07-12"), - Timestamp.valueOf(LocalDate.of(2024, 7, 12).atStartOfDay()) - ); - Assert.assertEquals(TimestampUtils.toTimestamp("1720798356111"), new Timestamp(1720798356111L)); - Assert.assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("July 12, 2024")); + public void testValidTimestampFormats() { + // Test ISO8601 variations with and without milliseconds and timezones + assertEquals( + TimestampUtils.toTimestamp("2024-07-12T15:32:36Z"), + Timestamp.from(LocalDateTime.of(2024, 7, 12, 15, 32, 36).atZone(ZoneOffset.UTC).toInstant())); + assertEquals( + TimestampUtils.toTimestamp("2024-07-12 15:32:36.111Z"), + Timestamp.from(LocalDateTime.of(2024, 7, 12, 15, 32, 36, 111000000).atZone(ZoneOffset.UTC).toInstant())); + for (int i = 1; i < 7; i++) { + int fraction = Integer.parseInt("1".repeat(i) + "0".repeat(9 - i)); + assertEquals( + TimestampUtils.toTimestamp("2024-07-12T15:32:36." + fraction), + Timestamp.valueOf("2024-07-12 15:32:36." + fraction)); + assertEquals( + TimestampUtils.toTimestamp("2024-07-12T15:32:36." + fraction + "Z"), + Timestamp.from(LocalDateTime.of(2024, 7, 12, 15, 32, 36, fraction).atZone(ZoneOffset.UTC).toInstant())); + } + + // Test date and time variations without 'T' + assertEquals(TimestampUtils.toTimestamp("2024-07-12 15:32:36.111"), Timestamp.valueOf("2024-07-12 15:32:36.111")); + assertEquals(TimestampUtils.toTimestamp("2024-07-12 15:32:36"), Timestamp.valueOf("2024-07-12 15:32:36")); + assertEquals(TimestampUtils.toTimestamp("2024-07-12 15:32"), Timestamp.valueOf("2024-07-12 15:32:00")); + assertEquals(TimestampUtils.toTimestamp("2024-07-12"), Timestamp.valueOf("2024-07-12 00:00:00")); + assertEquals(TimestampUtils.toTimestamp("1720798356111"), new Timestamp(1720798356111L)); + } + + @Test + public void testValidMillisSinceEpochFormats() { + // Test ISO8601 variations with and without milliseconds and timezones + assertEquals( + TimestampUtils.toMillisSinceEpoch("2024-07-12T15:32:36Z"), + Timestamp.valueOf("2024-07-12 15:32:36").toLocalDateTime().atZone(ZoneOffset.UTC).toInstant().toEpochMilli()); + assertEquals( + TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36.111Z"), + Timestamp.valueOf("2024-07-12 15:32:36.111").toLocalDateTime().atZone(ZoneOffset.UTC).toInstant() + .toEpochMilli()); + for (int i = 1; i < 7; i++) { + String fraction = "1".repeat(i); + assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36." + fraction), + Timestamp.valueOf("2024-07-12 15:32:36." + fraction).getTime()); + assertEquals( + TimestampUtils.toMillisSinceEpoch("2024-07-12T15:32:36." + fraction + "Z"), + Timestamp.valueOf("2024-07-12 15:32:36." + fraction).toLocalDateTime().atZone(ZoneOffset.UTC).toInstant() + .toEpochMilli()); + } + + // Test date and time variations without 'T' + assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36.111"), + Timestamp.valueOf("2024-07-12 15:32:36.111").getTime()); + assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36"), + Timestamp.valueOf("2024-07-12 15:32:36").getTime()); + assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32"), + Timestamp.valueOf("2024-07-12 15:32:00").getTime()); + assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12"), + Timestamp.valueOf("2024-07-12 00:00:00").getTime()); + assertEquals(TimestampUtils.toMillisSinceEpoch("1720798356111"), 1720798356111L); + } + + @Test + public void testTimestampFormatsWithZone() { + // ISO8601 with various timezone offsets + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36+02:00"), + Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, ZoneId.of("+02:00")).toInstant())); + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36-05:00"), + Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, ZoneId.of("-05:00")).toInstant())); + + // ISO8601 with milliseconds and various timezones + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36.123Z"), + Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 123000000, ZoneId.of("Z")).toInstant())); + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36.123+01:30"), + Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 123000000, ZoneId.of("+01:30")).toInstant())); + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36.123-08:00"), + Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 123000000, ZoneId.of("-08:00")).toInstant())); + + // Testing edge cases like half-hour and quarter-hour time zones + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36+05:45"), + Timestamp.from( + ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, ZoneId.of("+05:45")).toInstant())); // Nepal Time Zone + assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36+08:45"), + Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, ZoneId.of("+08:45")) + .toInstant())); // Australian Central Western Standard Time } @Test - public void testToMillisSinceEpoch() { - Assert.assertEquals( - TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36.111"), - Timestamp.valueOf("2024-07-12 15:32:36.111").getTime() - ); - Assert.assertEquals( - TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36"), - Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32, 36)).getTime() - ); - Assert.assertEquals( - TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32"), - Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32)).getTime() - ); - Assert.assertEquals( - TimestampUtils.toMillisSinceEpoch("2024-07-12"), - Timestamp.valueOf(LocalDate.of(2024, 7, 12).atStartOfDay()).getTime() - ); - Assert.assertEquals(TimestampUtils.toMillisSinceEpoch("1720798356111"), 1720798356111L); - Assert.assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toMillisSinceEpoch("July 12, 2024")); + public void testInvalidFormatHandling() { + // Test incorrect date and time formats + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("July 12, 2024")); + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("2024-07-12T25:32:36")); + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("2024-07-12T15:32:36+25:00")); + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("This is not a date")); + + // Test incorrect date and time formats for millisecond conversion + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toMillisSinceEpoch("July 12, 2024")); + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toMillisSinceEpoch("2024-07-12T25:32:36")); + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toMillisSinceEpoch("2024-07-12T15:32:36+25:00")); + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toMillisSinceEpoch("This is not a date")); + + // Incorrect time zone formats + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("2024-07-12T15:32:36+25:00")); + + // Invalid minute in time zone + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("2024-07-12T15:32:36+02:60")); + + //Too many digits in fractional seconds + assertThrows(IllegalArgumentException.class, () -> TimestampUtils.toTimestamp("2024-07-12T15:32:36.12345678910Z")); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org