This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 15a8f16b38 Support ISO8601 timestamp parser (#13667)
15a8f16b38 is described below

commit 15a8f16b38130e2ea48287382b68aa0095dd626a
Author: Xiang Fu <xiangfu.1...@gmail.com>
AuthorDate: Tue Jul 23 01:45:48 2024 +0800

    Support ISO8601 timestamp parser (#13667)
---
 .../org/apache/pinot/spi/utils/TimestampUtils.java |  61 +++++++--
 .../apache/pinot/spi/utils/TimestampUtilsTest.java | 149 +++++++++++++++------
 2 files changed, 159 insertions(+), 51 deletions(-)

diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java
index c0d23e95b7..3157dfbd11 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/TimestampUtils.java
@@ -21,15 +21,43 @@ package org.apache.pinot.spi.utils;
 import java.sql.Timestamp;
 import java.time.LocalDateTime;
 import java.time.ZoneId;
+import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatterBuilder;
-import java.time.format.DateTimeParseException;
 import java.time.temporal.ChronoField;
 
 
 public class TimestampUtils {
-  private static final DateTimeFormatter DATE_TIME_FORMATTER = new 
DateTimeFormatterBuilder()
-      .appendPattern("yyyy-MM-dd[ HH:mm[:ss]]")
+  private static final DateTimeFormatter UNIVERSAL_DATE_TIME_FORMATTER = new 
DateTimeFormatterBuilder()
+      // Date part
+      .appendPattern("yyyy-MM-dd")
+      // Optional time part starting with 'T'
+      .optionalStart()
+      .appendLiteral('T')
+      .appendPattern("HH:mm")
+      .optionalStart()
+      .appendLiteral(':')
+      .appendPattern("ss")
+      .optionalEnd()
+      .optionalStart()
+      .appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true)
+      .optionalEnd()
+      .optionalEnd()
+      // Optional space-separated time part
+      .optionalStart()
+      .appendLiteral(' ')
+      .appendPattern("HH:mm")
+      .optionalStart()
+      .appendLiteral(':')
+      .appendPattern("ss")
+      .optionalEnd()
+      .optionalStart()
+      .appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true)
+      .optionalEnd()
+      .optionalEnd()
+      // Time zone handling, allows parsing of 'Z', '+hh:mm', '-hh:mm'
+      .appendOptional(DateTimeFormatter.ofPattern("XXX"))
+      // Default values for missing time components
       .parseDefaulting(ChronoField.HOUR_OF_DAY, 0)
       .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
       .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
@@ -40,11 +68,12 @@ public class TimestampUtils {
 
   /**
    * Parses the given timestamp string into {@link Timestamp}.
-   * <p>Two formats of timestamp are supported:
+   * <p>Below formats of timestamp are supported:
    * <ul>
    *   <li>'yyyy-mm-dd hh:mm:ss[.fffffffff]'</li>
    *   <li>'yyyy-MM-dd[ HH:mm[:ss]]'</li>
    *   <li>Millis since epoch</li>
+   *   <li>ISO8601 format</li>
    * </ul>
    */
   public static Timestamp toTimestamp(String timestampString) {
@@ -55,24 +84,29 @@ public class TimestampUtils {
     }
     try {
       return new Timestamp(Long.parseLong(timestampString));
-    } catch (Exception e1) {
+    } catch (Exception e) {
+    }
+    try {
+      return Timestamp.from(ZonedDateTime.parse(timestampString, 
UNIVERSAL_DATE_TIME_FORMATTER).toInstant());
+    } catch (Exception e) {
       // Try the next format
     }
     try {
-      LocalDateTime dateTime = LocalDateTime.parse(timestampString, 
DATE_TIME_FORMATTER);
+      LocalDateTime dateTime = LocalDateTime.parse(timestampString, 
UNIVERSAL_DATE_TIME_FORMATTER);
       return Timestamp.valueOf(dateTime);
-    } catch (DateTimeParseException e) {
+    } catch (Exception e) {
       throw new IllegalArgumentException(String.format("Invalid timestamp: 
'%s'", timestampString));
     }
   }
 
   /**
    * Parses the given timestamp string into millis since epoch.
-   * <p>Two formats of timestamp are supported:
+   * <p>Below formats of timestamp are supported:
    * <ul>
    *   <li>'yyyy-mm-dd hh:mm:ss[.fffffffff]'</li>
    *   <li>'yyyy-MM-dd[ HH:mm[:ss]]'</li>
    *   <li>Millis since epoch</li>
+   *   <li>ISO8601 format</li>
    * </ul>
    */
   public static long toMillisSinceEpoch(String timestampString) {
@@ -83,13 +117,18 @@ public class TimestampUtils {
     }
     try {
       return Long.parseLong(timestampString);
-    } catch (Exception e1) {
+    } catch (Exception e) {
       // Try the next format
     }
     try {
-      LocalDateTime dateTime = LocalDateTime.parse(timestampString, 
DATE_TIME_FORMATTER);
+      return ZonedDateTime.parse(timestampString, 
UNIVERSAL_DATE_TIME_FORMATTER).toInstant().toEpochMilli();
+    } catch (Exception e) {
+      // Try the next format
+    }
+    try {
+      LocalDateTime dateTime = LocalDateTime.parse(timestampString, 
UNIVERSAL_DATE_TIME_FORMATTER);
       return 
dateTime.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli();
-    } catch (DateTimeParseException e) {
+    } catch (Exception e) {
       throw new IllegalArgumentException(String.format("Invalid timestamp: 
'%s'", timestampString));
     }
   }
diff --git 
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java 
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java
index 142d99256a..b6e447374b 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/TimestampUtilsTest.java
@@ -19,53 +19,122 @@
 package org.apache.pinot.spi.utils;
 
 import java.sql.Timestamp;
-import java.time.LocalDate;
 import java.time.LocalDateTime;
-import org.testng.Assert;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
+
+
 public class TimestampUtilsTest {
   @Test
-  public void testToTimestamp() {
-    Assert.assertEquals(
-        TimestampUtils.toTimestamp("2024-07-12 15:32:36.111"),
-        Timestamp.valueOf("2024-07-12 15:32:36.111")
-    );
-    Assert.assertEquals(
-        TimestampUtils.toTimestamp("2024-07-12 15:32:36"),
-        Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32, 36))
-    );
-    Assert.assertEquals(
-        TimestampUtils.toTimestamp("2024-07-12 15:32"),
-        Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32))
-    );
-    Assert.assertEquals(
-        TimestampUtils.toTimestamp("2024-07-12"),
-        Timestamp.valueOf(LocalDate.of(2024, 7, 12).atStartOfDay())
-    );
-    Assert.assertEquals(TimestampUtils.toTimestamp("1720798356111"), new 
Timestamp(1720798356111L));
-    Assert.assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("July 12, 2024"));
+  public void testValidTimestampFormats() {
+    // Test ISO8601 variations with and without milliseconds and timezones
+    assertEquals(
+        TimestampUtils.toTimestamp("2024-07-12T15:32:36Z"),
+        Timestamp.from(LocalDateTime.of(2024, 7, 12, 15, 32, 
36).atZone(ZoneOffset.UTC).toInstant()));
+    assertEquals(
+        TimestampUtils.toTimestamp("2024-07-12 15:32:36.111Z"),
+        Timestamp.from(LocalDateTime.of(2024, 7, 12, 15, 32, 36, 
111000000).atZone(ZoneOffset.UTC).toInstant()));
+    for (int i = 1; i < 7; i++) {
+      int fraction = Integer.parseInt("1".repeat(i) + "0".repeat(9 - i));
+      assertEquals(
+          TimestampUtils.toTimestamp("2024-07-12T15:32:36." + fraction),
+          Timestamp.valueOf("2024-07-12 15:32:36." + fraction));
+      assertEquals(
+          TimestampUtils.toTimestamp("2024-07-12T15:32:36." + fraction + "Z"),
+          Timestamp.from(LocalDateTime.of(2024, 7, 12, 15, 32, 36, 
fraction).atZone(ZoneOffset.UTC).toInstant()));
+    }
+
+    // Test date and time variations without 'T'
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12 15:32:36.111"), 
Timestamp.valueOf("2024-07-12 15:32:36.111"));
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12 15:32:36"), 
Timestamp.valueOf("2024-07-12 15:32:36"));
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12 15:32"), 
Timestamp.valueOf("2024-07-12 15:32:00"));
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12"), 
Timestamp.valueOf("2024-07-12 00:00:00"));
+    assertEquals(TimestampUtils.toTimestamp("1720798356111"), new 
Timestamp(1720798356111L));
+  }
+
+  @Test
+  public void testValidMillisSinceEpochFormats() {
+    // Test ISO8601 variations with and without milliseconds and timezones
+    assertEquals(
+        TimestampUtils.toMillisSinceEpoch("2024-07-12T15:32:36Z"),
+        Timestamp.valueOf("2024-07-12 
15:32:36").toLocalDateTime().atZone(ZoneOffset.UTC).toInstant().toEpochMilli());
+    assertEquals(
+        TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36.111Z"),
+        Timestamp.valueOf("2024-07-12 
15:32:36.111").toLocalDateTime().atZone(ZoneOffset.UTC).toInstant()
+            .toEpochMilli());
+    for (int i = 1; i < 7; i++) {
+      String fraction = "1".repeat(i);
+      assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36." + 
fraction),
+          Timestamp.valueOf("2024-07-12 15:32:36." + fraction).getTime());
+      assertEquals(
+          TimestampUtils.toMillisSinceEpoch("2024-07-12T15:32:36." + fraction 
+ "Z"),
+          Timestamp.valueOf("2024-07-12 15:32:36." + 
fraction).toLocalDateTime().atZone(ZoneOffset.UTC).toInstant()
+              .toEpochMilli());
+    }
+
+    // Test date and time variations without 'T'
+    assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36.111"),
+        Timestamp.valueOf("2024-07-12 15:32:36.111").getTime());
+    assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36"),
+        Timestamp.valueOf("2024-07-12 15:32:36").getTime());
+    assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32"),
+        Timestamp.valueOf("2024-07-12 15:32:00").getTime());
+    assertEquals(TimestampUtils.toMillisSinceEpoch("2024-07-12"),
+        Timestamp.valueOf("2024-07-12 00:00:00").getTime());
+    assertEquals(TimestampUtils.toMillisSinceEpoch("1720798356111"), 
1720798356111L);
+  }
+
+  @Test
+  public void testTimestampFormatsWithZone() {
+    // ISO8601 with various timezone offsets
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36+02:00"),
+        Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, 
ZoneId.of("+02:00")).toInstant()));
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36-05:00"),
+        Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, 
ZoneId.of("-05:00")).toInstant()));
+
+    // ISO8601 with milliseconds and various timezones
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36.123Z"),
+        Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 123000000, 
ZoneId.of("Z")).toInstant()));
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36.123+01:30"),
+        Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 123000000, 
ZoneId.of("+01:30")).toInstant()));
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36.123-08:00"),
+        Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 123000000, 
ZoneId.of("-08:00")).toInstant()));
+
+    // Testing edge cases like half-hour and quarter-hour time zones
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36+05:45"),
+        Timestamp.from(
+            ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, 
ZoneId.of("+05:45")).toInstant())); // Nepal Time Zone
+    assertEquals(TimestampUtils.toTimestamp("2024-07-12T15:32:36+08:45"),
+        Timestamp.from(ZonedDateTime.of(2024, 7, 12, 15, 32, 36, 0, 
ZoneId.of("+08:45"))
+            .toInstant())); // Australian Central Western Standard Time
   }
 
   @Test
-  public void testToMillisSinceEpoch() {
-    Assert.assertEquals(
-        TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36.111"),
-        Timestamp.valueOf("2024-07-12 15:32:36.111").getTime()
-    );
-    Assert.assertEquals(
-        TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32:36"),
-        Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32, 36)).getTime()
-    );
-    Assert.assertEquals(
-        TimestampUtils.toMillisSinceEpoch("2024-07-12 15:32"),
-        Timestamp.valueOf(LocalDateTime.of(2024, 7, 12, 15, 32)).getTime()
-    );
-    Assert.assertEquals(
-        TimestampUtils.toMillisSinceEpoch("2024-07-12"),
-        Timestamp.valueOf(LocalDate.of(2024, 7, 12).atStartOfDay()).getTime()
-    );
-    Assert.assertEquals(TimestampUtils.toMillisSinceEpoch("1720798356111"), 
1720798356111L);
-    Assert.assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toMillisSinceEpoch("July 12, 2024"));
+  public void testInvalidFormatHandling() {
+    // Test incorrect date and time formats
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("July 12, 2024"));
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("2024-07-12T25:32:36"));
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("2024-07-12T15:32:36+25:00"));
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("This is not a date"));
+
+    // Test incorrect date and time formats for millisecond conversion
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toMillisSinceEpoch("July 12, 2024"));
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toMillisSinceEpoch("2024-07-12T25:32:36"));
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toMillisSinceEpoch("2024-07-12T15:32:36+25:00"));
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toMillisSinceEpoch("This is not a date"));
+
+    // Incorrect time zone formats
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("2024-07-12T15:32:36+25:00"));
+
+    // Invalid minute in time zone
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("2024-07-12T15:32:36+02:60"));
+
+    //Too many digits in fractional seconds
+    assertThrows(IllegalArgumentException.class, () -> 
TimestampUtils.toTimestamp("2024-07-12T15:32:36.12345678910Z"));
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to