Jackie-Jiang commented on code in PR #8779:
URL: https://github.com/apache/pinot/pull/8779#discussion_r883155571


##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java:
##########
@@ -47,28 +50,75 @@ public class DateTimeFormatSpec {
   public static final int MIN_FORMAT_TOKENS = 3;
   public static final int MAX_FORMAT_TOKENS = 4;
 
+  public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+  public static final int FORMAT_PATTERN_POSITION_PIPE = 1;
+  public static final int FORMAT_UNIT_POSITION_PIPE = 1;
+  public static final int FORMAT_SIZE_POSITION_PIPE = 2;
+  public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2;
+  public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+  public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
   private final String _format;
   private final int _size;
   private final DateTimeFormatUnitSpec _unitSpec;
   private final DateTimeFormatPatternSpec _patternSpec;
 
   public DateTimeFormatSpec(String format) {
     _format = format;
-    validateFormat(format);
-    String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, 
MAX_FORMAT_TOKENS);
-    if (formatTokens.length == MAX_FORMAT_TOKENS) {
-      _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
-          formatTokens[FORMAT_PATTERN_POSITION]);
-    } else {
-      _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
-    }
-    if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
-      // TIMESTAMP type stores millis since epoch
-      _size = 1;
-      _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+    if (_format.matches(COLON_REGEX)) {
+      validateFormat(format);

Review Comment:
   Not introduced in this PR, but suggest to first do the split, then validate 
on the split parts to avoid splitting twice. Or even better, perform the 
validation alone with the value processing to avoid all unnecessary overhead



##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java:
##########
@@ -47,28 +50,75 @@ public class DateTimeFormatSpec {
   public static final int MIN_FORMAT_TOKENS = 3;
   public static final int MAX_FORMAT_TOKENS = 4;
 
+  public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+  public static final int FORMAT_PATTERN_POSITION_PIPE = 1;
+  public static final int FORMAT_UNIT_POSITION_PIPE = 1;
+  public static final int FORMAT_SIZE_POSITION_PIPE = 2;
+  public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2;
+  public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+  public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
   private final String _format;
   private final int _size;
   private final DateTimeFormatUnitSpec _unitSpec;
   private final DateTimeFormatPatternSpec _patternSpec;
 
   public DateTimeFormatSpec(String format) {
     _format = format;
-    validateFormat(format);
-    String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, 
MAX_FORMAT_TOKENS);
-    if (formatTokens.length == MAX_FORMAT_TOKENS) {
-      _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
-          formatTokens[FORMAT_PATTERN_POSITION]);
-    } else {
-      _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
-    }
-    if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
-      // TIMESTAMP type stores millis since epoch
-      _size = 1;
-      _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+    if (_format.matches(COLON_REGEX)) {
+      validateFormat(format);
+      String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, 
MAX_FORMAT_TOKENS);
+      if (formatTokens.length == MAX_FORMAT_TOKENS) {
+        _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
+            formatTokens[FORMAT_PATTERN_POSITION]);
+      } else {
+        _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
+      }
+      if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
+        // TIMESTAMP type stores millis since epoch
+        _size = 1;
+        _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+      } else {
+        _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
+        _unitSpec = new 
DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+      }
     } else {
-      _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
-      _unitSpec = new 
DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+      validatePipeFormat(format);
+      String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR, 
MAX_FORMAT_TOKENS_PIPE);
+      if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) { //date with tz or 
epoch with time-size

Review Comment:
   I'd suggest branching based on the first token, instead of the length. The 
logic would be much easier to understand that way.



##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java:
##########
@@ -47,28 +50,75 @@ public class DateTimeFormatSpec {
   public static final int MIN_FORMAT_TOKENS = 3;
   public static final int MAX_FORMAT_TOKENS = 4;
 
+  public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+  public static final int FORMAT_PATTERN_POSITION_PIPE = 1;
+  public static final int FORMAT_UNIT_POSITION_PIPE = 1;
+  public static final int FORMAT_SIZE_POSITION_PIPE = 2;
+  public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2;
+  public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+  public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
   private final String _format;
   private final int _size;
   private final DateTimeFormatUnitSpec _unitSpec;
   private final DateTimeFormatPatternSpec _patternSpec;
 
   public DateTimeFormatSpec(String format) {
     _format = format;
-    validateFormat(format);
-    String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, 
MAX_FORMAT_TOKENS);
-    if (formatTokens.length == MAX_FORMAT_TOKENS) {
-      _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
-          formatTokens[FORMAT_PATTERN_POSITION]);
-    } else {
-      _patternSpec = new 
DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
-    }
-    if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
-      // TIMESTAMP type stores millis since epoch
-      _size = 1;
-      _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+    if (_format.matches(COLON_REGEX)) {

Review Comment:
   Let's not use regex to determine the format because it is too expensive. The 
simplest way I can think of is to check if the first character is a digit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to