Jackie-Jiang commented on code in PR #8779: URL: https://github.com/apache/pinot/pull/8779#discussion_r883155571
########## pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java: ########## @@ -47,28 +50,75 @@ public class DateTimeFormatSpec { public static final int MIN_FORMAT_TOKENS = 3; public static final int MAX_FORMAT_TOKENS = 4; + public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0; + public static final int FORMAT_PATTERN_POSITION_PIPE = 1; + public static final int FORMAT_UNIT_POSITION_PIPE = 1; + public static final int FORMAT_SIZE_POSITION_PIPE = 2; + public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2; + public static final int MIN_FORMAT_TOKENS_PIPE = 1; + public static final int MAX_FORMAT_TOKENS_PIPE = 3; + private final String _format; private final int _size; private final DateTimeFormatUnitSpec _unitSpec; private final DateTimeFormatPatternSpec _patternSpec; public DateTimeFormatSpec(String format) { _format = format; - validateFormat(format); - String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); - if (formatTokens.length == MAX_FORMAT_TOKENS) { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION], - formatTokens[FORMAT_PATTERN_POSITION]); - } else { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]); - } - if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) { - // TIMESTAMP type stores millis since epoch - _size = 1; - _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS"); + if (_format.matches(COLON_REGEX)) { + validateFormat(format); Review Comment: Not introduced in this PR, but suggest to first do the split, then validate on the split parts to avoid splitting twice. Or even better, perform the validation alone with the value processing to avoid all unnecessary overhead ########## pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java: ########## @@ -47,28 +50,75 @@ public class DateTimeFormatSpec { public static final int MIN_FORMAT_TOKENS = 3; public static final int MAX_FORMAT_TOKENS = 4; + public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0; + public static final int FORMAT_PATTERN_POSITION_PIPE = 1; + public static final int FORMAT_UNIT_POSITION_PIPE = 1; + public static final int FORMAT_SIZE_POSITION_PIPE = 2; + public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2; + public static final int MIN_FORMAT_TOKENS_PIPE = 1; + public static final int MAX_FORMAT_TOKENS_PIPE = 3; + private final String _format; private final int _size; private final DateTimeFormatUnitSpec _unitSpec; private final DateTimeFormatPatternSpec _patternSpec; public DateTimeFormatSpec(String format) { _format = format; - validateFormat(format); - String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); - if (formatTokens.length == MAX_FORMAT_TOKENS) { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION], - formatTokens[FORMAT_PATTERN_POSITION]); - } else { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]); - } - if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) { - // TIMESTAMP type stores millis since epoch - _size = 1; - _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS"); + if (_format.matches(COLON_REGEX)) { + validateFormat(format); + String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); + if (formatTokens.length == MAX_FORMAT_TOKENS) { + _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION], + formatTokens[FORMAT_PATTERN_POSITION]); + } else { + _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]); + } + if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) { + // TIMESTAMP type stores millis since epoch + _size = 1; + _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS"); + } else { + _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]); + _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); + } } else { - _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]); - _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); + validatePipeFormat(format); + String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR, MAX_FORMAT_TOKENS_PIPE); + if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) { //date with tz or epoch with time-size Review Comment: I'd suggest branching based on the first token, instead of the length. The logic would be much easier to understand that way. ########## pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java: ########## @@ -47,28 +50,75 @@ public class DateTimeFormatSpec { public static final int MIN_FORMAT_TOKENS = 3; public static final int MAX_FORMAT_TOKENS = 4; + public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0; + public static final int FORMAT_PATTERN_POSITION_PIPE = 1; + public static final int FORMAT_UNIT_POSITION_PIPE = 1; + public static final int FORMAT_SIZE_POSITION_PIPE = 2; + public static final int FORMAT_TIMEZONE_POSITION_PIPE = 2; + public static final int MIN_FORMAT_TOKENS_PIPE = 1; + public static final int MAX_FORMAT_TOKENS_PIPE = 3; + private final String _format; private final int _size; private final DateTimeFormatUnitSpec _unitSpec; private final DateTimeFormatPatternSpec _patternSpec; public DateTimeFormatSpec(String format) { _format = format; - validateFormat(format); - String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); - if (formatTokens.length == MAX_FORMAT_TOKENS) { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION], - formatTokens[FORMAT_PATTERN_POSITION]); - } else { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]); - } - if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) { - // TIMESTAMP type stores millis since epoch - _size = 1; - _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS"); + if (_format.matches(COLON_REGEX)) { Review Comment: Let's not use regex to determine the format because it is too expensive. The simplest way I can think of is to check if the first character is a digit -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org