jadami10 commented on code in PR #14385: URL: https://github.com/apache/pinot/pull/14385#discussion_r1831979437
########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +428,80 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + // Check if date trunc function is being applied on a literal value + if (dateTruncOperands.get(1).isSetLiteral()) { + return; + } + + Long lowerMillis = null; + Long upperMillis = null; + DateTimeFormatSpec inputFormat = new DateTimeFormatSpec("TIMESTAMP"); + String inputTimeUnit = (dateTruncOperands.size() >= 3) ? dateTruncOperands.get(2).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); Review Comment: is the default milliseconds if you just do `DATETRUNC(unit, timeValue)`? ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +428,80 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + // Check if date trunc function is being applied on a literal value + if (dateTruncOperands.get(1).isSetLiteral()) { + return; + } + + Long lowerMillis = null; + Long upperMillis = null; + DateTimeFormatSpec inputFormat = new DateTimeFormatSpec("TIMESTAMP"); + String inputTimeUnit = (dateTruncOperands.size() >= 3) ? dateTruncOperands.get(2).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + String outputTimeUnit = (dateTruncOperands.size() == 5) ? dateTruncOperands.get(4).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + boolean lowerInclusive = true; + boolean upperInclusive = true; + List<Expression> operands = new ArrayList<>(dateTruncOperands); + switch (filterKind) { + case EQUALS: + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + lowerMillis = dateTruncFloor(operands); + upperMillis = dateTruncCeil(operands); + // Check if it is impossible to obtain literal equality + if (lowerMillis != TimeUnit.valueOf(inputTimeUnit).convert(getLongValue(filterOperands.get(1)), + TimeUnit.valueOf(outputTimeUnit))) { + lowerMillis = Long.MAX_VALUE; + upperMillis = Long.MIN_VALUE; + } + break; + case GREATER_THAN: + lowerInclusive = false; + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + lowerMillis = dateTruncCeil(operands); + break; + case GREATER_THAN_OR_EQUAL: + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + lowerInclusive = false; + lowerMillis = dateTruncCeil(operands); + if (dateTruncFloor(operands) + == inputFormat.fromFormatToMillis(getLongValue(filterOperands.get(1)))) { + lowerInclusive = true; + lowerMillis = dateTruncFloor(operands); + } + break; + case LESS_THAN: + upperInclusive = false; + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + upperMillis = dateTruncFloor(operands); + if (upperMillis != inputFormat.fromFormatToMillis(getLongValue(filterOperands.get(1)))) { + upperInclusive = true; + upperMillis = dateTruncCeil(operands); + } + break; + case LESS_THAN_OR_EQUAL: + operands.set(1, filterOperands.get(1)); + upperMillis = dateTruncCeil(operands); + break; + case BETWEEN: + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + operands.set(1, getExpression(filterOperands.get(2), inputFormat, inputTimeUnit, outputTimeUnit)); + upperMillis = dateTruncCeil(operands); + break; + default: + throw new IllegalStateException(); Review Comment: shouldn't we just return here? ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +428,80 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + // Check if date trunc function is being applied on a literal value + if (dateTruncOperands.get(1).isSetLiteral()) { + return; + } + + Long lowerMillis = null; + Long upperMillis = null; + DateTimeFormatSpec inputFormat = new DateTimeFormatSpec("TIMESTAMP"); + String inputTimeUnit = (dateTruncOperands.size() >= 3) ? dateTruncOperands.get(2).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + String outputTimeUnit = (dateTruncOperands.size() == 5) ? dateTruncOperands.get(4).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + boolean lowerInclusive = true; + boolean upperInclusive = true; + List<Expression> operands = new ArrayList<>(dateTruncOperands); + switch (filterKind) { + case EQUALS: + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + lowerMillis = dateTruncFloor(operands); + upperMillis = dateTruncCeil(operands); + // Check if it is impossible to obtain literal equality + if (lowerMillis != TimeUnit.valueOf(inputTimeUnit).convert(getLongValue(filterOperands.get(1)), Review Comment: I don't quite follow this line. mind explaining it ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +428,80 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + // Check if date trunc function is being applied on a literal value + if (dateTruncOperands.get(1).isSetLiteral()) { + return; + } + + Long lowerMillis = null; + Long upperMillis = null; + DateTimeFormatSpec inputFormat = new DateTimeFormatSpec("TIMESTAMP"); + String inputTimeUnit = (dateTruncOperands.size() >= 3) ? dateTruncOperands.get(2).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + String outputTimeUnit = (dateTruncOperands.size() == 5) ? dateTruncOperands.get(4).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + boolean lowerInclusive = true; + boolean upperInclusive = true; + List<Expression> operands = new ArrayList<>(dateTruncOperands); + switch (filterKind) { + case EQUALS: + operands.set(1, getExpression(filterOperands.get(1), inputFormat, inputTimeUnit, outputTimeUnit)); + lowerMillis = dateTruncFloor(operands); + upperMillis = dateTruncCeil(operands); Review Comment: these should also be equal? ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +428,80 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + // Check if date trunc function is being applied on a literal value + if (dateTruncOperands.get(1).isSetLiteral()) { Review Comment: 1. I think you want a `testInvalidFilterOptimizer` unit test for this. 2. nit: this comment is identical to the code below it. a better comment would say why we can't/don't optimize literal values. ########## pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java: ########## @@ -121,38 +114,74 @@ public void testEpochToEpochDateTimeConvert() { new Range(1620833400L, true, null, false)); testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH', '30:MINUTES') < 450255", new Range(null, false, 27015300L, false)); - testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') BETWEEN 18759 AND 18760", + testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') " + + "BETWEEN 18759 AND 18760", new Range(18759L, true, 18761L, false)); testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') = 18759", new Range(18759L, true, 18760L, false)); // Invalid time - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); } @Test public void testSDFToEpochDateTimeConvert() { - testTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); - testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', '30:MINUTES') " - + "BETWEEN 1620830760000 AND 1620917160000", - new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES') = " - + "1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); + testTimeConvert("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:" + + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); + testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH'," + + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); + testTimeConvert("dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', " + + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new Range("2021-05-12 15:00", true, "2021-05-13 " + + "15:00", false)); + testTimeConvert("dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')" + + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); // Invalid time - testInvalidTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000.5"); - testInvalidTimeConvert( - "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160"); + } + + + @Test + public void testDateTruncOptimizer() { + testDateTrunc("datetrunc('DAY', col) < 1620777600000", new Range("0", true, "1620777600000", false)); Review Comment: 1. do we need a test with an INT instead of long? I believe that's also a valid time type in pinot 2. can we have test with week or month truncation 3. can we have a test case where it's not a support function. as in `IN` to make sure nothing is erroring 4. also a test case where the time granularity is unsupported? I'm not sure if calcite will catch that before the optimizer, but we do use `DAY` as an input ########## pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java: ########## @@ -121,38 +114,74 @@ public void testEpochToEpochDateTimeConvert() { new Range(1620833400L, true, null, false)); testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH', '30:MINUTES') < 450255", new Range(null, false, 27015300L, false)); - testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') BETWEEN 18759 AND 18760", + testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') " + + "BETWEEN 18759 AND 18760", new Range(18759L, true, 18761L, false)); testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') = 18759", new Range(18759L, true, 18760L, false)); // Invalid time - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); } @Test public void testSDFToEpochDateTimeConvert() { - testTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); - testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', '30:MINUTES') " - + "BETWEEN 1620830760000 AND 1620917160000", - new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES') = " - + "1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); + testTimeConvert("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:" + + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); + testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH'," + + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); + testTimeConvert("dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', " + + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new Range("2021-05-12 15:00", true, "2021-05-13 " + + "15:00", false)); + testTimeConvert("dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')" + + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); // Invalid time - testInvalidTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000.5"); - testInvalidTimeConvert( - "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160"); + } + + + @Test + public void testDateTruncOptimizer() { + testDateTrunc("datetrunc('DAY', col) < 1620777600000", new Range("0", true, "1620777600000", false)); + testDateTrunc("dateTrunc('DAY', col) < 1620777600000", new Range("0", true, "1620777600000", false)); + testDateTrunc("DATETRUNC('DAY', col) < 1620777600010", new Range("0", true, "1620863999999", true)); + testDateTrunc("DATE_TRUNC('DAY', col) < 1620863999999", new Range("0", true, "1620863999999", true)); + + testDateTrunc("datetrunc('DAY', col) <= 1620777600000", new Range("0", true, "1620863999999", true)); + testDateTrunc("datetrunc('DAY', col) <= 1620777600010", new Range("0", true, "1620863999999", true)); + + testDateTrunc("datetrunc('DAY', col) > 1620777600000", new Range("1620863999999", false, Long.MAX_VALUE, true)); + testDateTrunc("dateTrunc('DAY', col) > 1620863999999", new Range("1620863999999", false, Long.MAX_VALUE, true)); + testDateTrunc("DATETRUNC('DAY', col) > 1620864000000", new Range("1620950399999", false, Long.MAX_VALUE, true)); + + testDateTrunc("datetrunc('DAY', col) >= 1620863999909", new Range("1620863999999", false, Long.MAX_VALUE, true)); + testDateTrunc("datetrunc('DAY', col) >= 1620777600000", new Range("1620777600000", true, Long.MAX_VALUE, true)); + + testDateTrunc("datetrunc('DAY', col) = 1620777600000", new Range("1620777600000", true, "1620863999999", true)); + testDateTrunc("dateTrunc('DAY', col) = 1620777600001", new Range(Long.MAX_VALUE, true, Long.MIN_VALUE, true)); + + testDateTrunc("datetrunc('DAY', col, 'DAYS', 'UTC', 'DAYS') = 453631", new Range("453631", true, "453631", true)); Review Comment: can we add 1 non-utc test -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org