jadami10 commented on code in PR #14385: URL: https://github.com/apache/pinot/pull/14385#discussion_r1962401280
########## pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java: ########## @@ -121,38 +114,79 @@ public void testEpochToEpochDateTimeConvert() { new Range(1620833400L, true, null, false)); testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH', '30:MINUTES') < 450255", new Range(null, false, 27015300L, false)); - testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') BETWEEN 18759 AND 18760", + testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') " + + "BETWEEN 18759 AND 18760", new Range(18759L, true, 18761L, false)); testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') = 18759", new Range(18759L, true, 18760L, false)); // Invalid time - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); } @Test public void testSDFToEpochDateTimeConvert() { - testTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); - testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', '30:MINUTES') " - + "BETWEEN 1620830760000 AND 1620917160000", - new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES') = " - + "1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); + testTimeConvert("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:" + + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); + testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH'," + + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); + testTimeConvert("dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', " + + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new Range("2021-05-12 15:00", true, "2021-05-13 " + + "15:00", false)); + testTimeConvert("dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')" + + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); // Invalid time - testInvalidTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000.5"); - testInvalidTimeConvert( - "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160"); + } + + + @Test + public void testDateTruncOptimizer() { + testDateTrunc("datetrunc('DAY', col) = 1620777600000", new Range("1620777600000", true, "1620863999999", true)); + testDateTrunc("dateTrunc('DAY', col) = 1620777600001", new Range(Long.MAX_VALUE, true, Long.MIN_VALUE, true)); + + testDateTrunc("datetrunc('DAY', col) < 1620777600000", new Range(Long.MIN_VALUE, true, "1620777600000", false)); + testDateTrunc("DATETRUNC('DAY', col) < 1620777600010", new Range(Long.MIN_VALUE, true, "1620863999999", true)); + testDateTrunc("DATE_TRUNC('DAY', col) < 1620863999999", new Range(Long.MIN_VALUE, true, "1620863999999", true)); + + testDateTrunc("datetrunc('DAY', col) <= 1620777600000", new Range(Long.MIN_VALUE, true, "1620863999999", true)); + testDateTrunc("datetrunc('DAY', col) <= 1620777600010", new Range(Long.MIN_VALUE, true, "1620863999999", true)); + + testDateTrunc("datetrunc('DAY', col) > 1620777600000", new Range("1620863999999", false, Long.MAX_VALUE, true)); + testDateTrunc("dateTrunc('DAY', col) > 1620863999999", new Range("1620863999999", false, Long.MAX_VALUE, true)); + testDateTrunc("DATETRUNC('DAY', col) > 1620864000000", new Range("1620950399999", false, Long.MAX_VALUE, true)); + + testDateTrunc("datetrunc('DAY', col) >= 1620863999909", new Range("1620863999999", false, Long.MAX_VALUE, true)); + testDateTrunc("datetrunc('DAY', col) >= 1620777600000", new Range("1620777600000", true, Long.MAX_VALUE, true)); + + testInvalidFilterOptimizer("datetrunc('DAY', col, 'MILLISECONDS', 'CET', 'MILLISECONDS') = 1620770400000"); + testDateTrunc("datetrunc('DAY', col, 'DAYS', 'UTC', 'DAYS') = 453631", new Range("453631", true, "453631", true)); + testInvalidFilterOptimizer("datetrunc('DAY', col, 'DAYS', 'CET', 'MILLISECONDS') = 39193714800000"); + testDateTrunc("datetrunc('DAY', col, 'MILLISECONDS', 'UTC', 'DAYS') = 453630", + new Range("39193632000000", true, "39193718399999", true)); + testInvalidFilterOptimizer("datetrunc('DAY', col, 'MILLISECONDS', 'CET', 'DAYS') = 453630"); + // TODO: add between predicate test Review Comment: This is a pretty important code path since it rewrites queries. I would either add tests, or not handle the BETWEEN case at all. ########## pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java: ########## @@ -121,38 +114,79 @@ public void testEpochToEpochDateTimeConvert() { new Range(1620833400L, true, null, false)); testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH', '30:MINUTES') < 450255", new Range(null, false, 27015300L, false)); - testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') BETWEEN 18759 AND 18760", + testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') " + + "BETWEEN 18759 AND 18760", new Range(18759L, true, 18761L, false)); testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', '30:MINUTES') = 18759", new Range(18759L, true, 18760L, false)); // Invalid time - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); - testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', '30:MINUTES:EPOCH', '30:MINUTES') > 27013846"); } @Test public void testSDFToEpochDateTimeConvert() { - testTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); - testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', '30:MINUTES') " - + "BETWEEN 1620830760000 AND 1620917160000", - new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false)); - testTimeConvert( - "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES') = " - + "1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); + testTimeConvert("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:" + + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12 15:00:00.000", true, null, false)); + testTimeConvert("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH'," + + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 15:00:00", false)); + testTimeConvert("dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', " + + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new Range("2021-05-12 15:00", true, "2021-05-13 " + + "15:00", false)); + testTimeConvert("dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')" + + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12", true)); // Invalid time - testInvalidTimeConvert( - "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') > 1620830760000.5"); - testInvalidTimeConvert( - "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', " - + "'30:MINUTES') < 1620917160"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5"); + testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', " + + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160"); + } + + + @Test + public void testDateTruncOptimizer() { + testDateTrunc("datetrunc('DAY', col) = 1620777600000", new Range("1620777600000", true, "1620863999999", true)); Review Comment: can you add a test where it's flipped like ``` testDateTrunc("1620777600000 = datetrunc('DAY', col)", new Range("1620777600000", true, "1620863999999", true)); ``` ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + if (dateTruncOperands.get(1).isSetLiteral()) { + return; + } + + Long lowerMillis = null; + Long upperMillis = null; + boolean lowerInclusive = true; + boolean upperInclusive = true; + List<Expression> operands = new ArrayList<>(dateTruncOperands); + String unit = operands.get(0).getLiteral().getStringValue(); + String inputTimeUnit = (operands.size() >= 3) ? operands.get(2).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + if (operands.size() >= 4) { + if (!operands.get(3).getLiteral().getStringValue().equals("UTC")) { + // Leave query unoptimized if working with non-UTC time zones + return; + } + } + ISOChronology chronology = ISOChronology.getInstanceUTC(); + String outputTimeUnit = (operands.size() == 5) ? operands.get(4).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + switch (filterKind) { + case EQUALS: + operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new DateTimeFormatSpec("TIMESTAMP"))); + upperMillis = dateTruncCeil(operands); + lowerMillis = dateTruncFloor(operands); + if (lowerMillis != DateTimeUtils.getTimestampField(chronology, unit).roundFloor(lowerMillis)) { Review Comment: I forgot what this case is handling: - what is the `if` statement checking? - why are we returning early at minimum, it needs a code comment for future readers ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); Review Comment: similar to my test comment below. is this assuming it's always `dateTrunc(...) = X`, or will it work for `X = dateTrunc(...)` ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TimestampIndexMseTest.java: ########## @@ -85,23 +85,6 @@ public void timestampIndexSubstitutedInProjections() { + " FilterMatchEntireSegment(numDocs=[115545])\n"); } - @Test Review Comment: why are these removed? ########## pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java: ########## @@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), } } + private void optimizeDateTrunc(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTruncOperands = filterOperands.get(0).getFunctionCall().getOperands(); + + if (dateTruncOperands.get(1).isSetLiteral()) { + return; + } + + Long lowerMillis = null; + Long upperMillis = null; + boolean lowerInclusive = true; + boolean upperInclusive = true; + List<Expression> operands = new ArrayList<>(dateTruncOperands); + String unit = operands.get(0).getLiteral().getStringValue(); + String inputTimeUnit = (operands.size() >= 3) ? operands.get(2).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + if (operands.size() >= 4) { + if (!operands.get(3).getLiteral().getStringValue().equals("UTC")) { + // Leave query unoptimized if working with non-UTC time zones + return; + } + } + ISOChronology chronology = ISOChronology.getInstanceUTC(); + String outputTimeUnit = (operands.size() == 5) ? operands.get(4).getLiteral().getStringValue() + : TimeUnit.MILLISECONDS.name(); + switch (filterKind) { + case EQUALS: + operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new DateTimeFormatSpec("TIMESTAMP"))); + upperMillis = dateTruncCeil(operands); + lowerMillis = dateTruncFloor(operands); + if (lowerMillis != DateTimeUtils.getTimestampField(chronology, unit).roundFloor(lowerMillis)) { + lowerMillis = Long.MAX_VALUE; + upperMillis = Long.MIN_VALUE; + String rangeString = new Range(lowerMillis, lowerInclusive, upperMillis, upperInclusive).getRangeString(); + rewriteToRange(filterFunction, dateTruncOperands.get(1), rangeString); + return; + } + break; + case GREATER_THAN: + operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new DateTimeFormatSpec("TIMESTAMP"))); + lowerMillis = dateTruncCeil(operands); + lowerInclusive = false; + upperMillis = Long.MAX_VALUE; + break; + case GREATER_THAN_OR_EQUAL: + operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new DateTimeFormatSpec("TIMESTAMP"))); + lowerMillis = dateTruncFloor(operands); + upperMillis = Long.MAX_VALUE; + if (lowerMillis != DateTimeUtils.getTimestampField(chronology, unit).roundFloor(lowerMillis)) { Review Comment: 1. what is this check doing 2. why don't we have it in less_that_or_equal? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org