jadami10 commented on code in PR #14385:
URL: https://github.com/apache/pinot/pull/14385#discussion_r1962401280
##########
pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java:
##########
@@ -121,38 +114,79 @@ public void testEpochToEpochDateTimeConvert() {
new Range(1620833400L, true, null, false));
testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH',
'30:MINUTES') < 450255",
new Range(null, false, 27015300L, false));
- testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH',
'30:MINUTES') BETWEEN 18759 AND 18760",
+ testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH',
'30:MINUTES') "
+ + "BETWEEN 18759 AND 18760",
new Range(18759L, true, 18761L, false));
testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH',
'30:MINUTES') = 18759",
new Range(18759L, true, 18760L, false));
// Invalid time
- testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH',
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
- testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH',
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
+ testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH',
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
+ testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH',
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
}
@Test
public void testSDFToEpochDateTimeConvert() {
- testTimeConvert(
- "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') > 1620830760000", new Range("2021-05-12
15:00:00.000", true, null, false));
- testTimeConvert("dateTimeConvert(col,
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13
15:00:00", false));
- testTimeConvert(
- "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm',
'1:MILLISECONDS:EPOCH', '30:MINUTES') "
- + "BETWEEN 1620830760000 AND 1620917160000",
- new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false));
- testTimeConvert(
- "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd',
'1:MILLISECONDS:EPOCH', '30:MINUTES') = "
- + "1620830760000", new Range("2021-05-12", false, "2021-05-12",
true));
+ testTimeConvert("dateTimeConvert(col,
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:"
+ + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12
15:00:00.000", true, null, false));
+ testTimeConvert("dateTimeConvert(col,
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH',"
+ + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13
15:00:00", false));
+ testTimeConvert("dateTimeConvert(col,
'1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', "
+ + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new
Range("2021-05-12 15:00", true, "2021-05-13 "
+ + "15:00", false));
+ testTimeConvert("dateTimeConvert(col,
'1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')"
+ + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12",
true));
// Invalid time
- testInvalidTimeConvert(
- "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') > 1620830760000.5");
- testInvalidTimeConvert(
- "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd
HH:mm:ss', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') < 1620917160");
+ testInvalidFilterOptimizer("dateTimeConvert(col,
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', "
+ + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5");
+ testInvalidFilterOptimizer("dateTimeConvert(col,
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
+ + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160");
+ }
+
+
+ @Test
+ public void testDateTruncOptimizer() {
+ testDateTrunc("datetrunc('DAY', col) = 1620777600000", new
Range("1620777600000", true, "1620863999999", true));
+ testDateTrunc("dateTrunc('DAY', col) = 1620777600001", new
Range(Long.MAX_VALUE, true, Long.MIN_VALUE, true));
+
+ testDateTrunc("datetrunc('DAY', col) < 1620777600000", new
Range(Long.MIN_VALUE, true, "1620777600000", false));
+ testDateTrunc("DATETRUNC('DAY', col) < 1620777600010", new
Range(Long.MIN_VALUE, true, "1620863999999", true));
+ testDateTrunc("DATE_TRUNC('DAY', col) < 1620863999999", new
Range(Long.MIN_VALUE, true, "1620863999999", true));
+
+ testDateTrunc("datetrunc('DAY', col) <= 1620777600000", new
Range(Long.MIN_VALUE, true, "1620863999999", true));
+ testDateTrunc("datetrunc('DAY', col) <= 1620777600010", new
Range(Long.MIN_VALUE, true, "1620863999999", true));
+
+ testDateTrunc("datetrunc('DAY', col) > 1620777600000", new
Range("1620863999999", false, Long.MAX_VALUE, true));
+ testDateTrunc("dateTrunc('DAY', col) > 1620863999999", new
Range("1620863999999", false, Long.MAX_VALUE, true));
+ testDateTrunc("DATETRUNC('DAY', col) > 1620864000000", new
Range("1620950399999", false, Long.MAX_VALUE, true));
+
+ testDateTrunc("datetrunc('DAY', col) >= 1620863999909", new
Range("1620863999999", false, Long.MAX_VALUE, true));
+ testDateTrunc("datetrunc('DAY', col) >= 1620777600000", new
Range("1620777600000", true, Long.MAX_VALUE, true));
+
+ testInvalidFilterOptimizer("datetrunc('DAY', col, 'MILLISECONDS', 'CET',
'MILLISECONDS') = 1620770400000");
+ testDateTrunc("datetrunc('DAY', col, 'DAYS', 'UTC', 'DAYS') = 453631", new
Range("453631", true, "453631", true));
+ testInvalidFilterOptimizer("datetrunc('DAY', col, 'DAYS', 'CET',
'MILLISECONDS') = 39193714800000");
+ testDateTrunc("datetrunc('DAY', col, 'MILLISECONDS', 'UTC', 'DAYS') =
453630",
+ new Range("39193632000000", true, "39193718399999", true));
+ testInvalidFilterOptimizer("datetrunc('DAY', col, 'MILLISECONDS', 'CET',
'DAYS') = 453630");
+ // TODO: add between predicate test
Review Comment:
This is a pretty important code path since it rewrites queries. I would
either add tests, or not handle the BETWEEN case at all.
##########
pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java:
##########
@@ -121,38 +114,79 @@ public void testEpochToEpochDateTimeConvert() {
new Range(1620833400L, true, null, false));
testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH',
'30:MINUTES') < 450255",
new Range(null, false, 27015300L, false));
- testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH',
'30:MINUTES') BETWEEN 18759 AND 18760",
+ testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH',
'30:MINUTES') "
+ + "BETWEEN 18759 AND 18760",
new Range(18759L, true, 18761L, false));
testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH',
'30:MINUTES') = 18759",
new Range(18759L, true, 18760L, false));
// Invalid time
- testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH',
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
- testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH',
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
+ testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH',
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
+ testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH',
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
}
@Test
public void testSDFToEpochDateTimeConvert() {
- testTimeConvert(
- "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') > 1620830760000", new Range("2021-05-12
15:00:00.000", true, null, false));
- testTimeConvert("dateTimeConvert(col,
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13
15:00:00", false));
- testTimeConvert(
- "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm',
'1:MILLISECONDS:EPOCH', '30:MINUTES') "
- + "BETWEEN 1620830760000 AND 1620917160000",
- new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false));
- testTimeConvert(
- "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd',
'1:MILLISECONDS:EPOCH', '30:MINUTES') = "
- + "1620830760000", new Range("2021-05-12", false, "2021-05-12",
true));
+ testTimeConvert("dateTimeConvert(col,
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:"
+ + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12
15:00:00.000", true, null, false));
+ testTimeConvert("dateTimeConvert(col,
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH',"
+ + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13
15:00:00", false));
+ testTimeConvert("dateTimeConvert(col,
'1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', "
+ + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new
Range("2021-05-12 15:00", true, "2021-05-13 "
+ + "15:00", false));
+ testTimeConvert("dateTimeConvert(col,
'1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')"
+ + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12",
true));
// Invalid time
- testInvalidTimeConvert(
- "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') > 1620830760000.5");
- testInvalidTimeConvert(
- "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd
HH:mm:ss', '1:MILLISECONDS:EPOCH', "
- + "'30:MINUTES') < 1620917160");
+ testInvalidFilterOptimizer("dateTimeConvert(col,
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', "
+ + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5");
+ testInvalidFilterOptimizer("dateTimeConvert(col,
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
+ + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160");
+ }
+
+
+ @Test
+ public void testDateTruncOptimizer() {
+ testDateTrunc("datetrunc('DAY', col) = 1620777600000", new
Range("1620777600000", true, "1620863999999", true));
Review Comment:
can you add a test where it's flipped like
```
testDateTrunc("1620777600000 = datetrunc('DAY', col)", new
Range("1620777600000", true, "1620863999999", true));
```
##########
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java:
##########
@@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)),
}
}
+ private void optimizeDateTrunc(Function filterFunction, FilterKind
filterKind) {
+ List<Expression> filterOperands = filterFunction.getOperands();
+ List<Expression> dateTruncOperands =
filterOperands.get(0).getFunctionCall().getOperands();
+
+ if (dateTruncOperands.get(1).isSetLiteral()) {
+ return;
+ }
+
+ Long lowerMillis = null;
+ Long upperMillis = null;
+ boolean lowerInclusive = true;
+ boolean upperInclusive = true;
+ List<Expression> operands = new ArrayList<>(dateTruncOperands);
+ String unit = operands.get(0).getLiteral().getStringValue();
+ String inputTimeUnit = (operands.size() >= 3) ?
operands.get(2).getLiteral().getStringValue()
+ : TimeUnit.MILLISECONDS.name();
+ if (operands.size() >= 4) {
+ if (!operands.get(3).getLiteral().getStringValue().equals("UTC")) {
+ // Leave query unoptimized if working with non-UTC time zones
+ return;
+ }
+ }
+ ISOChronology chronology = ISOChronology.getInstanceUTC();
+ String outputTimeUnit = (operands.size() == 5) ?
operands.get(4).getLiteral().getStringValue()
+ : TimeUnit.MILLISECONDS.name();
+ switch (filterKind) {
+ case EQUALS:
+ operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new
DateTimeFormatSpec("TIMESTAMP")));
+ upperMillis = dateTruncCeil(operands);
+ lowerMillis = dateTruncFloor(operands);
+ if (lowerMillis != DateTimeUtils.getTimestampField(chronology,
unit).roundFloor(lowerMillis)) {
Review Comment:
I forgot what this case is handling:
- what is the `if` statement checking?
- why are we returning early
at minimum, it needs a code comment for future readers
##########
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java:
##########
@@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)),
}
}
+ private void optimizeDateTrunc(Function filterFunction, FilterKind
filterKind) {
+ List<Expression> filterOperands = filterFunction.getOperands();
+ List<Expression> dateTruncOperands =
filterOperands.get(0).getFunctionCall().getOperands();
Review Comment:
similar to my test comment below. is this assuming it's always
`dateTrunc(...) = X`, or will it work for `X = dateTrunc(...)`
##########
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TimestampIndexMseTest.java:
##########
@@ -85,23 +85,6 @@ public void timestampIndexSubstitutedInProjections() {
+ " FilterMatchEntireSegment(numDocs=[115545])\n");
}
- @Test
Review Comment:
why are these removed?
##########
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java:
##########
@@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)),
}
}
+ private void optimizeDateTrunc(Function filterFunction, FilterKind
filterKind) {
+ List<Expression> filterOperands = filterFunction.getOperands();
+ List<Expression> dateTruncOperands =
filterOperands.get(0).getFunctionCall().getOperands();
+
+ if (dateTruncOperands.get(1).isSetLiteral()) {
+ return;
+ }
+
+ Long lowerMillis = null;
+ Long upperMillis = null;
+ boolean lowerInclusive = true;
+ boolean upperInclusive = true;
+ List<Expression> operands = new ArrayList<>(dateTruncOperands);
+ String unit = operands.get(0).getLiteral().getStringValue();
+ String inputTimeUnit = (operands.size() >= 3) ?
operands.get(2).getLiteral().getStringValue()
+ : TimeUnit.MILLISECONDS.name();
+ if (operands.size() >= 4) {
+ if (!operands.get(3).getLiteral().getStringValue().equals("UTC")) {
+ // Leave query unoptimized if working with non-UTC time zones
+ return;
+ }
+ }
+ ISOChronology chronology = ISOChronology.getInstanceUTC();
+ String outputTimeUnit = (operands.size() == 5) ?
operands.get(4).getLiteral().getStringValue()
+ : TimeUnit.MILLISECONDS.name();
+ switch (filterKind) {
+ case EQUALS:
+ operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new
DateTimeFormatSpec("TIMESTAMP")));
+ upperMillis = dateTruncCeil(operands);
+ lowerMillis = dateTruncFloor(operands);
+ if (lowerMillis != DateTimeUtils.getTimestampField(chronology,
unit).roundFloor(lowerMillis)) {
+ lowerMillis = Long.MAX_VALUE;
+ upperMillis = Long.MIN_VALUE;
+ String rangeString = new Range(lowerMillis, lowerInclusive,
upperMillis, upperInclusive).getRangeString();
+ rewriteToRange(filterFunction, dateTruncOperands.get(1),
rangeString);
+ return;
+ }
+ break;
+ case GREATER_THAN:
+ operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new
DateTimeFormatSpec("TIMESTAMP")));
+ lowerMillis = dateTruncCeil(operands);
+ lowerInclusive = false;
+ upperMillis = Long.MAX_VALUE;
+ break;
+ case GREATER_THAN_OR_EQUAL:
+ operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new
DateTimeFormatSpec("TIMESTAMP")));
+ lowerMillis = dateTruncFloor(operands);
+ upperMillis = Long.MAX_VALUE;
+ if (lowerMillis != DateTimeUtils.getTimestampField(chronology,
unit).roundFloor(lowerMillis)) {
Review Comment:
1. what is this check doing
2. why don't we have it in less_that_or_equal?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]