jadami10 commented on code in PR #14385:
URL: https://github.com/apache/pinot/pull/14385#discussion_r1962401280


##########
pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java:
##########
@@ -121,38 +114,79 @@ public void testEpochToEpochDateTimeConvert() {
         new Range(1620833400L, true, null, false));
     testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH', 
'30:MINUTES') < 450255",
         new Range(null, false, 27015300L, false));
-    testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', 
'30:MINUTES') BETWEEN 18759 AND 18760",
+    testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', 
'30:MINUTES') "
+            + "BETWEEN 18759 AND 18760",
         new Range(18759L, true, 18761L, false));
     testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', 
'30:MINUTES') = 18759",
         new Range(18759L, true, 18760L, false));
 
     // Invalid time
-    testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
-    testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
+    testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
+    testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
   }
 
   @Test
   public void testSDFToEpochDateTimeConvert() {
-    testTimeConvert(
-        "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd 
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
-            + "'30:MINUTES') > 1620830760000", new Range("2021-05-12 
15:00:00.000", true, null, false));
-    testTimeConvert("dateTimeConvert(col, 
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', "
-        + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 
15:00:00", false));
-    testTimeConvert(
-        "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', 
'1:MILLISECONDS:EPOCH', '30:MINUTES') "
-            + "BETWEEN 1620830760000 AND 1620917160000",
-        new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false));
-    testTimeConvert(
-        "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', 
'1:MILLISECONDS:EPOCH', '30:MINUTES') = "
-            + "1620830760000", new Range("2021-05-12", false, "2021-05-12", 
true));
+    testTimeConvert("dateTimeConvert(col, 
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:"
+        + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12 
15:00:00.000", true, null, false));
+    testTimeConvert("dateTimeConvert(col, 
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH',"
+        + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 
15:00:00", false));
+    testTimeConvert("dateTimeConvert(col, 
'1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', "
+        + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new 
Range("2021-05-12 15:00", true, "2021-05-13 "
+        + "15:00", false));
+    testTimeConvert("dateTimeConvert(col, 
'1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')"
+        + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12", 
true));
 
     // Invalid time
-    testInvalidTimeConvert(
-        "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd 
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
-            + "'30:MINUTES') > 1620830760000.5");
-    testInvalidTimeConvert(
-        "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd 
HH:mm:ss', '1:MILLISECONDS:EPOCH', "
-            + "'30:MINUTES') < 1620917160");
+    testInvalidFilterOptimizer("dateTimeConvert(col, 
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', "
+        + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5");
+    testInvalidFilterOptimizer("dateTimeConvert(col, 
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
+        + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160");
+  }
+
+
+  @Test
+  public void testDateTruncOptimizer() {
+    testDateTrunc("datetrunc('DAY', col) = 1620777600000", new 
Range("1620777600000", true, "1620863999999", true));
+    testDateTrunc("dateTrunc('DAY', col) = 1620777600001", new 
Range(Long.MAX_VALUE, true, Long.MIN_VALUE, true));
+
+    testDateTrunc("datetrunc('DAY', col) < 1620777600000", new 
Range(Long.MIN_VALUE, true, "1620777600000", false));
+    testDateTrunc("DATETRUNC('DAY', col) < 1620777600010", new 
Range(Long.MIN_VALUE, true, "1620863999999", true));
+    testDateTrunc("DATE_TRUNC('DAY', col) < 1620863999999", new 
Range(Long.MIN_VALUE, true, "1620863999999", true));
+
+    testDateTrunc("datetrunc('DAY', col) <= 1620777600000", new 
Range(Long.MIN_VALUE, true, "1620863999999", true));
+    testDateTrunc("datetrunc('DAY', col) <= 1620777600010", new 
Range(Long.MIN_VALUE, true, "1620863999999", true));
+
+    testDateTrunc("datetrunc('DAY', col) > 1620777600000", new 
Range("1620863999999", false, Long.MAX_VALUE, true));
+    testDateTrunc("dateTrunc('DAY', col) > 1620863999999", new 
Range("1620863999999", false, Long.MAX_VALUE, true));
+    testDateTrunc("DATETRUNC('DAY', col) > 1620864000000", new 
Range("1620950399999", false, Long.MAX_VALUE, true));
+
+    testDateTrunc("datetrunc('DAY', col) >= 1620863999909", new 
Range("1620863999999", false, Long.MAX_VALUE, true));
+    testDateTrunc("datetrunc('DAY', col) >= 1620777600000", new 
Range("1620777600000", true, Long.MAX_VALUE, true));
+
+    testInvalidFilterOptimizer("datetrunc('DAY', col, 'MILLISECONDS', 'CET', 
'MILLISECONDS') = 1620770400000");
+    testDateTrunc("datetrunc('DAY', col, 'DAYS', 'UTC', 'DAYS') = 453631", new 
Range("453631", true, "453631", true));
+    testInvalidFilterOptimizer("datetrunc('DAY', col, 'DAYS', 'CET', 
'MILLISECONDS') = 39193714800000");
+    testDateTrunc("datetrunc('DAY', col, 'MILLISECONDS', 'UTC', 'DAYS') = 
453630",
+        new Range("39193632000000", true, "39193718399999", true));
+    testInvalidFilterOptimizer("datetrunc('DAY', col, 'MILLISECONDS', 'CET', 
'DAYS') = 453630");
+    // TODO: add between predicate test

Review Comment:
   This is a pretty important code path since it rewrites queries. I would 
either add tests, or not handle the BETWEEN case at all.



##########
pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizerTest.java:
##########
@@ -121,38 +114,79 @@ public void testEpochToEpochDateTimeConvert() {
         new Range(1620833400L, true, null, false));
     testTimeConvert("dateTimeConvert(col, '1:MINUTES:EPOCH', '1:HOURS:EPOCH', 
'30:MINUTES') < 450255",
         new Range(null, false, 27015300L, false));
-    testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', 
'30:MINUTES') BETWEEN 18759 AND 18760",
+    testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', 
'30:MINUTES') "
+            + "BETWEEN 18759 AND 18760",
         new Range(18759L, true, 18761L, false));
     testTimeConvert("dateTimeConvert(col, '1:DAYS:EPOCH', '1:DAYS:EPOCH', 
'30:MINUTES') = 18759",
         new Range(18759L, true, 18760L, false));
 
     // Invalid time
-    testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
-    testInvalidTimeConvert("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
+    testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'1:MINUTES:EPOCH', '30:MINUTES') > 27013846.5");
+    testInvalidFilterOptimizer("dateTimeConvert(col, '1:SECONDS:EPOCH', 
'30:MINUTES:EPOCH', '30:MINUTES') > 27013846");
   }
 
   @Test
   public void testSDFToEpochDateTimeConvert() {
-    testTimeConvert(
-        "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd 
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
-            + "'30:MINUTES') > 1620830760000", new Range("2021-05-12 
15:00:00.000", true, null, false));
-    testTimeConvert("dateTimeConvert(col, 
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH', "
-        + "'30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 
15:00:00", false));
-    testTimeConvert(
-        "dateTimeConvert(col, '1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', 
'1:MILLISECONDS:EPOCH', '30:MINUTES') "
-            + "BETWEEN 1620830760000 AND 1620917160000",
-        new Range("2021-05-12 15:00", true, "2021-05-13 15:00", false));
-    testTimeConvert(
-        "dateTimeConvert(col, '1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', 
'1:MILLISECONDS:EPOCH', '30:MINUTES') = "
-            + "1620830760000", new Range("2021-05-12", false, "2021-05-12", 
true));
+    testTimeConvert("dateTimeConvert(col, 
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:MILLISECONDS:"
+        + "EPOCH', '30:MINUTES') > 1620830760000", new Range("2021-05-12 
15:00:00.000", true, null, false));
+    testTimeConvert("dateTimeConvert(col, 
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:MILLISECONDS:EPOCH',"
+        + " '30:MINUTES') < 1620917160000", new Range(null, false, "2021-05-13 
15:00:00", false));
+    testTimeConvert("dateTimeConvert(col, 
'1:MINUTES:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm', '1:MILLISECONDS:EPOCH', "
+        + "'30:MINUTES') BETWEEN 1620830760000 AND 1620917160000", new 
Range("2021-05-12 15:00", true, "2021-05-13 "
+        + "15:00", false));
+    testTimeConvert("dateTimeConvert(col, 
'1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd', '1:MILLISECONDS:EPOCH', '30:MINUTES')"
+        + " = 1620830760000", new Range("2021-05-12", false, "2021-05-12", 
true));
 
     // Invalid time
-    testInvalidTimeConvert(
-        "dateTimeConvert(col, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd 
HH:mm:ss.SSS', '1:MILLISECONDS:EPOCH', "
-            + "'30:MINUTES') > 1620830760000.5");
-    testInvalidTimeConvert(
-        "dateTimeConvert(col, '1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd 
HH:mm:ss', '1:MILLISECONDS:EPOCH', "
-            + "'30:MINUTES') < 1620917160");
+    testInvalidFilterOptimizer("dateTimeConvert(col, 
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', "
+        + "'1:MILLISECONDS:EPOCH', '30:MINUTES') > 1620830760000.5");
+    testInvalidFilterOptimizer("dateTimeConvert(col, 
'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
+        + "'1:MILLISECONDS:EPOCH', '30:MINUTES') < 1620917160");
+  }
+
+
+  @Test
+  public void testDateTruncOptimizer() {
+    testDateTrunc("datetrunc('DAY', col) = 1620777600000", new 
Range("1620777600000", true, "1620863999999", true));

Review Comment:
   can you add a test where it's flipped like
   ```
   testDateTrunc("1620777600000 = datetrunc('DAY', col)", new 
Range("1620777600000", true, "1620863999999", true));
   ```



##########
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java:
##########
@@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)),
     }
   }
 
+  private void optimizeDateTrunc(Function filterFunction, FilterKind 
filterKind) {
+    List<Expression> filterOperands = filterFunction.getOperands();
+    List<Expression> dateTruncOperands = 
filterOperands.get(0).getFunctionCall().getOperands();
+
+    if (dateTruncOperands.get(1).isSetLiteral()) {
+      return;
+    }
+
+    Long lowerMillis = null;
+    Long upperMillis = null;
+    boolean lowerInclusive = true;
+    boolean upperInclusive = true;
+    List<Expression> operands = new ArrayList<>(dateTruncOperands);
+    String unit = operands.get(0).getLiteral().getStringValue();
+    String inputTimeUnit = (operands.size() >= 3) ? 
operands.get(2).getLiteral().getStringValue()
+        : TimeUnit.MILLISECONDS.name();
+    if (operands.size() >= 4) {
+      if (!operands.get(3).getLiteral().getStringValue().equals("UTC")) {
+        // Leave query unoptimized if working with non-UTC time zones
+        return;
+      }
+    }
+    ISOChronology chronology = ISOChronology.getInstanceUTC();
+    String outputTimeUnit = (operands.size() == 5) ? 
operands.get(4).getLiteral().getStringValue()
+        : TimeUnit.MILLISECONDS.name();
+    switch (filterKind) {
+      case EQUALS:
+        operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new 
DateTimeFormatSpec("TIMESTAMP")));
+        upperMillis = dateTruncCeil(operands);
+        lowerMillis = dateTruncFloor(operands);
+        if (lowerMillis != DateTimeUtils.getTimestampField(chronology, 
unit).roundFloor(lowerMillis)) {

Review Comment:
   I forgot what this case is handling:
   - what is the `if` statement checking?
   - why are we returning early
   
   at minimum, it needs a code comment for future readers



##########
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java:
##########
@@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)),
     }
   }
 
+  private void optimizeDateTrunc(Function filterFunction, FilterKind 
filterKind) {
+    List<Expression> filterOperands = filterFunction.getOperands();
+    List<Expression> dateTruncOperands = 
filterOperands.get(0).getFunctionCall().getOperands();

Review Comment:
   similar to my test comment below. is this assuming it's always 
`dateTrunc(...) = X`, or will it work for `X = dateTrunc(...)`



##########
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TimestampIndexMseTest.java:
##########
@@ -85,23 +85,6 @@ public void timestampIndexSubstitutedInProjections() {
             + "              FilterMatchEntireSegment(numDocs=[115545])\n");
   }
 
-  @Test

Review Comment:
   why are these removed?



##########
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java:
##########
@@ -411,6 +427,94 @@ && isStringLiteral(dateTimeConvertOperands.get(3)),
     }
   }
 
+  private void optimizeDateTrunc(Function filterFunction, FilterKind 
filterKind) {
+    List<Expression> filterOperands = filterFunction.getOperands();
+    List<Expression> dateTruncOperands = 
filterOperands.get(0).getFunctionCall().getOperands();
+
+    if (dateTruncOperands.get(1).isSetLiteral()) {
+      return;
+    }
+
+    Long lowerMillis = null;
+    Long upperMillis = null;
+    boolean lowerInclusive = true;
+    boolean upperInclusive = true;
+    List<Expression> operands = new ArrayList<>(dateTruncOperands);
+    String unit = operands.get(0).getLiteral().getStringValue();
+    String inputTimeUnit = (operands.size() >= 3) ? 
operands.get(2).getLiteral().getStringValue()
+        : TimeUnit.MILLISECONDS.name();
+    if (operands.size() >= 4) {
+      if (!operands.get(3).getLiteral().getStringValue().equals("UTC")) {
+        // Leave query unoptimized if working with non-UTC time zones
+        return;
+      }
+    }
+    ISOChronology chronology = ISOChronology.getInstanceUTC();
+    String outputTimeUnit = (operands.size() == 5) ? 
operands.get(4).getLiteral().getStringValue()
+        : TimeUnit.MILLISECONDS.name();
+    switch (filterKind) {
+      case EQUALS:
+        operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new 
DateTimeFormatSpec("TIMESTAMP")));
+        upperMillis = dateTruncCeil(operands);
+        lowerMillis = dateTruncFloor(operands);
+        if (lowerMillis != DateTimeUtils.getTimestampField(chronology, 
unit).roundFloor(lowerMillis)) {
+          lowerMillis = Long.MAX_VALUE;
+          upperMillis = Long.MIN_VALUE;
+          String rangeString = new Range(lowerMillis, lowerInclusive, 
upperMillis, upperInclusive).getRangeString();
+          rewriteToRange(filterFunction, dateTruncOperands.get(1), 
rangeString);
+          return;
+        }
+        break;
+      case GREATER_THAN:
+        operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new 
DateTimeFormatSpec("TIMESTAMP")));
+        lowerMillis = dateTruncCeil(operands);
+        lowerInclusive = false;
+        upperMillis = Long.MAX_VALUE;
+        break;
+      case GREATER_THAN_OR_EQUAL:
+        operands.set(1, getExpression(getLongValue(filterOperands.get(1)), new 
DateTimeFormatSpec("TIMESTAMP")));
+        lowerMillis = dateTruncFloor(operands);
+        upperMillis = Long.MAX_VALUE;
+        if (lowerMillis != DateTimeUtils.getTimestampField(chronology, 
unit).roundFloor(lowerMillis)) {

Review Comment:
   1. what is this check doing
   2. why don't we have it in less_that_or_equal?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to