Jackie-Jiang commented on a change in pull request #6957: URL: https://github.com/apache/incubator-pinot/pull/6957#discussion_r637207805
########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java ########## @@ -0,0 +1,330 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.optimizer.filter; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; +import org.apache.pinot.common.request.Expression; +import org.apache.pinot.common.request.ExpressionType; +import org.apache.pinot.common.request.Function; +import org.apache.pinot.common.utils.request.FilterQueryTree; +import org.apache.pinot.common.utils.request.RequestUtils; +import org.apache.pinot.core.operator.transform.function.DateTimeConversionTransformFunction; +import org.apache.pinot.core.operator.transform.function.TimeConversionTransformFunction; +import org.apache.pinot.core.query.optimizer.filter.utils.Range; +import org.apache.pinot.pql.parsers.pql2.ast.FilterKind; +import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; +import org.apache.pinot.spi.data.Schema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * The {@code TimePredicateFilterOptimizer} optimizes the time related predicates: + * <ul> + * <li> + * Optimizes TIME_CONVERT/DATE_TIME_CONVERT function with range/equality predicate to directly apply the predicate + * to the inner expression. + * </li> + * </ul> + * + * NOTE: This optimizer is followed by the {@link MergeRangeFilterOptimizer}, which can merge the generated ranges. + */ +public class TimePredicateFilterOptimizer implements FilterOptimizer { + private static final Logger LOGGER = LoggerFactory.getLogger(TimePredicateFilterOptimizer.class); + + @Override + public FilterQueryTree optimize(FilterQueryTree filterQueryTree, @Nullable Schema schema) { + // Do not rewrite PQL queries because PQL is deprecated + return filterQueryTree; + } + + @Override + public Expression optimize(Expression filterExpression, @Nullable Schema schema) { + return optimize(filterExpression); + } + + @VisibleForTesting + Expression optimize(Expression filterExpression) { + Function filterFunction = filterExpression.getFunctionCall(); + FilterKind filterKind = FilterKind.valueOf(filterFunction.getOperator()); + List<Expression> operands = filterFunction.getOperands(); + if (filterKind == FilterKind.AND || filterKind == FilterKind.OR) { + operands.replaceAll(this::optimize); + } else if (filterKind.isRange() || filterKind == FilterKind.EQUALS) { + Expression expression = operands.get(0); + if (expression.getType() == ExpressionType.FUNCTION) { + Function expressionFunction = expression.getFunctionCall(); + String functionName = StringUtils.remove(expressionFunction.getOperator(), '_'); + if (functionName.equalsIgnoreCase(TimeConversionTransformFunction.FUNCTION_NAME)) { + optimizeTimeConvert(filterFunction, filterKind); + } else if (functionName.equalsIgnoreCase(DateTimeConversionTransformFunction.FUNCTION_NAME)) { + optimizeDateTimeConvert(filterFunction, filterKind); + } + } + } + return filterExpression; + } + + /** + * Helper method to optimize TIME_CONVERT function with range/equality predicate to directly apply the predicate to + * the inner expression. Changes are applied in-place of the filter function. + */ + private void optimizeTimeConvert(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> timeConvertOperands = filterOperands.get(0).getFunctionCall().getOperands(); + Preconditions.checkArgument(timeConvertOperands.size() == 3, + "Exactly 3 arguments are required for TIME_CONVERT transform function"); + Preconditions + .checkArgument(isStringLiteral(timeConvertOperands.get(1)) && isStringLiteral(timeConvertOperands.get(2)), + "The 2nd and 3rd argument for TIME_CONVERT transform function must be string literal"); + + try { + TimeUnit inputTimeUnit = TimeUnit.valueOf(timeConvertOperands.get(1).getLiteral().getStringValue().toUpperCase()); + TimeUnit outputTimeUnit = + TimeUnit.valueOf(timeConvertOperands.get(2).getLiteral().getStringValue().toUpperCase()); + + // Step 1: Convert output range to millis range + Long lowerMillis = null; + Long upperMillis = null; + switch (filterKind) { + case GREATER_THAN: { + // millisToFormat(millis) > n + // -> millisToFormat(millis) >= n + 1 + // -> millis >= formatToMillis(n + 1) + long lowerValue = Long.parseLong(filterOperands.get(1).getLiteral().getFieldValue().toString()); + lowerMillis = outputTimeUnit.toMillis(lowerValue + 1); + break; + } + case GREATER_THAN_OR_EQUAL: { + // millisToFormat(millis) >= n + // -> millis >= formatToMillis(n) + long lowerValue = Long.parseLong(filterOperands.get(1).getLiteral().getFieldValue().toString()); + lowerMillis = outputTimeUnit.toMillis(lowerValue); + break; + } + case LESS_THAN: { + // millisToFormat(millis) < n + // -> millis < formatToMillis(n) + long upperValue = Long.parseLong(filterOperands.get(1).getLiteral().getFieldValue().toString()); + upperMillis = outputTimeUnit.toMillis(upperValue); + break; + } + case LESS_THAN_OR_EQUAL: { + // millisToFormat(millis) <= n + // -> millisToFormat(millis) < n + 1 + // -> millis < formatToMillis(n + 1) + long upperValue = Long.parseLong(filterOperands.get(1).getLiteral().getFieldValue().toString()); + upperMillis = outputTimeUnit.toMillis(upperValue + 1); + break; + } + case BETWEEN: { + // Combine GREATER_THAN_OR_EQUAL and LESS_THAN_OR_EQUAL + long lowerValue = Long.parseLong(filterOperands.get(1).getLiteral().getFieldValue().toString()); + lowerMillis = outputTimeUnit.toMillis(lowerValue); + long upperValue = Long.parseLong(filterOperands.get(2).getLiteral().getFieldValue().toString()); + upperMillis = outputTimeUnit.toMillis(upperValue + 1); + break; + } + case EQUALS: { + // Combine GREATER_THAN_OR_EQUAL and LESS_THAN_OR_EQUAL + long value = Long.parseLong(filterOperands.get(1).getLiteral().getFieldValue().toString()); + lowerMillis = outputTimeUnit.toMillis(value); + upperMillis = outputTimeUnit.toMillis(value + 1); + break; + } + default: + throw new IllegalStateException(); + } + + // Step 2: Convert millis range to input range + Long lowerValue = null; + boolean lowerInclusive = false; + if (lowerMillis != null) { + // formatToMillis(col) >= millis + // - if (formatToMillis(millisToFormat(millis)) == millis) + // -> col >= millisToFormat(millis) + // - else (formatToMillis(millisToFormat(millis)) < millis) + // -> col > millisToFormat(millis) + lowerValue = inputTimeUnit.convert(lowerMillis, TimeUnit.MILLISECONDS); + lowerInclusive = inputTimeUnit.toMillis(lowerValue) == lowerMillis; + } + Long upperValue = null; + boolean upperInclusive = false; + if (upperMillis != null) { + // formatToMillis(col) < millis + // - if (formatToMillis(millisToFormat(millis)) == millis) + // -> col < millisToFormat(millis) + // - else (formatToMillis(millisToFormat(millis)) < millis) + // -> col <= millisToFormat(millis) + upperValue = inputTimeUnit.convert(upperMillis, TimeUnit.MILLISECONDS); + upperInclusive = inputTimeUnit.toMillis(upperValue) != upperMillis; + } + + // Step 3: Rewrite the filter function + String rangeString = new Range(lowerValue, lowerInclusive, upperValue, upperInclusive).getRangeString(); + filterFunction.setOperator(FilterKind.RANGE.name()); + filterFunction + .setOperands(Arrays.asList(timeConvertOperands.get(0), RequestUtils.getLiteralExpression(rangeString))); + } catch (Exception e) { + LOGGER.warn("Caught exception while optimizing TIME_CONVERT predicate: {}, skipping the optimization", + filterFunction, e); + } + } + + /** + * Helper method to optimize DATE_TIME_CONVERT function with range/equality predicate to directly apply the predicate + * to the inner expression. Changes are applied in-place of the filter function. + */ + private void optimizeDateTimeConvert(Function filterFunction, FilterKind filterKind) { + List<Expression> filterOperands = filterFunction.getOperands(); + List<Expression> dateTimeConvertOperands = filterOperands.get(0).getFunctionCall().getOperands(); + Preconditions.checkArgument(dateTimeConvertOperands.size() == 4, + "Exactly 4 arguments are required for DATE_TIME_CONVERT transform function"); + Preconditions.checkArgument( + isStringLiteral(dateTimeConvertOperands.get(1)) && isStringLiteral(dateTimeConvertOperands.get(2)) + && isStringLiteral(dateTimeConvertOperands.get(3)), + "The 2nd to 4th arguments for DATE_TIME_CONVERT transform function must be string literal"); + + try { + DateTimeFormatSpec inputFormat = + new DateTimeFormatSpec(dateTimeConvertOperands.get(1).getLiteral().getStringValue()); + DateTimeFormatSpec outputFormat = + new DateTimeFormatSpec(dateTimeConvertOperands.get(2).getLiteral().getStringValue()); + // SDF output format is not supported because: + // 1. No easy way to get the next time value (instead of simply +1 for EPOCH format) + // 2. Hard to calculate the bucket boundary (need to consider time zone) + // TODO: Support SDF output format + if (outputFormat.getTimeFormat() == TimeFormat.SIMPLE_DATE_FORMAT) { + return; + } + long granularityMillis = new DateTimeGranularitySpec(dateTimeConvertOperands.get(3).getLiteral().getStringValue()) + .granularityToMillis(); + + // Step 1: Convert output range to millis range Review comment: I considered using the same code for both `time_convert` and `date_time_convert`, but that would introduce unnecessary overhead for `time_convert` because it does not require SDF and bucketing support. Basically it's a trade-off between performance and code complexity -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org