This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 08dc9f9b35c314e5a97f4b23b3e383d12417e2bb Author: Jiale He <35652389+jial...@users.noreply.github.com> AuthorDate: Fri Feb 3 16:00:30 2023 +0800 KYLIN-5499 fix partition col format mismatch with type --- .../kylin/query/routing/RealizationPrunerTest.java | 86 ++++++++++++++++++++++ .../kylin/query/routing/RealizationPruner.java | 64 ++++++++++++++-- 2 files changed, 142 insertions(+), 8 deletions(-) diff --git a/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java b/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java new file mode 100644 index 0000000000..b22b90a244 --- /dev/null +++ b/src/kylin-it/src/test/java/org/apache/kylin/query/routing/RealizationPrunerTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.query.routing; + +import org.apache.kylin.metadata.datatype.DataType; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.springframework.test.util.ReflectionTestUtils; + +class RealizationPrunerTest { + + @Test + void testCheckAndReformatDateType() { + long segmentTs = 1675396800000L; + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03", segmentTs, new DataType("date", 0, 0)); + Assertions.assertEquals("2023-02-03", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("date", 0, 0)); + Assertions.assertEquals("2023-02-03", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03", segmentTs, new DataType("timestamp", 0, 0)); + Assertions.assertEquals("2023-02-03 12:00:00", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("timestamp", 0, 0)); + Assertions.assertEquals("2023-02-03 12:00:00", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("varchar", 0, 0)); + Assertions.assertEquals("2023-02-03 12:00:00", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("string", 0, 0)); + Assertions.assertEquals("2023-02-03 12:00:00", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("integer", 0, 0)); + Assertions.assertEquals("2023-02-03 12:00:00", formattedValue); + } + + { + String formattedValue = ReflectionTestUtils.invokeMethod(RealizationPruner.class, + "checkAndReformatDateType", "2023-02-03 12:00:00", segmentTs, new DataType("bigint", 0, 0)); + Assertions.assertEquals("2023-02-03 12:00:00", formattedValue); + } + + { + DataType errorType = new DataType("error_type", 0, 0); + Assertions.assertThrows(IllegalArgumentException.class, + () -> ReflectionTestUtils.invokeMethod(RealizationPruner.class, "checkAndReformatDateType", + "2023-02-03 12:00:00", segmentTs, errorType)); + } + } +} diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java index dc54aae001..5c56de2fb3 100644 --- a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java +++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java @@ -18,6 +18,9 @@ package org.apache.kylin.query.routing; +import static org.apache.kylin.common.util.DateFormat.DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS; +import static org.apache.kylin.common.util.DateFormat.DEFAULT_DATE_PATTERN; + import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; @@ -27,6 +30,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TimeZone; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.calcite.plan.RelOptPredicateList; @@ -86,6 +90,9 @@ public class RealizationPruner { private static final String INTEGER = "integer"; private static final String BIGINT = "bigint"; private static final TimeZone UTC_ZONE = TimeZone.getTimeZone("UTC"); + private static final Pattern DATE_PATTERN = Pattern.compile("[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]"); + private static final Pattern TIMESTAMP_PATTERN = Pattern.compile( + "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" + " " + "[0-9][0-9]:[0-9][0-9]:[0-9][0-9](\\.[0-9]*[1-9])?"); private static final Set<SqlKind> COMPARISON_OP_KIND_SET = ImmutableSet.of(SqlKind.GREATER_THAN, SqlKind.GREATER_THAN_OR_EQUAL, // SqlKind.LESS_THAN, SqlKind.LESS_THAN_OR_EQUAL, // @@ -134,12 +141,18 @@ public class RealizationPruner { val partitionColInputRef = transformColumn2RexInputRef(partitionColumn, olapContext.allTableScans); if (allReadySegments.size() > 0 && dateFormat != null) { - val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder, - partitionColInputRef, partitionColumn.getType(), dataflow.isStreaming()); - RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst()); - filterConditions = filterConditions.stream().map(filterCondition -> rewriteRexCall(filterCondition, - rexBuilder, segmentLiteralTypeFamily, partitionColInputRef, dateFormat)) - .collect(Collectors.toList()); + try { + val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder, + partitionColInputRef, partitionColumn.getType(), dataflow.isStreaming()); + RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst()); + filterConditions = filterConditions.stream()// + .map(filterCondition -> rewriteRexCall(filterCondition, rexBuilder, segmentLiteralTypeFamily, + partitionColInputRef, dateFormat)) + .collect(Collectors.toList()); + } catch (Exception ex) { + log.warn("Segment pruning error: ", ex); + return allReadySegments; + } } var simplifiedSqlFilter = rexSimplify.simplifyAnds(filterConditions); @@ -307,8 +320,9 @@ public class RealizationPruner { start = DateFormat.formatToDateStr(dataSegment.getKSRange().getStart(), dateFormat); end = DateFormat.formatToDateStr(dataSegment.getKSRange().getEnd(), dateFormat); } else { - start = DateFormat.formatToDateStr(dataSegment.getTSRange().getStart(), dateFormat); - end = DateFormat.formatToDateStr(dataSegment.getTSRange().getEnd(), dateFormat); + Pair<String, String> pair = transformDateType(dataSegment, partitionColType, dateFormat); + start = pair.getFirst(); + end = pair.getSecond(); } val startRexLiteral = transformValue2RexLiteral(rexBuilder, start, partitionColType); @@ -322,6 +336,40 @@ public class RealizationPruner { return Pair.newPair(greaterThanOrEqualCall, lessCall); } + private static Pair<String, String> transformDateType(NDataSegment dataSegment, DataType colType, + String dateFormat) { + long segmentStartTs = dataSegment.getTSRange().getStart(); + long segmentEndTs = dataSegment.getTSRange().getEnd(); + String formattedStart = DateFormat.formatToDateStr(segmentStartTs, dateFormat); + String formattedEnd = DateFormat.formatToDateStr(segmentEndTs, dateFormat); + String start = checkAndReformatDateType(formattedStart, segmentStartTs, colType); + String end = checkAndReformatDateType(formattedEnd, segmentEndTs, colType); + return Pair.newPair(start, end); + } + + private static String checkAndReformatDateType(String formattedValue, long segmentTs, DataType colType) { + switch (colType.getName()) { + case DATE: + if (DATE_PATTERN.matcher(formattedValue).matches()) { + return formattedValue; + } + return DateFormat.formatToDateStr(segmentTs, DEFAULT_DATE_PATTERN); + case TIMESTAMP: + if (TIMESTAMP_PATTERN.matcher(formattedValue).matches()) { + return formattedValue; + } + return DateFormat.formatToDateStr(segmentTs, DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS); + case VARCHAR: + case STRING: + case INTEGER: + case BIGINT: + return formattedValue; + default: + throw new IllegalArgumentException( + String.format(Locale.ROOT, "%s data type is not supported for partition column", colType)); + } + } + private static RexNode transformValue2RexLiteral(RexBuilder rexBuilder, String value, DataType colType) { switch (colType.getName()) { case DATE: