This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit b51378390b1a113928127ed7f4aa53d91eb0bc1b Author: Yaguang Jia <jiayagu...@foxmail.com> AuthorDate: Tue Mar 14 14:44:31 2023 +0800 KYLIN-5561 Fix sumLC null val and optimize sumLC model build --- .../common/exception/code/ErrorCodeServer.java | 2 + .../resources/kylin_error_msg_conf_cn.properties | 2 + .../resources/kylin_error_msg_conf_en.properties | 2 + .../kylin_error_suggestion_conf_cn.properties | 4 +- .../kylin_error_suggestion_conf_en.properties | 4 +- .../main/resources/kylin_errorcode_conf.properties | 2 + .../apache/kylin/measure/sumlc/SumLCCounter.java | 18 ++ .../apache/kylin/metadata/model/FunctionDesc.java | 33 ++- .../kylin/measure/sumlc/SumLCCounterTest.java | 54 +++++ .../localmeta/data/SSB.SUMLC_CC_TEST.csv | 6 + .../localmeta/data/SSB.SUMLC_EXTEND_4X.csv | 12 ++ .../localmeta/data/SSB.SUM_LC_NULL_TBL.csv | 8 + .../localmeta/data/SSB.SUM_LC_TB.csv | 10 + ...n => 4120b88e-6a3b-aba2-f86e-c692f6588f22.json} | 8 +- .../4120b88e-6a3b-aba2-f86e-c692f6588f22.json | 63 ++++++ .../4120b88e-6a3b-aba2-f86e-c692f6588f22.json | 135 +++++++++++++ .../metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json | 63 ++++++ .../org/apache/kylin/newten/SumLCResultTest.java | 214 ++++++++++++++++++++ .../sum_lc/metadata/_global.project/sum_lc.json | 6 + .../4120b88e-6a3b-aba2-f86e-c692f6588f22.json} | 8 +- .../648098d6-3009-5b26-3e20-82e494cfdb0c.json | 17 ++ .../c2f81b79-2c10-dce2-4206-588cab0e68ec.json | 17 ++ .../f35f2937-9e4d-347a-7465-d64df939e7d6.json | 0 .../4120b88e-6a3b-aba2-f86e-c692f6588f22.json | 63 ++++++ .../648098d6-3009-5b26-3e20-82e494cfdb0c.json | 63 ++++++ .../c2f81b79-2c10-dce2-4206-588cab0e68ec.json} | 22 +- .../f35f2937-9e4d-347a-7465-d64df939e7d6.json | 0 .../4120b88e-6a3b-aba2-f86e-c692f6588f22.json | 135 +++++++++++++ .../648098d6-3009-5b26-3e20-82e494cfdb0c.json | 110 ++++++++++ .../c2f81b79-2c10-dce2-4206-588cab0e68ec.json | 221 +++++++++++++++++++++ .../f35f2937-9e4d-347a-7465-d64df939e7d6.json | 0 .../metadata/sum_lc/table/SSB.SUMLC_CC_TEST.json | 58 ++++++ .../metadata/sum_lc/table/SSB.SUMLC_EXTEND_4X.json | 0 .../metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json | 63 ++++++ .../metadata/sum_lc/table/SSB.SUM_LC_TB.json | 53 +++++ .../apache/kylin/rest/service/ModelService.java | 4 + .../kylin/rest/service/ModelServiceTest.java | 16 ++ .../kylin/query/util/DefaultQueryTransformer.java | 2 +- .../kylin/query/engine/QueryRoutingEngine.java | 28 +-- .../kylin/query/engine/QueryRoutingEngineTest.java | 18 +- .../sql/catalyst/expressions/ExpressionUtils.scala | 6 +- .../sql/catalyst/expressions/KapExpresssions.scala | 2 + .../scala/org/apache/spark/sql/udaf/SumLC.scala | 100 ++++++---- 43 files changed, 1565 insertions(+), 87 deletions(-) diff --git a/src/core-common/src/main/java/org/apache/kylin/common/exception/code/ErrorCodeServer.java b/src/core-common/src/main/java/org/apache/kylin/common/exception/code/ErrorCodeServer.java index 5fab5c855c..c2d6c51c0c 100644 --- a/src/core-common/src/main/java/org/apache/kylin/common/exception/code/ErrorCodeServer.java +++ b/src/core-common/src/main/java/org/apache/kylin/common/exception/code/ErrorCodeServer.java @@ -34,6 +34,8 @@ public enum ErrorCodeServer implements ErrorCodeProducer { MODEL_NOT_EXIST_SEGMENTS("KE-010002208"), MODEL_TDS_EXPORT_DIM_COL_AND_MEASURE_NAME_CONFLICT("KE-010002301"), MODEL_TDS_EXPORT_COLUMN_AND_MEASURE_NAME_CONFLICT("KE-010002302"), + MODEL_SUM_LC_INVALID_DATA_TYPE("KE-010002303"), + MODEL_SUM_LC_INVALID_TIMESTAMP_TYPE("KE-010002304"), // 100252XX Cube CUBE_NOT_EXIST("KE-010025201"), diff --git a/src/core-common/src/main/resources/kylin_error_msg_conf_cn.properties b/src/core-common/src/main/resources/kylin_error_msg_conf_cn.properties index 0314410ab1..3b4eb00a04 100644 --- a/src/core-common/src/main/resources/kylin_error_msg_conf_cn.properties +++ b/src/core-common/src/main/resources/kylin_error_msg_conf_cn.properties @@ -34,6 +34,8 @@ KE-010002207=修改模型时,simplified_measures 参数中的每个度量必 KE-010002208=模型上线必须存在 Segment。请重新输入。 KE-010002301=维度的列名 %s 与度量名 %s 重复,无法导出 TDS。请去除重名后再重试。 KE-010002302=模型中的列名 %s 与度量名 %s 重复,无法导出 TDS。请去除重名后再重试。 +KE-010002303=SUM_LC度量的返回类型 '%s' 不合法。返回类型必须是这其中的一个:%s。 +KE-010002304=SUM_LC度量的时间类型 '%s' 不合法。 ## 100252XX Cube KE-010025201=无法找到相关 Cube。 diff --git a/src/core-common/src/main/resources/kylin_error_msg_conf_en.properties b/src/core-common/src/main/resources/kylin_error_msg_conf_en.properties index df7ed31b2a..e5dd9f4fb6 100644 --- a/src/core-common/src/main/resources/kylin_error_msg_conf_en.properties +++ b/src/core-common/src/main/resources/kylin_error_msg_conf_en.properties @@ -34,6 +34,8 @@ KE-010002207=When modifying model, each measure id is required in simplified_mea KE-010002208=The online model must have a segment. Please re-enter. KE-010002301=There are duplicated names among dimension column %s and measure name %s. Cannot export a valid TDS file. Please correct the duplicated names and try again. KE-010002302=There are duplicated names among model column %s and measure name %s. Cannot export a valid TDS file. Please correct the duplicated names and try again. +KE-010002303=SUM_LC Measure's return type '%s' is illegal. It must be one of %s. +KE-010002304=SUM_LC Measure's time column type '%s' is illegal. ## 100252XX Cube KE-010025201=Can't find the cube. diff --git a/src/core-common/src/main/resources/kylin_error_suggestion_conf_cn.properties b/src/core-common/src/main/resources/kylin_error_suggestion_conf_cn.properties index 059fa231e7..b9b1865c7b 100644 --- a/src/core-common/src/main/resources/kylin_error_suggestion_conf_cn.properties +++ b/src/core-common/src/main/resources/kylin_error_suggestion_conf_cn.properties @@ -32,6 +32,8 @@ KE-010002205= KE-010002206= KE-010002207= KE-010002208= +KE-010002303= +KE-010002304= ## 100252XX Cube KE-010025201= @@ -167,4 +169,4 @@ KE-050041202= # Common ## KE-060100201 -KE-060100201=请检查其他系统、组件等外部环境是否正确。 \ No newline at end of file +KE-060100201=请检查其他系统、组件等外部环境是否正确。 diff --git a/src/core-common/src/main/resources/kylin_error_suggestion_conf_en.properties b/src/core-common/src/main/resources/kylin_error_suggestion_conf_en.properties index 05ce078ac1..53dc857806 100644 --- a/src/core-common/src/main/resources/kylin_error_suggestion_conf_en.properties +++ b/src/core-common/src/main/resources/kylin_error_suggestion_conf_en.properties @@ -32,6 +32,8 @@ KE-010002205= KE-010002206= KE-010002207= KE-010002208= +KE-010002303= +KE-010002304= ## 100252XX Cube KE-010025201= @@ -169,4 +171,4 @@ KE-050041202= # Common ## KE-060100201 -KE-060100201=Please check whether the external environment(other systems, components, etc.) is normal. \ No newline at end of file +KE-060100201=Please check whether the external environment(other systems, components, etc.) is normal. diff --git a/src/core-common/src/main/resources/kylin_errorcode_conf.properties b/src/core-common/src/main/resources/kylin_errorcode_conf.properties index 170a66723f..3b891fbf05 100644 --- a/src/core-common/src/main/resources/kylin_errorcode_conf.properties +++ b/src/core-common/src/main/resources/kylin_errorcode_conf.properties @@ -35,6 +35,8 @@ KE-010002207 KE-010002208 KE-010002301 KE-010002302 +KE-010002303 +KE-010002304 ## 100252XX Cube KE-010025201 diff --git a/src/core-metadata/src/main/java/org/apache/kylin/measure/sumlc/SumLCCounter.java b/src/core-metadata/src/main/java/org/apache/kylin/measure/sumlc/SumLCCounter.java index dfca82230b..06ed1074bd 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/measure/sumlc/SumLCCounter.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/measure/sumlc/SumLCCounter.java @@ -85,6 +85,24 @@ public class SumLCCounter implements Serializable { } } + public void update(Number sumLC, Long timestamp) { + if (timestamp == null) { + return; + } + Number typeConvertInput = numericTypeConversion(sumLC); + if (this.timestamp == null || this.timestamp < timestamp) { + this.sumLC = typeConvertInput; + this.timestamp = timestamp; + } else if (this.timestamp.equals(timestamp)) { + if (this.sumLC == null) { + this.sumLC = typeConvertInput; + } else if (typeConvertInput != null) { + String sumLCTypeName = this.sumLC.getClass().getSimpleName(); + this.sumLC = MERGE_FUNC_MAP.get(sumLCTypeName).apply(this.sumLC, typeConvertInput); + } + } + } + public Number getSumLC() { return sumLC; } diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index 28e3603d28..13ec783634 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -19,6 +19,8 @@ package org.apache.kylin.metadata.model; import static org.apache.kylin.common.exception.ServerErrorCode.INVALID_MEASURE_DATA_TYPE; +import static org.apache.kylin.common.exception.code.ErrorCodeServer.MODEL_SUM_LC_INVALID_DATA_TYPE; +import static org.apache.kylin.common.exception.code.ErrorCodeServer.MODEL_SUM_LC_INVALID_TIMESTAMP_TYPE; import static org.apache.kylin.metadata.datatype.DataType.ANY; import static org.apache.kylin.metadata.datatype.DataType.BIGINT; import static org.apache.kylin.metadata.datatype.DataType.DECIMAL; @@ -102,16 +104,15 @@ public class FunctionDesc implements Serializable { } break; } - case FunctionDesc.FUNC_SUM_LC: { - Preconditions.checkArgument(StringUtils.isNotEmpty(colDataType), - "SUM_LC Measure's input type shouldn't be null or empty"); - checkSumLCDataType(colDataType); - break; - } default: break; } } + if (FunctionDesc.FUNC_SUM_LC.equals(expression)) { + Preconditions.checkArgument(StringUtils.isNotEmpty(colDataType), + "SUM_LC Measure's input type shouldn't be null or empty"); + checkSumLCDataType(colDataType); + } String returnType = override.getOrDefault(expression, EXPRESSION_DEFAULT_TYPE_MAP.getOrDefault(expression, colDataType)); @@ -137,9 +138,14 @@ public class FunctionDesc implements Serializable { private static void checkSumLCDataType(String dataTypeName) { DataType dataType = DataType.getType(dataTypeName); if (!dataType.isNumberFamily()) { - throw new KylinException(INVALID_MEASURE_DATA_TYPE, - String.format(Locale.ROOT, "SUM_LC Measure's return type '%s' is illegal. It must be one of %s", - dataType, DataType.NUMBER_FAMILY)); + throw new KylinException(MODEL_SUM_LC_INVALID_DATA_TYPE, dataType, DataType.NUMBER_FAMILY); + } + } + + private static void checkSumLCTimeColDataType(String dataTypeName) { + DataType dataType = DataType.getType(dataTypeName); + if (dataType.isTinyInt() || dataType.isFloat() || dataType.isDouble() || dataType.isDecimal() || dataType.isBoolean()) { + throw new KylinException(MODEL_SUM_LC_INVALID_TIMESTAMP_TYPE, dataType); } } @@ -210,17 +216,22 @@ public class FunctionDesc implements Serializable { expression = PercentileMeasureType.FUNC_PERCENTILE_APPROX; // for backward compatibility } - for (ParameterDesc p : getParameters()) { + List<ParameterDesc> paramList = getParameters(); + for (int i = 0; i < paramList.size(); i++) { + ParameterDesc p = paramList.get(i); if (p.isColumnType()) { TblColRef colRef = model.findColumn(p.getValue()); p.setValue(colRef.getIdentity()); p.setColRef(colRef); if (expression.equals(FUNC_SUM_LC)) { - if (Objects.isNull(returnDataType)) { + if (i == 0) { // use the first column to init returnType and returnDataType, ignore the second timestamp column returnType = proposeReturnType(expression, colRef.getDatatype(), Maps.newHashMap(), model.isSaveCheck()); returnDataType = DataType.getType(returnType); + } else { + // check sum_lc time column type + checkSumLCTimeColDataType(colRef.getDatatype()); } } else { returnDataType = DataType.getType(proposeReturnType(expression, colRef.getDatatype(), diff --git a/src/core-metadata/src/test/java/org/apache/kylin/measure/sumlc/SumLCCounterTest.java b/src/core-metadata/src/test/java/org/apache/kylin/measure/sumlc/SumLCCounterTest.java new file mode 100644 index 0000000000..800af9757e --- /dev/null +++ b/src/core-metadata/src/test/java/org/apache/kylin/measure/sumlc/SumLCCounterTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kylin.measure.sumlc; + +import org.junit.Assert; +import org.junit.Test; + +public class SumLCCounterTest { + private static final Long PAST_VAL = 1L; + private static final Long PAST_TS = 915120000000L; + private static final Long LATER_VAL = 2L; + private static final Long LATER_TS = 1640966400000L; + + @Test + public void testSumLCUpdate() { + SumLCCounter target = new SumLCCounter(); + + target.update(PAST_VAL, PAST_TS); + Assert.assertEquals(PAST_VAL, target.getSumLC()); + Assert.assertEquals(PAST_TS, target.getTimestamp()); + + target.update(null, LATER_TS); + Assert.assertEquals(null, target.getSumLC()); + Assert.assertEquals(LATER_TS, target.getTimestamp()); + + target.update(null, null); + Assert.assertEquals(null, target.getSumLC()); + Assert.assertEquals(LATER_TS, target.getTimestamp()); + + target.update(LATER_VAL, LATER_TS); + Assert.assertEquals(LATER_VAL, target.getSumLC()); + Assert.assertEquals(LATER_TS, target.getTimestamp()); + + target.update(PAST_VAL + LATER_VAL, LATER_TS); + Assert.assertEquals(PAST_VAL + LATER_VAL + LATER_VAL, target.getSumLC()); + Assert.assertEquals(LATER_TS, target.getTimestamp()); + } + +} diff --git a/src/examples/test_case_data/localmeta/data/SSB.SUMLC_CC_TEST.csv b/src/examples/test_case_data/localmeta/data/SSB.SUMLC_CC_TEST.csv new file mode 100644 index 0000000000..691965e41c --- /dev/null +++ b/src/examples/test_case_data/localmeta/data/SSB.SUMLC_CC_TEST.csv @@ -0,0 +1,6 @@ +20220901,2022,9,1,A,1 +20220902,2022,9,2,A,2 +20220903,2022,9,3,A,3 +20220901,2022,9,1,B,4 +20220902,2022,9,2,B,5 +20220903,2022,9,3,B,6 \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/data/SSB.SUMLC_EXTEND_4X.csv b/src/examples/test_case_data/localmeta/data/SSB.SUMLC_EXTEND_4X.csv new file mode 100644 index 0000000000..8c6fd3ed83 --- /dev/null +++ b/src/examples/test_case_data/localmeta/data/SSB.SUMLC_EXTEND_4X.csv @@ -0,0 +1,12 @@ +2022-09-01,1661961600000,B,121,32758,2147483640,2147483640,3.1415926,3.1415926,41.5343424 +2022-09-02,1662048000000,B,122,32759,2147483641,2147483641,3.1415926,3.1415926,42.5343424 +2022-09-03,1662134400000,B,123,32760,2147483642,2147483642,3.1415926,3.1415926,43.5343424 +2022-09-01,1661961600000,A,124,32761,2147483643,2147483643,3.1415926,3.1415926,44.5343424 +2022-09-02,1662048000000,A,125,32762,2147483644,2147483644,3.1415926,3.1415926,45.5343424 +2022-09-03,1662134400000,A,126,32763,2147483645,2147483645,3.1415926,3.1415926,46.5343424 +2022-10-03,1664726400000,B,121,32758,2147483640,2147483640,3.1415926,3.1415926,41.5343424 +2022-10-04,1664812800000,B,122,32759,2147483641,2147483641,3.1415926,3.1415926,42.5343424 +2022-10-05,1664899200000,B,123,32760,2147483642,2147483642,3.1415926,3.1415926,43.5343424 +2022-10-03,1664726400000,A,124,32761,2147483643,2147483643,3.1415926,3.1415926,44.5343424 +2022-10-04,1664812800000,A,125,32762,2147483644,2147483644,3.1415926,3.1415926,45.5343424 +2022-10-05,1664899200000,A,126,32763,2147483645,2147483645,3.1415926,3.1415926,46.5343424 \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/data/SSB.SUM_LC_NULL_TBL.csv b/src/examples/test_case_data/localmeta/data/SSB.SUM_LC_NULL_TBL.csv new file mode 100644 index 0000000000..904b6f6dfb --- /dev/null +++ b/src/examples/test_case_data/localmeta/data/SSB.SUM_LC_NULL_TBL.csv @@ -0,0 +1,8 @@ +A,CMB,2022-11-07,100.0,2022/11/07,100.000001,10.000001 +B,CMB,2022-11-07,110.0,2022/11/07,NULL,NULL +C,CMB,2022-11-07,120.0,2022/11/07,NULL,NULL +D,CMB,2022-11-07,130.0,NULL,NULL,13.000001 +A,BCM,2022-11-08,200.0,2022/11/08,200.000001,20.000001 +B,BCM,2022-11-08,210.0,2022/11/08,210.000001,21.000001 +C,BCM,2022-11-08,220.0,2022/11/08,NULL,NULL +D,BCM,2022-11-08,230.0,2022/11/08,230.000001,23.000001 diff --git a/src/examples/test_case_data/localmeta/data/SSB.SUM_LC_TB.csv b/src/examples/test_case_data/localmeta/data/SSB.SUM_LC_TB.csv new file mode 100644 index 0000000000..78b697a9f8 --- /dev/null +++ b/src/examples/test_case_data/localmeta/data/SSB.SUM_LC_TB.csv @@ -0,0 +1,10 @@ +A,CMB,2022-11-07,100,2022-11-07 00:00:00 +B,CMB,2022-11-07,110,2022-11-07 00:00:00 +A,BCM,2022-11-08,200,2022-11-08 00:00:00 +B,BCM,2022-11-08,210,2022-11-08 00:00:00 +A,ICBC,2022-11-09,300,2022-11-09 00:00:00 +B,ICBC,2022-11-09,310,2022-11-09 00:00:00 +A,ABC,2022-11-10,400,2022-11-10 00:00:00 +B,ABC,2022-11-10,410,2022-11-10 00:00:00 +A,BOC,2022-11-11,500,2022-11-11 00:00:00 +B,BOC,2022-11-11,510,2022-11-11 00:00:00 \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json b/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/4120b88e-6a3b-aba2-f86e-c692f6588f22.json similarity index 61% copy from src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json copy to src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/4120b88e-6a3b-aba2-f86e-c692f6588f22.json index f5f39f4889..647faf41f8 100644 --- a/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json +++ b/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/4120b88e-6a3b-aba2-f86e-c692f6588f22.json @@ -1,7 +1,7 @@ { - "uuid" : "f35f2937-9e4d-347a-7465-d64df939e7d6", - "last_modified" : 1667467391143, - "create_time" : 1667465241650, + "uuid" : "4120b88e-6a3b-aba2-f86e-c692f6588f22", + "last_modified" : 1667836716050, + "create_time" : 1667828178652, "version" : "4.0.0.0", "status" : "ONLINE", "last_status" : null, @@ -10,4 +10,4 @@ "last_query_time" : 0, "layout_query_hit_count" : { }, "segments" : [ ] -} \ No newline at end of file +} diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/4120b88e-6a3b-aba2-f86e-c692f6588f22.json b/src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/4120b88e-6a3b-aba2-f86e-c692f6588f22.json new file mode 100644 index 0000000000..6b187c2f82 --- /dev/null +++ b/src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/4120b88e-6a3b-aba2-f86e-c692f6588f22.json @@ -0,0 +1,63 @@ +{ + "uuid" : "4120b88e-6a3b-aba2-f86e-c692f6588f22", + "last_modified" : 1668138510879, + "create_time" : 1668138510851, + "version" : "4.0.0.0", + "description" : null, + "rule_based_index" : null, + "indexes" : [ { + "id" : 0, + "dimensions" : [ 0, 2, 6 ], + "measures" : [ 100000, 100001, 100002 ], + "layouts" : [ { + "id" : 1, + "name" : null, + "owner" : null, + "col_order" : [ 0, 2, 6, 100000, 100001, 100002 ], + "shard_by_columns" : [ ], + "partition_by_columns" : [ ], + "sort_by_columns" : [ ], + "storage_type" : 20, + "update_time" : 1668138510858, + "manual" : false, + "auto" : false, + "base" : true, + "draft_version" : null, + "index_range" : null + } ], + "next_layout_offset" : 2 + }, { + "id" : 20000000000, + "dimensions" : [ 0, 1, 2, 4, 5, 6 ], + "measures" : [ ], + "layouts" : [ { + "id" : 20000000001, + "name" : null, + "owner" : null, + "col_order" : [ 0, 1, 2, 4, 5, 6 ], + "shard_by_columns" : [ ], + "partition_by_columns" : [ ], + "sort_by_columns" : [ ], + "storage_type" : 20, + "update_time" : 1668138510859, + "manual" : false, + "auto" : false, + "base" : true, + "draft_version" : null, + "index_range" : null + } ], + "next_layout_offset" : 2 + } ], + "override_properties" : { }, + "to_be_deleted_indexes" : [ ], + "auto_merge_time_ranges" : null, + "retention_range" : 0, + "engine_type" : 80, + "next_aggregation_index_id" : 10000, + "next_table_index_id" : 20000010000, + "agg_shard_by_columns" : [ ], + "extend_partition_columns" : [ ], + "layout_bucket_num" : { }, + "approved_additional_recs" : 0, + "approved_removal_recs" : 0 +} diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/model_desc/4120b88e-6a3b-aba2-f86e-c692f6588f22.json b/src/examples/test_case_data/localmeta/metadata/sum_lc/model_desc/4120b88e-6a3b-aba2-f86e-c692f6588f22.json new file mode 100644 index 0000000000..943dbb203e --- /dev/null +++ b/src/examples/test_case_data/localmeta/metadata/sum_lc/model_desc/4120b88e-6a3b-aba2-f86e-c692f6588f22.json @@ -0,0 +1,135 @@ +{ + "uuid" : "4120b88e-6a3b-aba2-f86e-c692f6588f22", + "last_modified" : 1668138510884, + "create_time" : 1668138510184, + "version" : "4.0.0.0", + "alias" : "sum_lc_null_val_test", + "owner" : "ADMIN", + "config_last_modifier" : null, + "config_last_modified" : 0, + "description" : null, + "fact_table" : "SSB.SUM_LC_NULL_TBL", + "fact_table_alias" : null, + "management_type" : "MODEL_BASED", + "join_tables" : [ ], + "filter_condition" : "", + "partition_desc" : { + "partition_date_column" : "SUM_LC_NULL_TBL.PART_COL", + "partition_date_start" : 0, + "partition_date_format" : "yyyy-MM-dd", + "partition_type" : "APPEND", + "partition_condition_builder" : "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder" + }, + "capacity" : "MEDIUM", + "segment_config" : { + "auto_merge_enabled" : null, + "auto_merge_time_ranges" : null, + "volatile_range" : null, + "retention_range" : null, + "create_empty_segment_enabled" : false + }, + "data_check_desc" : null, + "semantic_version" : 0, + "storage_type" : 0, + "model_type" : "BATCH", + "all_named_columns" : [ { + "id" : 0, + "name" : "PART_COL", + "column" : "SUM_LC_NULL_TBL.PART_COL", + "status" : "DIMENSION" + }, { + "id" : 1, + "name" : "SUM_DATE1", + "column" : "SUM_LC_NULL_TBL.SUM_DATE1" + }, { + "id" : 2, + "name" : "ACCOUNT1", + "column" : "SUM_LC_NULL_TBL.ACCOUNT1", + "status" : "DIMENSION" + }, { + "id" : 3, + "name" : "BALANCE1", + "column" : "SUM_LC_NULL_TBL.BALANCE1" + }, { + "id" : 4, + "name" : "DATA_NULL", + "column" : "SUM_LC_NULL_TBL.DATA_NULL" + }, { + "id" : 5, + "name" : "DATA_DECIMAL", + "column" : "SUM_LC_NULL_TBL.DATA_DECIMAL" + }, { + "id" : 6, + "name" : "ACCOUNT2", + "column" : "SUM_LC_NULL_TBL.ACCOUNT2", + "status" : "DIMENSION" + } ], + "all_measures" : [ { + "name" : "COUNT_ALL", + "function" : { + "expression" : "COUNT", + "parameters" : [ { + "type" : "constant", + "value" : "1" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : null, + "id" : 100000, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sumlc_double_null", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.DATA_NULL" + }, { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.SUM_DATE1" + } ], + "returntype" : "double" + }, + "column" : null, + "comment" : "", + "id" : 100001, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sumlc_decimal_null", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.DATA_DECIMAL" + }, { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.SUM_DATE1" + } ], + "returntype" : "decimal(20,6)" + }, + "column" : null, + "comment" : "", + "id" : 100002, + "type" : "NORMAL", + "internal_ids" : [ ] + } ], + "recommendations_count" : 0, + "computed_columns" : [ ], + "canvas" : { + "coordinate" : { + "SUM_LC_NULL_TBL" : { + "x" : 462.44444105360253, + "y" : 108.66667005750864, + "width" : 200.0, + "height" : 230.0 + } + }, + "zoom" : 9.0 + }, + "multi_partition_desc" : null, + "multi_partition_key_mapping" : null, + "fusion_id" : null +} diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json b/src/examples/test_case_data/localmeta/metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json new file mode 100644 index 0000000000..d843b9239b --- /dev/null +++ b/src/examples/test_case_data/localmeta/metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json @@ -0,0 +1,63 @@ +{ + "uuid" : "6d164a7b-de25-e326-32f3-92103e53cf7e", + "last_modified" : 0, + "create_time" : 1668136653291, + "version" : "4.0.0.0", + "name" : "SUM_LC_NULL_TBL", + "columns" : [ { + "id" : "1", + "name" : "ACCOUNT1", + "datatype" : "char(1)", + "case_sensitive_name" : "account1" + }, { + "id" : "2", + "name" : "ACCOUNT2", + "datatype" : "varchar(52)", + "case_sensitive_name" : "account2" + }, { + "id" : "3", + "name" : "PART_COL", + "datatype" : "date", + "case_sensitive_name" : "part_col" + }, { + "id" : "4", + "name" : "BALANCE1", + "datatype" : "double", + "case_sensitive_name" : "balance1" + }, { + "id" : "5", + "name" : "SUM_DATE1", + "datatype" : "varchar(1024)", + "case_sensitive_name" : "sum_date1" + }, { + "id" : "6", + "name" : "DATA_NULL", + "datatype" : "double", + "case_sensitive_name" : "data_null" + }, { + "id" : "7", + "name" : "DATA_DECIMAL", + "datatype" : "decimal(10,6)", + "case_sensitive_name" : "data_decimal" + } ], + "source_type" : 9, + "table_type" : "MANAGED", + "top" : false, + "increment_loading" : false, + "last_snapshot_path" : null, + "last_snapshot_size" : 0, + "snapshot_last_modified" : 0, + "query_hit_count" : 0, + "partition_column" : null, + "snapshot_partitions" : { }, + "snapshot_partitions_info" : { }, + "snapshot_total_rows" : 0, + "snapshot_partition_col" : null, + "selected_snapshot_partition_col" : null, + "temp_snapshot_path" : null, + "snapshot_has_broken" : false, + "database" : "SSB", + "transactional" : false, + "rangePartition" : false, + "partition_desc" : null +} diff --git a/src/kylin-it/src/test/java/org/apache/kylin/newten/SumLCResultTest.java b/src/kylin-it/src/test/java/org/apache/kylin/newten/SumLCResultTest.java new file mode 100644 index 0000000000..2bf0c69b13 --- /dev/null +++ b/src/kylin-it/src/test/java/org/apache/kylin/newten/SumLCResultTest.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.newten; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.engine.spark.NLocalWithSparkSessionTest; +import org.apache.kylin.job.engine.JobEngineConfig; +import org.apache.kylin.job.impl.threadpool.NDefaultScheduler; +import org.apache.kylin.metadata.cube.model.LayoutEntity; +import org.apache.kylin.metadata.cube.model.NDataflow; +import org.apache.kylin.metadata.cube.model.NDataflowManager; +import org.apache.kylin.metadata.model.SegmentRange; +import org.apache.kylin.util.ExecAndComp; +import org.apache.spark.sql.SparderEnv; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.collect.Sets; + +public class SumLCResultTest extends NLocalWithSparkSessionTest { + + private NDataflowManager dfMgr = null; + + @Before + public void setup() throws Exception { + overwriteSystemProp("kylin.job.scheduler.poll-interval-second", "1"); + this.createTestMetadata("src/test/resources/ut_meta/sum_lc"); + dfMgr = NDataflowManager.getInstance(getTestConfig(), getProject()); + NDefaultScheduler scheduler = NDefaultScheduler.getInstance(getProject()); + scheduler.init(new JobEngineConfig(KylinConfig.getInstanceFromEnv())); + if (!scheduler.hasStarted()) { + throw new RuntimeException("scheduler has not been started"); + } + } + + @After + public void after() throws Exception { + NDefaultScheduler.destroyInstance(); + cleanupTestMetadata(); + } + + @Override + public String getProject() { + return "sum_lc"; + } + + @Test + public void testSumLCWithDifferentDataType() throws Exception { + String dfID = "f35f2937-9e4d-347a-7465-d64df939e7d6"; + NDataflow dataflow = dfMgr.getDataflow(dfID); + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(30001L); + Assert.assertNotNull(layout); + + indexDataConstructor.buildIndex(dfID, + new SegmentRange.TimePartitionedSegmentRange(1661961600000L, 1664553600000L), Sets.newHashSet(layout), + true); + List<Pair<String, String>> query = new ArrayList<>(); + String sql1 = "select TX_DATE, sum_lc(TINYINT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_tinyint_data_query", sql1)); + + String sql2 = "select TX_DATE, sum_lc(SMALLINT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_smallint_data_query", sql2)); + + String sql3 = "select TX_DATE, sum_lc(INT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_int_data_query", sql3)); + + String sql4 = "select TX_DATE, sum_lc(BIGINT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_bigint_data_query", sql4)); + + String sql5 = "select TX_DATE, sum_lc(FLOAT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_float_data_query", sql5)); + + String sql6 = "select TX_DATE, sum_lc(DOUBLE_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_double_data_query", sql6)); + + String sql7 = "select TX_DATE, sum_lc(DECIMAL_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by TX_DATE"; + query.add(Pair.newPair("sum_lc_decimal_data_query", sql7)); + + ExecAndComp.execAndCompare(query, getProject(), ExecAndComp.CompareLevel.NONE, "left"); + } + + @Test + public void testPostAggregate() throws Exception { + String dfID = "f35f2937-9e4d-347a-7465-d64df939e7d6"; + NDataflow dataflow = dfMgr.getDataflow(dfID); + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(30001L); + Assert.assertNotNull(layout); + + indexDataConstructor.buildIndex(dfID, + new SegmentRange.TimePartitionedSegmentRange(1661961600000L, 1664553600000L), Sets.newHashSet(layout), + true); + + populateSSWithCSVData(getTestConfig(), getProject(), SparderEnv.getSparkSession()); + List<Pair<String, String>> query = new ArrayList<>(); + + String sql = "select sum_lc(TINYINT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X"; + query.add(Pair.newPair("sum_lc_post_aggregate_query", sql)); + + ExecAndComp.execAndCompare(query, getProject(), ExecAndComp.CompareLevel.NONE, "left"); + } + + @Test + public void testEmptySegment() throws Exception { + String dfID = "f35f2937-9e4d-347a-7465-d64df939e7d6"; + NDataflow dataflow = dfMgr.getDataflow(dfID); + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(50001L); + Assert.assertNotNull(layout); + + indexDataConstructor.buildIndex(dfID, + new SegmentRange.TimePartitionedSegmentRange(1667232000000L, 1669824000000L), Sets.newHashSet(layout), + true); + populateSSWithCSVData(getTestConfig(), getProject(), SparderEnv.getSparkSession()); + List<Pair<String, String>> query = new ArrayList<>(); + + String sql1 = "select account, sum_lc(TINYINT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X group by account"; + query.add(Pair.newPair("sum_lc_empty_seg_exactly_match_query", sql1)); + + String sql2 = "select sum_lc(TINYINT_DATA, TX_DATE) from SSB.SUMLC_EXTEND_4X"; + query.add(Pair.newPair("sum_lc_empty_seg_post_agg_query", sql2)); + + ExecAndComp.execAndCompare(query, getProject(), ExecAndComp.CompareLevel.NONE, "left"); + } + + @Test + public void testCCQuery() throws Exception { + String dfID = "c2f81b79-2c10-dce2-4206-588cab0e68ec"; + NDataflow dataflow = dfMgr.getDataflow(dfID); + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(190001L); + Assert.assertNotNull(layout); + + indexDataConstructor.buildIndex(dfID, SegmentRange.TimePartitionedSegmentRange.createInfinite(), + Sets.newHashSet(layout), true); + + populateSSWithCSVData(getTestConfig(), getProject(), SparderEnv.getSparkSession()); + List<Pair<String, String>> query = new ArrayList<>(); + + String sql1 = "select account, sum_lc(INT_DATA, TO_TIME_COMPOSE_CC) from SSB.SUMLC_CC_TEST group by account"; + query.add(Pair.newPair("sum_lc_cc_time", sql1)); + + String sql2 = "select account, sum_lc(INT_DATA * 2, TO_TIME_COMPOSE_CC) from SSB.SUMLC_CC_TEST group by account"; + query.add(Pair.newPair("sum_lc_cc_int", sql2)); + + String sql3 = "select account, sum_lc(INT_DATA_CC, TO_TIME_COMPOSE_CC) from SSB.SUMLC_CC_TEST group by account"; + query.add(Pair.newPair("sum_lc_cc_int_cc_time", sql3)); + + ExecAndComp.execAndCompare(query, getProject(), ExecAndComp.CompareLevel.NONE, "left"); + } + + @Test + public void testTimestampTimeCol() throws Exception { + String dfID = "648098d6-3009-5b26-3e20-82e494cfdb0c"; + NDataflow dataflow = dfMgr.getDataflow(dfID); + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(1L); + Assert.assertNotNull(layout); + + indexDataConstructor.buildIndex(dfID, SegmentRange.TimePartitionedSegmentRange.createInfinite(), + Sets.newHashSet(layout), true); + + populateSSWithCSVData(getTestConfig(), getProject(), SparderEnv.getSparkSession()); + List<Pair<String, String>> query = new ArrayList<>(); + + String sql = "select sum_lc(BALANCE, SUM_DATE) from SSB.SUM_LC_TB"; + query.add(Pair.newPair("query", sql)); + ExecAndComp.execAndCompare(query, getProject(), ExecAndComp.CompareLevel.NONE, "left"); + } + + @Test + public void testNullVal() throws Exception { + String dfID = "4120b88e-6a3b-aba2-f86e-c692f6588f22"; + NDataflow dataflow = dfMgr.getDataflow(dfID); + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(1L); + Assert.assertNotNull(layout); + + indexDataConstructor.buildIndex(dfID, + new SegmentRange.TimePartitionedSegmentRange(1667750400000L, 1667836800000L), Sets.newHashSet(layout), + true); + populateSSWithCSVData(getTestConfig(), getProject(), SparderEnv.getSparkSession()); + List<Pair<String, String>> query = new ArrayList<>(); + + String sql1 = "select account1, account2, part_col, sum_lc(data_null, sum_date1) from ssb.sum_lc_null_tbl group by account1, account2, part_col"; + query.add(Pair.newPair("exact_match_null_query", sql1)); + + String sql2 = "select account1, sum_lc(data_null, sum_date1) from ssb.sum_lc_null_tbl group by account1"; + query.add(Pair.newPair("double_null_query", sql2)); + + String sql3 = "select account1, sum_lc(data_decimal, sum_date1) from ssb.sum_lc_null_tbl group by account1"; + query.add(Pair.newPair("decimal_null_query", sql3)); + + ExecAndComp.execAndCompare(query, getProject(), ExecAndComp.CompareLevel.NONE, "left"); + } + +} diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/_global.project/sum_lc.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/_global.project/sum_lc.json new file mode 100644 index 0000000000..7b831ae057 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/_global.project/sum_lc.json @@ -0,0 +1,6 @@ +{ + "uuid": "d1ceb3a4-4d2c-27af-5b7d-92c8a4055776", + "override_kylin_properties": { + "kylin.query.slowquery-detect-interval": "4" + } +} diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/4120b88e-6a3b-aba2-f86e-c692f6588f22.json similarity index 61% copy from src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json copy to src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/4120b88e-6a3b-aba2-f86e-c692f6588f22.json index f5f39f4889..647faf41f8 100644 --- a/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/4120b88e-6a3b-aba2-f86e-c692f6588f22.json @@ -1,7 +1,7 @@ { - "uuid" : "f35f2937-9e4d-347a-7465-d64df939e7d6", - "last_modified" : 1667467391143, - "create_time" : 1667465241650, + "uuid" : "4120b88e-6a3b-aba2-f86e-c692f6588f22", + "last_modified" : 1667836716050, + "create_time" : 1667828178652, "version" : "4.0.0.0", "status" : "ONLINE", "last_status" : null, @@ -10,4 +10,4 @@ "last_query_time" : 0, "layout_query_hit_count" : { }, "segments" : [ ] -} \ No newline at end of file +} diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/648098d6-3009-5b26-3e20-82e494cfdb0c.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/648098d6-3009-5b26-3e20-82e494cfdb0c.json new file mode 100644 index 0000000000..e8dce17945 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/648098d6-3009-5b26-3e20-82e494cfdb0c.json @@ -0,0 +1,17 @@ +{ + "uuid" : "648098d6-3009-5b26-3e20-82e494cfdb0c", + "last_modified" : 1667836716050, + "create_time" : 1667828178652, + "version" : "4.0.0.0", + "status" : "ONLINE", + "last_status" : null, + "cost" : 50, + "query_hit_count" : 6, + "last_query_time" : 1667835303109, + "layout_query_hit_count" : { + "1" : { + "1667750400000" : 6 + } + }, + "segments" : [ ] +} \ No newline at end of file diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/c2f81b79-2c10-dce2-4206-588cab0e68ec.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/c2f81b79-2c10-dce2-4206-588cab0e68ec.json new file mode 100644 index 0000000000..2966d75878 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/c2f81b79-2c10-dce2-4206-588cab0e68ec.json @@ -0,0 +1,17 @@ +{ + "uuid" : "c2f81b79-2c10-dce2-4206-588cab0e68ec", + "last_modified" : 1667549287312, + "create_time" : 1667544082554, + "version" : "4.0.0.0", + "status" : "ONLINE", + "last_status" : null, + "cost" : 50, + "query_hit_count" : 3, + "last_query_time" : 1667549256856, + "layout_query_hit_count" : { + "190001" : { + "1667491200000" : 3 + } + }, + "segments" : [ ] +} \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json similarity index 100% rename from src/examples/test_case_data/localmeta/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json rename to src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/dataflow/f35f2937-9e4d-347a-7465-d64df939e7d6.json diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/4120b88e-6a3b-aba2-f86e-c692f6588f22.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/4120b88e-6a3b-aba2-f86e-c692f6588f22.json new file mode 100644 index 0000000000..6b187c2f82 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/4120b88e-6a3b-aba2-f86e-c692f6588f22.json @@ -0,0 +1,63 @@ +{ + "uuid" : "4120b88e-6a3b-aba2-f86e-c692f6588f22", + "last_modified" : 1668138510879, + "create_time" : 1668138510851, + "version" : "4.0.0.0", + "description" : null, + "rule_based_index" : null, + "indexes" : [ { + "id" : 0, + "dimensions" : [ 0, 2, 6 ], + "measures" : [ 100000, 100001, 100002 ], + "layouts" : [ { + "id" : 1, + "name" : null, + "owner" : null, + "col_order" : [ 0, 2, 6, 100000, 100001, 100002 ], + "shard_by_columns" : [ ], + "partition_by_columns" : [ ], + "sort_by_columns" : [ ], + "storage_type" : 20, + "update_time" : 1668138510858, + "manual" : false, + "auto" : false, + "base" : true, + "draft_version" : null, + "index_range" : null + } ], + "next_layout_offset" : 2 + }, { + "id" : 20000000000, + "dimensions" : [ 0, 1, 2, 4, 5, 6 ], + "measures" : [ ], + "layouts" : [ { + "id" : 20000000001, + "name" : null, + "owner" : null, + "col_order" : [ 0, 1, 2, 4, 5, 6 ], + "shard_by_columns" : [ ], + "partition_by_columns" : [ ], + "sort_by_columns" : [ ], + "storage_type" : 20, + "update_time" : 1668138510859, + "manual" : false, + "auto" : false, + "base" : true, + "draft_version" : null, + "index_range" : null + } ], + "next_layout_offset" : 2 + } ], + "override_properties" : { }, + "to_be_deleted_indexes" : [ ], + "auto_merge_time_ranges" : null, + "retention_range" : 0, + "engine_type" : 80, + "next_aggregation_index_id" : 10000, + "next_table_index_id" : 20000010000, + "agg_shard_by_columns" : [ ], + "extend_partition_columns" : [ ], + "layout_bucket_num" : { }, + "approved_additional_recs" : 0, + "approved_removal_recs" : 0 +} diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/648098d6-3009-5b26-3e20-82e494cfdb0c.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/648098d6-3009-5b26-3e20-82e494cfdb0c.json new file mode 100644 index 0000000000..b28d6b4543 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/648098d6-3009-5b26-3e20-82e494cfdb0c.json @@ -0,0 +1,63 @@ +{ + "uuid" : "648098d6-3009-5b26-3e20-82e494cfdb0c", + "last_modified" : 1667835213989, + "create_time" : 1667828178631, + "version" : "4.0.0.0", + "description" : null, + "rule_based_index" : null, + "indexes" : [ { + "id" : 0, + "dimensions" : [ 0, 1, 2, 4 ], + "measures" : [ 100000, 100001 ], + "layouts" : [ { + "id" : 1, + "name" : null, + "owner" : null, + "col_order" : [ 0, 1, 2, 4, 100000, 100001 ], + "shard_by_columns" : [ ], + "partition_by_columns" : [ ], + "sort_by_columns" : [ ], + "storage_type" : 20, + "update_time" : 1667828178631, + "manual" : false, + "auto" : false, + "base" : true, + "draft_version" : null, + "index_range" : null + } ], + "next_layout_offset" : 2 + }, { + "id" : 20000000000, + "dimensions" : [ 0, 1, 2, 3, 4 ], + "measures" : [ ], + "layouts" : [ { + "id" : 20000000001, + "name" : null, + "owner" : null, + "col_order" : [ 0, 1, 2, 3, 4 ], + "shard_by_columns" : [ ], + "partition_by_columns" : [ ], + "sort_by_columns" : [ ], + "storage_type" : 20, + "update_time" : 1667828178632, + "manual" : false, + "auto" : false, + "base" : true, + "draft_version" : null, + "index_range" : null + } ], + "next_layout_offset" : 2 + } ], + "override_properties" : { }, + "to_be_deleted_indexes" : [ ], + "auto_merge_time_ranges" : null, + "retention_range" : 0, + "engine_type" : 80, + "next_aggregation_index_id" : 10000, + "next_table_index_id" : 20000010000, + "agg_shard_by_columns" : [ ], + "extend_partition_columns" : [ ], + "layout_bucket_num" : { }, + "approved_additional_recs" : 0, + "approved_removal_recs" : 0 +} \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/c2f81b79-2c10-dce2-4206-588cab0e68ec.json similarity index 61% copy from src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json copy to src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/c2f81b79-2c10-dce2-4206-588cab0e68ec.json index 9b29b1fd06..af9ed2c9d5 100644 --- a/src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/c2f81b79-2c10-dce2-4206-588cab0e68ec.json @@ -1,16 +1,16 @@ { - "uuid" : "f35f2937-9e4d-347a-7465-d64df939e7d6", - "last_modified" : 1667467391117, - "create_time" : 1667293061109, + "uuid" : "c2f81b79-2c10-dce2-4206-588cab0e68ec", + "last_modified" : 1667548832263, + "create_time" : 1667544082554, "version" : "4.0.0.0", "description" : null, "rule_based_index" : { - "dimensions" : [ 1, 6 ], - "measures" : [ 100000, 100001, 100002, 100003, 100004, 100005, 100006, 100007, 100008 ], + "dimensions" : [ 0, 5, 7 ], + "measures" : [ 100000, 100002, 100004, 100005 ], "global_dim_cap" : null, "aggregation_groups" : [ { - "includes" : [ 1, 6 ], - "measures" : [ 100000, 100001, 100002, 100003, 100004, 100005, 100006, 100007, 100008 ], + "includes" : [ 0, 5, 7 ], + "measures" : [ 100000, 100002, 100004, 100005 ], "select_rule" : { "hierarchy_dims" : [ ], "mandatory_dims" : [ ], @@ -18,10 +18,10 @@ }, "index_range" : "EMPTY" } ], - "layout_id_mapping" : [ 30001, 40001, 50001 ], + "layout_id_mapping" : [ 150001, 160001, 170001, 180001, 190001, 200001, 210001 ], "parent_forward" : 3, - "index_start_id" : 30000, - "last_modify_time" : 1667370467484, + "index_start_id" : 150000, + "last_modify_time" : 1667548832258, "layout_black_list" : [ ], "scheduler_version" : 2, "index_update_enabled" : true, @@ -33,7 +33,7 @@ "auto_merge_time_ranges" : null, "retention_range" : 0, "engine_type" : 80, - "next_aggregation_index_id" : 60000, + "next_aggregation_index_id" : 220000, "next_table_index_id" : 20000000000, "agg_shard_by_columns" : [ ], "extend_partition_columns" : [ ], diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json similarity index 100% rename from src/examples/test_case_data/localmeta/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json rename to src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/index_plan/f35f2937-9e4d-347a-7465-d64df939e7d6.json diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/4120b88e-6a3b-aba2-f86e-c692f6588f22.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/4120b88e-6a3b-aba2-f86e-c692f6588f22.json new file mode 100644 index 0000000000..943dbb203e --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/4120b88e-6a3b-aba2-f86e-c692f6588f22.json @@ -0,0 +1,135 @@ +{ + "uuid" : "4120b88e-6a3b-aba2-f86e-c692f6588f22", + "last_modified" : 1668138510884, + "create_time" : 1668138510184, + "version" : "4.0.0.0", + "alias" : "sum_lc_null_val_test", + "owner" : "ADMIN", + "config_last_modifier" : null, + "config_last_modified" : 0, + "description" : null, + "fact_table" : "SSB.SUM_LC_NULL_TBL", + "fact_table_alias" : null, + "management_type" : "MODEL_BASED", + "join_tables" : [ ], + "filter_condition" : "", + "partition_desc" : { + "partition_date_column" : "SUM_LC_NULL_TBL.PART_COL", + "partition_date_start" : 0, + "partition_date_format" : "yyyy-MM-dd", + "partition_type" : "APPEND", + "partition_condition_builder" : "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder" + }, + "capacity" : "MEDIUM", + "segment_config" : { + "auto_merge_enabled" : null, + "auto_merge_time_ranges" : null, + "volatile_range" : null, + "retention_range" : null, + "create_empty_segment_enabled" : false + }, + "data_check_desc" : null, + "semantic_version" : 0, + "storage_type" : 0, + "model_type" : "BATCH", + "all_named_columns" : [ { + "id" : 0, + "name" : "PART_COL", + "column" : "SUM_LC_NULL_TBL.PART_COL", + "status" : "DIMENSION" + }, { + "id" : 1, + "name" : "SUM_DATE1", + "column" : "SUM_LC_NULL_TBL.SUM_DATE1" + }, { + "id" : 2, + "name" : "ACCOUNT1", + "column" : "SUM_LC_NULL_TBL.ACCOUNT1", + "status" : "DIMENSION" + }, { + "id" : 3, + "name" : "BALANCE1", + "column" : "SUM_LC_NULL_TBL.BALANCE1" + }, { + "id" : 4, + "name" : "DATA_NULL", + "column" : "SUM_LC_NULL_TBL.DATA_NULL" + }, { + "id" : 5, + "name" : "DATA_DECIMAL", + "column" : "SUM_LC_NULL_TBL.DATA_DECIMAL" + }, { + "id" : 6, + "name" : "ACCOUNT2", + "column" : "SUM_LC_NULL_TBL.ACCOUNT2", + "status" : "DIMENSION" + } ], + "all_measures" : [ { + "name" : "COUNT_ALL", + "function" : { + "expression" : "COUNT", + "parameters" : [ { + "type" : "constant", + "value" : "1" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : null, + "id" : 100000, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sumlc_double_null", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.DATA_NULL" + }, { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.SUM_DATE1" + } ], + "returntype" : "double" + }, + "column" : null, + "comment" : "", + "id" : 100001, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sumlc_decimal_null", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.DATA_DECIMAL" + }, { + "type" : "column", + "value" : "SUM_LC_NULL_TBL.SUM_DATE1" + } ], + "returntype" : "decimal(20,6)" + }, + "column" : null, + "comment" : "", + "id" : 100002, + "type" : "NORMAL", + "internal_ids" : [ ] + } ], + "recommendations_count" : 0, + "computed_columns" : [ ], + "canvas" : { + "coordinate" : { + "SUM_LC_NULL_TBL" : { + "x" : 462.44444105360253, + "y" : 108.66667005750864, + "width" : 200.0, + "height" : 230.0 + } + }, + "zoom" : 9.0 + }, + "multi_partition_desc" : null, + "multi_partition_key_mapping" : null, + "fusion_id" : null +} diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/648098d6-3009-5b26-3e20-82e494cfdb0c.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/648098d6-3009-5b26-3e20-82e494cfdb0c.json new file mode 100644 index 0000000000..660c1420fe --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/648098d6-3009-5b26-3e20-82e494cfdb0c.json @@ -0,0 +1,110 @@ +{ + "uuid" : "648098d6-3009-5b26-3e20-82e494cfdb0c", + "last_modified" : 1667828178657, + "create_time" : 1667828178249, + "version" : "4.0.0.0", + "alias" : "sum_lc_timestamp_test", + "owner" : "ADMIN", + "config_last_modifier" : null, + "config_last_modified" : 0, + "description" : null, + "fact_table" : "SSB.SUM_LC_TB", + "fact_table_alias" : null, + "management_type" : "MODEL_BASED", + "join_tables" : [ ], + "filter_condition" : "", + "partition_desc" : { + "partition_date_column" : "SUM_LC_TB.PART_COL", + "partition_date_start" : 0, + "partition_date_format" : "yyyy-MM-dd", + "partition_type" : "APPEND", + "partition_condition_builder" : "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder" + }, + "capacity" : "MEDIUM", + "segment_config" : { + "auto_merge_enabled" : null, + "auto_merge_time_ranges" : null, + "volatile_range" : null, + "retention_range" : null, + "create_empty_segment_enabled" : false + }, + "data_check_desc" : null, + "semantic_version" : 0, + "storage_type" : 0, + "model_type" : "BATCH", + "all_named_columns" : [ { + "id" : 0, + "name" : "ACCOUNT", + "column" : "SUM_LC_TB.ACCOUNT", + "status" : "DIMENSION" + }, { + "id" : 1, + "name" : "BANK_NAME", + "column" : "SUM_LC_TB.BANK_NAME", + "status" : "DIMENSION" + }, { + "id" : 2, + "name" : "PART_COL", + "column" : "SUM_LC_TB.PART_COL", + "status" : "DIMENSION" + }, { + "id" : 3, + "name" : "BALANCE", + "column" : "SUM_LC_TB.BALANCE" + }, { + "id" : 4, + "name" : "SUM_DATE", + "column" : "SUM_LC_TB.SUM_DATE", + "status" : "DIMENSION" + } ], + "all_measures" : [ { + "name" : "COUNT_ALL", + "function" : { + "expression" : "COUNT", + "parameters" : [ { + "type" : "constant", + "value" : "1" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : null, + "id" : 100000, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sum_lc_balance", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUM_LC_TB.BALANCE" + }, { + "type" : "column", + "value" : "SUM_LC_TB.SUM_DATE" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : "", + "id" : 100001, + "type" : "NORMAL", + "internal_ids" : [ ] + } ], + "recommendations_count" : 0, + "computed_columns" : [ ], + "canvas" : { + "coordinate" : { + "SUM_LC_TB" : { + "x" : 585.7777913411459, + "y" : 87.44444105360242, + "width" : 200.0, + "height" : 483.33333333333337 + } + }, + "zoom" : 9.0 + }, + "multi_partition_desc" : null, + "multi_partition_key_mapping" : null, + "fusion_id" : null +} \ No newline at end of file diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/c2f81b79-2c10-dce2-4206-588cab0e68ec.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/c2f81b79-2c10-dce2-4206-588cab0e68ec.json new file mode 100644 index 0000000000..000b6bcce7 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/c2f81b79-2c10-dce2-4206-588cab0e68ec.json @@ -0,0 +1,221 @@ +{ + "uuid" : "c2f81b79-2c10-dce2-4206-588cab0e68ec", + "last_modified" : 1667548802841, + "create_time" : 1667544081868, + "version" : "4.0.0.0", + "alias" : "sum_lc_cc_test", + "owner" : "ADMIN", + "config_last_modifier" : null, + "config_last_modified" : 0, + "description" : null, + "fact_table" : "SSB.SUMLC_CC_TEST", + "fact_table_alias" : null, + "management_type" : "MODEL_BASED", + "join_tables" : [ ], + "filter_condition" : "", + "partition_desc" : null, + "capacity" : "MEDIUM", + "segment_config" : { + "auto_merge_enabled" : null, + "auto_merge_time_ranges" : null, + "volatile_range" : null, + "retention_range" : null, + "create_empty_segment_enabled" : false + }, + "data_check_desc" : null, + "semantic_version" : 0, + "storage_type" : 0, + "model_type" : "BATCH", + "all_named_columns" : [ { + "id" : 0, + "name" : "ACCOUNT", + "column" : "SUMLC_CC_TEST.ACCOUNT", + "status" : "DIMENSION" + }, { + "id" : 1, + "name" : "MONTH_DATA", + "column" : "SUMLC_CC_TEST.MONTH_DATA" + }, { + "id" : 2, + "name" : "INT_DATA", + "column" : "SUMLC_CC_TEST.INT_DATA" + }, { + "id" : 3, + "name" : "YEAR_DATA", + "column" : "SUMLC_CC_TEST.YEAR_DATA" + }, { + "id" : 4, + "name" : "DAY_DATA", + "column" : "SUMLC_CC_TEST.DAY_DATA" + }, { + "id" : 5, + "name" : "DATE_DATA", + "column" : "SUMLC_CC_TEST.DATE_DATA", + "status" : "DIMENSION" + }, { + "id" : 6, + "name" : "TIME_COMPOSE_CC", + "column" : "SUMLC_CC_TEST.TIME_COMPOSE_CC" + }, { + "id" : 7, + "name" : "INT_DATA_CC", + "column" : "SUMLC_CC_TEST.INT_DATA_CC", + "status" : "DIMENSION" + }, { + "id" : 8, + "name" : "TO_TIME_COMPOSE_CC", + "column" : "SUMLC_CC_TEST.TO_TIME_COMPOSE_CC", + "status" : "DIMENSION" + } ], + "all_measures" : [ { + "name" : "COUNT_ALL", + "function" : { + "expression" : "COUNT", + "parameters" : [ { + "type" : "constant", + "value" : "1" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : null, + "id" : 100000, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sum_lc_int_data_time_cc", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUMLC_CC_TEST.INT_DATA" + }, { + "type" : "column", + "value" : "SUMLC_CC_TEST.TIME_COMPOSE_CC" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : "", + "id" : 100001, + "tomb" : true, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sum_lc_int_data_cc_date_data", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUMLC_CC_TEST.INT_DATA_CC" + }, { + "type" : "column", + "value" : "SUMLC_CC_TEST.DATE_DATA" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : "", + "id" : 100002, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sum_lc_int_cc_time_cc", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUMLC_CC_TEST.INT_DATA_CC" + }, { + "type" : "column", + "value" : "SUMLC_CC_TEST.TIME_COMPOSE_CC" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : "", + "id" : 100003, + "tomb" : true, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sum_lc_int_data_time_cc", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUMLC_CC_TEST.INT_DATA" + }, { + "type" : "column", + "value" : "SUMLC_CC_TEST.TO_TIME_COMPOSE_CC" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : "", + "id" : 100004, + "type" : "NORMAL", + "internal_ids" : [ ] + }, { + "name" : "sum_lc_int_cc_time_cc", + "function" : { + "expression" : "SUM_LC", + "parameters" : [ { + "type" : "column", + "value" : "SUMLC_CC_TEST.INT_DATA_CC" + }, { + "type" : "column", + "value" : "SUMLC_CC_TEST.TO_TIME_COMPOSE_CC" + } ], + "returntype" : "bigint" + }, + "column" : null, + "comment" : "", + "id" : 100005, + "type" : "NORMAL", + "internal_ids" : [ ] + } ], + "recommendations_count" : 0, + "computed_columns" : [ { + "tableIdentity" : "SSB.SUMLC_CC_TEST", + "tableAlias" : "SUMLC_CC_TEST", + "columnName" : "TIME_COMPOSE_CC", + "expression" : "concat(SUMLC_CC_TEST.YEAR_DATA,'-',SUMLC_CC_TEST.MONTH_DATA,'-',SUMLC_CC_TEST.DAY_DATA)", + "innerExpression" : "concat(`SUMLC_CC_TEST`.`YEAR_DATA`, '-', `SUMLC_CC_TEST`.`MONTH_DATA`, '-', `SUMLC_CC_TEST`.`DAY_DATA`)", + "datatype" : "VARCHAR", + "comment" : null, + "rec_uuid" : null + }, { + "tableIdentity" : "SSB.SUMLC_CC_TEST", + "tableAlias" : "SUMLC_CC_TEST", + "columnName" : "INT_DATA_CC", + "expression" : "SUMLC_CC_TEST.INT_DATA * 2", + "innerExpression" : "`SUMLC_CC_TEST`.`INT_DATA` * 2", + "datatype" : "INTEGER", + "comment" : null, + "rec_uuid" : null + }, { + "tableIdentity" : "SSB.SUMLC_CC_TEST", + "tableAlias" : "SUMLC_CC_TEST", + "columnName" : "TO_TIME_COMPOSE_CC", + "expression" : "concat(\ncast(SUMLC_CC_TEST.year_data as string),'-',\nconcat((case when SUMLC_CC_TEST.month_data<10 then '0' else '' end),cast(SUMLC_CC_TEST.month_data as string)),'-',\nconcat((case when SUMLC_CC_TEST.day_data<10 then '0' else '' end),cast(SUMLC_CC_TEST.day_data as string))\n)", + "innerExpression" : "concat(\ncast(`SUMLC_CC_TEST`.`YEAR_DATA` as string), '-',\nconcat((case when `SUMLC_CC_TEST`.`MONTH_DATA`<10 then '0' else '' end), cast(`SUMLC_CC_TEST`.`MONTH_DATA` as string)), '-',\nconcat((case when `SUMLC_CC_TEST`.`DAY_DATA`<10 then '0' else '' end), cast(`SUMLC_CC_TEST`.`DAY_DATA` as string))\n)", + "datatype" : "VARCHAR", + "comment" : null, + "rec_uuid" : null + } ], + "canvas" : { + "coordinate" : { + "SUMLC_CC_TEST" : { + "x" : 463.5555691189238, + "y" : 122.99999660915798, + "width" : 200.0, + "height" : 466.66666666666663 + } + }, + "zoom" : 9.0 + }, + "multi_partition_desc" : null, + "multi_partition_key_mapping" : null, + "fusion_id" : null +} \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/model_desc/f35f2937-9e4d-347a-7465-d64df939e7d6.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/f35f2937-9e4d-347a-7465-d64df939e7d6.json similarity index 100% rename from src/examples/test_case_data/localmeta/metadata/sum_lc/model_desc/f35f2937-9e4d-347a-7465-d64df939e7d6.json rename to src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/model_desc/f35f2937-9e4d-347a-7465-d64df939e7d6.json diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUMLC_CC_TEST.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUMLC_CC_TEST.json new file mode 100644 index 0000000000..3c2944ac01 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUMLC_CC_TEST.json @@ -0,0 +1,58 @@ +{ + "uuid" : "ff1e6d9d-fb31-0269-2547-a6b79cfbe798", + "last_modified" : 0, + "create_time" : 1667543492928, + "version" : "4.0.0.0", + "name" : "SUMLC_CC_TEST", + "columns" : [ { + "id" : "1", + "name" : "DATE_DATA", + "datatype" : "integer", + "case_sensitive_name" : "date_data" + }, { + "id" : "2", + "name" : "YEAR_DATA", + "datatype" : "integer", + "case_sensitive_name" : "year_data" + }, { + "id" : "3", + "name" : "MONTH_DATA", + "datatype" : "integer", + "case_sensitive_name" : "month_data" + }, { + "id" : "4", + "name" : "DAY_DATA", + "datatype" : "integer", + "case_sensitive_name" : "day_data" + }, { + "id" : "5", + "name" : "ACCOUNT", + "datatype" : "varchar(4096)", + "case_sensitive_name" : "account" + }, { + "id" : "6", + "name" : "INT_DATA", + "datatype" : "integer", + "case_sensitive_name" : "int_data" + } ], + "source_type" : 9, + "table_type" : "MANAGED", + "top" : false, + "increment_loading" : false, + "last_snapshot_path" : null, + "last_snapshot_size" : 0, + "snapshot_last_modified" : 0, + "query_hit_count" : 0, + "partition_column" : null, + "snapshot_partitions" : { }, + "snapshot_partitions_info" : { }, + "snapshot_total_rows" : 0, + "snapshot_partition_col" : null, + "selected_snapshot_partition_col" : null, + "temp_snapshot_path" : null, + "snapshot_has_broken" : false, + "database" : "SSB", + "transactional" : false, + "rangePartition" : false, + "partition_desc" : null +} \ No newline at end of file diff --git a/src/examples/test_case_data/localmeta/metadata/sum_lc/table/SSB.SUMLC_EXTEND_4X.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUMLC_EXTEND_4X.json similarity index 100% rename from src/examples/test_case_data/localmeta/metadata/sum_lc/table/SSB.SUMLC_EXTEND_4X.json rename to src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUMLC_EXTEND_4X.json diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json new file mode 100644 index 0000000000..d843b9239b --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUM_LC_NULL_TBL.json @@ -0,0 +1,63 @@ +{ + "uuid" : "6d164a7b-de25-e326-32f3-92103e53cf7e", + "last_modified" : 0, + "create_time" : 1668136653291, + "version" : "4.0.0.0", + "name" : "SUM_LC_NULL_TBL", + "columns" : [ { + "id" : "1", + "name" : "ACCOUNT1", + "datatype" : "char(1)", + "case_sensitive_name" : "account1" + }, { + "id" : "2", + "name" : "ACCOUNT2", + "datatype" : "varchar(52)", + "case_sensitive_name" : "account2" + }, { + "id" : "3", + "name" : "PART_COL", + "datatype" : "date", + "case_sensitive_name" : "part_col" + }, { + "id" : "4", + "name" : "BALANCE1", + "datatype" : "double", + "case_sensitive_name" : "balance1" + }, { + "id" : "5", + "name" : "SUM_DATE1", + "datatype" : "varchar(1024)", + "case_sensitive_name" : "sum_date1" + }, { + "id" : "6", + "name" : "DATA_NULL", + "datatype" : "double", + "case_sensitive_name" : "data_null" + }, { + "id" : "7", + "name" : "DATA_DECIMAL", + "datatype" : "decimal(10,6)", + "case_sensitive_name" : "data_decimal" + } ], + "source_type" : 9, + "table_type" : "MANAGED", + "top" : false, + "increment_loading" : false, + "last_snapshot_path" : null, + "last_snapshot_size" : 0, + "snapshot_last_modified" : 0, + "query_hit_count" : 0, + "partition_column" : null, + "snapshot_partitions" : { }, + "snapshot_partitions_info" : { }, + "snapshot_total_rows" : 0, + "snapshot_partition_col" : null, + "selected_snapshot_partition_col" : null, + "temp_snapshot_path" : null, + "snapshot_has_broken" : false, + "database" : "SSB", + "transactional" : false, + "rangePartition" : false, + "partition_desc" : null +} diff --git a/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUM_LC_TB.json b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUM_LC_TB.json new file mode 100644 index 0000000000..4148c52c21 --- /dev/null +++ b/src/kylin-it/src/test/resources/ut_meta/sum_lc/metadata/sum_lc/table/SSB.SUM_LC_TB.json @@ -0,0 +1,53 @@ +{ + "uuid" : "6c979409-290a-2deb-dc53-c100ac070ab9", + "last_modified" : 0, + "create_time" : 1667828032179, + "version" : "4.0.0.0", + "name" : "SUM_LC_TB", + "columns" : [ { + "id" : "1", + "name" : "ACCOUNT", + "datatype" : "char(1)", + "case_sensitive_name" : "account" + }, { + "id" : "2", + "name" : "BANK_NAME", + "datatype" : "varchar(52)", + "case_sensitive_name" : "bank_name" + }, { + "id" : "3", + "name" : "PART_COL", + "datatype" : "date", + "case_sensitive_name" : "part_col" + }, { + "id" : "4", + "name" : "BALANCE", + "datatype" : "integer", + "case_sensitive_name" : "balance" + }, { + "id" : "5", + "name" : "SUM_DATE", + "datatype" : "timestamp", + "case_sensitive_name" : "sum_date" + } ], + "source_type" : 9, + "table_type" : "MANAGED", + "top" : false, + "increment_loading" : false, + "last_snapshot_path" : null, + "last_snapshot_size" : 0, + "snapshot_last_modified" : 0, + "query_hit_count" : 0, + "partition_column" : null, + "snapshot_partitions" : { }, + "snapshot_partitions_info" : { }, + "snapshot_total_rows" : 0, + "snapshot_partition_col" : null, + "selected_snapshot_partition_col" : null, + "temp_snapshot_path" : null, + "snapshot_has_broken" : false, + "database" : "SSB", + "transactional" : false, + "rangePartition" : false, + "partition_desc" : null +} \ No newline at end of file diff --git a/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java b/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java index 7c077551db..13f2f0d307 100644 --- a/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java +++ b/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java @@ -3575,6 +3575,10 @@ public class ModelService extends AbstractModelService implements TableModelSupp validateFusionModelDimension(modelRequest); NDataModel model = semanticUpdater.convertToDataModel(modelRequest); + for(NDataModel.Measure measure : model.getAllMeasures()) { + measure.getFunction().init(model); + } + if (modelRequest.getPartitionDesc() != null && !KylinConfig.getInstanceFromEnv().isUseBigIntAsTimestampForPartitionColumn()) { PartitionDesc partitionDesc = modelRequest.getPartitionDesc(); diff --git a/src/modeling-service/src/test/java/org/apache/kylin/rest/service/ModelServiceTest.java b/src/modeling-service/src/test/java/org/apache/kylin/rest/service/ModelServiceTest.java index 1e3e316bd8..7c24afb5c1 100644 --- a/src/modeling-service/src/test/java/org/apache/kylin/rest/service/ModelServiceTest.java +++ b/src/modeling-service/src/test/java/org/apache/kylin/rest/service/ModelServiceTest.java @@ -5716,4 +5716,20 @@ public class ModelServiceTest extends SourceTestCase { Assert.assertEquals("DEFAULT.TEST_ORDER.ORDER_ID", colRef1.getCanonicalName()); } } + + @Test + public void testInitModel() throws IOException { + String modelRequest = + "{\"uuid\":null,\"name\":\"sum_lc_null_val_test_clone\",\"owner\":\"ADMIN\",\"project\":\"sum_lc\",\"description\":null," + + "\"alias\":\"sum_lc_null_val_test_clone\",\"fact_table\":\"SSB.SUM_LC_NULL_TBL\",\"join_tables\":[]," + + "\"simplified_dimensions\":[{\"id\":0,\"name\":\"PART_COL\",\"column\":\"SUM_LC_NULL_TBL.PART_COL\",\"status\":\"DIMENSION\",\"excluded\":false,\"cardinality\":null,\"min_value\":null,\"max_value\":null,\"max_length_value\":null,\"min_length_value\":null,\"null_count\":null,\"comment\":null,\"type\":\"date\",\"simple\":null,\"datatype\":\"date\"}," + + "{\"id\":1,\"name\":\"SUM_DATE1\",\"column\":\"SUM_LC_NULL_TBL.SUM_DATE1\",\"status\":\"DIMENSION\",\"excluded\":false,\"cardinality\":null,\"min_value\":null,\"max_value\":null,\"max_length_value\":null,\"min_length_value\":null,\"null_count\":null,\"comment\":null,\"type\":\"varchar(1024)\",\"simple\":null,\"datatype\":\"varchar(1024)\"},{\"id\":2,\"name\":\"ACCOUNT1\",\"column\":\"SUM_LC_NULL_TBL.ACCOUNT1\",\"status\":\"DIMENSION\",\"excluded\":false,\"cardin [...] + + "{\"id\":6,\"name\":\"ACCOUNT2\",\"column\":\"SUM_LC_NULL_TBL.ACCOUNT2\",\"status\":\"DIMENSION\",\"excluded\":false,\"cardinality\":null,\"min_value\":null,\"max_value\":null,\"max_length_value\":null,\"min_length_value\":null,\"null_count\":null,\"comment\":null,\"type\":\"varchar(52)\",\"simple\":null,\"datatype\":\"varchar(52)\"}]," + + "\"simplified_measures\":[{\"id\":100000,\"expression\":\"COUNT\",\"name\":\"COUNT_ALL\",\"return_type\":\"bigint\",\"parameter_value\":[{\"type\":\"constant\",\"value\":\"1\"}],\"converted_columns\":[],\"column\":null,\"comment\":null},{\"id\":100001,\"expression\":\"SUM_LC\",\"name\":\"sumlc_double_null\",\"return_type\":\"double\",\"parameter_value\":[{\"type\":\"column\",\"value\":\"SUM_LC_NULL_TBL.DATA_NULL\"},{\"type\":\"column\",\"value\":\"SUM_LC_NULL_TB [...] + + "\"converted_columns\":[],\"column\":null,\"comment\":\"\"},{\"id\":100002,\"expression\":\"SUM_LC\",\"name\":\"sumlc_decimal_null\",\"return_type\":\"decimal(20,6)\",\"parameter_value\":[{\"type\":\"column\",\"value\":\"SUM_LC_NULL_TBL.DATA_DECIMAL\"},{\"type\":\"column\",\"value\":\"SUM_LC_NULL_TBL.SUM_DATE1\"}],\"converted_columns\":[],\"column\":null,\"comment\":\"\"},{\"name\":\"abc\",\"expression\":\"SUM_LC\",\"return_type\":\"\",\"comment\":\"\",\"paramet [...] + + "\"computed_columns\":[],\"last_modified\":1668402813791,\"filter_condition\":\"\",\"partition_desc\":null,\"multi_partition_desc\":null,\"management_type\":\"MODEL_BASED\",\"with_second_storage\":false,\"second_storage_size\":0,\"canvas\":{\"coordinate\":{\"SUM_LC_NULL_TBL\":{\"x\":462.44444105360253,\"y\":108.66667005750864,\"width\":200,\"height\":486.66666666666663,\"isSpread\":true}},\"zoom\":9,\"marginClient\":{\"left\":0,\"top\":0}},\"available_indexes_co [...] + + "{\"column\":\"SUM_LC_NULL_TBL.DATA_NULL\",\"name\":\"DATA_NULL\",\"datatype\":\"double\"},{\"column\":\"SUM_LC_NULL_TBL.DATA_DECIMAL\",\"name\":\"DATA_DECIMAL\",\"datatype\":\"decimal(10,6)\"}]}"; + ModelRequest request = JsonUtil.readValue(modelRequest, ModelRequest.class); + Assert.assertThrows(KylinException.class, () -> modelService.checkBeforeModelSave(request)); + } } diff --git a/src/query-common/src/main/java/org/apache/kylin/query/util/DefaultQueryTransformer.java b/src/query-common/src/main/java/org/apache/kylin/query/util/DefaultQueryTransformer.java index 18ec915c90..b8fa24d009 100644 --- a/src/query-common/src/main/java/org/apache/kylin/query/util/DefaultQueryTransformer.java +++ b/src/query-common/src/main/java/org/apache/kylin/query/util/DefaultQueryTransformer.java @@ -29,7 +29,7 @@ import org.apache.kylin.query.IQueryTransformer; */ public class DefaultQueryTransformer implements IQueryTransformer { - private static final String S0 = "\\s*"; + public static final String S0 = "\\s*"; private static final String SM = "\\s+"; private static final String ONE = "1"; diff --git a/src/query/src/main/java/org/apache/kylin/query/engine/QueryRoutingEngine.java b/src/query/src/main/java/org/apache/kylin/query/engine/QueryRoutingEngine.java index e4ed2992d0..fa8571fd82 100644 --- a/src/query/src/main/java/org/apache/kylin/query/engine/QueryRoutingEngine.java +++ b/src/query/src/main/java/org/apache/kylin/query/engine/QueryRoutingEngine.java @@ -19,6 +19,7 @@ package org.apache.kylin.query.engine; import static org.apache.kylin.query.relnode.OLAPContext.clearThreadLocalContexts; +import static org.apache.kylin.query.util.DefaultQueryTransformer.S0; import java.sql.PreparedStatement; import java.sql.SQLException; @@ -30,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; import org.apache.calcite.avatica.ColumnMetaData; import org.apache.calcite.prepare.CalcitePrepareImpl; @@ -79,6 +81,10 @@ public class QueryRoutingEngine { public static final String SPARK_MEM_LIMIT_EXCEEDED = "Container killed by YARN for exceeding memory limits"; public static final String SPARK_JOB_FAILED = "Job aborted due to stage failure"; + private static final Pattern PTN_SUM_LC = Pattern.compile( + S0 + "\\bSUM_LC" + S0 + "[(]" + S0 + ".*\\.?.*" + S0 + "[,]" + S0 + ".*\\.?.*" + S0 + "[)]" + S0, + Pattern.CASE_INSENSITIVE); + public QueryResult queryWithSqlMassage(QueryParams queryParams) throws Exception { QueryContext.current().setAclInfo(queryParams.getAclInfo()); KylinConfig projectKylinConfig = NProjectManager.getProjectConfig(queryParams.getProject()); @@ -86,7 +92,7 @@ public class QueryRoutingEngine { queryParams.setDefaultSchema(queryExec.getDefaultSchemaName()); if (queryParams.isForcedToPushDown()) { - checkContainsSumLC(queryParams, null); + checkContainsSumLC(queryParams); return pushDownQuery(null, queryParams); } @@ -134,7 +140,6 @@ public class QueryRoutingEngine { if (cause instanceof SQLException && cause.getCause() instanceof KylinException) { throw (SQLException) cause; } - checkContainsSumLC(queryParams, e); if (shouldPushdown(cause, queryParams)) { return pushDownQuery((SQLException) cause, queryParams); } else { @@ -153,7 +158,6 @@ public class QueryRoutingEngine { } } } - checkContainsSumLC(queryParams, e); if (shouldPushdown(e, queryParams)) { return pushDownQuery(e, queryParams); } else { @@ -175,18 +179,14 @@ public class QueryRoutingEngine { return false; } - private void checkContainsSumLC(QueryParams queryParams, Throwable t) { - if (queryParams.getSql().contains("sum_lc")) { - String message = "There is no aggregate index to answer this query, sum_lc() function now is not supported by other query engine"; - if (t != null) { - throw new NotSupportedSQLException(message, t); - } else { - throw new NotSupportedSQLException(message); - } + private void checkContainsSumLC(QueryParams queryParams) { + if (PTN_SUM_LC.matcher(queryParams.getSql()).find()) { + String message = "sum_lc() function now is not supported by other query engine"; + throw new NotSupportedSQLException(message); } } - private boolean shouldPushdown(Throwable e, QueryParams queryParams) { + protected boolean shouldPushdown(Throwable e, QueryParams queryParams) { if (queryParams.isForcedToIndex()) { return false; } @@ -203,6 +203,10 @@ public class QueryRoutingEngine { return checkBigQueryPushDown(queryParams); } + if (PTN_SUM_LC.matcher(queryParams.getSql()).find()) { + return false; + } + return e instanceof SQLException && !e.getMessage().contains(SPARK_MEM_LIMIT_EXCEEDED); } diff --git a/src/query/src/test/java/org/apache/kylin/query/engine/QueryRoutingEngineTest.java b/src/query/src/test/java/org/apache/kylin/query/engine/QueryRoutingEngineTest.java index 44ab952e70..4d535bbea0 100644 --- a/src/query/src/test/java/org/apache/kylin/query/engine/QueryRoutingEngineTest.java +++ b/src/query/src/test/java/org/apache/kylin/query/engine/QueryRoutingEngineTest.java @@ -320,7 +320,7 @@ public class QueryRoutingEngineTest extends NLocalFileMetadataTestCase { @Test public void testQueryPushDownWithSumLC() { - final String sql = "select sum_lc(column, dateColumn) from success_table_2"; + final String sql = "select sUm_Lc \r\n ( \r\n \"success_table_2\".\"column\", \r\n dateColumn) from success_table_2"; final String project = "default"; KylinConfig kylinconfig = KylinConfig.getInstanceFromEnv(); QueryParams queryParams = new QueryParams(); @@ -333,6 +333,22 @@ public class QueryRoutingEngineTest extends NLocalFileMetadataTestCase { Assert.assertThrows(NotSupportedSQLException.class, () -> queryRoutingEngine.queryWithSqlMassage(queryParams)); } + @Test + public void testShouldPushDown() { + final String sql = "select sUm_Lc \r\n ( \r\n \"success_table_2\".\"column\", \r\n dateColumn) from success_table_2"; + final String project = "default"; + KylinConfig kylinconfig = KylinConfig.getInstanceFromEnv(); + QueryParams queryParams = new QueryParams(); + queryParams.setProject(project); + queryParams.setSql(sql); + queryParams.setKylinConfig(kylinconfig); + queryParams.setSelect(true); + queryParams.setForcedToPushDown(true); + + boolean shouldPushDown = queryRoutingEngine.shouldPushdown(new RuntimeException(), queryParams); + Assert.assertEquals(false, shouldPushDown); + } + @Test public void testQueryPushDownFail() { final String sql = "SELECT 1"; diff --git a/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionUtils.scala b/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionUtils.scala index 321bece62c..ebf68446ad 100644 --- a/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionUtils.scala +++ b/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionUtils.scala @@ -139,7 +139,11 @@ object ExpressionUtils { val arrayBytes = bytes.asInstanceOf[Array[Byte]] val codec = SumLCUtil.getNumericNullSafeSerializerByDataType(DataType.fromJson(wrapDataType.toString)) val counter = SumLCUtil.decodeToSumLCCounter(arrayBytes, codec) - counter.getSumLC + if (counter == null) { + null + } else { + counter.getSumLC + } } } diff --git a/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/KapExpresssions.scala b/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/KapExpresssions.scala index 08a9fa3cc0..8df821ed8a 100644 --- a/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/KapExpresssions.scala +++ b/src/spark-project/sparder/src/main/scala/org/apache/spark/sql/catalyst/expressions/KapExpresssions.scala @@ -828,6 +828,8 @@ case class SumLCDecode(bytes: Expression, wrapDataTypeExpr: Expression) extends override def prettyName: String = "sum_lc_decode" + override def nullable: Boolean = true + override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Expression = { val newChildren = Seq(newLeft, newRight) super.legacyWithNewChildren(newChildren) diff --git a/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/udaf/SumLC.scala b/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/udaf/SumLC.scala index bb68841e01..6472b742dd 100644 --- a/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/udaf/SumLC.scala +++ b/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/udaf/SumLC.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.udaf -import com.esotericsoftware.kryo.KryoException import com.esotericsoftware.kryo.io.{Input, KryoDataInput, KryoDataOutput, Output} import org.apache.kylin.common.util.DateFormat import org.apache.kylin.measure.sumlc.SumLCCounter @@ -43,6 +42,9 @@ sealed abstract class BaseSumLC(wrapDataType: DataType, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[SumLCCounter] with Serializable with Logging { + private val INIT_ARRAY_BUFFER_LENGTH = 32; + private val MAX_ARRAY_BUFFER_LENGTH = 256; + lazy val serializer: NullSafeValueSerializer = SumLCUtil.getNumericNullSafeSerializerByDataType(wrapDataType) override def prettyName: String = "sum_lc" @@ -52,49 +54,38 @@ sealed abstract class BaseSumLC(wrapDataType: DataType, case BinaryType => serialize(buffer) case DecimalType() => - if (buffer.getSumLC != null) { - Decimal.fromDecimal(buffer.getSumLC.asInstanceOf[java.math.BigDecimal]) - } else { - Decimal.ZERO - } + if (buffer == null || buffer.getSumLC == null) return null; + Decimal.fromDecimal(buffer.getSumLC.asInstanceOf[java.math.BigDecimal]) case _ => + if (buffer == null) return null; buffer.getSumLC } } - override def createAggregationBuffer(): SumLCCounter = new SumLCCounter() + override def createAggregationBuffer(): SumLCCounter = null override def merge(buffer: SumLCCounter, input: SumLCCounter): SumLCCounter = { SumLCCounter.merge(buffer, input) } override def serialize(buffer: SumLCCounter): Array[Byte] = { - val array: Array[Byte] = new Array[Byte](1024 * 1024) - val output: Output = new Output(array) - serialize(buffer, array, output) - } - - private def serialize(buffer: SumLCCounter, array: Array[Byte], output: Output): Array[Byte] = { - try { - if (buffer == null) { - Array.empty[Byte] - } else { - output.clear() - val out = new KryoDataOutput(output) - serializer.serialize(out, buffer.getSumLC) - out.writeLong(buffer.getTimestamp) + if (buffer == null) { + Array.empty[Byte] + } else { + val output: Output = new Output(INIT_ARRAY_BUFFER_LENGTH, MAX_ARRAY_BUFFER_LENGTH) + try { + val dataOutput = new KryoDataOutput(output) + serializer.serialize(dataOutput, buffer.getSumLC) + dataOutput.writeLong(buffer.getTimestamp) val mark = output.position() + val bufferArray = output.getBuffer + bufferArray.slice(0, mark) + } catch { + case th: Throwable => + throw th + } finally if (output != null) { output.close() - array.slice(0, mark) } - } catch { - case th: KryoException if th.getMessage.contains("Buffer overflow") => - logWarning(s"Resize buffer size to ${array.length * 2}") - val updateArray = new Array[Byte](array.length * 2) - output.setBuffer(updateArray) - serialize(buffer, updateArray, output) - case th => - throw th } } @@ -102,9 +93,18 @@ sealed abstract class BaseSumLC(wrapDataType: DataType, SumLCUtil.decodeToSumLCCounter(bytes, serializer) } - override def nullable: Boolean = false + override def nullable: Boolean = true override def dataType: DataType = outputDataType + + protected def sumLCUpdateInternal(buffer: SumLCCounter, columnVal: Number, timestampVal: Long): SumLCCounter = { + if (buffer == null) { + new SumLCCounter(columnVal, timestampVal) + } else { + buffer.update(columnVal, timestampVal) + buffer + } + } } case class EncodeSumLC( @@ -124,9 +124,14 @@ case class EncodeSumLC( case _ => columnEvalVal.asInstanceOf[Number] } - val dateValStr = String.valueOf(dateCol.eval(input)).trim - val timestampVal = DateFormat.stringToMillis(dateValStr) - SumLCCounter.merge(buffer, columnVal, timestampVal) + val dateEvalVal = dateCol.eval(input) + if (dateEvalVal == null || dateEvalVal.toString.toUpperCase().equals("NULL")) { + buffer + } else { + val dateValStr = String.valueOf(dateEvalVal).trim + val timestampVal = DateFormat.stringToMillis(dateValStr) + sumLCUpdateInternal(buffer, columnVal, timestampVal) + } } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = @@ -149,8 +154,14 @@ case class ReuseSumLC(measure: Expression, extends BaseSumLC(wrapDataType, outputDataType, mutableAggBufferOffset, inputAggBufferOffset) { override def update(buffer: SumLCCounter, input: InternalRow): SumLCCounter = { - val evalCounter = deserialize(measure.eval(input).asInstanceOf[Array[Byte]]) - SumLCCounter.merge(buffer, evalCounter) + val valAndTsPair = SumLCUtil.decodeToValAndTs(measure.eval(input).asInstanceOf[Array[Byte]], serializer) + if (valAndTsPair == null) { + buffer + } else { + val columnVal = valAndTsPair._1 + val timestampVal = valAndTsPair._2 + sumLCUpdateInternal(buffer, columnVal, timestampVal) + } } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = @@ -168,14 +179,23 @@ case class ReuseSumLC(measure: Expression, object SumLCUtil extends Logging { - def decodeToSumLCCounter(bytes: Array[Byte], codec: NullSafeValueSerializer): SumLCCounter = { - if (bytes.nonEmpty) { + def decodeToValAndTs(bytes: Array[Byte], codec: NullSafeValueSerializer): (Number, Long) = { + if (bytes.isEmpty) { + null + } else { val in = new KryoDataInput(new Input(bytes)) val sumLC = codec.deserialize(in).asInstanceOf[Number] val timestamp = in.readLong() - new SumLCCounter(sumLC, timestamp) + (sumLC, timestamp) + } + } + + def decodeToSumLCCounter(bytes: Array[Byte], codec: NullSafeValueSerializer): SumLCCounter = { + val valAndTsPair = decodeToValAndTs(bytes, codec) + if (valAndTsPair == null) { + null } else { - new SumLCCounter() + new SumLCCounter(valAndTsPair._1, valAndTsPair._2) } }