This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin-on-parquet-v2 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit de95ab977e700178efd70705ead4edaca272d41f Author: zhengshengjun <shengjun_zh...@sina.com> AuthorDate: Mon Mar 1 11:43:52 2021 +0800 add test case for interger type partition pruner --- examples/test_case_data/parquet_test/cube/ssb.json | 17 +++ .../test_case_data/parquet_test/cube_desc/ssb.json | 115 +++++++++++++++++++++ .../parquet_test/data/SSB.P_LINEORDER.csv | 18 ++++ .../parquet_test/model_desc/ssb.json | 25 +++++ .../parquet_test/project/default.json | 2 +- .../parquet_test/table/SSB.P_LINEORDER.json | 82 +++++++++++++++ .../resources/query/sql_prune_segment/query00.sql | 20 ++++ .../resources/query/sql_prune_segment/query01.sql | 20 ++++ .../kylin/engine/spark2/NBuildAndQueryTest.java | 37 +++++-- .../apache/kylin/engine/spark2/NExecAndComp.java | 5 +- 10 files changed, 328 insertions(+), 13 deletions(-) diff --git a/examples/test_case_data/parquet_test/cube/ssb.json b/examples/test_case_data/parquet_test/cube/ssb.json new file mode 100644 index 0000000..8e4f825 --- /dev/null +++ b/examples/test_case_data/parquet_test/cube/ssb.json @@ -0,0 +1,17 @@ +{ + "uuid" : "70a9f288-3c01-4745-a04b-5641e82d6c69", + "last_modified" : 1594722761733, + "version" : "1.5.3", + "name" : "ssb", + "owner" : "ADMIN", + "descriptor" : "ssb", + "display_name" : "ssb", + "cost" : 50, + "status" : "DISABLED", + "segments" : [ ], + "create_time_utc" : 1457444500888, + "cuboid_bytes" : null, + "cuboid_bytes_recommend" : null, + "cuboid_last_optimized" : 0, + "snapshots" : { } +} \ No newline at end of file diff --git a/examples/test_case_data/parquet_test/cube_desc/ssb.json b/examples/test_case_data/parquet_test/cube_desc/ssb.json new file mode 100644 index 0000000..62ca74a --- /dev/null +++ b/examples/test_case_data/parquet_test/cube_desc/ssb.json @@ -0,0 +1,115 @@ +{ + "uuid" : "5c44df30-daec-486e-af90-927bf7851057", + "last_modified" : 1491925122527, + "version" : "1.5.3", + "name" : "ssb", + "model_name" : "ssb", + "description" : "", + "null_string" : null, + "dimensions" : [ { + "name" : "LO_QUANTITY", + "table" : "P_LINEORDER", + "column" : "LO_QUANTITY", + "derived" : null + }, { + "name" : "LO_DISCOUNT", + "table" : "P_LINEORDER", + "column" : "LO_DISCOUNT", + "derived" : null + }, { + "name" : "LO_ORDERDATE", + "table" : "P_LINEORDER", + "column" : "LO_ORDERDATE", + "derived" : null + } ], + "measures" : [ { + "name" : "_COUNT_", + "function" : { + "expression" : "COUNT", + "parameter" : { + "type" : "constant", + "value" : "1" + }, + "returntype" : "bigint" + } + }, { + "name" : "P_LINEORDER.V_REVENUE_SUM", + "function" : { + "expression" : "SUM", + "parameter" : { + "type" : "column", + "value" : "P_LINEORDER.V_REVENUE" + }, + "returntype" : "bigint" + } + }, { + "name" : "P_LINEORDER.LO_SUPPLYCOST_SUM", + "function" : { + "expression" : "SUM", + "parameter" : { + "type" : "column", + "value" : "P_LINEORDER.LO_SUPPLYCOST" + }, + "returntype" : "bigint" + } + }, { + "name" : "P_LINEORDER.LO_REVENUE_SUM", + "function" : { + "expression" : "SUM", + "parameter" : { + "type" : "column", + "value" : "P_LINEORDER.LO_REVENUE" + }, + "returntype" : "bigint" + } + } ], + "rowkey" : { + "rowkey_columns" : [ { + "column" : "P_LINEORDER.LO_ORDERDATE", + "encoding" : "integer:8", + "isShardBy" : false, + "index" : "eq" + }, { + "column" : "P_LINEORDER.LO_QUANTITY", + "encoding" : "integer:8", + "isShardBy" : false, + "index" : "eq" + }, { + "column" : "P_LINEORDER.LO_DISCOUNT", + "encoding" : "integer:8", + "isShardBy" : false, + "index" : "eq" + } ] + }, + "hbase_mapping" : { + "column_family" : [ { + "name" : "F1", + "columns" : [ { + "qualifier" : "M", + "measure_refs" : [ "_COUNT_", "P_LINEORDER.V_REVENUE_SUM", "P_LINEORDER.LO_SUPPLYCOST_SUM", "P_LINEORDER.LO_REVENUE_SUM" ] + } ] + } ] + }, + "aggregation_groups" : [ { + "includes" : [ "P_LINEORDER.LO_QUANTITY", "P_LINEORDER.LO_DISCOUNT" ], + "select_rule" : { + "hierarchy_dims" : [ ], + "mandatory_dims" : [ ], + "joint_dims" : [ + [ "P_LINEORDER.LO_QUANTITY", "P_LINEORDER.LO_DISCOUNT" ] + ] + } + } ], + "notify_list" : [ ], + "status_need_notify" : [ ], + "partition_date_start" : 694224000000, + "partition_date_end" : 3153600000000, + "auto_merge_time_ranges" : [ ], + "retention_range" : 0, + "engine_type" : 6, + "storage_type" : 4, + "override_kylin_properties" : { + "kylin.cube.aggrgroup.is-mandatory-only-valid" : "true", + "kylin.storage.hbase.min-region-count" : "4" + } +} diff --git a/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv b/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv new file mode 100644 index 0000000..24ada80 --- /dev/null +++ b/examples/test_case_data/parquet_test/data/SSB.P_LINEORDER.csv @@ -0,0 +1,18 @@ +4581,1,16,165,1,19920904,4-NOT SPECI,0,37,3941092,8959211,1,3901681,63909,4,19921105,MAIL,3941092 +4581,2,16,50,2,19920904,4-NOT SPECI,0,7,665035,8959211,1,658384,57003,2,19921020,MAIL,665035 +4581,3,16,21,2,19920904,4-NOT SPECI,0,46,4236692,8959211,4,4067224,55261,4,19921127,REG AIR,16946768 +2560,1,28,169,1,19920905,1-URGENT,0,41,4383556,15342679,7,4076707,64149,1,19921111,SHIP,30684892 +2560,2,28,4,2,19920905,1-URGENT,0,27,2440800,15342679,0,2440800,54240,1,19921116,MAIL,0 +2560,3,28,46,2,19920905,1-URGENT,0,31,2932724,15342679,1,2903396,56762,5,19921014,AIR,2932724 +2560,4,28,72,2,19920905,1-URGENT,0,36,3499452,15342679,1,3464457,58324,2,19921030,MAIL,3499452 +2560,5,28,42,2,19920905,1-URGENT,0,9,847836,15342679,4,813922,56522,2,19921029,REG AIR,3391344 +2560,6,28,108,1,19920905,1-URGENT,0,13,1310530,15342679,3,1271214,60486,6,19921021,FOB,3931590 +2147,1,20,29,2,19920906,4-NOT SPECI,0,50,4645100,9151379,4,4459296,55741,6,19921130,RAIL,18580400 +2147,2,20,101,1,19920906,4-NOT SPECI,0,4,400440,9151379,1,396435,60066,4,19921115,AIR,400440 +2147,3,20,44,2,19920906,4-NOT SPECI,0,34,3209736,9151379,10,2888762,56642,4,19921108,REG AIR,32097360 +2147,4,20,11,2,19920906,4-NOT SPECI,0,11,1002111,9151379,6,941984,54660,7,19921116,AIR,6012666 +1991,1,4,110,1,19920907,4-NOT SPECI,0,39,3939429,13985441,6,3703063,60606,2,19921129,TRUCK,23636574 +1991,2,4,53,1,19920907,4-NOT SPECI,0,49,4669945,13985441,8,4296349,57183,6,19921129,SHIP,37359560 +1991,3,4,174,1,19920907,4-NOT SPECI,0,6,644502,13985441,2,631611,64450,1,19921008,REG AIR,1289004 +1991,4,4,138,2,19920907,4-NOT SPECI,0,6,622878,13985441,10,560590,62287,6,19921103,RAIL,6228780 +1991,5,4,60,1,19920907,4-NOT SPECI,0,49,4704294,13985441,6,4422036,57603,0,19921130,AIR,28225764 \ No newline at end of file diff --git a/examples/test_case_data/parquet_test/model_desc/ssb.json b/examples/test_case_data/parquet_test/model_desc/ssb.json new file mode 100644 index 0000000..f896765 --- /dev/null +++ b/examples/test_case_data/parquet_test/model_desc/ssb.json @@ -0,0 +1,25 @@ +{ + "uuid" : "cd92588f-b987-4a12-b90f-e32c44345c64", + "version" : "1.5.3", + "name" : "ssb", + "description" : "", + "lookups" : [ ], + "dimensions" : [ { + "table" : "SSB.P_LINEORDER", + "columns" : [ "LO_ORDERDATE" ] + } ], + "metrics" : [ "LO_REVENUE", "LO_SUPPLYCOST", "V_REVENUE" ], + "capacity" : "MEDIUM", + "last_modified" : 1464441928669, + "fact_table" : "SSB.P_LINEORDER", + "filter_condition" : "", + "partition_desc" : { + "partition_date_column" : "SSB.P_LINEORDER.LO_ORDERDATE", + "partition_time_column" : null, + "partition_date_start" : 0, + "partition_date_format" : "yyyyMMdd", + "partition_time_format" : "HH:mm:ss", + "partition_type" : "APPEND", + "partition_condition_builder" : "org.apache.kylin.metadata.model.PartitionDesc$DefaultPartitionConditionBuilder" + } +} \ No newline at end of file diff --git a/examples/test_case_data/parquet_test/project/default.json b/examples/test_case_data/parquet_test/project/default.json index 7db3136..3af3911 100644 --- a/examples/test_case_data/parquet_test/project/default.json +++ b/examples/test_case_data/parquet_test/project/default.json @@ -3,7 +3,7 @@ "last_modified" : 1585736623334, "version" : "3.0.0.20500", "name" : "default", - "tables" : [ "DEFAULT.TEST_COUNTRY", "EDW.TEST_SITES", "SSB.SUPPLIER", "SSB.CUSTOMER", "SSB.PART", "DEFAULT.TEST_KYLIN_FACT", "DEFAULT.TEST_CATEGORY_GROUPINGS", "DEFAULT.TEST_ORDER", "SSB.DATES", "EDW.TEST_SELLER_TYPE_DIM", "SSB.V_LINEORDER", "EDW.TEST_CAL_DT", "DEFAULT.TEST_ACCOUNT" ], + "tables" : [ "DEFAULT.TEST_COUNTRY", "EDW.TEST_SITES", "SSB.SUPPLIER", "SSB.CUSTOMER", "SSB.PART", "DEFAULT.TEST_KYLIN_FACT", "DEFAULT.TEST_CATEGORY_GROUPINGS", "DEFAULT.TEST_ORDER", "SSB.DATES", "EDW.TEST_SELLER_TYPE_DIM", "SSB.V_LINEORDER", "EDW.TEST_CAL_DT", "DEFAULT.TEST_ACCOUNT", "SSB.P_LINEORDER" ], "owner" : null, "status" : null, "create_time_utc" : 0, diff --git a/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json b/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json new file mode 100644 index 0000000..03d91fa --- /dev/null +++ b/examples/test_case_data/parquet_test/table/SSB.P_LINEORDER.json @@ -0,0 +1,82 @@ +{ + "uuid" : "b017d54b-e7b7-465b-a4db-b47f68baf1ad", + "version" : "2.1", + "name" : "P_LINEORDER", + "columns" : [ { + "id" : "1", + "name" : "LO_ORDERKEY", + "datatype" : "bigint" + }, { + "id" : "2", + "name" : "LO_LINENUMBER", + "datatype" : "bigint" + }, { + "id" : "3", + "name" : "LO_CUSTKEY", + "datatype" : "integer" + }, { + "id" : "4", + "name" : "LO_PARTKEY", + "datatype" : "integer" + }, { + "id" : "5", + "name" : "LO_SUPPKEY", + "datatype" : "integer" + }, { + "id" : "6", + "name" : "LO_ORDERDATE", + "datatype" : "integer" + }, { + "id" : "7", + "name" : "LO_ORDERPRIOTITY", + "datatype" : "varchar(15)" + }, { + "id" : "8", + "name" : "LO_SHIPPRIOTITY", + "datatype" : "integer" + }, { + "id" : "9", + "name" : "LO_QUANTITY", + "datatype" : "bigint" + }, { + "id" : "10", + "name" : "LO_EXTENDEDPRICE", + "datatype" : "bigint" + }, { + "id" : "11", + "name" : "LO_ORDTOTALPRICE", + "datatype" : "bigint" + }, { + "id" : "12", + "name" : "LO_DISCOUNT", + "datatype" : "bigint" + }, { + "id" : "13", + "name" : "LO_REVENUE", + "datatype" : "bigint" + }, { + "id" : "14", + "name" : "LO_SUPPLYCOST", + "datatype" : "bigint" + }, { + "id" : "15", + "name" : "LO_TAX", + "datatype" : "bigint" + }, { + "id" : "16", + "name" : "LO_COMMITDATE", + "datatype" : "integer" + }, { + "id" : "17", + "name" : "LO_SHIPMODE", + "datatype" : "varchar(10)" + }, { + "id" : "18", + "name" : "V_REVENUE", + "datatype" : "bigint" + } ], + "database" : "SSB", + "last_modified" : 1457444145578, + "source_type" : 9, + "table_type" : "VIRTUAL_VIEW" +} diff --git a/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql b/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql new file mode 100644 index 0000000..aaf8bdb --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_prune_segment/query00.sql @@ -0,0 +1,20 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select sum(LO_REVENUE) from SSB.P_LINEORDER where LO_ORDERDATE = 19920906 +;{"scanRowCount":4,"scanBytes":0,"scanFiles":1,"cuboidId":[7]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql b/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql new file mode 100644 index 0000000..c93432a --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_prune_segment/query01.sql @@ -0,0 +1,20 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select sum(LO_REVENUE) from SSB.P_LINEORDER where LO_ORDERDATE = '19920906' +;{"scanRowCount":4,"scanBytes":0,"scanFiles":1,"cuboidId":[7]} \ No newline at end of file diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java index dc84886..58570c9 100644 --- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java +++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java @@ -200,7 +200,9 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_unionall")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_values")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_window")); + tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_limit")); + tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_prune_segment")); } logger.info("Total {} tasks.", tasks.size()); return tasks; @@ -213,6 +215,10 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { } else if (Boolean.parseBoolean(System.getProperty("isDeveloperMode", "false"))) { //fullBuildCube("ci_inner_join_cube"); fullBuildCube("ci_left_join_cube"); + buildSegments("ssb", new SegmentRange.TSRange(dateToLong("1992-09-04"), dateToLong("1992-09-05")), + new SegmentRange.TSRange(dateToLong("1992-09-05"), dateToLong("1992-09-06")), + new SegmentRange.TSRange(dateToLong("1992-09-06"), dateToLong("1992-09-07")), + new SegmentRange.TSRange(dateToLong("1992-09-07"), dateToLong("1992-09-08"))); } else { //buildAndMergeCube("ci_inner_join_cube"); buildAndMergeCube("ci_left_join_cube"); @@ -223,6 +229,9 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { if (cubeName.equals("ci_inner_join_cube")) { buildFourSegmentAndMerge(cubeName); } + if (cubeName.equals("ssb")) { + buildSegments(cubeName, new SegmentRange.TSRange(dateToLong("1992-0-01"), dateToLong("2015-01-01"))); + } if (cubeName.equals("ci_left_join_cube")) { buildTwoSegmentAndMerge(cubeName); } @@ -287,17 +296,10 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { // Round 1: Build 4 segment ExecutableState state; - state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2012-06-01"))); - Assert.assertEquals(ExecutableState.SUCCEED, state); - - state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2012-06-01"), dateToLong("2013-01-01"))); - Assert.assertEquals(ExecutableState.SUCCEED, state); - - state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2013-06-01"))); - Assert.assertEquals(ExecutableState.SUCCEED, state); - - state = buildCuboid(cubeName, new SegmentRange.TSRange(dateToLong("2013-06-01"), dateToLong("2015-01-01"))); - Assert.assertEquals(ExecutableState.SUCCEED, state); + buildSegments(cubeName, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2012-06-01")), + new SegmentRange.TSRange(dateToLong("2012-06-01"), dateToLong("2013-01-01")), + new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2013-06-01")), + new SegmentRange.TSRange(dateToLong("2013-06-01"), dateToLong("2015-01-01"))); // Round 2: Merge two segments state = mergeSegments(cubeName, dateToLong("2010-01-01"), dateToLong("2013-01-01"), false); @@ -316,6 +318,19 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { secondSegment.getSegRange()); } + public void buildSegments(String cubeName, SegmentRange.TSRange ... toBuildRanges) throws Exception{ + Assert.assertTrue(config.getHdfsWorkingDirectory().startsWith("file:")); + + // cleanup all segments first + cleanupSegments(cubeName); + + ExecutableState state; + for (SegmentRange.TSRange toBuildRange : toBuildRanges) { + state = buildCuboid(cubeName, toBuildRange); + Assert.assertEquals(ExecutableState.SUCCEED, state); + } + } + class QueryCallable implements Callable<Pair<String, Throwable>> { private NExecAndComp.CompareLevel compareLevel; diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java index b570965..5a742a1 100644 --- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java +++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java @@ -395,7 +395,10 @@ public class NExecAndComp { .replaceAll("`TDVT`\\.", "") // .replaceAll("\"POPHEALTH_ANALYTICS\"\\.", "") // .replaceAll("`POPHEALTH_ANALYTICS`\\.", "") // - .replaceAll("(?i)ISSUES\\.", ""); + .replaceAll("(?i)ISSUES\\.", "") + .replaceAll("SSB\\.", "") + .replaceAll("\"SSB\"\\.", "") + .replaceAll("`SSB`\\.", ""); } public static List<Pair<String, String>> fetchQueries(String folder) throws IOException {