This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin-on-parquet-v2 in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this push: new df42555 KYLIN-4843 Support INTERSECT_COUNT/INTERSECT_VALUE function for Kylin 4 df42555 is described below commit df425556757d0654800d96aa0b11728e8adcc4a6 Author: Zhichao Zhang <441586...@qq.com> AuthorDate: Wed Dec 16 23:45:11 2020 +0800 KYLIN-4843 Support INTERSECT_COUNT/INTERSECT_VALUE function for Kylin 4 --- .../org/apache/kylin/common/KylinConfigBase.java | 7 +++ .../apache/kylin/metadata/model/FunctionDesc.java | 1 + .../{query08.sql.disabled => query08.sql} | 3 +- ...e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc | Bin 0 -> 12 bytes ...01bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc | Bin 12 -> 0 bytes ...0-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv | 2 + ...0-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv | 2 - ...c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc | Bin 0 -> 12 bytes ...c2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc | Bin 12 -> 0 bytes ...0-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv | 3 ++ ...0-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv | 3 -- ...535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc | Bin 0 -> 12 bytes ...b552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc | Bin 12 -> 0 bytes ...0-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv | 28 +++++++++++ ...0-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv | 28 ----------- ...a75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc | Bin 0 -> 12 bytes ...fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc | Bin 12 -> 0 bytes ...0-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv | 2 + ...0-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv | 2 - ...ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc | Bin 0 -> 12 bytes ...a313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc | Bin 12 -> 0 bytes ...0-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv | 1 + ...0-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv | 1 - .../query/sql_intersect_value/query00.sql | 32 +++++++++++++ .../query00.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes ...6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc | Bin 0 -> 12 bytes .../query00.sql.expected/_SUCCESS | 0 ...0-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv | 2 + .../query01.sql} | 12 ++++- .../query01.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes ...3c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc | Bin 0 -> 12 bytes .../query01.sql.expected/_SUCCESS | 0 ...0-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv | 3 ++ .../query03.sql} | 53 +++++++++------------ .../query03.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes ...c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc | Bin 0 -> 12 bytes .../query03.sql.expected/_SUCCESS | 0 ...0-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv | 28 +++++++++++ .../query/sql_intersect_value/query04.sql | 33 +++++++++++++ .../query04.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes ...2bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc | Bin 0 -> 12 bytes .../query04.sql.expected/_SUCCESS | 0 ...0-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv | 2 + .../query05.sql} | 8 +++- .../query05.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes ...3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc | Bin 0 -> 12 bytes .../query05.sql.expected/_SUCCESS | 0 ...0-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv | 1 + .../query06.sql} | 12 ++++- .../query06.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes ...e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc | Bin 0 -> 12 bytes .../query06.sql.expected/_SUCCESS | 0 ...0-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv | 2 + .../org/apache/spark/sql/KylinFunctions.scala | 15 ++++-- .../spark/sql/udaf/IntersectBitmapCounter.scala | 6 +-- .../org/apache/spark/sql/udaf/IntersectCount.scala | 46 ++++++++++++++++-- .../kylin/query/runtime/plans/AggregatePlan.scala | 32 +++++++++---- .../org/apache/spark/sql/SparkOperation.scala | 1 - .../kylin/engine/spark2/NBuildAndQueryTest.java | 5 +- .../apache/kylin/engine/spark2/NExecAndComp.java | 11 +++-- 60 files changed, 287 insertions(+), 100 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 8f2bf8e..000ad80 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -2874,6 +2874,13 @@ public abstract class KylinConfigBase implements Serializable { } /** + * the maximum number of returned values for intersect_value function + */ + public int getBitmapValuesUpperBound() { + return Integer.parseInt(getOptional("kylin.query.bitmap-upper-bound", "10000000")); + } + + /** * Used to upload user-defined log4j configuration */ public String sparkUploadFiles() { diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index c35ad2f..3f8496b 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -63,6 +63,7 @@ public class FunctionDesc implements Serializable { public static final String FUNC_COUNT = "COUNT"; public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT"; public static final String FUNC_INTERSECT_COUNT = "INTERSECT_COUNT"; + public static final String FUNC_INTERSECT_VALUE = "INTERSECT_VALUE"; public static final String FUNC_GROUPING = "GROUPING"; public static final String FUNC_PERCENTILE = "PERCENTILE_APPROX"; public static final Set<String> BUILT_IN_AGGREGATIONS = Sets.newHashSet(); diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled b/kylin-it/src/test/resources/query/sql_derived/query08.sql similarity index 92% copy from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled copy to kylin-it/src/test/resources/query/sql_derived/query08.sql index c4e7d0c..d975b1e 100644 --- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled +++ b/kylin-it/src/test/resources/query/sql_derived/query08.sql @@ -16,4 +16,5 @@ -- limitations under the License. -- -select distinct leaf_categ_id, lstg_site_id from test_kylin_fact +select distinct leaf_categ_id, lstg_site_id from test_kylin_fact +;{"scanRowCount":10000,"scanBytes":0,"scanFiles":1,"cuboidId":[2097151]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc new file mode 100644 index 0000000..5ea0498 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc deleted file mode 100644 index 9500b67..0000000 Binary files a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc and /dev/null differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv new file mode 100644 index 0000000..8549d01 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv @@ -0,0 +1,2 @@ +2012-06-23,16,25,17,1,0,0,92,98 +2013-12-22,13,16,21,0,0,0,96,100 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv deleted file mode 100644 index d688146..0000000 --- a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv +++ /dev/null @@ -1,2 +0,0 @@ -2013-12-22,96,96,96,96,96,96,96,100 -2012-06-23,92,92,92,92,92,92,92,98 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc new file mode 100644 index 0000000..0c55ec2 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc deleted file mode 100644 index b63189b..0000000 Binary files a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc and /dev/null differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv new file mode 100644 index 0000000..b43f3fe --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv @@ -0,0 +1,3 @@ +2012-01-01,10,0,0,0,0 +2012-01-02,0,11,0,0,0 +2012-01-03,0,0,12,0,0 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv deleted file mode 100644 index 8514dd2..0000000 --- a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv +++ /dev/null @@ -1,3 +0,0 @@ -2012-01-03,12,12,12,12,12 -2012-01-01,10,10,10,10,10 -2012-01-02,11,11,11,11,11 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc new file mode 100644 index 0000000..8e80af5 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc deleted file mode 100644 index 9b3f176..0000000 Binary files a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc and /dev/null differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv new file mode 100644 index 0000000..7866262 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv @@ -0,0 +1,28 @@ +9426,1 +10866,1 +13987,1 +26262,1 +32996,1 +62179,1 +67698,1 +95672,1 +152801,1 +164261,1 +11554,0 +20865,0 +24541,0 +43479,0 +44079,0 +156614,0 +161567,0 +1161,0 +1504,0 +15115,0 +61323,0 +66767,0 +95173,0 +99985,0 +106246,0 +139973,0 +148324,0 +166013,0 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv deleted file mode 100644 index f3e2ce2..0000000 --- a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv +++ /dev/null @@ -1,28 +0,0 @@ -24541,1 -67698,1 -32996,1 -44079,1 -1504,1 -13987,2 -10866,1 -62179,1 -43479,1 -148324,1 -11554,2 -156614,1 -166013,1 -139973,1 -26262,1 -1161,1 -20865,1 -66767,1 -9426,1 -164261,1 -106246,1 -152801,3 -15115,1 -99985,1 -61323,1 -95672,1 -95173,2 -161567,1 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc new file mode 100644 index 0000000..2294938 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc deleted file mode 100644 index 0645ce5..0000000 Binary files a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc and /dev/null differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv new file mode 100644 index 0000000..9351364 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv @@ -0,0 +1,2 @@ +2012-06-23,16,25,17,1,0,0,0,92,98 +2013-12-22,13,16,21,0,0,0,0,96,100 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv deleted file mode 100644 index ba9e47b..0000000 --- a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv +++ /dev/null @@ -1,2 +0,0 @@ -2013-12-22,96,96,96,96,96,96,96,96,100 -2012-06-23,92,92,92,92,92,92,92,92,98 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc new file mode 100644 index 0000000..c738131 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc deleted file mode 100644 index c73e8c9..0000000 Binary files a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc and /dev/null differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv new file mode 100644 index 0000000..2d51ee2 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv @@ -0,0 +1 @@ +10,0 diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv deleted file mode 100644 index 3ff9442..0000000 --- a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv +++ /dev/null @@ -1 +0,0 @@ -33,33 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql new file mode 100644 index 0000000..51ecd7e --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql @@ -0,0 +1,32 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select +week_beg_dt as week, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Auction']) as b, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Others']) as c, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction']) as ab, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Others']) as ac, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction', 'Others']) as abc, +count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers, +count(*) as cnt +from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt = edw.test_cal_dt.CAL_DT +where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23') +group by week_beg_dt +;{"scanRowCount":10018,"scanBytes":0,"scanFiles":2,"cuboidId":[276480]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/._SUCCESS.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/._SUCCESS.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/.part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/.part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc new file mode 100644 index 0000000..040ab65 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/.part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/_SUCCESS b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv new file mode 100644 index 0000000..0751172 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv @@ -0,0 +1,2 @@ +2012-06-23,"[44,235,257,284,341,363,419,719,780,854,856,862,892,927,957,989]","[4,19,34,55,151,153,182,210,278,288,293,317,360,424,461,513,524,550,675,678,679,760,791,854,972]","[15,28,34,128,161,174,217,270,441,532,589,630,770,876,925,931,961]",[854],"","",92,98 +2013-12-22,"[72,115,188,237,417,483,486,555,638,741,746,891,959]","[58,68,155,246,293,296,307,309,341,470,505,604,627,655,811,919]","[55,101,261,281,298,310,354,399,405,541,553,559,571,606,623,632,689,700,758,767,820]","","","",96,100 diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql similarity index 54% copy from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled copy to kylin-it/src/test/resources/query/sql_intersect_value/query01.sql index c4e7d0c..f9af7ab 100644 --- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql @@ -15,5 +15,13 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -- - -select distinct leaf_categ_id, lstg_site_id from test_kylin_fact +select CAL_DT, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-02']) as second_day, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-03']) as third_day, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01','2012-01-02']) as retention_oneday, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01','2012-01-02','2012-01-03']) as retention_twoday +from test_kylin_fact +where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03') +group by CAL_DT +;{"scanRowCount":731,"scanBytes":0,"scanFiles":1,"cuboidId":[262144]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/._SUCCESS.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/._SUCCESS.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/.part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/.part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc new file mode 100644 index 0000000..2aba606 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/.part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/_SUCCESS b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv new file mode 100644 index 0000000..767187a --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv @@ -0,0 +1,3 @@ +2012-01-01,"[245,246,247,326,430,520,610,709,809,810]","","","","" +2012-01-02,"","[1,2,121,327,328,329,431,611,612,811,898]","","","" +2012-01-03,"","","[122,123,124,248,249,330,432,521,613,614,710,711]","","" diff --git a/kylin-it/src/test/resources/query/sql_extended_column/query00.sql b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql similarity index 63% rename from kylin-it/src/test/resources/query/sql_extended_column/query00.sql rename to kylin-it/src/test/resources/query/sql_intersect_value/query03.sql index 530572e..cd5efdd 100644 --- a/kylin-it/src/test/resources/query/sql_extended_column/query00.sql +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql @@ -1,30 +1,23 @@ --- --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, software --- distributed under the License is distributed on an "AS IS" BASIS, --- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --- See the License for the specific language governing permissions and --- limitations under the License. --- - -SELECT - -TEST_ORDER.ORDER_ID -,TEST_EXTENDED_COLUMN - -FROM TEST_KYLIN_FACT as TEST_KYLIN_FACT -INNER JOIN TEST_ORDER as TEST_ORDER -ON TEST_KYLIN_FACT.ORDER_ID = TEST_ORDER.ORDER_ID -INNER JOIN TEST_CATEGORY_GROUPINGS as TEST_CATEGORY_GROUPINGS -ON TEST_KYLIN_FACT.LEAF_CATEG_ID = TEST_CATEGORY_GROUPINGS.LEAF_CATEG_ID AND TEST_KYLIN_FACT.LSTG_SITE_ID = TEST_CATEGORY_GROUPINGS.SITE_ID - -group by TEST_ORDER.ORDER_ID,TEST_EXTENDED_COLUMN +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +select LEAF_CATEG_ID, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-01']) as first_day +from test_kylin_fact +where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03') +group by LEAF_CATEG_ID +;{"scanRowCount":9562,"scanBytes":0,"scanFiles":1,"cuboidId":[507904]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/._SUCCESS.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/._SUCCESS.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/.part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/.part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc new file mode 100644 index 0000000..ccffb72 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/.part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/_SUCCESS b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv new file mode 100644 index 0000000..888cd05 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv @@ -0,0 +1,28 @@ +9426,[610] +10866,[326] +13987,[247] +26262,[246] +32996,[430] +62179,[810] +67698,[709] +95672,[520] +152801,[809] +164261,[245] +11554,"" +20865,"" +24541,"" +43479,"" +44079,"" +156614,"" +161567,"" +1161,"" +1504,"" +15115,"" +61323,"" +66767,"" +95173,"" +99985,"" +106246,"" +139973,"" +148324,"" +166013,"" diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql new file mode 100644 index 0000000..c9429fa --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql @@ -0,0 +1,33 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select +week_beg_dt as week, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Auction']) as b, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Others']) as c, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction']) as ab, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Others']) as ac, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction', 'Others']) as abc, +intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC|Auction', 'Others']) as a_or_b_and_c, +count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers, +count(*) as cnt +from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt = edw.test_cal_dt.CAL_DT +where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23') +group by week_beg_dt +;{"scanRowCount":10018,"scanBytes":0,"scanFiles":2,"cuboidId":[276480]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/._SUCCESS.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/._SUCCESS.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/.part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/.part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc new file mode 100644 index 0000000..e101351 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/.part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/_SUCCESS b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv new file mode 100644 index 0000000..ddf4299 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv @@ -0,0 +1,2 @@ +2012-06-23,"[44,235,257,284,341,363,419,719,780,854,856,862,892,927,957,989]","[4,19,34,55,151,153,182,210,278,288,293,317,360,424,461,513,524,550,675,678,679,760,791,854,972]","[15,28,34,128,161,174,217,270,441,532,589,630,770,876,925,931,961]",[854],"","","",92,98 +2013-12-22,"[72,115,188,237,417,483,486,555,638,741,746,891,959]","[58,68,155,246,293,296,307,309,341,470,505,604,627,655,811,919]","[55,101,261,281,298,310,354,399,405,541,553,559,571,606,623,632,689,700,758,767,820]","","","","",96,100 diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql similarity index 69% copy from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled copy to kylin-it/src/test/resources/query/sql_intersect_value/query05.sql index c4e7d0c..d887272 100644 --- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql @@ -15,5 +15,9 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -- - -select distinct leaf_categ_id, lstg_site_id from test_kylin_fact +select +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01', '2012-01-02']) as first_and_second_day +from test_kylin_fact +where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03') +;{"scanRowCount":731,"scanBytes":0,"scanFiles":1,"cuboidId":[262144]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/._SUCCESS.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/._SUCCESS.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/.part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/.part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc new file mode 100644 index 0000000..2b3fd41 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/.part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/_SUCCESS b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv new file mode 100644 index 0000000..9286fbf --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv @@ -0,0 +1 @@ +"[245,246,247,326,430,520,610,709,809,810]","" diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql similarity index 52% rename from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled rename to kylin-it/src/test/resources/query/sql_intersect_value/query06.sql index c4e7d0c..689d980 100644 --- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql @@ -16,4 +16,14 @@ -- limitations under the License. -- -select distinct leaf_categ_id, lstg_site_id from test_kylin_fact +select +week_beg_dt as week, +intersect_count(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a_cnt, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a_value, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction']) as ab, +intersect_value(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC|Auction', 'Others']) as a_or_b_and_c, +count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers +from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt = edw.test_cal_dt.CAL_DT +where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23') +group by week_beg_dt +;{"scanRowCount":10018,"scanBytes":0,"scanFiles":2,"cuboidId":[276480]} \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/._SUCCESS.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/._SUCCESS.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/.part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/.part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc new file mode 100644 index 0000000..5e55dfd Binary files /dev/null and b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/.part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc differ diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/_SUCCESS b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv new file mode 100644 index 0000000..7881113 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv @@ -0,0 +1,2 @@ +2012-06-23,16,"[44,235,257,284,341,363,419,719,780,854,856,862,892,927,957,989]",[854],"",92 +2013-12-22,13,"[72,115,188,237,417,483,486,555,638,741,746,891,959]","","",96 diff --git a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala index 330ded2..7b645fd 100644 --- a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala +++ b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala @@ -21,7 +21,7 @@ import org.apache.kylin.engine.spark.common.util.KylinDateTimeUtils import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} -import org.apache.spark.sql.types.{AbstractDataType, DataType, DateType, IntegerType} +import org.apache.spark.sql.types._ import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, DictEncode, Expression, ExpressionInfo, ExpressionUtils, ImplicitCastInputTypes, In, KylinAddMonths, Like, Literal, RoundBase, SplitPart, Sum0, TimestampAdd, TimestampDiff, Truncate, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction import org.apache.spark.sql.udaf.{ApproxCountDistinct, IntersectCount, PreciseCountDistinct} @@ -68,11 +68,18 @@ object KylinFunctions { def approx_count_distinct(column: Column, precision: Int): Column = Column(ApproxCountDistinct(column.expr, precision).toAggregateExpression()) - def intersect_count(columns: Column*): Column = { + def intersect_count(upperBound: Int, columns: Column*): Column = { require(columns.size == 3, s"Input columns size ${columns.size} don't equal to 3.") val expressions = columns.map(_.expr) - Column(IntersectCount(expressions.apply(0), expressions.apply(1), expressions.apply(2)) - .toAggregateExpression()) + Column(IntersectCount(expressions.apply(0), expressions.apply(1), expressions.apply(2), + LongType, upperBound).toAggregateExpression()) + } + + def intersect_value(upperBound: Int, columns: Column*): Column = { + require(columns.size == 3, s"Input columns size ${columns.size} don't equal to 3.") + val expressions = columns.map(_.expr) + Column(IntersectCount(expressions.apply(0), expressions.apply(1), expressions.apply(2), + StringType, upperBound).toAggregateExpression()) } def sum0(e: Column): Column = withAggregateFunction { diff --git a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala index 53436ad..0ed2551 100644 --- a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala +++ b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala @@ -59,15 +59,15 @@ class IntersectBitmapCounter() { } } - def result(filterSize: Int): Long = { + def result(filterSize: Int): Roaring64NavigableMap = { if (_map.size() != filterSize || _map.size() == 0) { - 0 + new Roaring64NavigableMap() } else { val bitmap = _map.asScala.values.reduce { (a, b) => a.and(b) a } - bitmap.getLongCardinality + bitmap } } } diff --git a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala index 36e27f6..d05ab3e 100644 --- a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala +++ b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala @@ -20,18 +20,22 @@ package org.apache.spark.sql.udaf import com.esotericsoftware.kryo.KryoException import com.esotericsoftware.kryo.io.{Input, KryoDataInput, KryoDataOutput, Output} +import org.apache.commons.lang3.StringUtils import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate} import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} import org.apache.spark.sql.catalyst.util.GenericArrayData -import org.apache.spark.sql.types.{DataType, LongType} +import org.apache.spark.sql.types.{ArrayType, DataType, LongType, StringType} +import org.apache.spark.unsafe.types.UTF8String import org.roaringbitmap.longlong.Roaring64NavigableMap import scala.collection.JavaConverters._ @SerialVersionUID(1) -case class IntersectCount(child1: Expression, child2: Expression, child3: Expression, mutableAggBufferOffset: Int = 0, +case class IntersectCount(child1: Expression, child2: Expression, child3: Expression, + returnDataType: DataType, upperBound: Int = 10000000, + mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[IntersectBitmapCounter] with Serializable with Logging { @@ -41,7 +45,8 @@ case class IntersectCount(child1: Expression, child2: Expression, child3: Expres override def update(counter: IntersectBitmapCounter, input: InternalRow): IntersectBitmapCounter = { if (filters == null) { - filters = child3.eval(input).asInstanceOf[GenericArrayData].array.map(filter => filter -> filter.toString).toMap + filters = child3.eval(input).asInstanceOf[GenericArrayData] + .array.map(filter => filter -> filter.toString).toMap } val bitmap = child1.eval(input).asInstanceOf[Array[Byte]] val key = child2.eval(input) @@ -59,7 +64,38 @@ case class IntersectCount(child1: Expression, child2: Expression, child3: Expres } override def eval(counter: IntersectBitmapCounter): Any = { - counter.result(child3.asInstanceOf[Literal].value.asInstanceOf[GenericArrayData].array.distinct.length) + val map = counter.result( + child3.asInstanceOf[Literal].value.asInstanceOf[GenericArrayData].array.distinct.length) + dataType match { + // for intersect_count + case LongType => map.getLongCardinality + // for intersect_value + case StringType => + val intCardinality = map.getIntCardinality + if (intCardinality > upperBound) { + throw new UnsupportedOperationException(s"Cardinality of the bitmap is greater than " + + s"configured upper bound(${upperBound})") + } + val result = new StringBuffer("") + if (intCardinality > 0) { + result.append("[").append(StringUtils.join(map.iterator(), ",")).append("]"); + } + UTF8String.fromString(result.toString) + case ArrayType(LongType, false) => + val cardinality = map.getIntCardinality + if (cardinality > upperBound) { + throw new UnsupportedOperationException(s"Cardinality of the bitmap is greater than " + + s"configured upper bound(${upperBound})") + } + val longs = new Array[Long](cardinality) + var id = 0 + val iterator = map.iterator() + while (iterator.hasNext) { + longs(id) = iterator.next() + id += 1 + } + new GenericArrayData(longs) + } } var array: Array[Byte] = _ @@ -126,7 +162,7 @@ case class IntersectCount(child1: Expression, child2: Expression, child3: Expres override def nullable: Boolean = false - override def dataType: DataType = LongType + override def dataType: DataType = returnDataType override def children: Seq[Expression] = child1 :: child2 :: child3 :: Nil } diff --git a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala index 4907af3..ab36cfe 100644 --- a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala +++ b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala @@ -21,6 +21,7 @@ import org.apache.calcite.DataContext import org.apache.calcite.rel.core.Aggregate import org.apache.calcite.rel.core.AggregateCall import org.apache.calcite.sql.SqlKind +import org.apache.kylin.common.KylinConfig import org.apache.kylin.metadata.model.FunctionDesc import org.apache.kylin.query.relnode.{KylinAggregateCall, OLAPAggregateRel} import org.apache.kylin.query.runtime.RuntimeHelper @@ -112,21 +113,34 @@ object AggregatePlan extends LogEx { val columnName = argNames.map(col) val registeredFuncName = RuntimeHelper.registerSingleByColName(funcName, dataType) val aggName = SchemaProcessor.replaceToAggravateSchemaName(index, funcName, hash, argNames: _*) - if (funcName == "COUNT_DISTINCT") { + if (funcName == FunctionDesc.FUNC_COUNT_DISTINCT) { if (dataType.getName == "hllc") { org.apache.spark.sql.KylinFunctions .approx_count_distinct(columnName.head, dataType.getPrecision) .alias(aggName) - } else { + } else if (call.getAggregation().getName.equalsIgnoreCase(FunctionDesc.FUNC_COUNT_DISTINCT)) { + // execute count distinct precisely KylinFunctions.precise_count_distinct(columnName.head).alias(aggName) + } else { + // for intersect_count and intersect_value function + require(columnName.size == 3, s"Input columns size ${columnName.size} don't equal to 3.") + val columns = columnName.zipWithIndex.map { + case (column: Column, 2) => column.cast(ArrayType.apply(schema.fields.apply(call.getArgList.get(1)).dataType)) + case (column: Column, _) => column + } + val upperBound = KylinConfig.getInstanceFromEnv.getBitmapValuesUpperBound + if (call.getAggregation().getName.equalsIgnoreCase(FunctionDesc.FUNC_INTERSECT_COUNT)) { + KylinFunctions.intersect_count(upperBound, columns.toList: _*) + .alias(SchemaProcessor + .replaceToAggravateSchemaName(index, FunctionDesc.FUNC_INTERSECT_COUNT, hash, + argNames: _*)) + } else { + KylinFunctions.intersect_value(upperBound, columns.toList: _*) + .alias(SchemaProcessor + .replaceToAggravateSchemaName(index, FunctionDesc.FUNC_INTERSECT_VALUE, hash, + argNames: _*)) + } } - } else if (funcName.equalsIgnoreCase(FunctionDesc.FUNC_INTERSECT_COUNT)) { - require(columnName.size == 3, s"Input columns size ${columnName.size} don't equal to 3.") - val columns = columnName.zipWithIndex.map { - case (column: Column, 2) => column.cast(ArrayType.apply(schema.fields.apply(call.getArgList.get(1)).dataType)) - case (column: Column, _) => column - } - KylinFunctions.intersect_count(columns.toList: _*).alias(aggName) } else { callUDF(registeredFuncName, columnName.toList: _*).alias(aggName) } diff --git a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala index 44791df..800caae 100644 --- a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala +++ b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala @@ -21,7 +21,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.GroupingSets import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{Column, DataFrame, Row, SparderContext} object SparkOperation { diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java index ce27ff2..c061135 100644 --- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java +++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java @@ -169,13 +169,13 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { // Not support yet //tasks.add(new QueryCallable(CompareLevel.NONE, joinType, "sql_expression")); - //tasks.add(new QueryCallable(CompareLevel.NONE, joinType, "sql_extended_column")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_function")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_grouping")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_h2")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_hive")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_intersect_count")); + tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_intersect_value")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_join")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_like")); tasks.add(new QueryCallable(CompareLevel.SAME, joinType, "sql_lookup")); @@ -338,7 +338,8 @@ public class NBuildAndQueryTest extends LocalWithSparkSessionTest { } else { List<Quadruple<String, String, NExecAndComp.ITQueryMetrics, List<String>>> queries = NExecAndComp.fetchQueries2(KYLIN_SQL_BASE_DIR + File.separator + sqlFolder); - NExecAndComp.execAndCompareNew2(queries, getProject(), compareLevel, joinType, null); + NExecAndComp.execAndCompareNew2(queries, getProject(), compareLevel, joinType, + null); } } catch (Throwable th) { logger.error("Query fail on: {}", identity); diff --git a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java index 707a906..b570965 100644 --- a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java +++ b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java @@ -228,8 +228,10 @@ public class NExecAndComp { } // generate results and save them into csv file try { - queryResult.getFirst().repartition(1) - .write().option("header", false).csv(genResultsCSVFile(query.getFirst())); + queryResult.getFirst().repartition(1).write() + .option("header", false) + .option("nullValue", "\"-\"") + .csv(genResultsFiles(query.getFirst())); } catch (JsonProcessingException e) { logger.error("Write results as csv file error: ", e); } @@ -249,7 +251,8 @@ public class NExecAndComp { String csvDataPathStr = query.getFirst() + ".expected"; if(new File(csvDataPathStr).exists()) { logger.debug("Use expected dataset for {}", sql); - sparkResult = KylinSparkEnv.getSparkSession().read().csv(csvDataPathStr); + sparkResult = KylinSparkEnv.getSparkSession().read() + .option("nullValue", "\"-\"").csv(csvDataPathStr); } else { sparkResult = queryWithSpark(prj, sql, query.getFirst(), query.getFourth()); } @@ -512,7 +515,7 @@ public class NExecAndComp { return parameters; } - public static String genResultsCSVFile(String sqlFileName) throws IOException { + public static String genResultsFiles(String sqlFileName) throws IOException { String resultsFielName = sqlFileName + ".expected"; File resultsFile = new File(resultsFielName); if (resultsFile.exists()) {