This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/main by this push:
     new 6fa14239c4 KYLIN-5182, support to ignore only one null count in check 
duplicate key
6fa14239c4 is described below

commit 6fa14239c49c800f69369321eafdc4ef9727ba33
Author: Mukvin <boyboys...@163.com>
AuthorDate: Fri May 6 18:37:29 2022 +0800

    KYLIN-5182, support to ignore only one null count in check duplicate key
---
 .../src/main/java/org/apache/kylin/common/KylinConfigBase.java     | 5 +++++
 .../apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala    | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git 
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 31a600857f..ba06df7345 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -2321,6 +2321,11 @@ public abstract class KylinConfigBase implements 
Serializable {
         return 
Boolean.valueOf(getOptional("kylin.engine.build-base-cuboid-enabled", TRUE));
     }
 
+    @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+    public boolean isIgnoringNullInCheckDupKeyEnabled() {
+        return 
Boolean.valueOf(this.getOptional("kylin.job.ignoring-null-in-check-dup-key-enabled",
 FALSE));
+    }
+
     // 
============================================================================
     // Kylin 4.X Spark resources automatic adjustment strategy configuration
     // 
============================================================================
diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
index 7fcfbd0a57..468f1228a2 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
@@ -197,7 +197,12 @@ class CubeSnapshotBuilder extends Logging {
           val lookupTablePKS = joinDesc.PKS.map(lookupTablePK => 
lookupTablePK.columnName)
           val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head, 
lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head
           if (countColumn != countDistinctColumn) {
-            throw new IllegalStateException(s"Failed to build lookup table 
${lookupTableName} snapshot for Dup key found, key= 
${lookupTablePKS.mkString(",")}")
+            if (seg.kylinconf.isIgnoringNullInCheckDupKeyEnabled && 
countDistinctColumn + 1 == countColumn) {
+              // if only one row with null value, then countDistinctColumn + 1 
will equals to countColumn
+              logInfo("Using config: 
kylin.job.ignoring-null-in-check-dup-key-enabled=true to ignore only one null 
count.")
+            } else {
+              throw new IllegalStateException(s"Failed to build lookup table 
${lookupTableName} snapshot for Dup key found, key= 
${lookupTablePKS.mkString(",")}")
+            }
           }
         } else {
           logInfo("Skip check duplicate primary key on table : " + 
tableInfo.identity)

Reply via email to