This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push: new 6fa14239c4 KYLIN-5182, support to ignore only one null count in check duplicate key 6fa14239c4 is described below commit 6fa14239c49c800f69369321eafdc4ef9727ba33 Author: Mukvin <boyboys...@163.com> AuthorDate: Fri May 6 18:37:29 2022 +0800 KYLIN-5182, support to ignore only one null count in check duplicate key --- .../src/main/java/org/apache/kylin/common/KylinConfigBase.java | 5 +++++ .../apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 31a600857f..ba06df7345 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -2321,6 +2321,11 @@ public abstract class KylinConfigBase implements Serializable { return Boolean.valueOf(getOptional("kylin.engine.build-base-cuboid-enabled", TRUE)); } + @ConfigTag(ConfigTag.Tag.CUBE_LEVEL) + public boolean isIgnoringNullInCheckDupKeyEnabled() { + return Boolean.valueOf(this.getOptional("kylin.job.ignoring-null-in-check-dup-key-enabled", FALSE)); + } + // ============================================================================ // Kylin 4.X Spark resources automatic adjustment strategy configuration // ============================================================================ diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala index 7fcfbd0a57..468f1228a2 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala +++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala @@ -197,7 +197,12 @@ class CubeSnapshotBuilder extends Logging { val lookupTablePKS = joinDesc.PKS.map(lookupTablePK => lookupTablePK.columnName) val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head, lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head if (countColumn != countDistinctColumn) { - throw new IllegalStateException(s"Failed to build lookup table ${lookupTableName} snapshot for Dup key found, key= ${lookupTablePKS.mkString(",")}") + if (seg.kylinconf.isIgnoringNullInCheckDupKeyEnabled && countDistinctColumn + 1 == countColumn) { + // if only one row with null value, then countDistinctColumn + 1 will equals to countColumn + logInfo("Using config: kylin.job.ignoring-null-in-check-dup-key-enabled=true to ignore only one null count.") + } else { + throw new IllegalStateException(s"Failed to build lookup table ${lookupTableName} snapshot for Dup key found, key= ${lookupTablePKS.mkString(",")}") + } } } else { logInfo("Skip check duplicate primary key on table : " + tableInfo.identity)