[kylin] branch main updated: KYLIN-5067 Skip snapshot build when dimension table's kind is LOOKUP

xxyu Sat, 18 Sep 2021 00:10:33 -0700

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git



The following commit(s) were added to refs/heads/main by this push:
     new df6dd47  KYLIN-5067 Skip snapshot build when dimension table's kind is 
LOOKUP
df6dd47 is described below

commit df6dd4726a90fdabb4dc4e04daacbee8f206b782
Author: XiaoxiangYu <x...@apache.org>
AuthorDate: Sat Sep 18 15:02:10 2021 +0800

    KYLIN-5067 Skip snapshot build when dimension table's kind is LOOKUP
---
 .../kylin/engine/spark/builder/CreateFlatTable.scala  |  2 +-
 .../engine/spark/builder/CubeSnapshotBuilder.scala    | 19 ++++++++++++-------
 .../kylin/engine/spark/job/ParentSourceChooser.scala  |  2 +-
 .../kylin/storage/spark/HadoopFileStorageQuery.java   |  1 +
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
index d266b9a..8154bd2 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
@@ -46,7 +46,7 @@ class CreateFlatTable(val seg: SegmentInfo,
     val ccCols = 
seg.allColumns.filter(_.isInstanceOf[ComputedColumnDesc]).toSet
     var rootFactDataset = generateTableDataset(seg.factTable, ccCols.toSeq, 
ss, seg.project)
 
-    logInfo(s"Create flattable need join lookup tables $needJoin, need encode 
cols $needEncode")
+    logInfo(s"Create flat table need join lookup tables $needJoin, need encode 
cols $needEncode")
     rootFactDataset = applyPartitionCondition(seg, rootFactDataset)
 
     (needJoin, needEncode) match {
diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
index 0248fb5..7fcfbd0 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala
@@ -189,13 +189,18 @@ class CubeSnapshotBuilder extends Logging {
     joinDescs.foreach {
       joinDesc =>
         val tableInfo = joinDesc.lookupTable
-        val lookupTableName = tableInfo.tableName
-        val df = ss.table(tableInfo)
-        val countColumn = df.count()
-        val lookupTablePKS = joinDesc.PKS.map(lookupTablePK => 
lookupTablePK.columnName)
-        val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head, 
lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head
-        if (countColumn != countDistinctColumn) {
-          throw new IllegalStateException(s"Failed to build lookup table 
${lookupTableName} snapshot for Dup key found, key= 
${lookupTablePKS.mkString(",")}")
+        // Build snapshot when DataModelDesc.JoinTableDesc.TableKind is 
TableKind.LOOKUP
+        if (seg.snapshotTables.exists(t => 
t.identity.equals(tableInfo.identity))) {
+          val lookupTableName = tableInfo.tableName
+          val df = ss.table(tableInfo)
+          val countColumn = df.count()
+          val lookupTablePKS = joinDesc.PKS.map(lookupTablePK => 
lookupTablePK.columnName)
+          val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head, 
lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head
+          if (countColumn != countDistinctColumn) {
+            throw new IllegalStateException(s"Failed to build lookup table 
${lookupTableName} snapshot for Dup key found, key= 
${lookupTablePKS.mkString(",")}")
+          }
+        } else {
+          logInfo("Skip check duplicate primary key on table : " + 
tableInfo.identity)
         }
     }
   }
diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
index fdb04ab..0f77c76 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala
@@ -75,7 +75,7 @@ class ParentSourceChooser(
 
   def decideFlatTableSource(entity: LayoutEntity): Unit = {
     if (flatTableSource == null) {
-      if (needEncoding) {
+      if (segInfo.snapshotTables.nonEmpty && needEncoding) {
         // hacked, for some case, you do not want to trigger buildSnapshot
         // eg: resource detect
         // Move this to a more suitable place
diff --git 
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
 
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
index a209a43..5ab9fc5 100644
--- 
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
+++ 
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java
@@ -84,6 +84,7 @@ public class HadoopFileStorageQuery extends 
GTCubeStorageQueryBase {
         dimensionsD.addAll(groupsD);
         dimensionsD.addAll(otherDimsD);
         Cuboid cuboid = findCuboid(cubeInstance, dimensionsD, metrics);
+        log.info("For OLAPContext {}, need cuboid {}, hit cuboid {}, level 
diff is {}.", olapContext.id, cuboid.getInputID() , cuboid.getId(), 
Long.bitCount(cuboid.getInputID() ^ cuboid.getId()));
         context.setCuboid(cuboid);
         return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD, 
null, null, null,
                 metrics, null, null, null, context);

[kylin] branch main updated: KYLIN-5067 Skip snapshot build when dimension table's kind is LOOKUP

Reply via email to