This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push: new df6dd47 KYLIN-5067 Skip snapshot build when dimension table's kind is LOOKUP df6dd47 is described below commit df6dd4726a90fdabb4dc4e04daacbee8f206b782 Author: XiaoxiangYu <x...@apache.org> AuthorDate: Sat Sep 18 15:02:10 2021 +0800 KYLIN-5067 Skip snapshot build when dimension table's kind is LOOKUP --- .../kylin/engine/spark/builder/CreateFlatTable.scala | 2 +- .../engine/spark/builder/CubeSnapshotBuilder.scala | 19 ++++++++++++------- .../kylin/engine/spark/job/ParentSourceChooser.scala | 2 +- .../kylin/storage/spark/HadoopFileStorageQuery.java | 1 + 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala index d266b9a..8154bd2 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala +++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala @@ -46,7 +46,7 @@ class CreateFlatTable(val seg: SegmentInfo, val ccCols = seg.allColumns.filter(_.isInstanceOf[ComputedColumnDesc]).toSet var rootFactDataset = generateTableDataset(seg.factTable, ccCols.toSeq, ss, seg.project) - logInfo(s"Create flattable need join lookup tables $needJoin, need encode cols $needEncode") + logInfo(s"Create flat table need join lookup tables $needJoin, need encode cols $needEncode") rootFactDataset = applyPartitionCondition(seg, rootFactDataset) (needJoin, needEncode) match { diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala index 0248fb5..7fcfbd0 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala +++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CubeSnapshotBuilder.scala @@ -189,13 +189,18 @@ class CubeSnapshotBuilder extends Logging { joinDescs.foreach { joinDesc => val tableInfo = joinDesc.lookupTable - val lookupTableName = tableInfo.tableName - val df = ss.table(tableInfo) - val countColumn = df.count() - val lookupTablePKS = joinDesc.PKS.map(lookupTablePK => lookupTablePK.columnName) - val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head, lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head - if (countColumn != countDistinctColumn) { - throw new IllegalStateException(s"Failed to build lookup table ${lookupTableName} snapshot for Dup key found, key= ${lookupTablePKS.mkString(",")}") + // Build snapshot when DataModelDesc.JoinTableDesc.TableKind is TableKind.LOOKUP + if (seg.snapshotTables.exists(t => t.identity.equals(tableInfo.identity))) { + val lookupTableName = tableInfo.tableName + val df = ss.table(tableInfo) + val countColumn = df.count() + val lookupTablePKS = joinDesc.PKS.map(lookupTablePK => lookupTablePK.columnName) + val countDistinctColumn = df.agg(countDistinct(lookupTablePKS.head, lookupTablePKS.tail: _*)).collect().map(_.getLong(0)).head + if (countColumn != countDistinctColumn) { + throw new IllegalStateException(s"Failed to build lookup table ${lookupTableName} snapshot for Dup key found, key= ${lookupTablePKS.mkString(",")}") + } + } else { + logInfo("Skip check duplicate primary key on table : " + tableInfo.identity) } } } diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala index fdb04ab..0f77c76 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala +++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/ParentSourceChooser.scala @@ -75,7 +75,7 @@ class ParentSourceChooser( def decideFlatTableSource(entity: LayoutEntity): Unit = { if (flatTableSource == null) { - if (needEncoding) { + if (segInfo.snapshotTables.nonEmpty && needEncoding) { // hacked, for some case, you do not want to trigger buildSnapshot // eg: resource detect // Move this to a more suitable place diff --git a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java index a209a43..5ab9fc5 100644 --- a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java +++ b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/storage/spark/HadoopFileStorageQuery.java @@ -84,6 +84,7 @@ public class HadoopFileStorageQuery extends GTCubeStorageQueryBase { dimensionsD.addAll(groupsD); dimensionsD.addAll(otherDimsD); Cuboid cuboid = findCuboid(cubeInstance, dimensionsD, metrics); + log.info("For OLAPContext {}, need cuboid {}, hit cuboid {}, level diff is {}.", olapContext.id, cuboid.getInputID() , cuboid.getId(), Long.bitCount(cuboid.getInputID() ^ cuboid.getId())); context.setCuboid(cuboid); return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD, null, null, null, metrics, null, null, null, context);