This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch 2.6.x in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/2.6.x by this push: new d783e1d KYLIN-4841 Spark RDD cache is invalid when building with spark engine d783e1d is described below commit d783e1d141830b6e12cb8648d494a4533fec36cf Author: Zhichao Zhang <441586...@qq.com> AuthorDate: Mon Dec 14 20:14:19 2020 +0800 KYLIN-4841 Spark RDD cache is invalid when building with spark engine --- .../main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java index cb1a33f..efdda18 100644 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java @@ -197,11 +197,13 @@ public class SparkCubingByLayer extends AbstractApplication implements Serializa allRDDs[level] = allRDDs[level - 1].flatMapToPair(flatMapFunction).reduceByKey(reducerFunction2, partition) .persist(storageLevel); - allRDDs[level - 1].unpersist(false); if (envConfig.isSparkSanityCheckEnabled() == true) { sanityCheck(allRDDs[level], totalCount, level, cubeStatsReader, countMeasureIndex); } saveToHDFS(allRDDs[level], metaUrl, cubeName, cubeSegment, outputPath, level, job, envConfig); + // must do 'unpersist' after allRDDs[level] is created, + // otherwise the parent RDD 'allRDDs[level - 1]' will be recomputed + allRDDs[level - 1].unpersist(false); } allRDDs[totalLevels].unpersist(false); logger.info("Finished on calculating all level cuboids.");