This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new badbec6a15 [fix](spark load) not setting the file format cause null pointer exception (#16202) badbec6a15 is described below commit badbec6a15bfab0d2b76c5d6f6a6856bf61209c6 Author: gnehil <adamlee...@gmail.com> AuthorDate: Tue Sep 5 12:14:07 2023 +0800 [fix](spark load) not setting the file format cause null pointer exception (#16202) --- .../src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java | 2 +- .../main/java/org/apache/doris/statistics/StatisticsJobScheduler.java | 2 +- .../src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java index b900085ca7..952ea5fa06 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java @@ -115,7 +115,7 @@ public class MetaStatisticsTask extends StatisticsTask { break; case PARTITION: Partition partition = getNotNullPartition(granularity, table); - long partitionSize = partition.getBaseIndex().getDataSize(); + long partitionSize = partition.getBaseIndex().getDataSize(false); result.getStatsTypeToValue().put(StatsType.DATA_SIZE, String.valueOf(partitionSize)); break; case TABLET: diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java index 4e492d6e30..87e8c09678 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java @@ -330,7 +330,7 @@ public class StatisticsJobScheduler extends MasterDaemon { LOG.info("Partition {} not found in the table {}", partitionName, table.getName()); continue; } - if (partition.getDataSize() == 0) { + if (partition.getDataSize(false) == 0) { LOG.info("Do not collect statistics for empty partition {} in the table {}", partitionName, table.getName()); continue; diff --git a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java index e6c9bf5528..33ca13cb0e 100644 --- a/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java +++ b/fe/spark-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java @@ -627,7 +627,7 @@ public final class SparkDpp implements java.io.Serializable { srcColumnsWithColumnsFromPath.addAll(fileGroup.columnsFromPath); } - if (fileGroup.fileFormat.equalsIgnoreCase("parquet")) { + if ("parquet".equalsIgnoreCase(fileGroup.fileFormat)) { // parquet had its own schema, just use it; perhaps we could add some validation in future. Dataset<Row> dataFrame = spark.read().parquet(fileUrl); if (!CollectionUtils.isEmpty(columnValueFromPath)) { @@ -639,7 +639,7 @@ public final class SparkDpp implements java.io.Serializable { return dataFrame; } - if (fileGroup.fileFormat.equalsIgnoreCase("orc")) { + if ("orc".equalsIgnoreCase(fileGroup.fileFormat)) { Dataset<Row> dataFrame = spark.read().orc(fileUrl); if (!CollectionUtils.isEmpty(columnValueFromPath)) { for (int k = 0; k < columnValueFromPath.size(); k++) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org