This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new a9da4deb318 [opt](paimon/iceberg)upgrade paimon to 1.0.1, iceberg to 1.6.1 (#49280) a9da4deb318 is described below commit a9da4deb31821cd799637315c230244a4fbcfe6d Author: wuwenchi <wuwen...@selectdb.com> AuthorDate: Fri Apr 25 10:19:11 2025 +0800 [opt](paimon/iceberg)upgrade paimon to 1.0.1, iceberg to 1.6.1 (#49280) bp #46990 #47768 #47117 --------- Co-authored-by: Socrates <suyit...@selectdb.com> Co-authored-by: Mingyu Chen (Rayner) <morning...@163.com> --- fe/fe-core/pom.xml | 16 ++++----- .../datasource/iceberg/source/IcebergScanNode.java | 42 +++++++++++++++++++++- .../datasource/paimon/PaimonExternalCatalog.java | 11 +++++- .../datasource/paimon/PaimonExternalTable.java | 2 +- .../datasource/paimon/PaimonMetadataCache.java | 2 +- .../apache/doris/datasource/paimon/PaimonUtil.java | 2 +- .../datasource/paimon/source/PaimonScanNode.java | 7 +++- fe/pom.xml | 12 +++---- 8 files changed, 72 insertions(+), 22 deletions(-) diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 07caa16802e..4a48bc34258 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -32,7 +32,6 @@ under the License. <doris.home>${basedir}/../../</doris.home> <doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty> <fe_ut_parallel>1</fe_ut_parallel> - <awssdk.version>2.20.131</awssdk.version> <huaweiobs.version>3.1.1-hw-46</huaweiobs.version> <tencentcos.version>8.2.7</tencentcos.version> </properties> @@ -502,10 +501,10 @@ under the License. <artifactId>s3</artifactId> <version>${awssdk.version}</version> <exclusions> - <exclusion> + <!--exclusion> <groupId>software.amazon.awssdk</groupId> <artifactId>apache-client</artifactId> - </exclusion> + </exclusion--> </exclusions> </dependency> <!-- tencent cloud sts --> @@ -660,11 +659,6 @@ under the License. <groupId>org.apache.iceberg</groupId> <artifactId>iceberg-aws</artifactId> </dependency> - <dependency> - <groupId>org.apache.iceberg</groupId> - <artifactId>iceberg-aws-bundle</artifactId> - <scope>runtime</scope> - </dependency> <dependency> <groupId>org.apache.paimon</groupId> <artifactId>paimon-core</artifactId> @@ -942,6 +936,12 @@ under the License. </dependency> </dependencies> <repositories> + <!-- for hive-catalog-shade --> + <repository> + <id>central</id> + <name>central maven repo https</name> + <url>https://repo.maven.apache.org/maven2</url> + </repository> <!-- for huawei obs sdk --> <repository> <id>huawei-obs-sdk</id> diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java index 29c07be8192..a9e9104ec85 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java @@ -186,7 +186,12 @@ public class IcebergScanNode extends FileQueryScanNode { try { return source.getCatalog().getPreExecutionAuthenticator().execute(() -> doGetSplits(numBackends)); } catch (Exception e) { - throw new RuntimeException(ExceptionUtils.getRootCauseMessage(e), e); + Optional<NotSupportedException> opt = checkNotSupportedException(e); + if (opt.isPresent()) { + throw opt.get(); + } else { + throw new RuntimeException(ExceptionUtils.getRootCauseMessage(e), e); + } } } @@ -460,4 +465,39 @@ public class IcebergScanNode extends FileQueryScanNode { } ((IcebergSplit) splits.get(size - 1)).setTableLevelRowCount(countPerSplit + totalCount % size); } + + private Optional<NotSupportedException> checkNotSupportedException(Exception e) { + if (e instanceof NullPointerException) { + /* + Caused by: java.lang.NullPointerException: Type cannot be null + at org.apache.iceberg.relocated.com.google.common.base.Preconditions.checkNotNull + (Preconditions.java:921) ~[iceberg-bundled-guava-1.4.3.jar:?] + at org.apache.iceberg.types.Types$NestedField.<init>(Types.java:447) ~[iceberg-api-1.4.3.jar:?] + at org.apache.iceberg.types.Types$NestedField.optional(Types.java:416) ~[iceberg-api-1.4.3.jar:?] + at org.apache.iceberg.PartitionSpec.partitionType(PartitionSpec.java:132) ~[iceberg-api-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex.lambda$new$0(DeleteFileIndex.java:97) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.relocated.com.google.common.collect.RegularImmutableMap.forEach + (RegularImmutableMap.java:297) ~[iceberg-bundled-guava-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex.<init>(DeleteFileIndex.java:97) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex.<init>(DeleteFileIndex.java:71) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex$Builder.build(DeleteFileIndex.java:578) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.ManifestGroup.plan(ManifestGroup.java:183) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.ManifestGroup.planFiles(ManifestGroup.java:170) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.DataTableScan.doPlanFiles(DataTableScan.java:89) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.SnapshotScan.planFiles(SnapshotScan.java:139) ~[iceberg-core-1.4.3.jar:?] + at org.apache.doris.datasource.iceberg.source.IcebergScanNode.doGetSplits + (IcebergScanNode.java:209) ~[doris-fe.jar:1.2-SNAPSHOT] + EXAMPLE: + CREATE TABLE iceberg_tb(col1 INT,col2 STRING) USING ICEBERG PARTITIONED BY (bucket(10,col2)); + INSERT INTO iceberg_tb VALUES( ... ); + ALTER TABLE iceberg_tb DROP PARTITION FIELD bucket(10,col2); + ALTER TABLE iceberg_tb DROP COLUMNS col2 STRING; + Link: https://github.com/apache/iceberg/pull/10755 + */ + LOG.warn("Iceberg TableScanUtil.splitFiles throw NullPointerException. Cause : ", e); + return Optional.of( + new NotSupportedException("Unable to read Iceberg table with dropped old partition column.")); + } + return Optional.empty(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java index 46ee79166b8..e87994ecdd3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java @@ -35,6 +35,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.paimon.catalog.Catalog; +import org.apache.paimon.catalog.Catalog.TableNotExistException; import org.apache.paimon.catalog.CatalogContext; import org.apache.paimon.catalog.CatalogFactory; import org.apache.paimon.catalog.Identifier; @@ -94,7 +95,15 @@ public abstract class PaimonExternalCatalog extends ExternalCatalog { public boolean tableExist(SessionContext ctx, String dbName, String tblName) { makeSureInitialized(); try { - return hadoopAuthenticator.doAs(() -> catalog.tableExists(Identifier.create(dbName, tblName))); + return hadoopAuthenticator.doAs(() -> { + try { + catalog.getTable(Identifier.create(dbName, tblName)); + return true; + } catch (TableNotExistException e) { + return false; + } + }); + } catch (IOException e) { throw new RuntimeException("Failed to check table existence, catalog name: " + getName(), e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index d234a4e081e..ffe71bc30af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -207,7 +207,7 @@ public class PaimonExternalTable extends ExternalTable implements MvccTable { PredicateBuilder builder = new PredicateBuilder(table.rowType()); Predicate predicate = builder.equal(0, key.getSchemaId()); // Adding predicates will also return excess data - List<InternalRow> rows = PaimonUtil.read(table, new int[][] {{0}, {1}, {2}}, predicate); + List<InternalRow> rows = PaimonUtil.read(table, new int[] {0, 1, 2}, predicate); for (InternalRow row : rows) { PaimonSchema schema = PaimonUtil.rowToSchema(row); if (schema.getSchemaId() == key.getSchemaId()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java index 5b711e07066..109394fabde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonMetadataCache.java @@ -97,7 +97,7 @@ public class PaimonMetadataCache { Table table = ((PaimonExternalCatalog) key.getCatalog()).getPaimonTable(key.getDbName(), key.getTableName() + Catalog.SYSTEM_TABLE_SPLITTER + SnapshotsTable.SNAPSHOTS); // snapshotId and schemaId - List<InternalRow> rows = PaimonUtil.read(table, new int[][] {{0}, {1}}, null); + List<InternalRow> rows = PaimonUtil.read(table, new int[] {0, 1}, null); long latestSnapshotId = 0L; long latestSchemaId = 0L; for (InternalRow row : rows) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java index b3df41bc5ce..bbb1eaf5096 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java @@ -62,7 +62,7 @@ public class PaimonUtil { private static final Logger LOG = LogManager.getLogger(PaimonUtil.class); public static List<InternalRow> read( - Table table, @Nullable int[][] projection, @Nullable Predicate predicate, + Table table, @Nullable int[] projection, @Nullable Predicate predicate, Pair<ConfigOption<?>, String>... dynamicOptions) throws IOException { Map<String, String> options = new HashMap<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 5afef56f085..07a49a7aaea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -212,7 +212,12 @@ public class PaimonScanNode extends FileQueryScanNode { .valueOf(sessionVariable.getIgnoreSplitType()); List<Split> splits = new ArrayList<>(); int[] projected = desc.getSlots().stream().mapToInt( - slot -> (source.getPaimonTable().rowType().getFieldNames().indexOf(slot.getColumn().getName()))) + slot -> source.getPaimonTable().rowType() + .getFieldNames() + .stream() + .map(String::toLowerCase) + .collect(Collectors.toList()) + .indexOf(slot.getColumn().getName())) .toArray(); ReadBuilder readBuilder = source.getPaimonTable().newReadBuilder(); List<org.apache.paimon.table.source.Split> paimonSplits = readBuilder.withFilter(predicates) diff --git a/fe/pom.xml b/fe/pom.xml index 72b62f18c50..6ef09c36a33 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -222,7 +222,7 @@ under the License. <module>be-java-extensions</module> </modules> <properties> - <doris.hive.catalog.shade.version>2.1.1</doris.hive.catalog.shade.version> + <doris.hive.catalog.shade.version>2.1.4</doris.hive.catalog.shade.version> <avro.version>1.11.4</avro.version> <parquet.version>1.13.1</parquet.version> <spark.version>3.4.3</spark.version> @@ -318,7 +318,7 @@ under the License. <!-- ATTN: avro version must be consistent with Iceberg version --> <!-- Please modify iceberg.version and avro.version together, you can find avro version info in iceberg mvn repository --> - <iceberg.version>1.4.3</iceberg.version> + <iceberg.version>1.6.1</iceberg.version> <maxcompute.version>0.49.0-public</maxcompute.version> <arrow.version>17.0.0</arrow.version> <presto.hadoop.version>2.7.4-11</presto.hadoop.version> @@ -365,7 +365,7 @@ under the License. <quartz.version>2.3.2</quartz.version> <aircompressor.version>0.27</aircompressor.version> <!-- paimon --> - <paimon.version>0.8.1</paimon.version> + <paimon.version>1.0.1</paimon.version> <disruptor.version>3.4.4</disruptor.version> <!-- arrow flight sql --> <arrow.vector.classifier>shade-format-flatbuffers</arrow.vector.classifier> @@ -380,6 +380,7 @@ under the License. <azure.sdk.batch.version>12.22.0</azure.sdk.batch.version> <semver4j.version>5.3.0</semver4j.version> <aliyun-sdk-oss.version>3.15.0</aliyun-sdk-oss.version> + <awssdk.version>2.29.26</awssdk.version> </properties> <profiles> <profile> @@ -1328,11 +1329,6 @@ under the License. <artifactId>iceberg-aws</artifactId> <version>${iceberg.version}</version> </dependency> - <dependency> - <groupId>org.apache.iceberg</groupId> - <artifactId>iceberg-aws-bundle</artifactId> - <version>${iceberg.version}</version> - </dependency> <dependency> <groupId>com.aliyun.odps</groupId> <artifactId>odps-sdk-core</artifactId> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org