This is an automated email from the ASF dual-hosted git repository. ashingau pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new dde04934efc [update](hudi) update hudi-spark bundle to 3.4.3 (#35013) dde04934efc is described below commit dde04934efc62218219add2514993966d0a02dea Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Tue May 21 17:13:06 2024 +0800 [update](hudi) update hudi-spark bundle to 3.4.3 (#35013) 1. For security reasons, upgrade to Spark version to 3.4.3 2. Resolve jar conflict of spark, which is different between fe and be-java-extensions 3. HBase version is 2.4.9, which is not safe, and conflict with hadoop3. Later PR will update HBase version to 2.5.x. Currently use `hbase-hadoop-compat:2.5.2-hadoop3` to resolve the conflict with hadoop3. --- fe/be-java-extensions/hudi-scanner/pom.xml | 41 ++++--------------- .../org/apache/doris/hudi/BaseSplitReader.scala | 8 ++-- .../apache/doris/hudi/MORSnapshotSplitReader.scala | 3 +- fe/be-java-extensions/preload-extensions/pom.xml | 46 ++++++---------------- fe/fe-core/pom.xml | 35 +++++++++++----- .../hudi/source/HudiCachedPartitionProcessor.java | 1 + fe/pom.xml | 45 +++++++++++++++++++-- 7 files changed, 94 insertions(+), 85 deletions(-) diff --git a/fe/be-java-extensions/hudi-scanner/pom.xml b/fe/be-java-extensions/hudi-scanner/pom.xml index d4f7a458612..465a9393b0f 100644 --- a/fe/be-java-extensions/hudi-scanner/pom.xml +++ b/fe/be-java-extensions/hudi-scanner/pom.xml @@ -32,10 +32,7 @@ under the License. <fe_ut_parallel>1</fe_ut_parallel> <scala.version>2.12.15</scala.version> <scala.binary.version>2.12</scala.binary.version> - <spark.version>3.2.0</spark.version> - <sparkbundle.version>3.2</sparkbundle.version> - <janino.version>3.0.16</janino.version> - <avro.version>1.11.2</avro.version> + <avro.version>1.11.3</avro.version> </properties> <dependencyManagement> @@ -91,7 +88,7 @@ under the License. </dependency> <dependency> <groupId>org.apache.hudi</groupId> - <artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId> + <artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId> <version>${hudi.version}</version> <scope>provided</scope> <exclusions> @@ -119,6 +116,11 @@ under the License. <version>1.10.1</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>org.antlr</groupId> + <artifactId>antlr4-runtime</artifactId> + <version>${antlr4.version}</version> + </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> @@ -160,35 +162,6 @@ under the License. <artifactId>spark-catalyst_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> - <exclusions> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>janino</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <!-- version of spark's janino is error --> - <groupId>org.codehaus.janino</groupId> - <artifactId>janino</artifactId> - <version>${janino.version}</version> - <scope>provided</scope> - <exclusions> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - <version>${janino.version}</version> - <scope>provided</scope> </dependency> <dependency> <!-- version of spark's jackson module is error --> diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala index 8229064163d..dcc068ad700 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala +++ b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala @@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader import org.apache.hudi.metadata.HoodieTableMetadataUtil import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, HoodieTableState} import org.apache.log4j.Logger -import org.apache.spark.sql.adapter.Spark3_2Adapter +import org.apache.spark.sql.adapter.Spark3_4Adapter import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat @@ -66,7 +66,7 @@ import scala.collection.JavaConverters._ import scala.util.control.NonFatal import scala.util.{Failure, Success, Try} -class DorisSparkAdapter extends Spark3_2Adapter { +class DorisSparkAdapter extends Spark3_4Adapter { override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters } @@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) { hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = { partitionedFile => { val reader = new HoodieAvroHFileReader( - hadoopConf, new Path(partitionedFile.filePath), new CacheConfig(hadoopConf)) + hadoopConf, partitionedFile.filePath.toPath, new CacheConfig(hadoopConf)) val requiredRowSchema = requiredDataSchema.structTypeSchema // NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable @@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) { BaseFileReader( read = partitionedFile => { - val extension = FSUtils.getFileExtension(partitionedFile.filePath) + val extension = FSUtils.getFileExtension(partitionedFile.filePath.toString()) if (tableBaseFileFormat.getFileExtension.equals(extension)) { read(partitionedFile) } else { diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala index 07e236082ce..02a4fa40045 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala +++ b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala @@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema import org.apache.hudi.avro.HoodieAvroUtils import org.apache.hudi.common.model.HoodieLogFile import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema} +import org.apache.spark.paths.SparkPath import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.PartitionedFile @@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit) extends BaseSplitR val partitionedBaseFile = if (split.dataFilePath.isEmpty) { None } else { - Some(PartitionedFile(getPartitionColumnsAsInternalRow(), split.dataFilePath, 0, split.dataFileLength)) + Some(PartitionedFile(getPartitionColumnsAsInternalRow(), SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength)) } HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles) } diff --git a/fe/be-java-extensions/preload-extensions/pom.xml b/fe/be-java-extensions/preload-extensions/pom.xml index 31ce08db1b9..06e7b0a231f 100644 --- a/fe/be-java-extensions/preload-extensions/pom.xml +++ b/fe/be-java-extensions/preload-extensions/pom.xml @@ -33,8 +33,6 @@ under the License. <maven.compiler.source>8</maven.compiler.source> <maven.compiler.target>8</maven.compiler.target> <scala.binary.version>2.12</scala.binary.version> - <spark.version>3.2.0</spark.version> - <janino.version>3.0.16</janino.version> </properties> <dependencies> @@ -63,6 +61,12 @@ under the License. <!-- Must be provided, we use hadoop_libs in BE's 3rd party instead --> <scope>provided</scope> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-annotations</artifactId> + <version>${hadoop.version}</version> + <scope>provided</scope> + </dependency> <dependency> <groupId>org.apache.hudi</groupId> <artifactId>hudi-spark-client</artifactId> @@ -83,6 +87,11 @@ under the License. </exclusion> </exclusions> </dependency> + <dependency> + <groupId>org.antlr</groupId> + <artifactId>antlr4-runtime</artifactId> + <version>${antlr4.version}</version> + </dependency> <dependency> <groupId>org.apache.hudi</groupId> <artifactId>hudi-spark3-common</artifactId> @@ -90,7 +99,7 @@ under the License. </dependency> <dependency> <groupId>org.apache.hudi</groupId> - <artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId> + <artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId> <version>${hudi.version}</version> <exclusions> <exclusion> @@ -158,37 +167,6 @@ under the License. <artifactId>spark-catalyst_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>compile</scope> - <exclusions> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>janino</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - </exclusion> - <exclusion> - <artifactId>antlr4-runtime</artifactId> - <groupId>org.antlr</groupId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <!-- version of spark's janino is error --> - <groupId>org.codehaus.janino</groupId> - <artifactId>janino</artifactId> - <version>${janino.version}</version> - <exclusions> - <exclusion> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.codehaus.janino</groupId> - <artifactId>commons-compiler</artifactId> - <version>${janino.version}</version> </dependency> <dependency> <!-- version of spark's jackson module is error --> diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 82b4bd15de5..459318193b3 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -32,7 +32,6 @@ under the License. <doris.home>${basedir}/../../</doris.home> <doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty> <fe_ut_parallel>1</fe_ut_parallel> - <antlr4.version>4.13.1</antlr4.version> <awssdk.version>2.20.131</awssdk.version> <huaweiobs.version>3.1.1-hw-46</huaweiobs.version> <tencentcos.version>8.2.7</tencentcos.version> @@ -433,9 +432,26 @@ under the License. </exclusion> </exclusions> </dependency> + <!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris, + when upgrade doris antlr-runtime version, should take care of trino-parser.--> + <dependency> + <groupId>org.antlr</groupId> + <artifactId>antlr4-runtime</artifactId> + <version>${antlr4.version}</version> + </dependency> <dependency> <groupId>com.aliyun.odps</groupId> <artifactId>odps-sdk-core</artifactId> + <exclusions> + <exclusion> + <artifactId>antlr-runtime</artifactId> + <groupId>org.antlr</groupId> + </exclusion> + <exclusion> + <artifactId>antlr4</artifactId> + <groupId>org.antlr</groupId> + </exclusion> + </exclusions> </dependency> <!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-web --> <dependency> @@ -655,14 +671,6 @@ under the License. <artifactId>mariadb-java-client</artifactId> </dependency> - <!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris, - when upgrade doris antlr-runtime version, should take care of trino-parser.--> - <dependency> - <groupId>org.antlr</groupId> - <artifactId>antlr4-runtime</artifactId> - <version>${antlr4.version}</version> - </dependency> - <dependency> <groupId>com.zaxxer</groupId> <artifactId>HikariCP</artifactId> @@ -808,6 +816,15 @@ under the License. <artifactId>ap-loader-all</artifactId> <version>3.0-8</version> </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-server</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-hadoop-compat</artifactId> + <version>2.5.2-hadoop3</version> + </dependency> </dependencies> <repositories> <!-- for huawei obs sdk --> diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java index 4543303db6c..c8220349019 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java @@ -162,6 +162,7 @@ public class HudiCachedPartitionProcessor extends HudiPartitionProcessor { partitionValues.writeLock().unlock(); } } catch (Exception e) { + LOG.warn("Failed to get hudi partitions", e); throw new CacheException("Failed to get hudi partitions", e); } } diff --git a/fe/pom.xml b/fe/pom.xml index cd90827f217..c81091cac81 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -274,7 +274,7 @@ under the License. <!-- NOTE: Using grpc-java whose version is newer than 1.34.0 will break the build on CentOS 6 due to the obsolete GLIBC --> <grpc-java.version>1.34.0</grpc-java.version> <grpc.version>1.60.1</grpc.version> - <check.freamework.version>3.42.0</check.freamework.version> + <check.freamework.version>3.43.0</check.freamework.version> <protobuf.version>3.24.3</protobuf.version> <!-- we use protoc-jar-maven-plugin to generate protobuf generated code --> <!-- see https://repo.maven.apache.org/maven2/com/google/protobuf/protoc/ to get correct version --> @@ -294,12 +294,13 @@ under the License. <zjsonpatch.version>0.2.3</zjsonpatch.version> <kafka-clients.version>3.4.0</kafka-clients.version> <oshi-core.version>6.4.5</oshi-core.version> - <xnio-nio.version>3.8.9.Final</xnio-nio.version> + <xnio-nio.version>3.8.14.Final</xnio-nio.version> <javax.annotation-api.version>1.3.2</javax.annotation-api.version> <javax.activation.version>1.2.0</javax.activation.version> <jaxws-api.version>2.3.0</jaxws-api.version> <RoaringBitmap.version>0.8.13</RoaringBitmap.version> - <spark.version>3.4.1</spark.version> + <spark.version>3.4.3</spark.version> + <hudi-spark.version>hudi-spark3.4.x</hudi-spark.version> <hive.version>3.1.3</hive.version> <hive.common.version>2.3.9</hive.common.version> <nimbusds.version>9.35</nimbusds.version> @@ -331,6 +332,8 @@ under the License. <aws-java-sdk.version>1.12.669</aws-java-sdk.version> <mariadb-java-client.version>3.0.9</mariadb-java-client.version> <hadoop.version>3.3.6</hadoop.version> + <hbase.version>2.4.9</hbase.version> + <antlr4.version>4.13.1</antlr4.version> <joda.version>2.8.1</joda.version> <project.scm.id>github</project.scm.id> <spring.version>2.7.13</spring.version> @@ -528,6 +531,14 @@ under the License. <exclusion> <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> </exclusion> </exclusions> </dependency> @@ -558,6 +569,29 @@ under the License. <artifactId>kerb-simplekdc</artifactId> <version>${kerby.version}</version> </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-server</artifactId> + <version>${hbase.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-hadoop2-compat</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-annotations</artifactId> + </exclusion> + </exclusions> + </dependency> <dependency> <groupId>org.apache.kerby</groupId> <artifactId>kerb-core</artifactId> @@ -1145,6 +1179,11 @@ under the License. <artifactId>xnio-nio</artifactId> <version>${xnio-nio.version}</version> </dependency> + <dependency> + <groupId>org.jboss.xnio</groupId> + <artifactId>xnio-api</artifactId> + <version>${xnio-nio.version}</version> + </dependency> <!-- support jdk9 --> <dependency> <groupId>javax.annotation</groupId> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org