This is an automated email from the ASF dual-hosted git repository.

ashingau pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dde04934efc [update](hudi) update hudi-spark bundle to 3.4.3 (#35013)
dde04934efc is described below

commit dde04934efc62218219add2514993966d0a02dea
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Tue May 21 17:13:06 2024 +0800

    [update](hudi) update hudi-spark bundle to 3.4.3 (#35013)
    
    1. For security reasons, upgrade to Spark version to 3.4.3
    2. Resolve jar conflict of spark, which is different between fe and 
be-java-extensions
    3. HBase version is 2.4.9, which is not safe, and conflict with hadoop3. 
Later PR will update HBase version to 2.5.x. Currently use 
`hbase-hadoop-compat:2.5.2-hadoop3` to resolve the conflict with hadoop3.
---
 fe/be-java-extensions/hudi-scanner/pom.xml         | 41 ++++---------------
 .../org/apache/doris/hudi/BaseSplitReader.scala    |  8 ++--
 .../apache/doris/hudi/MORSnapshotSplitReader.scala |  3 +-
 fe/be-java-extensions/preload-extensions/pom.xml   | 46 ++++++----------------
 fe/fe-core/pom.xml                                 | 35 +++++++++++-----
 .../hudi/source/HudiCachedPartitionProcessor.java  |  1 +
 fe/pom.xml                                         | 45 +++++++++++++++++++--
 7 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/fe/be-java-extensions/hudi-scanner/pom.xml 
b/fe/be-java-extensions/hudi-scanner/pom.xml
index d4f7a458612..465a9393b0f 100644
--- a/fe/be-java-extensions/hudi-scanner/pom.xml
+++ b/fe/be-java-extensions/hudi-scanner/pom.xml
@@ -32,10 +32,7 @@ under the License.
         <fe_ut_parallel>1</fe_ut_parallel>
         <scala.version>2.12.15</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
-        <spark.version>3.2.0</spark.version>
-        <sparkbundle.version>3.2</sparkbundle.version>
-        <janino.version>3.0.16</janino.version>
-        <avro.version>1.11.2</avro.version>
+        <avro.version>1.11.3</avro.version>
     </properties>
     
     <dependencyManagement>
@@ -91,7 +88,7 @@ under the License.
         </dependency>
         <dependency>
             <groupId>org.apache.hudi</groupId>
-            <artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
+            
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
             <version>${hudi.version}</version>
             <scope>provided</scope>
             <exclusions>
@@ -119,6 +116,11 @@ under the License.
             <version>1.10.1</version>
             <scope>provided</scope>
         </dependency>
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>antlr4-runtime</artifactId>
+            <version>${antlr4.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -160,35 +162,6 @@ under the License.
             <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
             <version>${spark.version}</version>
             <scope>provided</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.codehaus.janino</groupId>
-                    <artifactId>janino</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.codehaus.janino</groupId>
-                    <artifactId>commons-compiler</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <!-- version of spark's janino is error -->
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>janino</artifactId>
-            <version>${janino.version}</version>
-            <scope>provided</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.codehaus.janino</groupId>
-                    <artifactId>commons-compiler</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>commons-compiler</artifactId>
-            <version>${janino.version}</version>
-            <scope>provided</scope>
         </dependency>
         <dependency>
             <!-- version of spark's jackson module is error -->
diff --git 
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
 
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
index 8229064163d..dcc068ad700 100644
--- 
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
+++ 
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala
@@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader
 import org.apache.hudi.metadata.HoodieTableMetadataUtil
 import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, 
DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, 
HoodieTableState}
 import org.apache.log4j.Logger
-import org.apache.spark.sql.adapter.Spark3_2Adapter
+import org.apache.spark.sql.adapter.Spark3_4Adapter
 import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, 
HoodieSparkAvroSchemaConverters}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@@ -66,7 +66,7 @@ import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 import scala.util.{Failure, Success, Try}
 
-class DorisSparkAdapter extends Spark3_2Adapter {
+class DorisSparkAdapter extends Spark3_4Adapter {
   override def getAvroSchemaConverters: HoodieAvroSchemaConverters = 
HoodieSparkAvroSchemaConverters
 }
 
@@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
                                 hadoopConf: Configuration): PartitionedFile => 
Iterator[InternalRow] = {
     partitionedFile => {
       val reader = new HoodieAvroHFileReader(
-        hadoopConf, new Path(partitionedFile.filePath), new 
CacheConfig(hadoopConf))
+        hadoopConf, partitionedFile.filePath.toPath, new 
CacheConfig(hadoopConf))
 
       val requiredRowSchema = requiredDataSchema.structTypeSchema
       // NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] 
aren't serializable
@@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
 
     BaseFileReader(
       read = partitionedFile => {
-        val extension = FSUtils.getFileExtension(partitionedFile.filePath)
+        val extension = 
FSUtils.getFileExtension(partitionedFile.filePath.toString())
         if (tableBaseFileFormat.getFileExtension.equals(extension)) {
           read(partitionedFile)
         } else {
diff --git 
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
 
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
index 07e236082ce..02a4fa40045 100644
--- 
a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
+++ 
b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala
@@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.common.model.HoodieLogFile
 import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, 
HoodieTableSchema}
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.PartitionedFile
@@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit) 
extends BaseSplitR
     val partitionedBaseFile = if (split.dataFilePath.isEmpty) {
       None
     } else {
-      Some(PartitionedFile(getPartitionColumnsAsInternalRow(), 
split.dataFilePath, 0, split.dataFileLength))
+      Some(PartitionedFile(getPartitionColumnsAsInternalRow(), 
SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength))
     }
     HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles)
   }
diff --git a/fe/be-java-extensions/preload-extensions/pom.xml 
b/fe/be-java-extensions/preload-extensions/pom.xml
index 31ce08db1b9..06e7b0a231f 100644
--- a/fe/be-java-extensions/preload-extensions/pom.xml
+++ b/fe/be-java-extensions/preload-extensions/pom.xml
@@ -33,8 +33,6 @@ under the License.
         <maven.compiler.source>8</maven.compiler.source>
         <maven.compiler.target>8</maven.compiler.target>
         <scala.binary.version>2.12</scala.binary.version>
-        <spark.version>3.2.0</spark.version>
-        <janino.version>3.0.16</janino.version>
     </properties>
 
     <dependencies>
@@ -63,6 +61,12 @@ under the License.
             <!-- Must be provided, we use hadoop_libs in BE's 3rd party 
instead -->
             <scope>provided</scope>
         </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-annotations</artifactId>
+            <version>${hadoop.version}</version>
+            <scope>provided</scope>
+        </dependency>
         <dependency>
             <groupId>org.apache.hudi</groupId>
             <artifactId>hudi-spark-client</artifactId>
@@ -83,6 +87,11 @@ under the License.
                 </exclusion>
             </exclusions>
         </dependency>
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>antlr4-runtime</artifactId>
+            <version>${antlr4.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.hudi</groupId>
             <artifactId>hudi-spark3-common</artifactId>
@@ -90,7 +99,7 @@ under the License.
         </dependency>
         <dependency>
             <groupId>org.apache.hudi</groupId>
-            <artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
+            
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
             <version>${hudi.version}</version>
             <exclusions>
                 <exclusion>
@@ -158,37 +167,6 @@ under the License.
             <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
             <version>${spark.version}</version>
             <scope>compile</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.codehaus.janino</groupId>
-                    <artifactId>janino</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.codehaus.janino</groupId>
-                    <artifactId>commons-compiler</artifactId>
-                </exclusion>
-                <exclusion>
-                    <artifactId>antlr4-runtime</artifactId>
-                    <groupId>org.antlr</groupId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <!-- version of spark's janino is error -->
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>janino</artifactId>
-            <version>${janino.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.codehaus.janino</groupId>
-                    <artifactId>commons-compiler</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>org.codehaus.janino</groupId>
-            <artifactId>commons-compiler</artifactId>
-            <version>${janino.version}</version>
         </dependency>
         <dependency>
             <!-- version of spark's jackson module is error -->
diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml
index 82b4bd15de5..459318193b3 100644
--- a/fe/fe-core/pom.xml
+++ b/fe/fe-core/pom.xml
@@ -32,7 +32,6 @@ under the License.
         <doris.home>${basedir}/../../</doris.home>
         <doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty>
         <fe_ut_parallel>1</fe_ut_parallel>
-        <antlr4.version>4.13.1</antlr4.version>
         <awssdk.version>2.20.131</awssdk.version>
         <huaweiobs.version>3.1.1-hw-46</huaweiobs.version>
         <tencentcos.version>8.2.7</tencentcos.version>
@@ -433,9 +432,26 @@ under the License.
                 </exclusion>
             </exclusions>
         </dependency>
+        <!-- antl4 The version of antlr-runtime in trino parser is need to be 
consistent with doris,
+            when upgrade doris antlr-runtime version, should take care of 
trino-parser.-->
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>antlr4-runtime</artifactId>
+            <version>${antlr4.version}</version>
+        </dependency>
         <dependency>
             <groupId>com.aliyun.odps</groupId>
             <artifactId>odps-sdk-core</artifactId>
+            <exclusions>
+                <exclusion>
+                    <artifactId>antlr-runtime</artifactId>
+                    <groupId>org.antlr</groupId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>antlr4</artifactId>
+                    <groupId>org.antlr</groupId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <!-- 
https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-web
 -->
         <dependency>
@@ -655,14 +671,6 @@ under the License.
             <artifactId>mariadb-java-client</artifactId>
         </dependency>
 
-        <!-- antl4 The version of antlr-runtime in trino parser is need to be 
consistent with doris,
-            when upgrade doris antlr-runtime version, should take care of 
trino-parser.-->
-        <dependency>
-            <groupId>org.antlr</groupId>
-            <artifactId>antlr4-runtime</artifactId>
-            <version>${antlr4.version}</version>
-        </dependency>
-
         <dependency>
             <groupId>com.zaxxer</groupId>
             <artifactId>HikariCP</artifactId>
@@ -808,6 +816,15 @@ under the License.
             <artifactId>ap-loader-all</artifactId>
             <version>3.0-8</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-server</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-hadoop-compat</artifactId>
+            <version>2.5.2-hadoop3</version>
+        </dependency>
     </dependencies>
     <repositories>
         <!-- for huawei obs sdk -->
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
index 4543303db6c..c8220349019 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java
@@ -162,6 +162,7 @@ public class HudiCachedPartitionProcessor extends 
HudiPartitionProcessor {
                 partitionValues.writeLock().unlock();
             }
         } catch (Exception e) {
+            LOG.warn("Failed to get hudi partitions", e);
             throw new CacheException("Failed to get hudi partitions", e);
         }
     }
diff --git a/fe/pom.xml b/fe/pom.xml
index cd90827f217..c81091cac81 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -274,7 +274,7 @@ under the License.
         <!-- NOTE: Using grpc-java whose version is newer than 1.34.0 will 
break the build on CentOS 6 due to the obsolete GLIBC -->
         <grpc-java.version>1.34.0</grpc-java.version>
         <grpc.version>1.60.1</grpc.version>
-        <check.freamework.version>3.42.0</check.freamework.version>
+        <check.freamework.version>3.43.0</check.freamework.version>
         <protobuf.version>3.24.3</protobuf.version>
         <!-- we use protoc-jar-maven-plugin to generate protobuf generated 
code -->
         <!-- see 
https://repo.maven.apache.org/maven2/com/google/protobuf/protoc/ to get correct 
version -->
@@ -294,12 +294,13 @@ under the License.
         <zjsonpatch.version>0.2.3</zjsonpatch.version>
         <kafka-clients.version>3.4.0</kafka-clients.version>
         <oshi-core.version>6.4.5</oshi-core.version>
-        <xnio-nio.version>3.8.9.Final</xnio-nio.version>
+        <xnio-nio.version>3.8.14.Final</xnio-nio.version>
         <javax.annotation-api.version>1.3.2</javax.annotation-api.version>
         <javax.activation.version>1.2.0</javax.activation.version>
         <jaxws-api.version>2.3.0</jaxws-api.version>
         <RoaringBitmap.version>0.8.13</RoaringBitmap.version>
-        <spark.version>3.4.1</spark.version>
+        <spark.version>3.4.3</spark.version>
+        <hudi-spark.version>hudi-spark3.4.x</hudi-spark.version>
         <hive.version>3.1.3</hive.version>
         <hive.common.version>2.3.9</hive.common.version>
         <nimbusds.version>9.35</nimbusds.version>
@@ -331,6 +332,8 @@ under the License.
         <aws-java-sdk.version>1.12.669</aws-java-sdk.version>
         <mariadb-java-client.version>3.0.9</mariadb-java-client.version>
         <hadoop.version>3.3.6</hadoop.version>
+        <hbase.version>2.4.9</hbase.version>
+        <antlr4.version>4.13.1</antlr4.version>
         <joda.version>2.8.1</joda.version>
         <project.scm.id>github</project.scm.id>
         <spring.version>2.7.13</spring.version>
@@ -528,6 +531,14 @@ under the License.
                     <exclusion>
                         <groupId>javax.servlet</groupId>
                         <artifactId>servlet-api</artifactId>
+                    </exclusion>        
+                    <exclusion>
+                        <groupId>org.apache.hadoop</groupId>
+                        <artifactId>hadoop-yarn-common</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.apache.hadoop</groupId>
+                        <artifactId>hadoop-yarn-api</artifactId>
                     </exclusion>
                 </exclusions>
             </dependency>
@@ -558,6 +569,29 @@ under the License.
                 <artifactId>kerb-simplekdc</artifactId>
                 <version>${kerby.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.apache.hbase</groupId>
+                <artifactId>hbase-server</artifactId>
+                <version>${hbase.version}</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.apache.hadoop</groupId>
+                        <artifactId>hadoop-yarn-api</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.apache.hadoop</groupId>
+                        <artifactId>hadoop-yarn-common</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.apache.hbase</groupId>
+                        <artifactId>hbase-hadoop2-compat</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.apache.hadoop</groupId>
+                        <artifactId>hadoop-annotations</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
             <dependency>
                 <groupId>org.apache.kerby</groupId>
                 <artifactId>kerb-core</artifactId>
@@ -1145,6 +1179,11 @@ under the License.
                 <artifactId>xnio-nio</artifactId>
                 <version>${xnio-nio.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.jboss.xnio</groupId>
+                <artifactId>xnio-api</artifactId>
+                <version>${xnio-nio.version}</version>
+            </dependency>
             <!-- support jdk9 -->
             <dependency>
                 <groupId>javax.annotation</groupId>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to