This is an automated email from the ASF dual-hosted git repository.

diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git


The following commit(s) were added to refs/heads/master by this push:
     new d1e07a7  [improve](spark load) support load hadoop conf from resource 
(#280)
d1e07a7 is described below

commit d1e07a718acc61d4f828b92528a632292626eb5f
Author: gnehil <adamlee...@gmail.com>
AuthorDate: Mon Mar 17 20:36:05 2025 +0800

    [improve](spark load) support load hadoop conf from resource (#280)
---
 .../java/org/apache/doris/SparkLoadRunner.java     | 34 ++++++++++++++++-
 .../java/org/apache/doris/SparkLoadRunnerTest.java | 44 ++++++++++++++++++++++
 .../src/test/resources/core-site.xml               | 25 ++++++++++++
 .../src/test/resources/hdfs-site.xml               | 25 ++++++++++++
 .../src/test/resources/yarn-site.xml               | 25 ++++++++++++
 5 files changed, 152 insertions(+), 1 deletion(-)

diff --git 
a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
 
b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
index 5c7329f..9998bab 100644
--- 
a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
+++ 
b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
@@ -37,6 +37,8 @@ import org.apache.commons.cli.DefaultParser;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -74,7 +76,7 @@ public class SparkLoadRunner {
 
         JobConfig jobConfig = readConfig(cmdOptions.getConfigPath());
         try {
-            handleS3Config(jobConfig);
+            preprocessConfig(jobConfig);
             checkConfig(jobConfig);
         } catch (IllegalArgumentException e) {
             System.err.println("check config failed, msg: " + 
ExceptionUtils.getStackTrace(e));
@@ -156,6 +158,36 @@ public class SparkLoadRunner {
         jobConfig.checkHadoopProperties();
     }
 
+    private static void preprocessConfig(JobConfig jobConfig) {
+        loadHadoopConfig(jobConfig);
+        handleS3Config(jobConfig);
+    }
+
+    protected static void loadHadoopConfig(JobConfig jobConfig) {
+        if (jobConfig.getEnv().containsKey("HADOOP_CONF_DIR")) {
+            Configuration conf = new Configuration();
+            String hadoopConfDir = jobConfig.getEnv().get("HADOOP_CONF_DIR");
+            if (new File(hadoopConfDir + "/core-site.xml").exists()) {
+                System.out.println("core-site.xml was found at " + 
hadoopConfDir + "/core-site.xml");
+                conf.addResource(new Path(hadoopConfDir, "core-site.xml"));
+            }
+            if (new File(hadoopConfDir + "/hdfs-site.xml").exists()) {
+                System.out.println("hdfs-site.xml was found at " + 
hadoopConfDir + "/hdfs-site.xml");
+                conf.addResource(new Path(hadoopConfDir, "hdfs-site.xml"));
+            }
+            if (new File(hadoopConfDir + "/yarn-site.xml").exists()) {
+                System.out.println("yarn-site.xml was found at " + 
hadoopConfDir + "/yarn-site.xml");
+                conf.addResource(new Path(hadoopConfDir, "yarn-site.xml"));
+            }
+            Map<String, String> newHadoopProps = new HashMap<>();
+            for (Map.Entry<String, String> confEntry : conf) {
+                newHadoopProps.put(confEntry.getKey(), confEntry.getValue());
+            }
+            newHadoopProps.putAll(jobConfig.getHadoopProperties());
+            jobConfig.setHadoopProperties(newHadoopProps);
+        }
+    }
+
     private static void handleS3Config(JobConfig jobConfig) {
         URI uri = URI.create(jobConfig.getWorkingDir());
         if (uri.getScheme().equalsIgnoreCase("s3")) {
diff --git 
a/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java
 
b/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java
new file mode 100644
index 0000000..81010f9
--- /dev/null
+++ 
b/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris;
+
+import org.apache.doris.config.JobConfig;
+
+import org.junit.jupiter.api.Assertions;
+import static org.junit.jupiter.api.Assertions.*;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class SparkLoadRunnerTest {
+
+    @Test
+    void loadHadoopConfig() {
+
+        JobConfig jobConfig = new JobConfig();
+        Map<String, String> envMap = new HashMap<>();
+        envMap.put("HADOOP_CONF_DIR", 
this.getClass().getResource("/").getPath());
+        jobConfig.setEnv(envMap);
+        SparkLoadRunner.loadHadoopConfig(jobConfig);
+        Assertions.assertEquals("60000", 
jobConfig.getHadoopProperties().get("hadoop.http.idle_timeout.ms"));
+        Assertions.assertEquals("1", 
jobConfig.getHadoopProperties().get("dfs.replication"));
+        Assertions.assertEquals("my.hadoop.com", 
jobConfig.getHadoopProperties().get("yarn.resourcemanager.address"));
+
+    }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/resources/core-site.xml 
b/spark-load/spark-load-core/src/test/resources/core-site.xml
new file mode 100644
index 0000000..304b272
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/resources/core-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+    <property>
+        <name>hadoop.http.idle_timeout.ms</name>
+        <value>60000</value>
+    </property>
+</configuration>
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/resources/hdfs-site.xml 
b/spark-load/spark-load-core/src/test/resources/hdfs-site.xml
new file mode 100644
index 0000000..869686c
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/resources/hdfs-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+    <property>
+        <name>dfs.replication</name>
+        <value>1</value>
+    </property>
+</configuration>
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/resources/yarn-site.xml 
b/spark-load/spark-load-core/src/test/resources/yarn-site.xml
new file mode 100644
index 0000000..e8279d2
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/resources/yarn-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+    <property>
+        <name>yarn.resourcemanager.address</name>
+        <value>my.hadoop.com</value>
+    </property>
+</configuration>
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to