This is an automated email from the ASF dual-hosted git repository. diwu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git
The following commit(s) were added to refs/heads/master by this push: new d1e07a7 [improve](spark load) support load hadoop conf from resource (#280) d1e07a7 is described below commit d1e07a718acc61d4f828b92528a632292626eb5f Author: gnehil <adamlee...@gmail.com> AuthorDate: Mon Mar 17 20:36:05 2025 +0800 [improve](spark load) support load hadoop conf from resource (#280) --- .../java/org/apache/doris/SparkLoadRunner.java | 34 ++++++++++++++++- .../java/org/apache/doris/SparkLoadRunnerTest.java | 44 ++++++++++++++++++++++ .../src/test/resources/core-site.xml | 25 ++++++++++++ .../src/test/resources/hdfs-site.xml | 25 ++++++++++++ .../src/test/resources/yarn-site.xml | 25 ++++++++++++ 5 files changed, 152 insertions(+), 1 deletion(-) diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java index 5c7329f..9998bab 100644 --- a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java +++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java @@ -37,6 +37,8 @@ import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -74,7 +76,7 @@ public class SparkLoadRunner { JobConfig jobConfig = readConfig(cmdOptions.getConfigPath()); try { - handleS3Config(jobConfig); + preprocessConfig(jobConfig); checkConfig(jobConfig); } catch (IllegalArgumentException e) { System.err.println("check config failed, msg: " + ExceptionUtils.getStackTrace(e)); @@ -156,6 +158,36 @@ public class SparkLoadRunner { jobConfig.checkHadoopProperties(); } + private static void preprocessConfig(JobConfig jobConfig) { + loadHadoopConfig(jobConfig); + handleS3Config(jobConfig); + } + + protected static void loadHadoopConfig(JobConfig jobConfig) { + if (jobConfig.getEnv().containsKey("HADOOP_CONF_DIR")) { + Configuration conf = new Configuration(); + String hadoopConfDir = jobConfig.getEnv().get("HADOOP_CONF_DIR"); + if (new File(hadoopConfDir + "/core-site.xml").exists()) { + System.out.println("core-site.xml was found at " + hadoopConfDir + "/core-site.xml"); + conf.addResource(new Path(hadoopConfDir, "core-site.xml")); + } + if (new File(hadoopConfDir + "/hdfs-site.xml").exists()) { + System.out.println("hdfs-site.xml was found at " + hadoopConfDir + "/hdfs-site.xml"); + conf.addResource(new Path(hadoopConfDir, "hdfs-site.xml")); + } + if (new File(hadoopConfDir + "/yarn-site.xml").exists()) { + System.out.println("yarn-site.xml was found at " + hadoopConfDir + "/yarn-site.xml"); + conf.addResource(new Path(hadoopConfDir, "yarn-site.xml")); + } + Map<String, String> newHadoopProps = new HashMap<>(); + for (Map.Entry<String, String> confEntry : conf) { + newHadoopProps.put(confEntry.getKey(), confEntry.getValue()); + } + newHadoopProps.putAll(jobConfig.getHadoopProperties()); + jobConfig.setHadoopProperties(newHadoopProps); + } + } + private static void handleS3Config(JobConfig jobConfig) { URI uri = URI.create(jobConfig.getWorkingDir()); if (uri.getScheme().equalsIgnoreCase("s3")) { diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java new file mode 100644 index 0000000..81010f9 --- /dev/null +++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/SparkLoadRunnerTest.java @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris; + +import org.apache.doris.config.JobConfig; + +import org.junit.jupiter.api.Assertions; +import static org.junit.jupiter.api.Assertions.*; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +class SparkLoadRunnerTest { + + @Test + void loadHadoopConfig() { + + JobConfig jobConfig = new JobConfig(); + Map<String, String> envMap = new HashMap<>(); + envMap.put("HADOOP_CONF_DIR", this.getClass().getResource("/").getPath()); + jobConfig.setEnv(envMap); + SparkLoadRunner.loadHadoopConfig(jobConfig); + Assertions.assertEquals("60000", jobConfig.getHadoopProperties().get("hadoop.http.idle_timeout.ms")); + Assertions.assertEquals("1", jobConfig.getHadoopProperties().get("dfs.replication")); + Assertions.assertEquals("my.hadoop.com", jobConfig.getHadoopProperties().get("yarn.resourcemanager.address")); + + } +} \ No newline at end of file diff --git a/spark-load/spark-load-core/src/test/resources/core-site.xml b/spark-load/spark-load-core/src/test/resources/core-site.xml new file mode 100644 index 0000000..304b272 --- /dev/null +++ b/spark-load/spark-load-core/src/test/resources/core-site.xml @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + +<configuration> + <property> + <name>hadoop.http.idle_timeout.ms</name> + <value>60000</value> + </property> +</configuration> \ No newline at end of file diff --git a/spark-load/spark-load-core/src/test/resources/hdfs-site.xml b/spark-load/spark-load-core/src/test/resources/hdfs-site.xml new file mode 100644 index 0000000..869686c --- /dev/null +++ b/spark-load/spark-load-core/src/test/resources/hdfs-site.xml @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + +<configuration> + <property> + <name>dfs.replication</name> + <value>1</value> + </property> +</configuration> \ No newline at end of file diff --git a/spark-load/spark-load-core/src/test/resources/yarn-site.xml b/spark-load/spark-load-core/src/test/resources/yarn-site.xml new file mode 100644 index 0000000..e8279d2 --- /dev/null +++ b/spark-load/spark-load-core/src/test/resources/yarn-site.xml @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + +<configuration> + <property> + <name>yarn.resourcemanager.address</name> + <value>my.hadoop.com</value> + </property> +</configuration> \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org