Repository: kylin Updated Branches: refs/heads/2.0.x f982a27c9 -> 6dd5e37ff
KYLIN-2577, improve hive configurations during cubing steps Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/da90122c Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/da90122c Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/da90122c Branch: refs/heads/2.0.x Commit: da90122c9f18b2ca0361234db8ab513b28cc4c15 Parents: f982a27 Author: Cheng Wang <cheng.w...@kyligence.io> Authored: Wed May 10 18:54:22 2017 +0800 Committer: Roger Shi <rogershijich...@hotmail.com> Committed: Wed May 10 18:55:46 2017 +0800 ---------------------------------------------------------------------- build/bin/load-hive-conf.sh | 15 ++++ build/bin/sample.sh | 9 ++- .../kylin/common/util/HiveCmdBuilder.java | 79 +++++++++++++++++++ .../kylin/common/util/HiveCmdBuilderTest.java | 11 ++- .../org/apache/kylin/job/JoinedFlatTable.java | 37 ++------- .../kylin/job/engine/JobEngineConfig.java | 15 +--- .../source/hive/CreateFlatHiveTableStep.java | 1 + .../apache/kylin/source/hive/HiveMRInput.java | 6 +- .../org/apache/kylin/tool/HiveConfigCLI.java | 83 ++++++++++++++++++++ 9 files changed, 200 insertions(+), 56 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/build/bin/load-hive-conf.sh ---------------------------------------------------------------------- diff --git a/build/bin/load-hive-conf.sh b/build/bin/load-hive-conf.sh new file mode 100644 index 0000000..a5046f0 --- /dev/null +++ b/build/bin/load-hive-conf.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Kyligence Inc. License + +source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh + +# source me + +hive_conf_dir="${KYLIN_HOME}/conf/kylin_hive_conf.xml" +hive_conf_prop="${KYLIN_HOME}/logs/hive_props" +rm -rf ${hive_conf_prop} +export ENABLE_CHECK_ENV=false +${dir}/kylin.sh org.apache.kylin.tool.HiveConfigCLI ${hive_conf_dir} ${hive_conf_prop} +[[ 0 == $? ]] || quit "Error, can not parse ${hive_conf_dir} and can not apply it to hive relevant check." +hive_conf_properties=`cat ${hive_conf_prop}` \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/build/bin/sample.sh ---------------------------------------------------------------------- diff --git a/build/bin/sample.sh b/build/bin/sample.sh index 617b671..2e13805 100644 --- a/build/bin/sample.sh +++ b/build/bin/sample.sh @@ -19,6 +19,7 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/find-hadoop-conf-dir.sh +source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/load-hive-conf.sh source ${dir}/check-env.sh "if-not-yet" job_jar=`find -L ${KYLIN_HOME}/lib/ -name kylin-job*.jar` @@ -43,7 +44,7 @@ echo "Going to create sample tables in hive to database "$sample_database" by "$ if [ "${hive_client_mode}" == "beeline" ] then beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params` - beeline ${beeline_params} -e "CREATE DATABASE IF NOT EXISTS "$sample_database + beeline ${hive_conf_properties} ${beeline_params} -e "CREATE DATABASE IF NOT EXISTS "$sample_database hive2_url=`expr match "${beeline_params}" '.*\(hive2:.*:[0-9]\{4,6\}\/\)'` if [ -z ${hive2_url} ]; then hive2_url=`expr match "${beeline_params}" '.*\(hive2:.*:[0-9]\{4,6\}\)'` @@ -51,10 +52,10 @@ then else beeline_params=${beeline_params/${hive2_url}/${hive2_url}${sample_database}} fi - beeline ${beeline_params} -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } + beeline ${hive_conf_properties} ${beeline_params} -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } else - hive -e "CREATE DATABASE IF NOT EXISTS "$sample_database - hive --database $sample_database -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } + hive ${hive_conf_properties} -e "CREATE DATABASE IF NOT EXISTS "$sample_database + hive ${hive_conf_properties} --database $sample_database -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } fi echo "Sample hive tables are created successfully; Going to create sample cube..." http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java index 855ee48..b7e6c15 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java @@ -24,17 +24,26 @@ import java.io.FileWriter; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; import com.google.common.collect.Lists; public class HiveCmdBuilder { private static final Logger logger = LoggerFactory.getLogger(HiveCmdBuilder.class); + public static final String HIVE_CONF_FILENAME = "kylin_hive_conf"; public enum HiveClientMode { CLI, BEELINE @@ -42,11 +51,13 @@ public class HiveCmdBuilder { private HiveClientMode clientMode; private KylinConfig kylinConfig; + final private Map<String, String> hiveConfProps = new HashMap<>(); final private ArrayList<String> statements = Lists.newArrayList(); public HiveCmdBuilder() { kylinConfig = KylinConfig.getInstanceFromEnv(); clientMode = HiveClientMode.valueOf(kylinConfig.getHiveClientMode().toUpperCase()); + loadHiveConfiguration(); } public String build() { @@ -59,6 +70,7 @@ public class HiveCmdBuilder { buf.append(statement).append("\n"); } buf.append("\""); + buf.append(parseProps()); break; case BEELINE: BufferedWriter bw = null; @@ -71,6 +83,7 @@ public class HiveCmdBuilder { bw.newLine(); } buf.append("beeline "); + buf.append(parseProps()); buf.append(kylinConfig.getHiveBeelineParams()); buf.append(" -f "); buf.append(tmpHql.getAbsolutePath()); @@ -101,8 +114,29 @@ public class HiveCmdBuilder { return buf.toString(); } + private String parseProps() { + StringBuilder s = new StringBuilder(); + for (Map.Entry<String, String> prop : hiveConfProps.entrySet()) { + s.append(" --hiveconf "); + s.append(prop.getKey()); + s.append("="); + s.append(prop.getValue()); + } + return s.toString(); + } + public void reset() { statements.clear(); + hiveConfProps.clear(); + } + + public void setHiveConfProps(Map<String, String> hiveConfProps) { + this.hiveConfProps.clear(); + this.hiveConfProps.putAll(hiveConfProps); + } + + public void overwriteHiveProps(Map<String, String> overwrites) { + this.hiveConfProps.putAll(overwrites); } public void addStatement(String statement) { @@ -119,4 +153,49 @@ public class HiveCmdBuilder { public String toString() { return build(); } + + private void loadHiveConfiguration() { + + File hiveConfFile; + String hiveConfFileName = (HIVE_CONF_FILENAME + ".xml"); + String path = System.getProperty(KylinConfig.KYLIN_CONF); + + if (StringUtils.isNotEmpty(path)) { + hiveConfFile = new File(path, hiveConfFileName); + } else { + path = KylinConfig.getKylinHome(); + if (StringUtils.isEmpty(path)) { + logger.error("KYLIN_HOME is not set, can not locate hive conf: {}.xml", HIVE_CONF_FILENAME); + return; + } + hiveConfFile = new File(path + File.separator + "conf", hiveConfFileName); + } + + if (hiveConfFile == null || !hiveConfFile.exists()) { + throw new RuntimeException("Failed to read " + HIVE_CONF_FILENAME + ".xml"); + } + + String fileUrl = OptionsHelper.convertToFileURL(hiveConfFile.getAbsolutePath()); + + try { + File file = new File(fileUrl); + if (file.exists()) { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(file); + NodeList nl = doc.getElementsByTagName("property"); + hiveConfProps.clear(); + for (int i = 0; i < nl.getLength(); i++) { + String key = doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue(); + String value = doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue(); + if (!key.equals("tmpjars")) { + hiveConfProps.put(key, value); + } + } + } + } catch (Exception e) { + throw new RuntimeException("Failed to parse hive conf file ", e); + } + } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java ---------------------------------------------------------------------- diff --git a/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java b/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java index 3bc34b4..d847575 100644 --- a/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java +++ b/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.io.FileUtils; import org.junit.After; @@ -48,12 +50,17 @@ public class HiveCmdBuilderTest { public void testHiveCLI() { System.setProperty("kylin.source.hive.client", "cli"); + Map<String, String> hiveProps = new HashMap<>(); + hiveProps.put("hive.execution.engine", "mr"); + Map<String, String> hivePropsOverwrite = new HashMap<>(); + hivePropsOverwrite.put("hive.execution.engine", "tez"); HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); hiveCmdBuilder.addStatement("USE default;"); hiveCmdBuilder.addStatement("DROP TABLE test;"); hiveCmdBuilder.addStatement("SHOW\n TABLES;"); - - assertEquals("hive -e \"USE default;\nDROP TABLE test;\nSHOW\n TABLES;\n\"", hiveCmdBuilder.build()); + hiveCmdBuilder.setHiveConfProps(hiveProps); + hiveCmdBuilder.overwriteHiveProps(hivePropsOverwrite); + assertEquals("hive -e \"USE default;\nDROP TABLE test;\nSHOW\n TABLES;\n\" --hiveconf hive.execution.engine=tez", hiveCmdBuilder.build()); } @Test http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java ---------------------------------------------------------------------- diff --git a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java index 5553d34..cc64774 100644 --- a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java +++ b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java @@ -18,14 +18,9 @@ package org.apache.kylin.job; -import java.io.File; import java.util.HashSet; -import java.util.Map; import java.util.Set; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.IJoinedFlatTableDesc; import org.apache.kylin.metadata.model.JoinDesc; @@ -33,8 +28,6 @@ import org.apache.kylin.metadata.model.JoinTableDesc; import org.apache.kylin.metadata.model.PartitionDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; -import org.w3c.dom.Document; -import org.w3c.dom.NodeList; /** * @@ -46,35 +39,11 @@ public class JoinedFlatTable { return storageDfsDir + "/" + flatDesc.getTableName(); } - public static String generateHiveInitStatements( - String flatTableDatabase, String kylinHiveFile, Map<String, String> cubeOverrides) { + public static String generateHiveInitStatements(String flatTableDatabase) { StringBuilder buffer = new StringBuilder(); buffer.append("USE ").append(flatTableDatabase).append(";\n"); - try { - File file = new File(kylinHiveFile); - if (file.exists()) { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder builder = factory.newDocumentBuilder(); - Document doc = builder.parse(file); - NodeList nl = doc.getElementsByTagName("property"); - for (int i = 0; i < nl.getLength(); i++) { - String name = doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue(); - String value = doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue(); - if (!name.equals("tmpjars")) { - buffer.append("SET ").append(name).append("=").append(value).append(";\n"); - } - } - } - } catch (Exception e) { - throw new RuntimeException("Failed to parse hive conf file ", e); - } - - for (Map.Entry<String, String> entry : cubeOverrides.entrySet()) { - buffer.append("SET ").append(entry.getKey()).append("=").append(entry.getValue()).append(";\n"); - } - return buffer.toString(); } @@ -107,6 +76,10 @@ public class JoinedFlatTable { return "INSERT OVERWRITE TABLE " + flatDesc.getTableName() + " " + generateSelectDataStatement(flatDesc) + ";\n"; } + public static String generateInsertPartialDataStatement(IJoinedFlatTableDesc flatDesc, String statement) { + return "INSERT OVERWRITE TABLE " + flatDesc.getTableName() + " " + generateSelectDataStatement(flatDesc) + statement + ";\n"; + } + public static String generateSelectDataStatement(IJoinedFlatTableDesc flatDesc) { StringBuilder sql = new StringBuilder(); sql.append("SELECT" + "\n"); http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java ---------------------------------------------------------------------- diff --git a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java index 8859527..c9ac583 100644 --- a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java +++ b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java @@ -33,7 +33,6 @@ import org.slf4j.LoggerFactory; public class JobEngineConfig { private static final Logger logger = LoggerFactory.getLogger(JobEngineConfig.class); public static final String HADOOP_JOB_CONF_FILENAME = "kylin_job_conf"; - public static final String HIVE_CONF_FILENAME = "kylin_hive_conf"; public static final String DEFAUL_JOB_CONF_SUFFIX = ""; public static final String IN_MEM_JOB_CONF_SUFFIX = "inmem"; @@ -72,7 +71,7 @@ public class JobEngineConfig { /** * - * @param suffix job config file suffix name; if be null, will use the default job conf + * @param jobType job config file suffix name; if be null, will use the default job conf * @return the job config file path * @throws IOException */ @@ -95,18 +94,6 @@ public class JobEngineConfig { return path; } - public String getHiveConfFilePath() { - String hiveConfFile = (HIVE_CONF_FILENAME + ".xml"); - - File jobConfig = getJobConfig(hiveConfFile); - if (jobConfig == null || !jobConfig.exists()) { - - logger.error("fail to locate " + HIVE_CONF_FILENAME + ".xml"); - throw new RuntimeException("fail to locate " + HIVE_CONF_FILENAME + ".xml"); - } - return OptionsHelper.convertToFileURL(jobConfig.getAbsolutePath()); - } - // there should be no setters private final KylinConfig config; http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java index 8b241d2..f28f3c7 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java @@ -42,6 +42,7 @@ public class CreateFlatHiveTableStep extends AbstractExecutable { protected void createFlatHiveTable(KylinConfig config) throws IOException { final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); + hiveCmdBuilder.overwriteHiveProps(config.getHiveConfigOverride()); hiveCmdBuilder.addStatement(getInitStatement()); hiveCmdBuilder.addStatement(getCreateTableStatement()); final String cmd = hiveCmdBuilder.toString(); http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java index 418fcfc..a97468f 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java @@ -107,7 +107,7 @@ public class HiveMRInput implements IMRInput { public void configureJob(Job job) { try { job.getConfiguration().addResource("hive-site.xml"); - + HCatInputFormat.setInput(job, dbName, tableName); job.setInputFormatClass(HCatInputFormat.class); @@ -145,9 +145,7 @@ public class HiveMRInput implements IMRInput { final KylinConfig cubeConfig = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName).getConfig(); JobEngineConfig conf = new JobEngineConfig(cubeConfig); - final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements( - flatTableDatabase, conf.getHiveConfFilePath(), cubeConfig.getHiveConfigOverride() - ) ; + final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); final String jobWorkingDir = getJobWorkingDir(jobFlow); // create flat table first, then count and redistribute http://git-wip-us.apache.org/repos/asf/kylin/blob/da90122c/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java b/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java new file mode 100644 index 0000000..751a468 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class HiveConfigCLI { + + private static final Logger logger = LoggerFactory.getLogger(HiveConfigCLI.class); + + private String inputFileName; + private String outputFileName; + + public HiveConfigCLI(String inputFileName, String outputFileName) { + this.inputFileName = inputFileName; + this.outputFileName = outputFileName; + logger.info("{} will be parsed to {}", inputFileName, outputFileName); + } + + public void parse() throws ParserConfigurationException, IOException, SAXException { + File input = new File(inputFileName); + File output = new File(outputFileName); + StringBuilder buffer = new StringBuilder(); + if (input.exists()) { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(input); + NodeList nl = doc.getElementsByTagName("property"); + for (int i = 0; i < nl.getLength(); i++) { + String name = doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue(); + String value = doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue(); + buffer.append("--hiveconf "); + buffer.append(name); + buffer.append("="); + buffer.append(value); + buffer.append(" "); + logger.info("Parsing key: {}, value: {}", name, value); + } + FileUtils.writeStringToFile(output, buffer.toString(), Charset.defaultCharset()); + } else { + logger.error("input file {} doesn't exist.", input); + } + } + + public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException { + if (args.length != 2) { + System.out.println("Usage: HiveConfigCLI <inputFileName> <outputFileName>"); + System.exit(1); + } + + HiveConfigCLI cli = new HiveConfigCLI(args[0], args[1]); + cli.parse(); + } +}