KYLIN-2577, improve hive configurations during cubing steps
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ea5fcff8 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ea5fcff8 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ea5fcff8 Branch: refs/heads/master Commit: ea5fcff8397026de315324bf1797b62944591908 Parents: 4c18526 Author: Cheng Wang <cheng.w...@kyligence.io> Authored: Fri Apr 28 17:52:58 2017 +0800 Committer: nichunen <zjsy...@sjtu.org> Committed: Fri Apr 28 18:41:52 2017 +0800 ---------------------------------------------------------------------- build/bin/load-hive-conf.sh | 15 ++++ build/bin/sample.sh | 9 ++- .../kylin/common/util/HiveCmdBuilder.java | 29 ++++++- .../org/apache/kylin/job/JoinedFlatTable.java | 37 ++------- .../kylin/job/engine/JobEngineConfig.java | 32 +++++++- .../source/hive/CreateFlatHiveTableStep.java | 4 + .../apache/kylin/source/hive/HiveMRInput.java | 8 +- .../org/apache/kylin/tool/HiveConfigCLI.java | 81 ++++++++++++++++++++ 8 files changed, 173 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/build/bin/load-hive-conf.sh ---------------------------------------------------------------------- diff --git a/build/bin/load-hive-conf.sh b/build/bin/load-hive-conf.sh new file mode 100644 index 0000000..a5046f0 --- /dev/null +++ b/build/bin/load-hive-conf.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Kyligence Inc. License + +source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh + +# source me + +hive_conf_dir="${KYLIN_HOME}/conf/kylin_hive_conf.xml" +hive_conf_prop="${KYLIN_HOME}/logs/hive_props" +rm -rf ${hive_conf_prop} +export ENABLE_CHECK_ENV=false +${dir}/kylin.sh org.apache.kylin.tool.HiveConfigCLI ${hive_conf_dir} ${hive_conf_prop} +[[ 0 == $? ]] || quit "Error, can not parse ${hive_conf_dir} and can not apply it to hive relevant check." +hive_conf_properties=`cat ${hive_conf_prop}` \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/build/bin/sample.sh ---------------------------------------------------------------------- diff --git a/build/bin/sample.sh b/build/bin/sample.sh index 617b671..2e13805 100644 --- a/build/bin/sample.sh +++ b/build/bin/sample.sh @@ -19,6 +19,7 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/find-hadoop-conf-dir.sh +source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/load-hive-conf.sh source ${dir}/check-env.sh "if-not-yet" job_jar=`find -L ${KYLIN_HOME}/lib/ -name kylin-job*.jar` @@ -43,7 +44,7 @@ echo "Going to create sample tables in hive to database "$sample_database" by "$ if [ "${hive_client_mode}" == "beeline" ] then beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params` - beeline ${beeline_params} -e "CREATE DATABASE IF NOT EXISTS "$sample_database + beeline ${hive_conf_properties} ${beeline_params} -e "CREATE DATABASE IF NOT EXISTS "$sample_database hive2_url=`expr match "${beeline_params}" '.*\(hive2:.*:[0-9]\{4,6\}\/\)'` if [ -z ${hive2_url} ]; then hive2_url=`expr match "${beeline_params}" '.*\(hive2:.*:[0-9]\{4,6\}\)'` @@ -51,10 +52,10 @@ then else beeline_params=${beeline_params/${hive2_url}/${hive2_url}${sample_database}} fi - beeline ${beeline_params} -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } + beeline ${hive_conf_properties} ${beeline_params} -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } else - hive -e "CREATE DATABASE IF NOT EXISTS "$sample_database - hive --database $sample_database -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } + hive ${hive_conf_properties} -e "CREATE DATABASE IF NOT EXISTS "$sample_database + hive ${hive_conf_properties} --database $sample_database -f ${KYLIN_HOME}/sample_cube/create_sample_tables.sql || { exit 1; } fi echo "Sample hive tables are created successfully; Going to create sample cube..." http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java index 855ee48..9279973 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java @@ -24,6 +24,8 @@ import java.io.FileWriter; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -42,6 +44,7 @@ public class HiveCmdBuilder { private HiveClientMode clientMode; private KylinConfig kylinConfig; + final private Map<String, String> hiveConfProps = new HashMap<>(); final private ArrayList<String> statements = Lists.newArrayList(); public HiveCmdBuilder() { @@ -58,7 +61,9 @@ public class HiveCmdBuilder { for (String statement : statements) { buf.append(statement).append("\n"); } - buf.append("\""); + buf.append("\"").append(" \\").append("\n"); + buf.append(parseProps()); + buf.append("\n"); break; case BEELINE: BufferedWriter bw = null; @@ -71,6 +76,7 @@ public class HiveCmdBuilder { bw.newLine(); } buf.append("beeline "); + buf.append(parseProps()); buf.append(kylinConfig.getHiveBeelineParams()); buf.append(" -f "); buf.append(tmpHql.getAbsolutePath()); @@ -101,8 +107,29 @@ public class HiveCmdBuilder { return buf.toString(); } + private String parseProps() { + StringBuilder s = new StringBuilder(); + for (Map.Entry<String, String> prop : hiveConfProps.entrySet()) { + s.append("--hiveconf "); + s.append(prop.getKey()); + s.append("="); + s.append(prop.getValue()); + s.append(" \\").append("\n"); + } + return s.toString(); + } + public void reset() { statements.clear(); + hiveConfProps.clear(); + } + + public void setHiveConfProps(Map<String, String> hiveConfProps) { + this.hiveConfProps.putAll(hiveConfProps); + } + + public void overwriteHiveProps(Map<String, String> overwrites) { + this.hiveConfProps.putAll(overwrites); } public void addStatement(String statement) { http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java ---------------------------------------------------------------------- diff --git a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java index b8d18f8..16086e3 100644 --- a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java +++ b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java @@ -18,15 +18,10 @@ package org.apache.kylin.job; -import java.io.File; import java.lang.reflect.Method; import java.util.HashSet; -import java.util.Map; import java.util.Set; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - import org.apache.kylin.common.KylinConfig; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.job.engine.JobEngineConfig; @@ -37,8 +32,6 @@ import org.apache.kylin.metadata.model.JoinTableDesc; import org.apache.kylin.metadata.model.PartitionDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; -import org.w3c.dom.Document; -import org.w3c.dom.NodeList; /** * @@ -50,34 +43,10 @@ public class JoinedFlatTable { return storageDfsDir + "/" + flatDesc.getTableName(); } - public static String generateHiveInitStatements(String flatTableDatabase, String kylinHiveFile, Map<String, String> cubeOverrides) { + public static String generateHiveInitStatements(String flatTableDatabase) { StringBuilder buffer = new StringBuilder(); - buffer.append("USE ").append(flatTableDatabase).append(";\n"); - try { - File file = new File(kylinHiveFile); - if (file.exists()) { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder builder = factory.newDocumentBuilder(); - Document doc = builder.parse(file); - NodeList nl = doc.getElementsByTagName("property"); - for (int i = 0; i < nl.getLength(); i++) { - String name = doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue(); - String value = doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue(); - if (!name.equals("tmpjars")) { - buffer.append("SET ").append(name).append("=").append(value).append(";\n"); - } - } - } - } catch (Exception e) { - throw new RuntimeException("Failed to parse hive conf file ", e); - } - - for (Map.Entry<String, String> entry : cubeOverrides.entrySet()) { - buffer.append("SET ").append(entry.getKey()).append("=").append(entry.getValue()).append(";\n"); - } - return buffer.toString(); } @@ -128,6 +97,10 @@ public class JoinedFlatTable { return "INSERT OVERWRITE TABLE " + flatDesc.getTableName() + " " + generateSelectDataStatement(flatDesc) + ";\n"; } + public static String generateInsertPartialDataStatement(IJoinedFlatTableDesc flatDesc, String statement) { + return "INSERT OVERWRITE TABLE " + flatDesc.getTableName() + " " + generateSelectDataStatement(flatDesc) + statement + ";\n"; + } + public static String generateSelectDataStatement(IJoinedFlatTableDesc flatDesc) { StringBuilder sql = new StringBuilder(); sql.append("SELECT" + "\n"); http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java ---------------------------------------------------------------------- diff --git a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java index 8859527..260f817 100644 --- a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java +++ b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java @@ -20,12 +20,19 @@ package org.apache.kylin.job.engine; import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.OptionsHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; /** * @author ysong1 @@ -72,7 +79,7 @@ public class JobEngineConfig { /** * - * @param suffix job config file suffix name; if be null, will use the default job conf + * @param jobType job config file suffix name; if be null, will use the default job conf * @return the job config file path * @throws IOException */ @@ -107,6 +114,29 @@ public class JobEngineConfig { return OptionsHelper.convertToFileURL(jobConfig.getAbsolutePath()); } + public Map<String, String> getHivePropsFromFile() { + Map<String, String> props = new HashMap<>(); + try { + File file = new File(getHiveConfFilePath()); + if (file.exists()) { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(file); + NodeList nl = doc.getElementsByTagName("property"); + for (int i = 0; i < nl.getLength(); i++) { + String key = doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue(); + String value = doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue(); + if (!key.equals("tmpjars")) { + props.put(key, value); + } + } + } + } catch (Exception e) { + throw new RuntimeException("Failed to parse hive conf file ", e); + } + return props; + } + // there should be no setters private final KylinConfig config; http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java index 8b241d2..9e13df8 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java @@ -26,6 +26,7 @@ import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.engine.mr.steps.CubingExecutableUtil; import org.apache.kylin.job.common.PatternedLogger; +import org.apache.kylin.job.engine.JobEngineConfig; import org.apache.kylin.job.exception.ExecuteException; import org.apache.kylin.job.execution.AbstractExecutable; import org.apache.kylin.job.execution.ExecutableContext; @@ -42,6 +43,9 @@ public class CreateFlatHiveTableStep extends AbstractExecutable { protected void createFlatHiveTable(KylinConfig config) throws IOException { final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); + final JobEngineConfig jobEngineConfig = new JobEngineConfig(config); + hiveCmdBuilder.setHiveConfProps(jobEngineConfig.getHivePropsFromFile()); + hiveCmdBuilder.overwriteHiveProps(config.getHiveConfigOverride()); hiveCmdBuilder.addStatement(getInitStatement()); hiveCmdBuilder.addStatement(getCreateTableStatement()); final String cmd = hiveCmdBuilder.toString(); http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java index 418fcfc..2f348a0 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java @@ -107,7 +107,7 @@ public class HiveMRInput implements IMRInput { public void configureJob(Job job) { try { job.getConfiguration().addResource("hive-site.xml"); - + HCatInputFormat.setInput(job, dbName, tableName); job.setInputFormatClass(HCatInputFormat.class); @@ -145,9 +145,7 @@ public class HiveMRInput implements IMRInput { final KylinConfig cubeConfig = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName).getConfig(); JobEngineConfig conf = new JobEngineConfig(cubeConfig); - final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements( - flatTableDatabase, conf.getHiveConfFilePath(), cubeConfig.getHiveConfigOverride() - ) ; + final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); final String jobWorkingDir = getJobWorkingDir(jobFlow); // create flat table first, then count and redistribute @@ -259,6 +257,7 @@ public class HiveMRInput implements IMRInput { private void redistributeTable(KylinConfig config, int numReducers) throws IOException { final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); + hiveCmdBuilder.setHiveConfProps(new JobEngineConfig(config).getHivePropsFromFile()); hiveCmdBuilder.addStatement(getInitStatement()); hiveCmdBuilder.addStatement("set mapreduce.job.reduces=" + numReducers + ";\n"); hiveCmdBuilder.addStatement("set hive.merge.mapredfiles=false;\n"); @@ -376,6 +375,7 @@ public class HiveMRInput implements IMRInput { final String hiveTable = this.getIntermediateTableIdentity(); if (config.isHiveKeepFlatTable() == false && StringUtils.isNotEmpty(hiveTable)) { final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); + hiveCmdBuilder.setHiveConfProps(new JobEngineConfig(config).getHivePropsFromFile()); hiveCmdBuilder.addStatement("USE " + config.getHiveDatabaseForIntermediateTable() + ";"); hiveCmdBuilder.addStatement("DROP TABLE IF EXISTS " + hiveTable + ";"); http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java b/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java new file mode 100644 index 0000000..e5b300a --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class HiveConfigCLI { + + private static final Logger logger = LoggerFactory.getLogger(HiveConfigCLI.class); + + private String inputFileName; + private String outputFileName; + + public HiveConfigCLI(String inputFileName, String outputFileName) { + this.inputFileName = inputFileName; + this.outputFileName = outputFileName; + logger.info("{} will be parsed to {}", inputFileName, outputFileName); + } + + public void parse() throws ParserConfigurationException, IOException, SAXException { + File input = new File(inputFileName); + File output = new File(outputFileName); + StringBuilder buffer = new StringBuilder(); + if (input.exists()) { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(input); + NodeList nl = doc.getElementsByTagName("property"); + for (int i = 0; i < nl.getLength(); i++) { + String name = doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue(); + String value = doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue(); + buffer.append("--hiveconf "); + buffer.append(name); + buffer.append("="); + buffer.append(value); + buffer.append(" "); + logger.info("Parsing key: {}, value: {}", name, value); + } + FileUtils.writeStringToFile(output, buffer.toString(), Charset.defaultCharset()); + } + } + + public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException { + if (args.length != 2) { + System.out.println("Usage: HiveConfigCLI <inputFileName> <outputFileName>"); + System.exit(1); + } + + HiveConfigCLI cli = new HiveConfigCLI(args[0], args[1]); + cli.parse(); + } +}