KYLIN-2577, improve hive configurations during cubing steps

Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ea5fcff8
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ea5fcff8
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ea5fcff8

Branch: refs/heads/master
Commit: ea5fcff8397026de315324bf1797b62944591908
Parents: 4c18526
Author: Cheng Wang <cheng.w...@kyligence.io>
Authored: Fri Apr 28 17:52:58 2017 +0800
Committer: nichunen <zjsy...@sjtu.org>
Committed: Fri Apr 28 18:41:52 2017 +0800

----------------------------------------------------------------------
 build/bin/load-hive-conf.sh                     | 15 ++++
 build/bin/sample.sh                             |  9 ++-
 .../kylin/common/util/HiveCmdBuilder.java       | 29 ++++++-
 .../org/apache/kylin/job/JoinedFlatTable.java   | 37 ++-------
 .../kylin/job/engine/JobEngineConfig.java       | 32 +++++++-
 .../source/hive/CreateFlatHiveTableStep.java    |  4 +
 .../apache/kylin/source/hive/HiveMRInput.java   |  8 +-
 .../org/apache/kylin/tool/HiveConfigCLI.java    | 81 ++++++++++++++++++++
 8 files changed, 173 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/build/bin/load-hive-conf.sh
----------------------------------------------------------------------
diff --git a/build/bin/load-hive-conf.sh b/build/bin/load-hive-conf.sh
new file mode 100644
index 0000000..a5046f0
--- /dev/null
+++ b/build/bin/load-hive-conf.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Kyligence Inc. License
+
+source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
+
+# source me
+
+hive_conf_dir="${KYLIN_HOME}/conf/kylin_hive_conf.xml"
+hive_conf_prop="${KYLIN_HOME}/logs/hive_props"
+rm -rf ${hive_conf_prop}
+export ENABLE_CHECK_ENV=false
+${dir}/kylin.sh org.apache.kylin.tool.HiveConfigCLI ${hive_conf_dir} 
${hive_conf_prop}
+[[ 0 == $? ]] || quit "Error, can not parse ${hive_conf_dir} and can not apply 
it to hive relevant check."
+hive_conf_properties=`cat ${hive_conf_prop}`
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/build/bin/sample.sh
----------------------------------------------------------------------
diff --git a/build/bin/sample.sh b/build/bin/sample.sh
index 617b671..2e13805 100644
--- a/build/bin/sample.sh
+++ b/build/bin/sample.sh
@@ -19,6 +19,7 @@
 
 source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh
 source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/find-hadoop-conf-dir.sh
+source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/load-hive-conf.sh
 
 source ${dir}/check-env.sh "if-not-yet"
 job_jar=`find -L ${KYLIN_HOME}/lib/ -name kylin-job*.jar`
@@ -43,7 +44,7 @@ echo "Going to create sample tables in hive to database 
"$sample_database" by "$
 if [ "${hive_client_mode}" == "beeline" ]
 then
     beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh 
kylin.source.hive.beeline-params`
-    beeline ${beeline_params} -e "CREATE DATABASE IF NOT EXISTS 
"$sample_database
+    beeline ${hive_conf_properties} ${beeline_params} -e "CREATE DATABASE IF 
NOT EXISTS "$sample_database
     hive2_url=`expr match "${beeline_params}" '.*\(hive2:.*:[0-9]\{4,6\}\/\)'`
     if [ -z ${hive2_url} ]; then
         hive2_url=`expr match "${beeline_params}" 
'.*\(hive2:.*:[0-9]\{4,6\}\)'`
@@ -51,10 +52,10 @@ then
     else
         
beeline_params=${beeline_params/${hive2_url}/${hive2_url}${sample_database}}
     fi
-    beeline ${beeline_params} -f 
${KYLIN_HOME}/sample_cube/create_sample_tables.sql  || { exit 1; }
+    beeline ${hive_conf_properties} ${beeline_params} -f 
${KYLIN_HOME}/sample_cube/create_sample_tables.sql  || { exit 1; }
 else
-    hive -e "CREATE DATABASE IF NOT EXISTS "$sample_database
-    hive --database $sample_database -f 
${KYLIN_HOME}/sample_cube/create_sample_tables.sql  || { exit 1; }
+    hive ${hive_conf_properties} -e "CREATE DATABASE IF NOT EXISTS 
"$sample_database
+    hive ${hive_conf_properties} --database $sample_database -f 
${KYLIN_HOME}/sample_cube/create_sample_tables.sql  || { exit 1; }
 fi
 
 echo "Sample hive tables are created successfully; Going to create sample 
cube..."

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
----------------------------------------------------------------------
diff --git 
a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java 
b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
index 855ee48..9279973 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java
@@ -24,6 +24,8 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
@@ -42,6 +44,7 @@ public class HiveCmdBuilder {
 
     private HiveClientMode clientMode;
     private KylinConfig kylinConfig;
+    final private Map<String, String> hiveConfProps = new HashMap<>();
     final private ArrayList<String> statements = Lists.newArrayList();
 
     public HiveCmdBuilder() {
@@ -58,7 +61,9 @@ public class HiveCmdBuilder {
             for (String statement : statements) {
                 buf.append(statement).append("\n");
             }
-            buf.append("\"");
+            buf.append("\"").append(" \\").append("\n");
+            buf.append(parseProps());
+            buf.append("\n");
             break;
         case BEELINE:
             BufferedWriter bw = null;
@@ -71,6 +76,7 @@ public class HiveCmdBuilder {
                     bw.newLine();
                 }
                 buf.append("beeline ");
+                buf.append(parseProps());
                 buf.append(kylinConfig.getHiveBeelineParams());
                 buf.append(" -f ");
                 buf.append(tmpHql.getAbsolutePath());
@@ -101,8 +107,29 @@ public class HiveCmdBuilder {
         return buf.toString();
     }
 
+    private String parseProps() {
+        StringBuilder s = new StringBuilder();
+        for (Map.Entry<String, String> prop : hiveConfProps.entrySet()) {
+            s.append("--hiveconf ");
+            s.append(prop.getKey());
+            s.append("=");
+            s.append(prop.getValue());
+            s.append(" \\").append("\n");
+        }
+        return s.toString();
+    }
+
     public void reset() {
         statements.clear();
+        hiveConfProps.clear();
+    }
+
+    public void setHiveConfProps(Map<String, String> hiveConfProps) {
+        this.hiveConfProps.putAll(hiveConfProps);
+    }
+
+    public void overwriteHiveProps(Map<String, String> overwrites) {
+        this.hiveConfProps.putAll(overwrites);
     }
 
     public void addStatement(String statement) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
----------------------------------------------------------------------
diff --git a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java 
b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
index b8d18f8..16086e3 100644
--- a/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
+++ b/core-job/src/main/java/org/apache/kylin/job/JoinedFlatTable.java
@@ -18,15 +18,10 @@
 
 package org.apache.kylin.job;
 
-import java.io.File;
 import java.lang.reflect.Method;
 import java.util.HashSet;
-import java.util.Map;
 import java.util.Set;
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.cube.CubeSegment;
 import org.apache.kylin.job.engine.JobEngineConfig;
@@ -37,8 +32,6 @@ import org.apache.kylin.metadata.model.JoinTableDesc;
 import org.apache.kylin.metadata.model.PartitionDesc;
 import org.apache.kylin.metadata.model.TableRef;
 import org.apache.kylin.metadata.model.TblColRef;
-import org.w3c.dom.Document;
-import org.w3c.dom.NodeList;
 
 /**
  *
@@ -50,34 +43,10 @@ public class JoinedFlatTable {
         return storageDfsDir + "/" + flatDesc.getTableName();
     }
 
-    public static String generateHiveInitStatements(String flatTableDatabase, 
String kylinHiveFile, Map<String, String> cubeOverrides) {
+    public static String generateHiveInitStatements(String flatTableDatabase) {
 
         StringBuilder buffer = new StringBuilder();
-
         buffer.append("USE ").append(flatTableDatabase).append(";\n");
-        try {
-            File file = new File(kylinHiveFile);
-            if (file.exists()) {
-                DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
-                DocumentBuilder builder = factory.newDocumentBuilder();
-                Document doc = builder.parse(file);
-                NodeList nl = doc.getElementsByTagName("property");
-                for (int i = 0; i < nl.getLength(); i++) {
-                    String name = 
doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue();
-                    String value = 
doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue();
-                    if (!name.equals("tmpjars")) {
-                        buffer.append("SET 
").append(name).append("=").append(value).append(";\n");
-                    }
-                }
-            }
-        } catch (Exception e) {
-            throw new RuntimeException("Failed to parse hive conf file ", e);
-        }
-
-        for (Map.Entry<String, String> entry : cubeOverrides.entrySet()) {
-            buffer.append("SET 
").append(entry.getKey()).append("=").append(entry.getValue()).append(";\n");
-        }
-
         return buffer.toString();
     }
 
@@ -128,6 +97,10 @@ public class JoinedFlatTable {
         return "INSERT OVERWRITE TABLE " + flatDesc.getTableName() + " " + 
generateSelectDataStatement(flatDesc) + ";\n";
     }
 
+    public static String 
generateInsertPartialDataStatement(IJoinedFlatTableDesc flatDesc, String 
statement) {
+        return "INSERT OVERWRITE TABLE " + flatDesc.getTableName() + " " + 
generateSelectDataStatement(flatDesc) + statement + ";\n";
+    }
+
     public static String generateSelectDataStatement(IJoinedFlatTableDesc 
flatDesc) {
         StringBuilder sql = new StringBuilder();
         sql.append("SELECT" + "\n");

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
----------------------------------------------------------------------
diff --git 
a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java 
b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
index 8859527..260f817 100644
--- a/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
+++ b/core-job/src/main/java/org/apache/kylin/job/engine/JobEngineConfig.java
@@ -20,12 +20,19 @@ package org.apache.kylin.job.engine;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.OptionsHelper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
 
 /**
  * @author ysong1
@@ -72,7 +79,7 @@ public class JobEngineConfig {
 
     /**
      *
-     * @param suffix job config file suffix name; if be null, will use the 
default job conf
+     * @param jobType job config file suffix name; if be null, will use the 
default job conf
      * @return the job config file path
      * @throws IOException
      */
@@ -107,6 +114,29 @@ public class JobEngineConfig {
         return OptionsHelper.convertToFileURL(jobConfig.getAbsolutePath());
     }
 
+    public Map<String, String> getHivePropsFromFile() {
+        Map<String, String> props = new HashMap<>();
+        try {
+            File file = new File(getHiveConfFilePath());
+            if (file.exists()) {
+                DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+                DocumentBuilder builder = factory.newDocumentBuilder();
+                Document doc = builder.parse(file);
+                NodeList nl = doc.getElementsByTagName("property");
+                for (int i = 0; i < nl.getLength(); i++) {
+                    String key = 
doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue();
+                    String value = 
doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue();
+                    if (!key.equals("tmpjars")) {
+                        props.put(key, value);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to parse hive conf file ", e);
+        }
+        return props;
+    }
+
     // there should be no setters
     private final KylinConfig config;
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java
----------------------------------------------------------------------
diff --git 
a/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java
 
b/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java
index 8b241d2..9e13df8 100644
--- 
a/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java
+++ 
b/source-hive/src/main/java/org/apache/kylin/source/hive/CreateFlatHiveTableStep.java
@@ -26,6 +26,7 @@ import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
 import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
 import org.apache.kylin.job.common.PatternedLogger;
+import org.apache.kylin.job.engine.JobEngineConfig;
 import org.apache.kylin.job.exception.ExecuteException;
 import org.apache.kylin.job.execution.AbstractExecutable;
 import org.apache.kylin.job.execution.ExecutableContext;
@@ -42,6 +43,9 @@ public class CreateFlatHiveTableStep extends 
AbstractExecutable {
 
     protected void createFlatHiveTable(KylinConfig config) throws IOException {
         final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
+        final JobEngineConfig jobEngineConfig = new JobEngineConfig(config);
+        
hiveCmdBuilder.setHiveConfProps(jobEngineConfig.getHivePropsFromFile());
+        hiveCmdBuilder.overwriteHiveProps(config.getHiveConfigOverride());
         hiveCmdBuilder.addStatement(getInitStatement());
         hiveCmdBuilder.addStatement(getCreateTableStatement());
         final String cmd = hiveCmdBuilder.toString();

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
----------------------------------------------------------------------
diff --git 
a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java 
b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
index 418fcfc..2f348a0 100644
--- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
+++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMRInput.java
@@ -107,7 +107,7 @@ public class HiveMRInput implements IMRInput {
         public void configureJob(Job job) {
             try {
                 job.getConfiguration().addResource("hive-site.xml");
-                
+
                 HCatInputFormat.setInput(job, dbName, tableName);
                 job.setInputFormatClass(HCatInputFormat.class);
 
@@ -145,9 +145,7 @@ public class HiveMRInput implements IMRInput {
             final KylinConfig cubeConfig = 
CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName).getConfig();
             JobEngineConfig conf = new JobEngineConfig(cubeConfig);
 
-            final String hiveInitStatements = 
JoinedFlatTable.generateHiveInitStatements(
-                    flatTableDatabase, conf.getHiveConfFilePath(), 
cubeConfig.getHiveConfigOverride()
-            ) ;
+            final String hiveInitStatements = 
JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
             final String jobWorkingDir = getJobWorkingDir(jobFlow);
 
             // create flat table first, then count and redistribute
@@ -259,6 +257,7 @@ public class HiveMRInput implements IMRInput {
 
         private void redistributeTable(KylinConfig config, int numReducers) 
throws IOException {
             final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
+            hiveCmdBuilder.setHiveConfProps(new 
JobEngineConfig(config).getHivePropsFromFile());
             hiveCmdBuilder.addStatement(getInitStatement());
             hiveCmdBuilder.addStatement("set mapreduce.job.reduces=" + 
numReducers + ";\n");
             hiveCmdBuilder.addStatement("set hive.merge.mapredfiles=false;\n");
@@ -376,6 +375,7 @@ public class HiveMRInput implements IMRInput {
             final String hiveTable = this.getIntermediateTableIdentity();
             if (config.isHiveKeepFlatTable() == false && 
StringUtils.isNotEmpty(hiveTable)) {
                 final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
+                hiveCmdBuilder.setHiveConfProps(new 
JobEngineConfig(config).getHivePropsFromFile());
                 hiveCmdBuilder.addStatement("USE " + 
config.getHiveDatabaseForIntermediateTable() + ";");
                 hiveCmdBuilder.addStatement("DROP TABLE IF EXISTS  " + 
hiveTable + ";");
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/ea5fcff8/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java
----------------------------------------------------------------------
diff --git a/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java 
b/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java
new file mode 100644
index 0000000..e5b300a
--- /dev/null
+++ b/tool/src/main/java/org/apache/kylin/tool/HiveConfigCLI.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.tool;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.commons.io.FileUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+public class HiveConfigCLI {
+
+    private static final Logger logger = 
LoggerFactory.getLogger(HiveConfigCLI.class);
+
+    private String inputFileName;
+    private String outputFileName;
+
+    public HiveConfigCLI(String inputFileName, String outputFileName) {
+        this.inputFileName = inputFileName;
+        this.outputFileName = outputFileName;
+        logger.info("{} will be parsed to {}", inputFileName, outputFileName);
+    }
+
+    public void parse() throws ParserConfigurationException, IOException, 
SAXException {
+        File input = new File(inputFileName);
+        File output = new File(outputFileName);
+        StringBuilder buffer = new StringBuilder();
+        if (input.exists()) {
+            DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+            DocumentBuilder builder = factory.newDocumentBuilder();
+            Document doc = builder.parse(input);
+            NodeList nl = doc.getElementsByTagName("property");
+            for (int i = 0; i < nl.getLength(); i++) {
+                String name = 
doc.getElementsByTagName("name").item(i).getFirstChild().getNodeValue();
+                String value = 
doc.getElementsByTagName("value").item(i).getFirstChild().getNodeValue();
+                buffer.append("--hiveconf ");
+                buffer.append(name);
+                buffer.append("=");
+                buffer.append(value);
+                buffer.append(" ");
+                logger.info("Parsing key: {}, value: {}", name, value);
+            }
+            FileUtils.writeStringToFile(output, buffer.toString(), 
Charset.defaultCharset());
+        }
+    }
+
+    public static void main(String[] args) throws IOException, SAXException, 
ParserConfigurationException {
+        if (args.length != 2) {
+            System.out.println("Usage: HiveConfigCLI <inputFileName> 
<outputFileName>");
+            System.exit(1);
+        }
+
+        HiveConfigCLI cli = new HiveConfigCLI(args[0], args[1]);
+        cli.parse();
+    }
+}

Reply via email to