KYLIN-2515 add IAdhocConverter to allow customized adhoc query converte
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/6ce9983f Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/6ce9983f Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/6ce9983f Branch: refs/heads/master Commit: 6ce9983f70fb497a50b660289cd11a8714cc731e Parents: 9fafd27 Author: Hongbin Ma <mahong...@apache.org> Authored: Fri May 26 16:50:49 2017 +0800 Committer: Roger Shi <rogershijich...@gmail.com> Committed: Sat May 27 21:11:03 2017 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/KylinConfigBase.java | 4 + .../kylin/storage/adhoc/AdHocRunnerBase.java | 6 +- .../kylin/storage/adhoc/HiveAdhocConverter.java | 180 +++++++++++++++++++ .../kylin/storage/adhoc/IAdhocConverter.java | 25 +++ .../storage/adhoc/HiveAdhocConverterTest.java | 62 +++++++ .../org/apache/kylin/rest/util/AdHocUtil.java | 35 ++-- 6 files changed, 297 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 05be701..7f366d8 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -968,6 +968,10 @@ abstract public class KylinConfigBase implements Serializable { public String getAdHocRunnerClassName() { return getOptional("kylin.query.ad-hoc.runner.class-name", ""); } + + public String getAdHocConverterClassName() { + return getOptional("kylin.query.ad-hoc.converter.class-name", "org.apache.kylin.storage.adhoc.HiveAdhocConverter"); + } public String getJdbcUrl() { return getOptional("kylin.query.ad-hoc.jdbc.url", ""); http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java ---------------------------------------------------------------------- diff --git a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java index 7e811a4..7b870c6 100644 --- a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/AdHocRunnerBase.java @@ -18,13 +18,13 @@ package org.apache.kylin.storage.adhoc; +import java.util.List; + import org.apache.kylin.common.KylinConfig; import org.apache.kylin.metadata.querymeta.SelectedColumnMeta; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.List; - public abstract class AdHocRunnerBase { private static final Logger logger = LoggerFactory.getLogger(AdHocRunnerBase.class); @@ -45,4 +45,4 @@ public abstract class AdHocRunnerBase { public abstract void init(); public abstract void executeQuery(String query, List<List<String>> results, List<SelectedColumnMeta> columnMetas) throws Exception; -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java ---------------------------------------------------------------------- diff --git a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java new file mode 100644 index 0000000..1a43557 --- /dev/null +++ b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/HiveAdhocConverter.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ +package org.apache.kylin.storage.adhoc; + +import java.util.HashMap; +import java.util.Map; +import java.util.Stack; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +//TODO: Some workaround ways to make sql readable by hive parser, should replaced it with a more well-designed way +public class HiveAdhocConverter implements IAdhocConverter { + + private static final Logger logger = LoggerFactory.getLogger(HiveAdhocConverter.class); + + private static final Pattern EXTRACT_PATTERN = Pattern.compile("\\s+extract\\s*(\\()\\s*(.*?)\\s*from(\\s+)", Pattern.CASE_INSENSITIVE); + private static final Pattern FROM_PATTERN = Pattern.compile("\\s+from\\s+(\\()\\s*select\\s", Pattern.CASE_INSENSITIVE); + private static final Pattern CAST_PATTERN = Pattern.compile("CAST\\((.*?) (?i)AS\\s*(.*?)\\s*\\)", Pattern.CASE_INSENSITIVE); + private static final Pattern CONCAT_PATTERN = Pattern.compile("(['_a-z0-9A-Z]+)\\|\\|(['_a-z0-9A-Z]+)", Pattern.CASE_INSENSITIVE); + + public static String replaceString(String originString, String fromString, String toString) { + return originString.replace(fromString, toString); + } + + public static String extractReplace(String originString) { + Matcher extractMatcher = EXTRACT_PATTERN.matcher(originString); + String replacedString = originString; + Map<Integer, Integer> parenthesesPairs = null; + + while (extractMatcher.find()) { + if (parenthesesPairs == null) { + parenthesesPairs = findParenthesesPairs(originString); + } + + String functionStr = extractMatcher.group(2); + int startIdx = extractMatcher.end(3); + int endIdx = parenthesesPairs.get(extractMatcher.start(1)); + String extractInner = originString.substring(startIdx, endIdx); + int originStart = extractMatcher.start(0) + 1; + int originEnd = endIdx + 1; + + replacedString = replaceString(replacedString, originString.substring(originStart, originEnd), functionStr + "(" + extractInner + ")"); + } + + return replacedString; + } + + public static String castRepalce(String originString) { + Matcher castMatcher = CAST_PATTERN.matcher(originString); + String replacedString = originString; + + while (castMatcher.find()) { + String castStr = castMatcher.group(); + String type = castMatcher.group(2); + String supportedType = ""; + switch (type.toUpperCase()) { + case "INTEGER": + supportedType = "int"; + break; + case "SHORT": + supportedType = "smallint"; + break; + case "LONG": + supportedType = "bigint"; + break; + default: + supportedType = type; + } + + if (!supportedType.equals(type)) { + String replacedCastStr = castStr.replace(type, supportedType); + replacedString = replaceString(replacedString, castStr, replacedCastStr); + } + } + + return replacedString; + } + + public static String subqueryRepalce(String originString) { + Matcher subqueryMatcher = FROM_PATTERN.matcher(originString); + String replacedString = originString; + Map<Integer, Integer> parenthesesPairs = null; + + while (subqueryMatcher.find()) { + if (parenthesesPairs == null) { + parenthesesPairs = findParenthesesPairs(originString); + } + + int startIdx = subqueryMatcher.start(1); + int endIdx = parenthesesPairs.get(startIdx) + 1; + + replacedString = replaceString(replacedString, originString.substring(startIdx, endIdx), originString.substring(startIdx, endIdx) + " as alias"); + } + + return replacedString; + } + + public static String concatReplace(String originString) { + Matcher concatMatcher = CONCAT_PATTERN.matcher(originString); + String replacedString = originString; + + while (concatMatcher.find()) { + String leftString = concatMatcher.group(1); + String rightString = concatMatcher.group(2); + replacedString = replaceString(replacedString, leftString + "||" + rightString, "concat(" + leftString + "," + rightString + ")"); + } + + return replacedString; + } + + public static String doConvert(String originStr) { + // Step1.Replace " with ` + String convertedSql = replaceString(originStr, "\"", "`"); + + // Step2.Replace extract functions + convertedSql = extractReplace(convertedSql); + + // Step3.Replace cast type string + convertedSql = castRepalce(convertedSql); + + // Step4.Replace sub query + convertedSql = subqueryRepalce(convertedSql); + + // Step5.Replace char_length with length + convertedSql = replaceString(convertedSql, "char_length", "length"); + + // Step6.Replace "||" with concat + convertedSql = concatReplace(convertedSql); + + return convertedSql; + } + + private static Map<Integer, Integer> findParenthesesPairs(String sql) { + Map<Integer, Integer> result = new HashMap<>(); + if (sql.length() > 1) { + Stack<Integer> lStack = new Stack<>(); + boolean inStrVal = false; + for (int i = 0; i < sql.length(); i++) { + switch (sql.charAt(i)) { + case '(': + if (!inStrVal) { + lStack.push(i); + } + break; + case ')': + if (!inStrVal && !lStack.empty()) { + result.put(lStack.pop(), i); + } + break; + default: + break; + } + } + } + return result; + } + + @Override + public String convert(String originSql) { + return doConvert(originSql); + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java ---------------------------------------------------------------------- diff --git a/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java new file mode 100644 index 0000000..d5815bb --- /dev/null +++ b/core-storage/src/main/java/org/apache/kylin/storage/adhoc/IAdhocConverter.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ +package org.apache.kylin.storage.adhoc; + +/** + * convert the query to satisfy the parser of adhoc query engine + */ +public interface IAdhocConverter { + String convert(String originSql); +} http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java ---------------------------------------------------------------------- diff --git a/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java b/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java new file mode 100644 index 0000000..62f6792 --- /dev/null +++ b/core-storage/src/test/java/org/apache/kylin/storage/adhoc/HiveAdhocConverterTest.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.storage.adhoc; + +import junit.framework.TestCase; +import org.junit.Test; + + +public class HiveAdhocConverterTest extends TestCase { + @Test + public void testSringReplace() { + String originString = "select count(*) as cnt from test_kylin_fact where char_length(lstg_format_name) < 10"; + String replacedString = HiveAdhocConverter + .replaceString(originString, "char_length", "length"); + assertEquals(replacedString, "select count(*) as cnt from test_kylin_fact where length(lstg_format_name) < 10"); + } + + @Test + public void testExtractReplace() { + String originString = "ignore EXTRACT(YEAR FROM KYLIN_CAL_DT.CAL_DT) ignore"; + String replacedString = HiveAdhocConverter.extractReplace(originString); + assertEquals(replacedString, "ignore YEAR(KYLIN_CAL_DT.CAL_DT) ignore"); + } + + @Test + public void testCastReplace() { + String originString = "ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS INTEGER)) ignore"; + String replacedString = HiveAdhocConverter.castRepalce(originString); + assertEquals(replacedString, "ignore EXTRACT(YEAR FROM CAST(KYLIN_CAL_DT.CAL_DT AS int)) ignore"); + } + + @Test + public void testSubqueryReplace() { + String originString = "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) group by seller_id,lstg_format_name"; + String replacedString = HiveAdhocConverter.subqueryRepalce(originString); + assertEquals(replacedString, "select seller_id,lstg_format_name,sum(price) from (select * from test_kylin_fact where (lstg_format_name='FP-GTC') limit 20) as alias group by seller_id,lstg_format_name"); + } + + @Test + public void testConcatReplace() { + String originString = "select count(*) as cnt from test_kylin_fact where lstg_format_name||'a'='ABINa'"; + String replacedString = HiveAdhocConverter.concatReplace(originString); + assertEquals(replacedString, "select count(*) as cnt from test_kylin_fact where concat(lstg_format_name,'a')='ABINa'"); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/6ce9983f/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java ---------------------------------------------------------------------- diff --git a/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java b/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java index f6b3496..648ef91 100644 --- a/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java +++ b/server-base/src/main/java/org/apache/kylin/rest/util/AdHocUtil.java @@ -18,18 +18,19 @@ package org.apache.kylin.rest.util; +import java.sql.SQLException; +import java.util.List; + import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.metadata.querymeta.SelectedColumnMeta; import org.apache.kylin.query.routing.NoRealizationFoundException; -import org.apache.kylin.storage.adhoc.AdHocRunnerBase; import org.apache.kylin.rest.exception.InternalErrorException; -import org.apache.kylin.metadata.querymeta.SelectedColumnMeta; +import org.apache.kylin.storage.adhoc.AdHocRunnerBase; +import org.apache.kylin.storage.adhoc.IAdhocConverter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.sql.SQLException; -import java.util.List; - public class AdHocUtil { private static final Logger logger = LoggerFactory.getLogger(AdHocUtil.class); @@ -40,22 +41,33 @@ public class AdHocUtil { if (isExpectedCause && kylinConfig.isAdhocEnabled()) { Class runnerClass = Class.forName(kylinConfig.getAdHocRunnerClassName()); - Object instance = runnerClass.newInstance(); + Class converterClass = Class.forName(kylinConfig.getAdHocConverterClassName()); + Object runnerObj = runnerClass.newInstance(); + Object converterObj = converterClass.newInstance(); - if (!(instance instanceof AdHocRunnerBase)) { - throw new InternalErrorException("Ad-hoc runner class should be sub-class of AdHocRunnerBase."); + if (!(runnerObj instanceof AdHocRunnerBase)) { + throw new InternalErrorException("Ad-hoc runner class should be sub-class of AdHocRunnerBase"); } - AdHocRunnerBase runner = (AdHocRunnerBase) instance; + if (!(converterObj instanceof IAdhocConverter)) { + throw new InternalErrorException("Ad-hoc converter class should implement of IAdhocConverter"); + } + + AdHocRunnerBase runner = (AdHocRunnerBase) runnerObj; + IAdhocConverter converter = (IAdhocConverter) converterObj; runner.setConfig(kylinConfig); logger.debug("Ad-hoc query enabled for Kylin"); - // running query to ad-hoc jdbc runner.init(); try { - runner.executeQuery(sql, results, columnMetas); + String adhocSql = converter.convert(sql); + if (!sql.equals(adhocSql)) { + logger.info("the original query is converted to {} before delegating to ", adhocSql); + } + + runner.executeQuery(adhocSql, results, columnMetas); isAdHoc = true; } catch (Exception exception) { throw exception; @@ -67,4 +79,3 @@ public class AdHocUtil { return isAdHoc; } } -