This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin3 in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin3 by this push: new 80404c0 configurable hive dict table format 80404c0 is described below commit 80404c0e9cdce896219a89040ffd4fc8ec73a6f7 Author: fengpod <feng...@gmail.com> AuthorDate: Wed Dec 8 20:23:14 2021 +0800 configurable hive dict table format --- .../org/apache/kylin/common/KylinConfigBase.java | 4 ++ .../apache/kylin/source/hive/HiveInputBase.java | 4 +- .../apache/kylin/source/hive/MRHiveDictUtil.java | 50 ++++++++++++++-------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 4d24fd0..707848f 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -708,6 +708,10 @@ public abstract class KylinConfigBase implements Serializable { return getOptional("kylin.dictionary.mr-hive.intermediate.table.suffix", "_distinct_value"); } + public String getMrHiveDictTableFormat() { + return getOptional("kylin.dictionary.mr-hive.table.format", "TEXTFILE"); + } + // ============================================================================ // CUBE // ============================================================================ diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java index 193990d..90239d9 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveInputBase.java @@ -168,11 +168,11 @@ public class HiveInputBase { final String distinctValueTable = MRHiveDictUtil.distinctValueTable(flatDesc); final String segmentLevelDictTableName = MRHiveDictUtil.segmentLevelDictTableName(flatDesc); - final String createGlobalDictTableHql = MRHiveDictUtil.generateDictionaryDdl(globalDictDatabase, globalDictTable); + final String createGlobalDictTableHql = MRHiveDictUtil.generateDictionaryDdl(flatDesc, globalDictDatabase, globalDictTable); final String dropDistinctValueTableHql = MRHiveDictUtil.generateDropTableStatement(distinctValueTable); final String createDistinctValueTableHql = MRHiveDictUtil.generateDistinctValueTableStatement(flatDesc); final String dropSegmentLevelDictTableHql = MRHiveDictUtil.generateDropTableStatement(segmentLevelDictTableName); - final String createSegmentLevelDictTableHql = MRHiveDictUtil.generateDictTableStatement(segmentLevelDictTableName); + final String createSegmentLevelDictTableHql = MRHiveDictUtil.generateDictTableStatement(flatDesc, segmentLevelDictTableName); String maxAndDistinctCountSql = MRHiveDictUtil.generateDictStatisticsSql(distinctValueTable, globalDictTable, globalDictDatabase); diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java index 5fe8a97..62bf03d 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/MRHiveDictUtil.java @@ -96,15 +96,23 @@ public class MRHiveDictUtil { return cubeName + flatDesc.getSegment().getConfig().getMrHiveDictTableSuffix(); } - public static String generateDictionaryDdl(String db, String tbl) { - return "CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n" - + " ( dict_key STRING COMMENT '', \n" - + " dict_val INT COMMENT '' \n" - + ") \n" - + "COMMENT 'Hive Global Dictionary' \n" - + "PARTITIONED BY (dict_column string) \n" - + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n" - + "STORED AS TEXTFILE; \n"; + public static String generateDictionaryDdl(IJoinedFlatTableDesc flatDesc, String db, String tbl) { + KylinConfig config = flatDesc.getSegment().getConfig(); + String tableFormat = config.getMrHiveDictTableFormat(); + StringBuilder ddl = new StringBuilder(); + ddl.append("CREATE TABLE IF NOT EXISTS " + db + "." + tbl + "\n"); + ddl.append(" ( dict_key STRING COMMENT '', \n"); + ddl.append(" dict_val INT COMMENT '' \n"); + ddl.append(") \n"); + ddl.append("COMMENT 'Hive Global Dictionary' \n"); + ddl.append("PARTITIONED BY (dict_column string) \n"); + if ("TEXTFILE".equalsIgnoreCase(tableFormat)) { + ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n"); + ddl.append("STORED AS TEXTFILE; \n"); + } else { + ddl.append("STORED AS " +tableFormat+ "; \n"); + } + return ddl.toString(); } public static String generateDropTableStatement(String tableName) { @@ -114,24 +122,26 @@ public class MRHiveDictUtil { } public static String generateDistinctValueTableStatement(IJoinedFlatTableDesc flatDesc) { - StringBuilder ddl = new StringBuilder(); - String table = flatDesc.getTableName() - + flatDesc.getSegment().getConfig().getMrHiveDistinctValueTableSuffix(); + KylinConfig config = flatDesc.getSegment().getConfig(); + String table = config.getMrHiveDistinctValueTableSuffix(); + String tableFormat = config.getMrHiveDictTableFormat(); + StringBuilder ddl = new StringBuilder(); ddl.append("CREATE TABLE IF NOT EXISTS " + table + " \n"); ddl.append("( \n "); ddl.append(" dict_key" + " " + "STRING" + " COMMENT '' \n"); ddl.append(") \n"); ddl.append("COMMENT '' \n"); ddl.append("PARTITIONED BY (dict_column string) \n"); - ddl.append("STORED AS TEXTFILE \n"); - ddl.append(";").append("\n"); + ddl.append("STORED AS ").append(tableFormat).append(";\n"); return ddl.toString(); } - public static String generateDictTableStatement(String globalTableName) { - StringBuilder ddl = new StringBuilder(); + public static String generateDictTableStatement(IJoinedFlatTableDesc flatDesc, String globalTableName) { + KylinConfig config = flatDesc.getSegment().getConfig(); + String tableFormat = config.getMrHiveDictTableFormat(); + StringBuilder ddl = new StringBuilder(); ddl.append("CREATE TABLE IF NOT EXISTS " + globalTableName + " \n"); ddl.append("( \n "); ddl.append(" dict_key" + " " + "STRING" + " COMMENT '' , \n"); @@ -139,8 +149,12 @@ public class MRHiveDictUtil { ddl.append(") \n"); ddl.append("COMMENT '' \n"); ddl.append("PARTITIONED BY (dict_column string) \n"); - ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n"); - ddl.append("STORED AS TEXTFILE \n"); + if ("TEXTFILE".equalsIgnoreCase(tableFormat)) { + ddl.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' \n"); + ddl.append("STORED AS TEXTFILE \n"); + } else { + ddl.append("STORED AS ").append(tableFormat).append("\n"); + } ddl.append(";").append("\n"); return ddl.toString(); }