Minor, get metadata of query columns
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/25bb46e0 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/25bb46e0 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/25bb46e0 Branch: refs/heads/master Commit: 25bb46e0cea270cae2c5b042a5766880fc2eea02 Parents: d30d2a3 Author: Yifan Zhang <event.dim...@gmail.com> Authored: Thu Dec 28 21:02:16 2017 +0800 Committer: Li Yang <liy...@apache.org> Committed: Fri Jan 26 22:54:58 2018 +0800 ---------------------------------------------------------------------- .../kylin/source/ISourceMetadataExplorer.java | 3 + .../kylin/source/hive/HiveMetadataExplorer.java | 74 ++++++++++++----- .../apache/kylin/source/jdbc/JdbcExplorer.java | 83 ++++++++++++++------ .../apache/kylin/source/kafka/KafkaSource.java | 6 ++ 4 files changed, 125 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/25bb46e0/core-metadata/src/main/java/org/apache/kylin/source/ISourceMetadataExplorer.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/ISourceMetadataExplorer.java b/core-metadata/src/main/java/org/apache/kylin/source/ISourceMetadataExplorer.java index 4072846..0a6747c 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/ISourceMetadataExplorer.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/ISourceMetadataExplorer.java @@ -21,6 +21,7 @@ package org.apache.kylin.source; import java.util.List; import org.apache.kylin.common.util.Pair; +import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TableExtDesc; @@ -33,4 +34,6 @@ public interface ISourceMetadataExplorer { Pair<TableDesc, TableExtDesc> loadTableMetadata(String database, String table, String prj) throws Exception; List<String> getRelatedKylinResources(TableDesc table); + + ColumnDesc[] evalQueryMetadata(String query); } http://git-wip-us.apache.org/repos/asf/kylin/blob/25bb46e0/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetadataExplorer.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetadataExplorer.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetadataExplorer.java index 55e6267..d80c546 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetadataExplorer.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetadataExplorer.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.List; import java.util.UUID; +import org.apache.commons.lang3.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Pair; import org.apache.kylin.metadata.TableMetadataManager; @@ -59,7 +60,7 @@ public class HiveMetadataExplorer implements ISourceMetadataExplorer, ISampleDat } TableDesc tableDesc = metaMgr.getTableDesc(database + "." + tableName, prj); - + // make a new TableDesc instance, don't modify the one in use if (tableDesc == null) { tableDesc = new TableDesc(); @@ -70,28 +71,12 @@ public class HiveMetadataExplorer implements ISourceMetadataExplorer, ISampleDat } else { tableDesc = new TableDesc(tableDesc); } - + if (hiveTableMeta.tableType != null) { tableDesc.setTableType(hiveTableMeta.tableType); } - int columnNumber = hiveTableMeta.allColumns.size(); - List<ColumnDesc> columns = new ArrayList<ColumnDesc>(columnNumber); - for (int i = 0; i < columnNumber; i++) { - HiveTableMeta.HiveTableColumnMeta field = hiveTableMeta.allColumns.get(i); - ColumnDesc cdesc = new ColumnDesc(); - cdesc.setName(field.name.toUpperCase()); - // use "double" in kylin for "float" - if ("float".equalsIgnoreCase(field.dataType)) { - cdesc.setDatatype("double"); - } else { - cdesc.setDatatype(field.dataType); - } - cdesc.setId(String.valueOf(i + 1)); - cdesc.setComment(field.comment); - columns.add(cdesc); - } - tableDesc.setColumns(columns.toArray(new ColumnDesc[columnNumber])); + tableDesc.setColumns(extractColumnFromMeta(hiveTableMeta)); StringBuffer partitionColumnString = new StringBuffer(); for (int i = 0, n = hiveTableMeta.partitionColumns.size(); i < n; i++) { @@ -193,4 +178,55 @@ public class HiveMetadataExplorer implements ISourceMetadataExplorer, ISampleDat return hiveDataType.toLowerCase(); } + @Override + public ColumnDesc[] evalQueryMetadata(String query) { + if (StringUtils.isEmpty(query)) { + throw new RuntimeException("Evalutate query shall not be empty."); + } + + KylinConfig config = KylinConfig.getInstanceFromEnv(); + String tmpDatabase = config.getHiveDatabaseForIntermediateTable(); + String tmpView = "kylin_eval_query_" + UUID.nameUUIDFromBytes(query.getBytes()).toString().replace("-", ""); + + String dropViewSql = "DROP VIEW IF EXISTS " + tmpDatabase + "." + tmpView; + String evalViewSql = "CREATE VIEW " + tmpDatabase + "." + tmpView + " as " + query; + + try { + hiveClient.executeHQL(new String[] { dropViewSql, evalViewSql }); + HiveTableMeta hiveTableMeta = hiveClient.getHiveTableMeta(tmpDatabase, tmpView); + return extractColumnFromMeta(hiveTableMeta); + } catch (Exception e) { + throw new RuntimeException("Cannot evalutate metadata of query: " + query, e); + } finally { + try { + hiveClient.executeHQL(dropViewSql); + } catch (Exception e) { + throw new RuntimeException("Cannot temp view of query: " + query, e); + } + } + } + + private ColumnDesc[] extractColumnFromMeta(HiveTableMeta hiveTableMeta) { + int columnNumber = hiveTableMeta.allColumns.size(); + List<ColumnDesc> columns = new ArrayList<ColumnDesc>(columnNumber); + + for (int i = 0; i < columnNumber; i++) { + HiveTableMeta.HiveTableColumnMeta field = hiveTableMeta.allColumns.get(i); + ColumnDesc cdesc = new ColumnDesc(); + cdesc.setName(field.name.toUpperCase()); + + // use "double" in kylin for "float" + if ("float".equalsIgnoreCase(field.dataType)) { + cdesc.setDatatype("double"); + } else { + cdesc.setDatatype(field.dataType); + } + + cdesc.setId(String.valueOf(i + 1)); + cdesc.setComment(field.comment); + columns.add(cdesc); + } + + return columns.toArray(new ColumnDesc[columnNumber]); + } } http://git-wip-us.apache.org/repos/asf/kylin/blob/25bb46e0/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcExplorer.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcExplorer.java b/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcExplorer.java index 2827b3d..81e42bb 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcExplorer.java +++ b/source-hive/src/main/java/org/apache/kylin/source/jdbc/JdbcExplorer.java @@ -28,6 +28,7 @@ import java.util.Collections; import java.util.List; import java.util.UUID; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.DBUtils; @@ -99,33 +100,12 @@ public class JdbcExplorer implements ISourceMetadataExplorer, ISampleDataDeploye } } - List<ColumnDesc> columns = new ArrayList<>(); try (ResultSet rs = jdbcMetadataDialect.listColumns(dbmd, database, table)) { - while (rs.next()) { - String cname = rs.getString("COLUMN_NAME"); - int type = rs.getInt("DATA_TYPE"); - int csize = rs.getInt("COLUMN_SIZE"); - int digits = rs.getInt("DECIMAL_DIGITS"); - int pos = rs.getInt("ORDINAL_POSITION"); - String remarks = rs.getString("REMARKS"); - - ColumnDesc cdesc = new ColumnDesc(); - cdesc.setName(cname.toUpperCase()); - - String kylinType = SqlUtil.jdbcTypeToKylinDataType(type); - int precision = (SqlUtil.isPrecisionApplicable(kylinType) && csize > 0) ? csize : -1; - int scale = (SqlUtil.isScaleApplicable(kylinType) && digits > 0) ? digits : -1; - - cdesc.setDatatype(new DataType(kylinType, precision, scale).toString()); - cdesc.setId(String.valueOf(pos)); - cdesc.setComment(remarks); - columns.add(cdesc); - } + tableDesc.setColumns(extractColumnFromMeta(rs)); } finally { DBUtils.closeQuietly(con); } - tableDesc.setColumns(columns.toArray(new ColumnDesc[columns.size()])); TableExtDesc tableExtDesc = new TableExtDesc(); tableExtDesc.setIdentity(tableDesc.getIdentity()); @@ -249,4 +229,63 @@ public class JdbcExplorer implements ISourceMetadataExplorer, ISampleDataDeploye return Collections.emptyList(); } + @Override + public ColumnDesc[] evalQueryMetadata(String query) { + if (StringUtils.isEmpty(query)) { + throw new RuntimeException("Evalutate query shall not be empty."); + } + + KylinConfig config = KylinConfig.getInstanceFromEnv(); + String tmpDatabase = config.getHiveDatabaseForIntermediateTable(); + String tmpView = tmpDatabase + ".kylin_eval_query_" + UUID.nameUUIDFromBytes(query.getBytes()).toString().replaceAll("-", ""); + + String dropViewSql = "DROP VIEW IF EXISTS " + tmpView; + String evalViewSql = "CREATE VIEW " + tmpView + " as " + query; + + try { + executeSQL(new String[] { dropViewSql, evalViewSql }); + Connection con = SqlUtil.getConnection(dbconf); + DatabaseMetaData dbmd = con.getMetaData(); + ResultSet rs = dbmd.getColumns(null, tmpDatabase, tmpView, null); + ColumnDesc[] result = extractColumnFromMeta(rs); + DBUtils.closeQuietly(rs); + DBUtils.closeQuietly(con); + return result; + } catch (Exception e) { + throw new RuntimeException("Cannot evalutate metadata of query: " + query, e); + } finally { + try { + executeSQL(dropViewSql); + } catch (Exception e) { + throw new RuntimeException("Cannot temp view of query: " + query, e); + } + } + } + + private ColumnDesc[] extractColumnFromMeta(ResultSet meta) throws SQLException { + List<ColumnDesc> columns = new ArrayList<>(); + + while (meta.next()) { + String cname = meta.getString("COLUMN_NAME"); + int type = meta.getInt("DATA_TYPE"); + int csize = meta.getInt("COLUMN_SIZE"); + int digits = meta.getInt("DECIMAL_DIGITS"); + int pos = meta.getInt("ORDINAL_POSITION"); + String remarks = meta.getString("REMARKS"); + + ColumnDesc cdesc = new ColumnDesc(); + cdesc.setName(cname.toUpperCase()); + + String kylinType = SqlUtil.jdbcTypeToKylinDataType(type); + int precision = (SqlUtil.isPrecisionApplicable(kylinType) && csize > 0) ? csize : -1; + int scale = (SqlUtil.isScaleApplicable(kylinType) && digits > 0) ? digits : -1; + + cdesc.setDatatype(new DataType(kylinType, precision, scale).toString()); + cdesc.setId(String.valueOf(pos)); + cdesc.setComment(remarks); + columns.add(cdesc); + } + + return columns.toArray(new ColumnDesc[columns.size()]); + } } http://git-wip-us.apache.org/repos/asf/kylin/blob/25bb46e0/source-kafka/src/main/java/org/apache/kylin/source/kafka/KafkaSource.java ---------------------------------------------------------------------- diff --git a/source-kafka/src/main/java/org/apache/kylin/source/kafka/KafkaSource.java b/source-kafka/src/main/java/org/apache/kylin/source/kafka/KafkaSource.java index 396c0e1..1142243 100644 --- a/source-kafka/src/main/java/org/apache/kylin/source/kafka/KafkaSource.java +++ b/source-kafka/src/main/java/org/apache/kylin/source/kafka/KafkaSource.java @@ -28,6 +28,7 @@ import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.engine.mr.IMRInput; +import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.IBuildable; import org.apache.kylin.metadata.model.SegmentRange; import org.apache.kylin.metadata.model.TableDesc; @@ -209,6 +210,11 @@ public class KafkaSource implements ISource { dependentResources.add(StreamingConfig.concatResourcePath(table.getIdentity())); return dependentResources; } + + @Override + public ColumnDesc[] evalQueryMetadata(String query) { + throw new UnsupportedOperationException(); + } }; }