This is an automated email from the ASF dual-hosted git repository. nic pushed a commit to branch 2.6.x in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/2.6.x by this push: new a61fec6 KYLIN-4298 KYLIN-4299 a61fec6 is described below commit a61fec60f94383d1ea4b38c9e64922b2f2c5221e Author: Rongnengwei <wrn19851...@163.com> AuthorDate: Sun Jan 5 18:35:41 2020 +0800 KYLIN-4298 KYLIN-4299 --- .../org/apache/kylin/common/KylinConfigBase.java | 4 ++ .../src/main/resources/kylin-defaults.properties | 4 +- metrics-reporter-hive/pom.xml | 5 +- .../kylin/metrics/lib/impl/hive/HiveProducer.java | 39 ++++++++-------- .../apache/kylin/source/hive/CLIHiveClient.java | 37 ++++++++------- .../source/hive/HiveMetaStoreClientFactory.java | 53 ++++++++++++++++++++++ 6 files changed, 101 insertions(+), 41 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 3e6bd5f..88218f3 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -977,6 +977,10 @@ abstract public class KylinConfigBase implements Serializable { } } + public String getHiveMetaDataType() { + return getOptional("kylin.source.hive.metadata-type", "hcatalog"); + } + // ============================================================================ // SOURCE.KAFKA // ============================================================================ diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties index 3491c15..6af19d4 100644 --- a/core-common/src/main/resources/kylin-defaults.properties +++ b/core-common/src/main/resources/kylin-defaults.properties @@ -101,7 +101,9 @@ kylin.source.hive.database-for-flat-table=default # Whether redistribute the intermediate flat table before building kylin.source.hive.redistribute-flat-table=true - +# Define how to access to hive metadata +# When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead +kylin.source.hive.metadata-type=hcatalog ### STORAGE ### diff --git a/metrics-reporter-hive/pom.xml b/metrics-reporter-hive/pom.xml index 9ca49ff..aa0f0c1 100644 --- a/metrics-reporter-hive/pom.xml +++ b/metrics-reporter-hive/pom.xml @@ -36,7 +36,10 @@ <groupId>org.apache.kylin</groupId> <artifactId>kylin-core-metrics</artifactId> </dependency> - + <dependency> + <groupId>org.apache.kylin</groupId> + <artifactId>kylin-source-hive</artifactId> + </dependency> <dependency> <groupId>org.apache.hive.hcatalog</groupId> <artifactId>hive-hcatalog-core</artifactId> diff --git a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java index 72121a9..b833ee1 100644 --- a/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java +++ b/metrics-reporter-hive/src/main/java/org/apache/kylin/metrics/lib/impl/hive/HiveProducer.java @@ -18,20 +18,19 @@ package org.apache.kylin.metrics.lib.impl.hive; -import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; - +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.cache.RemovalListener; +import com.google.common.cache.RemovalNotification; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; @@ -41,16 +40,17 @@ import org.apache.kylin.metrics.lib.ActiveReservoirReporter; import org.apache.kylin.metrics.lib.Record; import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord.RecordKey; +import org.apache.kylin.source.hive.HiveMetaStoreClientFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; public class HiveProducer { @@ -62,13 +62,12 @@ public class HiveProducer { private final FileSystem fileSystem; private final LoadingCache<Pair<String, String>, Pair<String, List<FieldSchema>>> tableFieldSchemaCache; private final String CONTENT_FILE_NAME; - private FSDataOutputStream fout; - private long partId = 0; - /** * Some cloud file system, like AWS S3, didn't support append action to exist file. */ private final boolean supportAppend; + private FSDataOutputStream fout; + private long partId = 0; public HiveProducer(Properties props) throws Exception { this(props, new HiveConf()); @@ -90,7 +89,7 @@ public class HiveProducer { }).maximumSize(CACHE_MAX_SIZE).build(new CacheLoader<Pair<String, String>, Pair<String, List<FieldSchema>>>() { @Override public Pair<String, List<FieldSchema>> load(Pair<String, String> tableName) throws Exception { - HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(hiveConf); + IMetaStoreClient metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf); String tableLocation = metaStoreClient.getTable(tableName.getFirst(), tableName.getSecond()).getSd().getLocation(); logger.debug("Find table location for {} at {}", tableName.getSecond(), tableLocation); List<FieldSchema> fields = metaStoreClient.getFields(tableName.getFirst(), tableName.getSecond()); diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java index bc9f17e..2491cc4 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/CLIHiveClient.java @@ -6,25 +6,22 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ package org.apache.kylin.source.hive; -import java.io.IOException; -import java.util.List; -import java.util.Map; - +import com.google.common.collect.Lists; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; @@ -32,16 +29,18 @@ import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.HiveCmdBuilder; import org.apache.kylin.common.util.Pair; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.List; +import java.util.Map; /** * Hive meta API client for Kylin - * @author shaoshi * + * @author shaoshi */ public class CLIHiveClient implements IHiveClient { protected HiveConf hiveConf = null; - protected HiveMetaStoreClient metaStoreClient = null; + protected IMetaStoreClient metaStoreClient = null; public CLIHiveClient() { hiveConf = new HiveConf(CLIHiveClient.class); @@ -49,7 +48,8 @@ public class CLIHiveClient implements IHiveClient { /** * only used by Deploy Util - * @throws IOException + * + * @throws IOException */ @Override public void executeHQL(String hql) throws IOException { @@ -129,21 +129,20 @@ public class CLIHiveClient implements IHiveClient { return getBasicStatForTable(new org.apache.hadoop.hive.ql.metadata.Table(table), StatsSetupConst.ROW_COUNT); } - private HiveMetaStoreClient getMetaStoreClient() throws Exception { + private IMetaStoreClient getMetaStoreClient() throws Exception { if (metaStoreClient == null) { - metaStoreClient = new HiveMetaStoreClient(hiveConf); + metaStoreClient = HiveMetaStoreClientFactory.getHiveMetaStoreClient(hiveConf); } return metaStoreClient; } /** * COPIED FROM org.apache.hadoop.hive.ql.stats.StatsUtil for backward compatibility - * + * <p> * Get basic stats of table - * @param table - * - table - * @param statType - * - type of stats + * + * @param table - table + * @param statType - type of stats * @return value of stats */ private long getBasicStatForTable(org.apache.hadoop.hive.ql.metadata.Table table, String statType) { diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java new file mode 100644 index 0000000..984623f --- /dev/null +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveMetaStoreClientFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.source.hive; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.kylin.common.KylinConfig; + +import java.io.IOException; + +public class HiveMetaStoreClientFactory { + + /** + * Get hivemetastoreclient. At present, it supports hivecatalog and glue catalog. When it is configured as hcatalog, + * you can directly new hivemetastoreclient (hiveconf), which is more efficient. + * But if you need to use hcatutil.gethivemetastoreclient (hiveconf) to configure gluecatalog, + * you can get: com.amazon aws.glue.catalog.metastore.awsgluedatacataloghiveclientfactory according to the configuration file + * + * @param hiveConf + * @return metaStoreClient + * @throws MetaException + * @throws IOException + */ + public static IMetaStoreClient getHiveMetaStoreClient(HiveConf hiveConf) throws MetaException, IOException { + IMetaStoreClient metaStoreClient = null; + if ("hcatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) { + metaStoreClient = new HiveMetaStoreClient(hiveConf); + } else if ("gluecatalog".equals(KylinConfig.getInstanceFromEnv().getHiveMetaDataType())) { + metaStoreClient = HCatUtil.getHiveMetastoreClient(hiveConf); + } + return metaStoreClient; + } + +}