This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit d052138985ed7127e1edeae398bb7bca7a8430c6 Author: wxy <dut.xian...@gmail.com> AuthorDate: Tue Dec 13 16:48:46 2022 +0800 [feature](multi-catalog) support connecting to hive metastore with ke… (#15026) Support kerberos authentication on hive external catalog --- .../docs/ecosystem/external-table/multi-catalog.md | 28 ++++++++++++++++--- .../docs/ecosystem/external-table/multi-catalog.md | 28 ++++++++++++++++--- .../org/apache/doris/catalog/HdfsResource.java | 3 ++ .../doris/catalog/external/HMSExternalTable.java | 4 +-- .../apache/doris/datasource/CatalogProperty.java | 10 ++++--- .../doris/datasource/HMSExternalCatalog.java | 32 ++++++++++++++++++++-- .../doris/planner/external/HiveScanProvider.java | 2 +- 7 files changed, 90 insertions(+), 17 deletions(-) diff --git a/docs/en/docs/ecosystem/external-table/multi-catalog.md b/docs/en/docs/ecosystem/external-table/multi-catalog.md index dabfcf4fb9..3be2f3bba0 100644 --- a/docs/en/docs/ecosystem/external-table/multi-catalog.md +++ b/docs/en/docs/ecosystem/external-table/multi-catalog.md @@ -85,11 +85,31 @@ CREATE CATALOG hive PROPERTIES ( "type"="hms", 'hive.metastore.uris' = 'thrift://172.21.0.1:7004', 'hadoop.username' = 'hive' - 'dfs.nameservices'='service1', + 'dfs.nameservices'='your-nameservice', + 'dfs.ha.namenodes.your-nameservice'='nn1,nn2', + 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007', + 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007', + 'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider' +); +``` + +If you want to connect to a Hive MetaStore with kerberos authentication, you can do like this: + +``` +CREATE CATALOG hive PROPERTIES ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://172.21.0.1:7004', + 'hive.metastore.sasl.enabled' = 'true', + 'dfs.nameservices'='your-nameservice', 'dfs.ha.namenodes. service1'='nn1,nn2', - 'dfs.namenode.rpc-address.HDFS8000871.nn1'='172.21.0.2:4007', - 'dfs.namenode.rpc-address.HDFS8000871.nn2'='172.21.0.3:4007', - 'dfs.client.failover.proxy.provider.HDFS8000871'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider' + 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007', + 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007', + 'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider', + 'hadoop.security.authentication' = 'kerberos', + 'hadoop.kerberos.keytab' = '/your-keytab-filepath/your.keytab', + 'hadoop.kerberos.principal' = 'your-princi...@your.com', + 'yarn.resourcemanager.address' = 'your-rm-address:your-rm-port', + 'yarn.resourcemanager.principal' = 'your-rm-principal/_h...@your.com' ); ``` diff --git a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md index 841c6b6766..3627bb9221 100644 --- a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md +++ b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md @@ -85,14 +85,34 @@ CREATE CATALOG hive PROPERTIES ( "type"="hms", 'hive.metastore.uris' = 'thrift://172.21.0.1:7004', 'hadoop.username' = 'hive' - 'dfs.nameservices'='service1', - 'dfs.ha.namenodes. service1'='nn1,nn2', - 'dfs.namenode.rpc-address.HDFS8000871.nn1'='172.21.0.2:4007', - 'dfs.namenode.rpc-address.HDFS8000871.nn2'='172.21.0.3:4007', + 'dfs.nameservices'='your-nameservice', + 'dfs.ha.namenodes.service1'='nn1,nn2', + 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007', + 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007', 'dfs.client.failover.proxy.provider.HDFS8000871'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider' ); ``` +如果需要连接开启了 Kerberos 认证的 Hive MetaStore,示例如下: + +``` +CREATE CATALOG hive PROPERTIES ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://172.21.0.1:7004', + 'hive.metastore.sasl.enabled' = 'true', + 'dfs.nameservices'='your-nameservice', + 'dfs.ha.namenodes.service1'='nn1,nn2', + 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007', + 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007', + 'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider', + 'hadoop.security.authentication' = 'kerberos', + 'hadoop.kerberos.keytab' = '/your-keytab-filepath/your.keytab', + 'hadoop.kerberos.principal' = 'your-princi...@your.com', + 'yarn.resourcemanager.address' = 'your-rm-address:your-rm-port', + 'yarn.resourcemanager.principal' = 'your-rm-principal/_h...@your.com' +); +``` + 创建后,可以通过 `SHOW CATALOGS` 命令查看 catalog: ``` diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java index 467cc1b6ec..868f032a44 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java @@ -44,7 +44,10 @@ import java.util.Map; * ); */ public class HdfsResource extends Resource { + public static final String HADOOP_PREFIX = "hadoop."; public static final String HADOOP_FS_PREFIX = "dfs."; + public static final String HIVE_PREFIX = "hive."; + public static final String YARN_PREFIX = "yarn."; public static String HADOOP_FS_NAME = "fs.defaultFS"; // simple or kerberos public static String HADOOP_USER_NAME = "hadoop.username"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index bfefe1f829..d732c52206 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -284,8 +284,8 @@ public class HMSExternalTable extends ExternalTable { return ((HMSExternalCatalog) catalog).getHiveMetastoreUris(); } - public Map<String, String> getDfsProperties() { - return catalog.getCatalogProperty().getDfsProperties(); + public Map<String, String> getHdfsProperties() { + return catalog.getCatalogProperty().getHdfsProperties(); } public Map<String, String> getS3Properties() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java index e20afc33f5..27928e3ffe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java @@ -44,13 +44,15 @@ public class CatalogProperty implements Writable { return properties.getOrDefault(key, defaultVal); } - // todo: remove and use HdfsResource - public Map<String, String> getDfsProperties() { + // get all properties with dfs.* hadoop.* yarn.* hive.* + // besides dfs.* and hadoop.username, we need other properties when enable kerberos + public Map<String, String> getHdfsProperties() { Map<String, String> dfsProperties = Maps.newHashMap(); for (Map.Entry<String, String> entry : properties.entrySet()) { if (entry.getKey().startsWith(HdfsResource.HADOOP_FS_PREFIX) - || entry.getKey().equals(HdfsResource.HADOOP_USER_NAME)) { - // todo: still missing properties like hadoop.xxx + || entry.getKey().startsWith(HdfsResource.HADOOP_PREFIX) + || entry.getKey().startsWith(HdfsResource.HIVE_PREFIX) + || entry.getKey().startsWith(HdfsResource.YARN_PREFIX)) { dfsProperties.put(entry.getKey(), entry.getValue()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java index 15cb40fe5c..0dfffe19fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java @@ -17,19 +17,25 @@ package org.apache.doris.datasource; +import org.apache.doris.catalog.AuthType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.HMSResource; +import org.apache.doris.catalog.HdfsResource; import org.apache.doris.catalog.HiveMetaStoreClientHelper; import org.apache.doris.catalog.external.ExternalDatabase; import org.apache.doris.catalog.external.HMSExternalDatabase; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.IOException; import java.util.List; import java.util.Map; @@ -54,7 +60,7 @@ public class HMSExternalCatalog extends ExternalCatalog { } public String getHiveMetastoreUris() { - return catalogProperty.getOrDefault("hive.metastore.uris", ""); + return catalogProperty.getOrDefault(HMSResource.HIVE_METASTORE_URIS, ""); } @Override @@ -91,8 +97,30 @@ public class HMSExternalCatalog extends ExternalCatalog { @Override protected void initLocalObjectsImpl() { HiveConf hiveConf = new HiveConf(); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, getHiveMetastoreUris()); + for (String key : catalogProperty.getHdfsProperties().keySet()) { + String val = catalogProperty.getOrDefault(key, ""); + hiveConf.set(key, val); + } + String authentication = catalogProperty.getOrDefault( + HdfsResource.HADOOP_SECURITY_AUTHENTICATION, ""); + if (AuthType.KERBEROS.getDesc().equals(authentication)) { + Configuration conf = new Configuration(); + conf.set(HdfsResource.HADOOP_SECURITY_AUTHENTICATION, authentication); + UserGroupInformation.setConfiguration(conf); + try { + /** + * Because metastore client is created by using + * {@link org.apache.hadoop.hive.metastore.RetryingMetaStoreClient#getProxy} + * it will relogin when TGT is expired, so we don't need to relogin manually. + */ + UserGroupInformation.loginUserFromKeytab( + catalogProperty.getOrDefault(HdfsResource.HADOOP_KERBEROS_PRINCIPAL, ""), + catalogProperty.getOrDefault(HdfsResource.HADOOP_KERBEROS_KEYTAB, "")); + } catch (IOException e) { + throw new HMSClientException("login with kerberos auth failed for catalog %s", e, this.getName()); + } + } // 1. read properties from hive-site.xml. // and then use properties in CatalogProperty to override properties got from hive-site.xml Map<String, String> properties = HiveMetaStoreClientHelper.getPropertiesForDLF(name, hiveConf); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java index 0ee3db195c..ace3693907 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java @@ -244,7 +244,7 @@ public class HiveScanProvider extends HMSTableScanProvider { if (locationType == TFileType.FILE_S3) { return hmsTable.getS3Properties(); } else if (locationType == TFileType.FILE_HDFS) { - return hmsTable.getDfsProperties(); + return hmsTable.getHdfsProperties(); } else { return Maps.newHashMap(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org