This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9df72a96f3 [Feature](multi-catalog) Support hadoop viewfs. (#24168)
9df72a96f3 is described below

commit 9df72a96f39261b45edd15fd12db9546d7e7f321
Author: Qi Chen <[email protected]>
AuthorDate: Wed Sep 13 00:20:12 2023 +0800

    [Feature](multi-catalog) Support hadoop viewfs. (#24168)
    
    ### Feature
    
    Support hadoop viewfs.
    
    ### Test
    
    - Regression tests:
      - hive viewfs test.
      - tvf viewfs test.
    
    - Broker load with broker and with hdfs tests manually.
---
 be/src/io/fs/hdfs_file_system.cpp                  |  8 +--
 docs/en/docs/lakehouse/multi-catalog/hive.md       | 22 ++++++++
 docs/zh-CN/docs/lakehouse/multi-catalog/hive.md    | 22 ++++++++
 .../org/apache/doris/analysis/StorageBackend.java  |  6 ++-
 .../java/org/apache/doris/common/FeConstants.java  |  1 +
 .../org/apache/doris/fs/FileSystemFactory.java     |  3 +-
 .../doris/planner/external/FileQueryScanNode.java  |  2 +
 .../doris/broker/hdfs/FileSystemManager.java       |  3 +-
 .../external_table_p2/hive/test_viewfs_hive.out    | 35 +++++++++++++
 .../data/external_table_p2/tvf/test_tvf_p2.out     |  3 ++
 .../external_table_p2/hive/test_viewfs_hive.groovy | 59 ++++++++++++++++++++++
 .../external_table_p2/tvf/test_tvf_p2.groovy       |  9 ++++
 12 files changed, 163 insertions(+), 10 deletions(-)

diff --git a/be/src/io/fs/hdfs_file_system.cpp 
b/be/src/io/fs/hdfs_file_system.cpp
index 5294890324..1e71ade934 100644
--- a/be/src/io/fs/hdfs_file_system.cpp
+++ b/be/src/io/fs/hdfs_file_system.cpp
@@ -138,7 +138,7 @@ HdfsFileSystem::HdfsFileSystem(const THdfsParams& 
hdfs_params, const std::string
           _hdfs_params(hdfs_params),
           _fs_handle(nullptr),
           _profile(profile) {
-    if (_hdfs_params.__isset.fs_name) {
+    if (fs_name.empty() && _hdfs_params.__isset.fs_name) {
         _fs_name = _hdfs_params.fs_name;
     } else {
         _fs_name = fs_name;
@@ -509,11 +509,7 @@ Status HdfsFileSystemCache::get_connection(const 
THdfsParams& hdfs_params,
 uint64 HdfsFileSystemCache::_hdfs_hash_code(const THdfsParams& hdfs_params,
                                             const std::string& fs_name) {
     uint64 hash_code = 0;
-    if (hdfs_params.__isset.fs_name) {
-        hash_code += Fingerprint(hdfs_params.fs_name);
-    } else {
-        hash_code += Fingerprint(fs_name);
-    }
+    hash_code += Fingerprint(fs_name);
     if (hdfs_params.__isset.user) {
         hash_code += Fingerprint(hdfs_params.user);
     }
diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md 
b/docs/en/docs/lakehouse/multi-catalog/hive.md
index 87e562ee88..afa388321c 100644
--- a/docs/en/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/en/docs/lakehouse/multi-catalog/hive.md
@@ -95,6 +95,28 @@ Please place the `krb5.conf` file and `keytab` 
authentication file under all `BE
 
 The value of `hive.metastore.kerberos.principal` needs to be consistent with 
the property of the same name of the connected hive metastore, which can be 
obtained from `hive-site.xml`.
 
+### Hive On VIEWFS
+
+```sql
+CREATE CATALOG hive PROPERTIES (
+    'type'='hms',
+    'hive.metastore.uris' = 'thrift://172.0.0.1:9083',
+    'hadoop.username' = 'hive',
+    'dfs.nameservices'='your-nameservice',
+    'dfs.ha.namenodes.your-nameservice'='nn1,nn2',
+    'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:8088',
+    'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:8088',
+    
'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider',
+    'fs.defaultFS' = 'viewfs://your-cluster',
+    'fs.viewfs.mounttable.your-cluster.link./ns1' = 'hdfs://your-nameservice/',
+    'fs.viewfs.mounttable.your-cluster.homedir' = '/ns1'
+);
+```
+
+viewfs related parameters can be added to the catalog configuration as above, 
or added to `conf/core-site.xml`.
+
+How viewfs works and parameter configuration, please refer to relevant hadoop 
documents, for example, 
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/ViewFs.html
+
 ### Hive On JuiceFS
 
 Data is stored in JuiceFS, examples are as follows:
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md 
b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
index 3242426184..e582d5e907 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
@@ -94,6 +94,28 @@ CREATE CATALOG hive PROPERTIES (
 请在所有的 `BE`、`FE` 节点下放置 `krb5.conf` 文件和 `keytab` 认证文件,`keytab` 
认证文件路径和配置保持一致,`krb5.conf` 文件默认放置在 `/etc/krb5.conf` 路径。
 `hive.metastore.kerberos.principal` 的值需要和所连接的 hive metastore 的同名属性保持一致,可从 
`hive-site.xml` 中获取。
 
+### Hive On VIEWFS
+
+```sql
+CREATE CATALOG hive PROPERTIES (
+    'type'='hms',
+    'hive.metastore.uris' = 'thrift://172.0.0.1:9083',
+    'hadoop.username' = 'hive',
+    'dfs.nameservices'='your-nameservice',
+    'dfs.ha.namenodes.your-nameservice'='nn1,nn2',
+    'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:8088',
+    'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:8088',
+    
'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider',
+    'fs.defaultFS' = 'viewfs://your-cluster',
+    'fs.viewfs.mounttable.your-cluster.link./ns1' = 'hdfs://your-nameservice/',
+    'fs.viewfs.mounttable.your-cluster.homedir' = '/ns1'
+);
+```
+
+viewfs 相关参数可以如上面一样添加到 catalog 配置中,也可以添加到 `conf/core-site.xml` 中。
+
+viewfs 工作原理和参数配置可以参考 hadoop 相关文档,比如 
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/ViewFs.html
+
 ### Hive On JuiceFS
 
 数据存储在JuiceFS,示例如下:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
index ada22ad301..f3d7f7e49f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
@@ -50,6 +50,7 @@ public class StorageBackend implements ParseNode {
             if (!schema.equalsIgnoreCase("bos")
                     && !schema.equalsIgnoreCase("afs")
                     && !schema.equalsIgnoreCase("hdfs")
+                    && !schema.equalsIgnoreCase("viewfs")
                     && !schema.equalsIgnoreCase("ofs")
                     && !schema.equalsIgnoreCase("obs")
                     && !schema.equalsIgnoreCase("oss")
@@ -58,8 +59,9 @@ public class StorageBackend implements ParseNode {
                     && !schema.equalsIgnoreCase("gfs")
                     && !schema.equalsIgnoreCase("jfs")
                     && !schema.equalsIgnoreCase("gs")) {
-                throw new AnalysisException("Invalid broker path. please use 
valid 'hdfs://', 'afs://' , 'bos://',"
-                        + " 'ofs://', 'obs://', 'oss://', 's3a://', 'cosn://', 
'gfs://', 'gs://' or 'jfs://' path.");
+                throw new AnalysisException("Invalid broker path. please use 
valid 'hdfs://', 'viewfs://', 'afs://',"
+                        + " 'bos://', 'ofs://', 'obs://', 'oss://', 's3a://', 
'cosn://', 'gfs://', 'gs://'"
+                        + " or 'jfs://' path.");
             }
         } else if (type == StorageBackend.StorageType.S3 && 
!schema.equalsIgnoreCase("s3")) {
             throw new AnalysisException("Invalid export path. please use valid 
's3://' path.");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeConstants.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/FeConstants.java
index f7fb0348dd..e9e853eb9f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeConstants.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeConstants.java
@@ -88,6 +88,7 @@ public class FeConstants {
     public static String FS_PREFIX_GFS = "gfs";
     public static String FS_PREFIX_JFS = "jfs";
     public static String FS_PREFIX_HDFS = "hdfs";
+    public static String FS_PREFIX_VIEWFS = "viewfs";
     public static String FS_PREFIX_FILE = "file";
     public static final String INTERNAL_DB_NAME = "__internal_schema";
     public static String TEMP_MATERIZLIZE_DVIEW_PREFIX = 
"internal_tmp_materialized_view_";
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java
index c2a070cdfa..3837a7eb95 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java
@@ -65,7 +65,8 @@ public class FileSystemFactory {
             } else {
                 fsType = FileSystemType.S3;
             }
-        } else if (location.startsWith(FeConstants.FS_PREFIX_HDFS) || 
location.startsWith(FeConstants.FS_PREFIX_GFS)) {
+        } else if (location.startsWith(FeConstants.FS_PREFIX_HDFS) || 
location.startsWith(FeConstants.FS_PREFIX_GFS)
+                 || location.startsWith(FeConstants.FS_PREFIX_VIEWFS)) {
             fsType = FileSystemType.DFS;
         } else if (location.startsWith(FeConstants.FS_PREFIX_OFS) || 
location.startsWith(FeConstants.FS_PREFIX_COSN)) {
             // ofs:// and cosn:// use the same underlying file system: Tencent 
Cloud HDFS, aka CHDFS)) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
index d59c6bec7d..00188dc50b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
@@ -484,6 +484,8 @@ public abstract class FileQueryScanNode extends 
FileScanNode {
                 return Optional.of(TFileType.FILE_S3);
             } else if (location.startsWith(FeConstants.FS_PREFIX_HDFS)) {
                 return Optional.of(TFileType.FILE_HDFS);
+            } else if (location.startsWith(FeConstants.FS_PREFIX_VIEWFS)) {
+                return Optional.of(TFileType.FILE_HDFS);
             } else if (location.startsWith(FeConstants.FS_PREFIX_COSN)) {
                 return Optional.of(TFileType.FILE_HDFS);
             } else if (location.startsWith(FeConstants.FS_PREFIX_FILE)) {
diff --git 
a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java
 
b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java
index d25947e33b..997cf0cb2c 100644
--- 
a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java
+++ 
b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java
@@ -66,6 +66,7 @@ public class FileSystemManager {
             .getLogger(FileSystemManager.class.getName());
     // supported scheme
     private static final String HDFS_SCHEME = "hdfs";
+    private static final String VIEWFS_SCHEME = "viewfs";
     private static final String S3A_SCHEME = "s3a";
     private static final String KS3_SCHEME = "ks3";
     private static final String CHDFS_SCHEME = "ofs";
@@ -210,7 +211,7 @@ public class FileSystemManager {
                 "invalid path. scheme is null");
         }
         BrokerFileSystem brokerFileSystem = null;
-        if (scheme.equals(HDFS_SCHEME)) {
+        if (scheme.equals(HDFS_SCHEME) || scheme.equals(VIEWFS_SCHEME)) {
             brokerFileSystem = getDistributedFileSystem(path, properties);
         } else if (scheme.equals(S3A_SCHEME)) {
             brokerFileSystem = getS3AFileSystem(path, properties);
diff --git a/regression-test/data/external_table_p2/hive/test_viewfs_hive.out 
b/regression-test/data/external_table_p2/hive/test_viewfs_hive.out
new file mode 100644
index 0000000000..37e1df0ba0
--- /dev/null
+++ b/regression-test/data/external_table_p2/hive/test_viewfs_hive.out
@@ -0,0 +1,35 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !viewfs --
+1      Tom     48      \N      male
+2      Jerry   35      \N      male
+3      Frank   25      \N      male
+4      Ada     22      \N      female
+
+-- !viewfs_partition1 --
+1      Tom     48      \N      male    20230101
+2      Jerry   35      \N      male    20230101
+3      Frank   25      \N      male    20230201
+4      Ada     22      \N      female  20230201
+
+-- !viewfs_partition2 --
+1      Tom     48      \N      male    20230101
+2      Jerry   35      \N      male    20230101
+
+-- !viewfs_partition3 --
+3      Frank   25      \N      male    20230201
+4      Ada     22      \N      female  20230201
+
+-- !viewfs_mixed_partition1 --
+1      Tom     48      \N      male    20230101
+2      Jerry   35      \N      male    20230101
+3      Frank   25      \N      male    20230201
+4      Ada     22      \N      female  20230201
+
+-- !viewfs_mixed_partition2 --
+1      Tom     48      \N      male    20230101
+2      Jerry   35      \N      male    20230101
+
+-- !viewfs_mixed_partition3 --
+3      Frank   25      \N      male    20230201
+4      Ada     22      \N      female  20230201
+
diff --git a/regression-test/data/external_table_p2/tvf/test_tvf_p2.out 
b/regression-test/data/external_table_p2/tvf/test_tvf_p2.out
index cb7239f2ff..86f3f43f2d 100644
--- a/regression-test/data/external_table_p2/tvf/test_tvf_p2.out
+++ b/regression-test/data/external_table_p2/tvf/test_tvf_p2.out
@@ -42,3 +42,6 @@
 -- !row_cross_pages --
 25001  25001   25001
 
+-- !viewfs --
+25001  25001   25001
+
diff --git 
a/regression-test/suites/external_table_p2/hive/test_viewfs_hive.groovy 
b/regression-test/suites/external_table_p2/hive/test_viewfs_hive.groovy
new file mode 100644
index 0000000000..9ccea773ed
--- /dev/null
+++ b/regression-test/suites/external_table_p2/hive/test_viewfs_hive.groovy
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_viewfs_hive", 
"p2,external,hive,external_remote,external_remote_hive") {
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String hdfsPort = context.config.otherConfigs.get("extHdfsPort")
+        String catalog_name = "test_viewfs_hive"
+
+        sql """drop catalog if exists ${catalog_name};"""
+
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}',
+                'fs.viewfs.mounttable.my-cluster.link./ns1' = 
'hdfs://${nameNodeHost}:${hdfsPort}/',
+                'fs.viewfs.mounttable.my-cluster.homedir' = '/ns1',
+                'fs.defaultFS' = 'viewfs://my-cluster'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        
+        sql """ use viewfs """ 
+
+        // The location of table is on viewfs.
+        qt_viewfs """ select * from test_viewfs order by id"""
+
+        // The location of partition table is on viewfs.
+        qt_viewfs_partition1 """ select * from test_viewfs_partition order by 
id""" 
+        qt_viewfs_partition2 """ select * from test_viewfs_partition where 
part_col = 20230101 order by id""" 
+        qt_viewfs_partition3 """ select * from test_viewfs_partition where 
part_col = 20230201 order by id""" 
+
+        // The location of partition table contains hdfs and viewfs locations 
partitions.
+        qt_viewfs_mixed_partition1 """ select * from 
test_viewfs_mixed_partition order by id""" 
+        qt_viewfs_mixed_partition2 """ select * from 
test_viewfs_mixed_partition where part_col = 20230101 order by id""" 
+        qt_viewfs_mixed_partition3 """ select * from 
test_viewfs_mixed_partition where part_col = 20230201 order by id""" 
+    }
+}
diff --git a/regression-test/suites/external_table_p2/tvf/test_tvf_p2.groovy 
b/regression-test/suites/external_table_p2/tvf/test_tvf_p2.groovy
index ec3fb41d95..853b5d2f4d 100644
--- a/regression-test/suites/external_table_p2/tvf/test_tvf_p2.groovy
+++ b/regression-test/suites/external_table_p2/tvf/test_tvf_p2.groovy
@@ -53,5 +53,14 @@ suite("test_tvf_p2", 
"p2,external,tvf,external_remote,external_remote_tvf") {
             "uri" = 
"hdfs://${nameNodeHost}:${hdfsPort}/catalog/tvf/parquet/row_cross_pages.parquet",
             "format" = "parquet",
             "fs.defaultFS" = "hdfs://${nameNodeHost}:${hdfsPort}")"""
+
+        // viewfs
+        qt_viewfs """select count(id), count(m1), count(m2)
+            from hdfs(
+            "uri" = 
"viewfs://my-cluster/ns1/catalog/tvf/parquet/row_cross_pages.parquet",
+            "format" = "parquet",
+            "fs.defaultFS" = "viewfs://my-cluster",
+            "fs.viewfs.mounttable.my-cluster.link./ns1" = 
"hdfs://${nameNodeHost}:${hdfsPort}/",
+            "fs.viewfs.mounttable.my-cluster.homedir" = "/ns1")"""
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to