This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 127ac8d93ec [Enhancement](ExternalTable)Optimize the performance of 
getCachedRowCount when reading ExternalTable (#41659)
127ac8d93ec is described below

commit 127ac8d93ece5310afe20eb5521f6ec883d7282a
Author: daidai <2017501...@qq.com>
AuthorDate: Sun Oct 13 20:21:02 2024 +0800

    [Enhancement](ExternalTable)Optimize the performance of getCachedRowCount 
when reading ExternalTable (#41659)
    
    ## Proposed changes
    Because ExternalTable will initialize the previously uninitialized table
    when `getCachedRowCount()`, which is unnecessary. So for the
    uninitialized table, we directly return -1.
    This will increase the speed of our query `information_schema.tables`.
---
 .../org/apache/doris/datasource/ExternalTable.java | 13 +++----
 .../doris/datasource/hive/HMSExternalCatalog.java  | 10 ++++++
 .../iceberg/IcebergHMSExternalCatalog.java         | 13 +------
 .../datasource/iceberg/IcebergMetadataCache.java   | 41 ++--------------------
 .../doris/datasource/iceberg/IcebergUtils.java     | 17 ++++++++-
 .../iceberg/test_iceberg_table_stats.groovy        |  1 +
 6 files changed, 38 insertions(+), 57 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index eedbe4e20da..baa25c991fc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -202,14 +202,15 @@ public class ExternalTable implements TableIf, Writable, 
GsonPostProcessable {
 
     @Override
     public long getCachedRowCount() {
-        // Return -1 if makeSureInitialized throw exception.
-        // For example, init hive table may throw NotSupportedException.
-        try {
-            makeSureInitialized();
-        } catch (Exception e) {
-            LOG.warn("Failed to initialize table {}.{}.{}", catalog.getName(), 
dbName, name, e);
+        // Return -1 if uninitialized.
+        // Before this, for uninitialized tables, we would call 
makeSureInitialized(), just like the implementation of
+        // ExternalTable.getRowCount(), but this is not very meaningful and 
time-consuming.
+        // The getCachedRowCount() function is only used when `show table` and 
querying `information_schema.tables`.
+        if (!isObjectCreated()) {
             return -1;
         }
+        // getExtMetaCacheMgr().getRowCountCache().getCachedRowCount() is an 
asynchronous non-blocking operation.
+        // For tables that are not in the cache, it will load asynchronously 
and return -1.
         return 
Env.getCurrentEnv().getExtMetaCacheMgr().getRowCountCache().getCachedRowCount(catalog.getId(),
 dbId, id);
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
index 99fd96f65db..20b9482041d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
@@ -34,6 +34,7 @@ import org.apache.doris.datasource.ExternalDatabase;
 import org.apache.doris.datasource.ExternalTable;
 import org.apache.doris.datasource.InitCatalogLog;
 import org.apache.doris.datasource.SessionContext;
+import org.apache.doris.datasource.iceberg.IcebergUtils;
 import org.apache.doris.datasource.jdbc.client.JdbcClientConfig;
 import org.apache.doris.datasource.operations.ExternalMetadataOperations;
 import org.apache.doris.datasource.property.PropertyConverter;
@@ -53,6 +54,7 @@ import com.google.common.collect.Maps;
 import lombok.Getter;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.iceberg.hive.HiveCatalog;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -85,6 +87,8 @@ public class HMSExternalCatalog extends ExternalCatalog {
     private int hmsEventsBatchSizePerRpc = -1;
     private boolean enableHmsEventsIncrementalSync = false;
 
+    //for "type" = "hms" , but is iceberg table.
+    private HiveCatalog icebergHiveCatalog;
 
     @VisibleForTesting
     public HMSExternalCatalog() {
@@ -195,6 +199,8 @@ public class HMSExternalCatalog extends ExternalCatalog {
         transactionManager = 
TransactionManagerFactory.createHiveTransactionManager(hiveOps, 
fileSystemProvider,
                 fileSystemExecutor);
         metadataOps = hiveOps;
+
+        icebergHiveCatalog = IcebergUtils.createIcebergHiveCatalog(this, 
getName());
     }
 
     @Override
@@ -331,6 +337,10 @@ public class HMSExternalCatalog extends ExternalCatalog {
         return enableHmsEventsIncrementalSync;
     }
 
+    public HiveCatalog getIcebergHiveCatalog() {
+        return icebergHiveCatalog;
+    }
+
     /**
      * Enum for meta tables in hive catalog.
      * eg: tbl$partitions
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
index c1475064934..51d39357b81 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergHMSExternalCatalog.java
@@ -19,10 +19,6 @@ package org.apache.doris.datasource.iceberg;
 
 import org.apache.doris.datasource.CatalogProperty;
 import org.apache.doris.datasource.property.PropertyConverter;
-import org.apache.doris.datasource.property.constants.HMSProperties;
-
-import org.apache.iceberg.CatalogProperties;
-import org.apache.iceberg.hive.HiveCatalog;
 
 import java.util.Map;
 
@@ -38,14 +34,7 @@ public class IcebergHMSExternalCatalog extends 
IcebergExternalCatalog {
     @Override
     protected void initCatalog() {
         icebergCatalogType = ICEBERG_HMS;
-        HiveCatalog hiveCatalog = new org.apache.iceberg.hive.HiveCatalog();
-        hiveCatalog.setConf(getConfiguration());
-        // initialize hive catalog
-        Map<String, String> catalogProperties = 
catalogProperty.getProperties();
-        String metastoreUris = 
catalogProperty.getOrDefault(HMSProperties.HIVE_METASTORE_URIS, "");
-        catalogProperties.put(CatalogProperties.URI, metastoreUris);
-        hiveCatalog.initialize(getName(), catalogProperties);
-        catalog = hiveCatalog;
+        catalog = IcebergUtils.createIcebergHiveCatalog(this, getName());
     }
 }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
index a35c73dc296..c1ac2a79754 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java
@@ -22,25 +22,22 @@ import org.apache.doris.common.CacheFactory;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.datasource.ExternalCatalog;
 import org.apache.doris.datasource.ExternalMetaCacheMgr;
 import org.apache.doris.datasource.hive.HMSExternalCatalog;
 import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
-import org.apache.doris.datasource.property.constants.HMSProperties;
-import org.apache.doris.fs.remote.dfs.DFSFileSystem;
 import org.apache.doris.thrift.TIcebergMetadataParams;
 
 import com.github.benmanes.caffeine.cache.LoadingCache;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.iceberg.ManifestFiles;
 import org.apache.iceberg.SerializableTable;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.Catalog;
 import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.hive.HiveCatalog;
 import org.jetbrains.annotations.NotNull;
 
 import java.util.HashMap;
@@ -97,11 +94,6 @@ public class IcebergMetadataCache {
         return restTable;
     }
 
-    public Table getRemoteTable(CatalogIf catalog, String dbName, String 
tbName) {
-        IcebergMetadataCacheKey key = IcebergMetadataCacheKey.of(catalog, 
dbName, tbName);
-        return loadTable(key);
-    }
-
     @NotNull
     private List<Snapshot> loadSnapshots(IcebergMetadataCacheKey key) {
         Table icebergTable = getIcebergTable(key.catalog, key.dbName, 
key.tableName);
@@ -114,17 +106,13 @@ public class IcebergMetadataCache {
     private Table loadTable(IcebergMetadataCacheKey key) {
         Catalog icebergCatalog;
         if (key.catalog instanceof HMSExternalCatalog) {
-            HMSExternalCatalog ctg = (HMSExternalCatalog) key.catalog;
-            icebergCatalog = createIcebergHiveCatalog(
-                    ctg.getHiveMetastoreUris(),
-                    ctg.getCatalogProperty().getHadoopProperties(),
-                    ctg.getProperties());
+            icebergCatalog = ((HMSExternalCatalog) 
key.catalog).getIcebergHiveCatalog();
         } else if (key.catalog instanceof IcebergExternalCatalog) {
             icebergCatalog = ((IcebergExternalCatalog) 
key.catalog).getCatalog();
         } else {
             throw new RuntimeException("Only support 'hms' and 'iceberg' type 
for iceberg table");
         }
-        Table icebergTable = 
HiveMetaStoreClientHelper.ugiDoAs(key.catalog.getId(),
+        Table icebergTable = 
HiveMetaStoreClientHelper.ugiDoAs(((ExternalCatalog) 
key.catalog).getConfiguration(),
                 () -> icebergCatalog.loadTable(TableIdentifier.of(key.dbName, 
key.tableName)));
         initIcebergTableFileIO(icebergTable, key.catalog.getProperties());
         return icebergTable;
@@ -177,29 +165,6 @@ public class IcebergMetadataCache {
                 });
     }
 
-    private Catalog createIcebergHiveCatalog(String uri, Map<String, String> 
hdfsConf, Map<String, String> props) {
-        // set hdfs configure
-        Configuration conf = DFSFileSystem.getHdfsConf(
-                
hdfsConf.getOrDefault(DFSFileSystem.PROP_ALLOW_FALLBACK_TO_SIMPLE_AUTH, 
"").isEmpty());
-        for (Map.Entry<String, String> entry : hdfsConf.entrySet()) {
-            conf.set(entry.getKey(), entry.getValue());
-        }
-        HiveCatalog hiveCatalog = new HiveCatalog();
-        hiveCatalog.setConf(conf);
-
-        if (props.containsKey(HMSExternalCatalog.BIND_BROKER_NAME)) {
-            props.put(HMSProperties.HIVE_METASTORE_URIS, uri);
-            props.put("uri", uri);
-            hiveCatalog.initialize("hive", props);
-        } else {
-            Map<String, String> catalogProperties = new HashMap<>();
-            catalogProperties.put(HMSProperties.HIVE_METASTORE_URIS, uri);
-            catalogProperties.put("uri", uri);
-            hiveCatalog.initialize("hive", catalogProperties);
-        }
-        return hiveCatalog;
-    }
-
     private static void initIcebergTableFileIO(Table table, Map<String, 
String> props) {
         Map<String, String> ioConf = new HashMap<>();
         table.properties().forEach((key, value) -> {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index f7280f5721f..893ee7bc93b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -47,10 +47,12 @@ import org.apache.doris.common.info.SimpleTableInfo;
 import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.datasource.ExternalCatalog;
 import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
+import org.apache.doris.datasource.property.constants.HMSProperties;
 import org.apache.doris.nereids.exceptions.NotSupportedException;
 import org.apache.doris.thrift.TExprOpcode;
 
 import com.google.common.collect.Lists;
+import org.apache.iceberg.CatalogProperties;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
@@ -63,6 +65,7 @@ import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.expressions.Not;
 import org.apache.iceberg.expressions.Or;
 import org.apache.iceberg.expressions.Unbound;
+import org.apache.iceberg.hive.HiveCatalog;
 import org.apache.iceberg.types.Type.TypeID;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.util.LocationUtil;
@@ -553,7 +556,7 @@ public class IcebergUtils {
         return Env.getCurrentEnv()
                 .getExtMetaCacheMgr()
                 .getIcebergMetadataCache()
-                .getRemoteTable(catalog, tableInfo.getDbName(), 
tableInfo.getTbName());
+                .getIcebergTable(catalog, tableInfo.getDbName(), 
tableInfo.getTbName());
     }
 
     private static org.apache.iceberg.Table 
getIcebergTableInternal(ExternalCatalog catalog, String dbName,
@@ -665,4 +668,16 @@ public class IcebergUtils {
         }
         return dataLocation;
     }
+
+    public static HiveCatalog createIcebergHiveCatalog(ExternalCatalog 
externalCatalog, String name) {
+        HiveCatalog hiveCatalog = new org.apache.iceberg.hive.HiveCatalog();
+        hiveCatalog.setConf(externalCatalog.getConfiguration());
+
+        Map<String, String> catalogProperties = 
externalCatalog.getProperties();
+        String metastoreUris = 
catalogProperties.getOrDefault(HMSProperties.HIVE_METASTORE_URIS, "");
+        catalogProperties.put(CatalogProperties.URI, metastoreUris);
+
+        hiveCatalog.initialize(name, catalogProperties);
+        return hiveCatalog;
+    }
 }
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
index a950f93909e..710a0bc953f 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_table_stats.groovy
@@ -40,6 +40,7 @@ suite("test_iceberg_table_stats", 
"p0,external,doris,external_docker,external_do
                 def retry = 0
                 def act = ""
                 while (retry < 10) {
+                    sql """ select * from ${table_name} """
                     def result = sql """ show table stats ${table_name} """
                     act = result[0][2]
                     if (act != "-1") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to