This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 913df76c2e [Fix](statistics)Handle external table in statistics 
cleaner. (#23843) (#24281)
913df76c2e is described below

commit 913df76c2e05342c8ac135ffb87e9a190d87c0dd
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Wed Sep 13 20:35:26 2023 +0800

    [Fix](statistics)Handle external table in statistics cleaner. (#23843) 
(#24281)
    
    Before, Statistics Cleaner only handles olap db and table.
    External db and tables would be removed without verification. So that 
external stats could stored no more than 2 days, which is the interval of Stats 
cleaner thread.
    This pr is to add verification for external db and tables.
---
 .../java/org/apache/doris/catalog/Database.java    |   3 +-
 .../java/org/apache/doris/catalog/DatabaseIf.java  |   3 +
 .../doris/catalog/external/ExternalDatabase.java   |   7 ++
 .../org/apache/doris/datasource/CatalogIf.java     |   4 +-
 .../apache/doris/datasource/ExternalCatalog.java   |   6 ++
 .../apache/doris/datasource/InternalCatalog.java   |   3 +-
 .../apache/doris/statistics/StatisticsCleaner.java |  33 +++---
 .../hive/test_hive_statistic_clean.groovy          | 120 +++++++++++++++++++++
 8 files changed, 162 insertions(+), 17 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
index 0fc63253d9..9728115dbf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
@@ -860,7 +860,8 @@ public class Database extends MetaObject implements 
Writable, DatabaseIf<Table>
         return null;
     }
 
-    public Map<Long, Table> getIdToTable() {
+    @Override
+    public Map<Long, TableIf> getIdToTable() {
         return new HashMap<>(idToTable);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java
index 7dfceabbde..46ed88e72f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java
@@ -30,6 +30,7 @@ import org.apache.logging.log4j.Logger;
 
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
@@ -257,4 +258,6 @@ public interface DatabaseIf<T extends TableIf> {
     default long getLastUpdateTime() {
         return -1L;
     }
+
+    public Map<Long, TableIf> getIdToTable();
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
index fa2ecd4011..3559637b8f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
@@ -20,6 +20,7 @@ package org.apache.doris.catalog.external;
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.DatabaseProperty;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.io.Text;
@@ -44,6 +45,7 @@ import org.apache.logging.log4j.Logger;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -360,4 +362,9 @@ public abstract class ExternalDatabase<T extends 
ExternalTable>
     public void createTableForReplay(String tableName, long tableId) {
         throw new NotImplementedException("createTable() is not implemented");
     }
+
+    @Override
+    public Map<Long, TableIf> getIdToTable() {
+        return new HashMap<>(idToTbl);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
index b84c769eac..69e51b6326 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.datasource;
 
-
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.common.AnalysisException;
@@ -34,6 +33,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 import javax.annotation.Nullable;
 
@@ -170,4 +170,6 @@ public interface CatalogIf<T extends DatabaseIf> {
     // Return a copy of all db collection.
     @SuppressWarnings({"rawtypes", "unchecked"})
     public Collection<DatabaseIf> getAllDbs();
+
+    public ConcurrentHashMap<Long, DatabaseIf> getIdToDb();
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
index c202ec6f56..093c591d92 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
@@ -62,6 +62,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
 
 /**
  * The abstract class for all types of external catalogs.
@@ -587,4 +588,9 @@ public abstract class ExternalCatalog
         makeSureInitialized();
         return new HashSet<>(idToDb.values());
     }
+
+    @Override
+    public ConcurrentHashMap<Long, DatabaseIf> getIdToDb() {
+        return new ConcurrentHashMap<>(idToDb);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 02e737d26c..0720e52207 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -3096,7 +3096,8 @@ public class InternalCatalog implements 
CatalogIf<Database> {
         return newChecksum;
     }
 
-    public ConcurrentHashMap<Long, Database> getIdToDb() {
+    @Override
+    public ConcurrentHashMap<Long, DatabaseIf> getIdToDb() {
         return new ConcurrentHashMap<>(idToDb);
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
index c023b9a335..9aaee6bf1d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
@@ -17,11 +17,11 @@
 
 package org.apache.doris.statistics;
 
-import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.MaterializedIndexMeta;
 import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.util.MasterDaemon;
@@ -31,6 +31,7 @@ import 
org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.system.SystemInfoService;
 
+import com.google.common.collect.Maps;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.text.StringSubstitutor;
 import org.apache.logging.log4j.LogManager;
@@ -56,12 +57,8 @@ public class StatisticsCleaner extends MasterDaemon {
     private OlapTable histStatsTbl;
 
     private Map<Long, CatalogIf> idToCatalog;
-
-    /* Internal DB only */
-    private Map<Long, Database> idToDb;
-
-    /* Internal tbl only */
-    private Map<Long, Table> idToTbl;
+    private Map<Long, DatabaseIf> idToDb;
+    private Map<Long, TableIf> idToTbl;
 
     private Map<Long, MaterializedIndexMeta> idToMVIdx;
 
@@ -115,15 +112,23 @@ public class StatisticsCleaner extends MasterDaemon {
         }
 
         idToCatalog = Env.getCurrentEnv().getCatalogMgr().getIdToCatalog();
-        idToDb = Env.getCurrentEnv().getInternalCatalog().getIdToDb();
+        idToDb = constructDbMap();
         idToTbl = constructTblMap();
         idToMVIdx = constructIdxMap();
         return true;
     }
 
-    private Map<Long, Table> constructTblMap() {
-        Map<Long, Table> idToTbl = new HashMap<>();
-        for (Database db : idToDb.values()) {
+    private Map<Long, DatabaseIf> constructDbMap() {
+        Map<Long, DatabaseIf> idToDb = Maps.newHashMap();
+        for (CatalogIf ctl : idToCatalog.values()) {
+            idToDb.putAll(ctl.getIdToDb());
+        }
+        return idToDb;
+    }
+
+    private Map<Long, TableIf> constructTblMap() {
+        Map<Long, TableIf> idToTbl = new HashMap<>();
+        for (DatabaseIf db : idToDb.values()) {
             idToTbl.putAll(db.getIdToTable());
         }
         return idToTbl;
@@ -131,7 +136,7 @@ public class StatisticsCleaner extends MasterDaemon {
 
     private Map<Long, MaterializedIndexMeta> constructIdxMap() {
         Map<Long, MaterializedIndexMeta> idToMVIdx = new HashMap<>();
-        for (Table t : idToTbl.values()) {
+        for (TableIf t : idToTbl.values()) {
             if (t instanceof OlapTable) {
                 OlapTable olapTable = (OlapTable) t;
                 olapTable.getCopyOfIndexIdToMeta()
@@ -213,7 +218,7 @@ public class StatisticsCleaner extends MasterDaemon {
                         continue;
                     }
 
-                    Table t = idToTbl.get(tblId);
+                    TableIf t = idToTbl.get(tblId);
                     String colId = r.getColumnValue("col_id");
                     if (t.getColumn(colId) == null) {
                         expiredStats.ids.add(id);
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_clean.groovy
 
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_clean.groovy
new file mode 100644
index 0000000000..3ceab60afc
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_clean.groovy
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic_clean", 
"p2,external,hive,external_remote,external_remote_hive") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_statistic_clean"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+
+        sql """analyze database ${catalog_name}.statistics with sync"""
+        sql """use ${catalog_name}.statistics"""
+
+        def result = sql """show column stats `statistics` (lo_quantity)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_quantity")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "46.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "50")
+
+        result = sql """show column stats `statistics` (lo_orderkey)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_orderkey")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "26.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "98")
+
+        result = sql """show column stats `statistics` (lo_linenumber)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_linenumber")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "7.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "7")
+
+        sql """drop expired stats"""
+        result = sql """show column stats `statistics` (lo_quantity)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_quantity")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "46.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "50")
+
+        result = sql """show column stats `statistics` (lo_orderkey)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_orderkey")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "26.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "98")
+
+        result = sql """show column stats `statistics` (lo_linenumber)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_linenumber")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "7.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "7")
+
+        def ctlId
+        result = sql """show proc '/catalogs'"""
+
+        for (int i = 0; i < result.size(); i++) {
+            if (result[i][1] == catalog_name) {
+                ctlId = result[i][0]
+            }
+        }
+
+        sql """drop catalog ${catalog_name}"""
+        sql """drop expired stats"""
+        result = sql """select * from 
internal.__internal_schema.column_statistics where catalog_id=${ctlId}"""
+        assertTrue(result.size() == 0)
+
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to