This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 5fd860642f9 [improvement](statistics)Drop expired external stats only 
when the catalog is dropped. (#42244) (#42412)
5fd860642f9 is described below

commit 5fd860642f95e32fee937cfbcedaca3950859b25
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Thu Oct 24 19:40:56 2024 +0800

    [improvement](statistics)Drop expired external stats only when the catalog 
is dropped. (#42244) (#42412)
    
    backport: https://github.com/apache/doris/pull/42244
---
 .../apache/doris/statistics/StatisticsCleaner.java | 18 +++--
 .../statistics/test_drop_expired_stats.groovy      | 76 ++++++++++++++++++++++
 2 files changed, 90 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
index 9775b6ecb73..15be395e590 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
@@ -37,6 +37,7 @@ import org.apache.commons.text.StringSubstitutor;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -176,10 +177,9 @@ public class StatisticsCleaner extends MasterDaemon {
 
     private Map<Long, DatabaseIf<? extends TableIf>> constructDbMap() {
         Map<Long, DatabaseIf<? extends TableIf>> idToDb = Maps.newHashMap();
-        for (CatalogIf<? extends DatabaseIf<? extends TableIf>> ctl : 
idToCatalog.values()) {
-            for (DatabaseIf<? extends TableIf> db : ctl.getAllDbs()) {
-                idToDb.put(db.getId(), db);
-            }
+        Collection<DatabaseIf<? extends TableIf>> internalDBs = 
Env.getCurrentEnv().getInternalCatalog().getAllDbs();
+        for (DatabaseIf<? extends TableIf> db : internalDBs) {
+            idToDb.put(db.getId(), db);
         }
         return idToDb;
     }
@@ -268,6 +268,16 @@ public class StatisticsCleaner extends MasterDaemon {
                         expiredStats.expiredCatalog.add(catalogId);
                         continue;
                     }
+                    // Skip check external DBs and tables to avoid fetch too 
much metadata.
+                    // Remove expired external table stats only when the 
external catalog is dropped.
+                    // TODO: Need to check external database and table exist 
or not. But for now, we only check catalog.
+                    // Because column_statistics table only keep table id and 
db id.
+                    // But meta data doesn't always cache all external tables' 
ids.
+                    // So we may fail to find the external table only by id. 
Need to use db name and table name instead.
+                    // Have to store db name and table name in 
column_statistics in the future.
+                    if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID) {
+                        continue;
+                    }
                     long dbId = statsId.dbId;
                     if (!idToDb.containsKey(dbId)) {
                         expiredStats.expiredDatabase.add(dbId);
diff --git a/regression-test/suites/statistics/test_drop_expired_stats.groovy 
b/regression-test/suites/statistics/test_drop_expired_stats.groovy
new file mode 100644
index 00000000000..23067f670b5
--- /dev/null
+++ b/regression-test/suites/statistics/test_drop_expired_stats.groovy
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_drop_expired_stats") {
+
+    sql """drop database if exists test_drop_expired_stats"""
+    sql """create database test_drop_expired_stats"""
+    sql """use test_drop_expired_stats"""
+    sql """set global enable_auto_analyze=false"""
+
+    sql """CREATE TABLE table1 (
+            key1 bigint NOT NULL,
+            key2 bigint NOT NULL,
+            value1 int NOT NULL,
+            value2 int NOT NULL,
+            value3 int NOT NULL
+        )ENGINE=OLAP
+        DUPLICATE KEY(`key1`, `key2`)
+        COMMENT "OLAP"
+        DISTRIBUTED BY HASH(`key1`) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        )
+    """
+
+    sql """CREATE TABLE table2 (
+            key1 bigint NOT NULL,
+            key2 bigint NOT NULL,
+            value1 int NOT NULL
+        )ENGINE=OLAP
+        DUPLICATE KEY(`key1`, `key2`)
+        COMMENT "OLAP"
+        DISTRIBUTED BY HASH(`key1`) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        )
+    """
+
+    def id1 = getTableId("test_drop_expired_stats", "table1")
+    def id2 = getTableId("test_drop_expired_stats", "table2")
+
+    sql """analyze table table1 with sync"""
+    sql """analyze table table2 with sync"""
+    def result = sql """select * from __internal_schema.column_statistics 
where tbl_id = ${id1}"""
+    assertEquals(5, result.size())
+    result = sql """select * from __internal_schema.column_statistics where 
tbl_id = ${id2}"""
+    assertEquals(3, result.size())
+    sql """drop table table1"""
+    sql """drop expired stats"""
+    result = sql """select * from __internal_schema.column_statistics where 
tbl_id = ${id1}"""
+    assertEquals(0, result.size())
+    result = sql """select * from __internal_schema.column_statistics where 
tbl_id = ${id2}"""
+    assertEquals(3, result.size())
+
+    sql """drop database if exists test_drop_expired_stats"""
+    sql """drop expired stats"""
+    result = sql """select * from __internal_schema.column_statistics where 
tbl_id = ${id1}"""
+    assertEquals(0, result.size())
+    result = sql """select * from __internal_schema.column_statistics where 
tbl_id = ${id2}"""
+    assertEquals(0, result.size())
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to