This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fb5a77b726 [Fix](statistics)Handle external table in statistics 
cleaner. (#23843)
fb5a77b726 is described below

commit fb5a77b72685cb67accc2881077eab4d48eece6f
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Fri Sep 8 09:43:46 2023 +0800

    [Fix](statistics)Handle external table in statistics cleaner. (#23843)
    
    Before, Statistics Cleaner only handles olap db and table.
    External db and tables would be removed without verification. So that 
external stats could stored no more than 2 days, which is the interval of Stats 
cleaner thread.
    This pr is to add verification for external db and tables.
---
 .../java/org/apache/doris/catalog/Database.java    |   3 +-
 .../java/org/apache/doris/catalog/DatabaseIf.java  |   3 +
 .../doris/catalog/external/ExternalDatabase.java   |   7 ++
 .../org/apache/doris/datasource/CatalogIf.java     |   4 +-
 .../apache/doris/datasource/ExternalCatalog.java   |   6 ++
 .../apache/doris/datasource/InternalCatalog.java   |   3 +-
 .../apache/doris/statistics/StatisticsCleaner.java |  33 +++---
 .../hive/test_hive_statistic_clean.groovy          | 120 +++++++++++++++++++++
 8 files changed, 162 insertions(+), 17 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
index 82f6f39662..92100a9898 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java
@@ -851,7 +851,8 @@ public class Database extends MetaObject implements 
Writable, DatabaseIf<Table>
         return null;
     }
 
-    public Map<Long, Table> getIdToTable() {
+    @Override
+    public Map<Long, TableIf> getIdToTable() {
         return new HashMap<>(idToTable);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java
index 7dfceabbde..46ed88e72f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/DatabaseIf.java
@@ -30,6 +30,7 @@ import org.apache.logging.log4j.Logger;
 
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
@@ -257,4 +258,6 @@ public interface DatabaseIf<T extends TableIf> {
     default long getLastUpdateTime() {
         return -1L;
     }
+
+    public Map<Long, TableIf> getIdToTable();
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
index fa2ecd4011..3559637b8f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalDatabase.java
@@ -20,6 +20,7 @@ package org.apache.doris.catalog.external;
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.DatabaseProperty;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.io.Text;
@@ -44,6 +45,7 @@ import org.apache.logging.log4j.Logger;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -360,4 +362,9 @@ public abstract class ExternalDatabase<T extends 
ExternalTable>
     public void createTableForReplay(String tableName, long tableId) {
         throw new NotImplementedException("createTable() is not implemented");
     }
+
+    @Override
+    public Map<Long, TableIf> getIdToTable() {
+        return new HashMap<>(idToTbl);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
index d135018e75..22d7cf2b15 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.datasource;
 
-
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.common.AnalysisException;
@@ -34,6 +33,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 import javax.annotation.Nullable;
 
@@ -172,4 +172,6 @@ public interface CatalogIf<T extends DatabaseIf> {
     public Collection<DatabaseIf> getAllDbs();
 
     public boolean enableAutoAnalyze();
+
+    public ConcurrentHashMap<Long, DatabaseIf> getIdToDb();
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
index 35d03dfabc..21617b2f05 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
@@ -63,6 +63,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
 
 /**
  * The abstract class for all types of external catalogs.
@@ -605,4 +606,9 @@ public abstract class ExternalCatalog
         }
         return ret;
     }
+
+    @Override
+    public ConcurrentHashMap<Long, DatabaseIf> getIdToDb() {
+        return new ConcurrentHashMap<>(idToDb);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 4114386680..082542fb34 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -3133,7 +3133,8 @@ public class InternalCatalog implements 
CatalogIf<Database> {
         return newChecksum;
     }
 
-    public ConcurrentHashMap<Long, Database> getIdToDb() {
+    @Override
+    public ConcurrentHashMap<Long, DatabaseIf> getIdToDb() {
         return new ConcurrentHashMap<>(idToDb);
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
index 93807bb1e0..849b68fe94 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
@@ -17,11 +17,11 @@
 
 package org.apache.doris.statistics;
 
-import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.MaterializedIndexMeta;
 import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.util.MasterDaemon;
@@ -30,6 +30,7 @@ import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.system.SystemInfoService;
 
+import com.google.common.collect.Maps;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.text.StringSubstitutor;
 import org.apache.logging.log4j.LogManager;
@@ -55,12 +56,8 @@ public class StatisticsCleaner extends MasterDaemon {
     private OlapTable histStatsTbl;
 
     private Map<Long, CatalogIf> idToCatalog;
-
-    /* Internal DB only */
-    private Map<Long, Database> idToDb;
-
-    /* Internal tbl only */
-    private Map<Long, Table> idToTbl;
+    private Map<Long, DatabaseIf> idToDb;
+    private Map<Long, TableIf> idToTbl;
 
     private Map<Long, MaterializedIndexMeta> idToMVIdx;
 
@@ -114,15 +111,23 @@ public class StatisticsCleaner extends MasterDaemon {
         }
 
         idToCatalog = Env.getCurrentEnv().getCatalogMgr().getIdToCatalog();
-        idToDb = Env.getCurrentEnv().getInternalCatalog().getIdToDb();
+        idToDb = constructDbMap();
         idToTbl = constructTblMap();
         idToMVIdx = constructIdxMap();
         return true;
     }
 
-    private Map<Long, Table> constructTblMap() {
-        Map<Long, Table> idToTbl = new HashMap<>();
-        for (Database db : idToDb.values()) {
+    private Map<Long, DatabaseIf> constructDbMap() {
+        Map<Long, DatabaseIf> idToDb = Maps.newHashMap();
+        for (CatalogIf ctl : idToCatalog.values()) {
+            idToDb.putAll(ctl.getIdToDb());
+        }
+        return idToDb;
+    }
+
+    private Map<Long, TableIf> constructTblMap() {
+        Map<Long, TableIf> idToTbl = new HashMap<>();
+        for (DatabaseIf db : idToDb.values()) {
             idToTbl.putAll(db.getIdToTable());
         }
         return idToTbl;
@@ -130,7 +135,7 @@ public class StatisticsCleaner extends MasterDaemon {
 
     private Map<Long, MaterializedIndexMeta> constructIdxMap() {
         Map<Long, MaterializedIndexMeta> idToMVIdx = new HashMap<>();
-        for (Table t : idToTbl.values()) {
+        for (TableIf t : idToTbl.values()) {
             if (t instanceof OlapTable) {
                 OlapTable olapTable = (OlapTable) t;
                 olapTable.getCopyOfIndexIdToMeta()
@@ -213,7 +218,7 @@ public class StatisticsCleaner extends MasterDaemon {
                         continue;
                     }
 
-                    Table t = idToTbl.get(tblId);
+                    TableIf t = idToTbl.get(tblId);
                     String colId = statsId.colId;
                     if (t.getColumn(colId) == null) {
                         expiredStats.ids.add(id);
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_statistic_clean.groovy
 
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_clean.groovy
new file mode 100644
index 0000000000..3ceab60afc
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_statistic_clean.groovy
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_statistic_clean", 
"p2,external,hive,external_remote,external_remote_hive") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_statistic_clean"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+
+        sql """analyze database ${catalog_name}.statistics with sync"""
+        sql """use ${catalog_name}.statistics"""
+
+        def result = sql """show column stats `statistics` (lo_quantity)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_quantity")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "46.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "50")
+
+        result = sql """show column stats `statistics` (lo_orderkey)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_orderkey")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "26.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "98")
+
+        result = sql """show column stats `statistics` (lo_linenumber)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_linenumber")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "7.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "7")
+
+        sql """drop expired stats"""
+        result = sql """show column stats `statistics` (lo_quantity)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_quantity")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "46.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "50")
+
+        result = sql """show column stats `statistics` (lo_orderkey)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_orderkey")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "26.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "98")
+
+        result = sql """show column stats `statistics` (lo_linenumber)"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_linenumber")
+        assertTrue(result[0][1] == "100.0")
+        assertTrue(result[0][2] == "7.0")
+        assertTrue(result[0][3] == "0.0")
+        assertTrue(result[0][4] == "404.0")
+        assertTrue(result[0][5] == "4.0")
+        assertTrue(result[0][6] == "1")
+        assertTrue(result[0][7] == "7")
+
+        def ctlId
+        result = sql """show proc '/catalogs'"""
+
+        for (int i = 0; i < result.size(); i++) {
+            if (result[i][1] == catalog_name) {
+                ctlId = result[i][0]
+            }
+        }
+
+        sql """drop catalog ${catalog_name}"""
+        sql """drop expired stats"""
+        result = sql """select * from 
internal.__internal_schema.column_statistics where catalog_id=${ctlId}"""
+        assertTrue(result.size() == 0)
+
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to