This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 1e925d922f5 [improve](statistics)Clean expired TableStatsMeta. 
(#39779) (#40252)
1e925d922f5 is described below

commit 1e925d922f5b832873d90f1348a35f8befec59a1
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Mon Sep 2 22:13:19 2024 +0800

    [improve](statistics)Clean expired TableStatsMeta. (#39779) (#40252)
    
    backport: https://github.com/apache/doris/pull/39779
---
 fe/fe-core/src/main/cup/sql_parser.cup             | 11 ++-
 .../apache/doris/analysis/ShowTableStatsStmt.java  | 39 ++++++++--
 .../java/org/apache/doris/qe/ShowExecutor.java     | 11 +++
 .../apache/doris/statistics/AnalysisManager.java   |  4 +
 .../apache/doris/statistics/StatisticsCleaner.java | 56 ++++++++++++++
 .../apache/doris/statistics/TableStatsMeta.java    | 25 ++++++
 .../doris/statistics/AnalysisManagerTest.java      |  8 ++
 .../statistics/StatisticsAutoCollectorTest.java    |  8 ++
 .../hive/test_drop_expired_table_stats.groovy      | 88 ++++++++++++++++++++++
 9 files changed, 240 insertions(+), 10 deletions(-)

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index 1c2d8347398..51d62cd116e 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4168,14 +4168,19 @@ show_param ::=
         RESULT = new ShowSyncJobStmt(dbName);
     :}
     /* show table stats */
-    | KW_TABLE opt_cached:cached KW_STATS table_name:tbl 
opt_partition_names:partitionNames
+    | KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames
     {:
-        RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null);
+        RESULT = new ShowTableStatsStmt(tbl, partitionNames, null);
+    :}
+    /* show table id stats */
+    | KW_TABLE KW_STATS INTEGER_LITERAL:tableId
+    {:
+        RESULT = new ShowTableStatsStmt(tableId);
     :}
     /* show index stats */
     | KW_INDEX KW_STATS table_name:tbl ident:id
     {:
-        RESULT = new ShowTableStatsStmt(tbl, null, false, id);
+        RESULT = new ShowTableStatsStmt(tbl, null, id);
     :}
     /* show column stats */
     | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols 
opt_partition_names:partitionNames
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 2328581238a..8e9800fc410 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -68,16 +68,26 @@ public class ShowTableStatsStmt extends ShowStmt {
 
     private final TableName tableName;
     private final PartitionNames partitionNames;
-    private final boolean cached;
     private final String indexName;
+    private final long tableId;
+    private final boolean useTableId;
 
     private TableIf table;
 
-    public ShowTableStatsStmt(TableName tableName, PartitionNames 
partitionNames, boolean cached, String indexName) {
+    public ShowTableStatsStmt(long tableId) {
+        this.tableName = null;
+        this.partitionNames = null;
+        this.indexName = null;
+        this.tableId = tableId;
+        this.useTableId = true;
+    }
+
+    public ShowTableStatsStmt(TableName tableName, PartitionNames 
partitionNames, String indexName) {
         this.tableName = tableName;
         this.partitionNames = partitionNames;
-        this.cached = cached;
         this.indexName = indexName;
+        this.tableId = -1;
+        this.useTableId = false;
     }
 
     public TableName getTableName() {
@@ -87,6 +97,13 @@ public class ShowTableStatsStmt extends ShowStmt {
     @Override
     public void analyze(Analyzer analyzer) throws UserException {
         super.analyze(analyzer);
+        if (useTableId) {
+            if 
(!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), 
PrivPredicate.SHOW)) {
+                
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, 
"Permission denied",
+                        ConnectContext.get().getQualifiedUser(), 
ConnectContext.get().getRemoteIP());
+            }
+            return;
+        }
         tableName.analyze(analyzer);
         if (partitionNames != null) {
             partitionNames.analyze(analyzer);
@@ -141,6 +158,14 @@ public class ShowTableStatsStmt extends ShowStmt {
         return table;
     }
 
+    public boolean isUseTableId() {
+        return useTableId;
+    }
+
+    public long getTableId() {
+        return tableId;
+    }
+
     public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
         if (indexName != null) {
             return constructIndexResultSet(tableStatistic);
@@ -148,6 +173,10 @@ public class ShowTableStatsStmt extends ShowStmt {
         return constructTableResultSet(tableStatistic);
     }
 
+    public ShowResultSet constructEmptyResultSet() {
+        return new ShowResultSet(getMetaData(), new ArrayList<>());
+    }
+
     public ShowResultSet constructResultSet(long rowCount) {
         List<List<String>> result = Lists.newArrayList();
         List<String> row = Lists.newArrayList();
@@ -207,8 +236,4 @@ public class ShowTableStatsStmt extends ShowStmt {
         result.add(row);
         return new ShowResultSet(getMetaData(), result);
     }
-
-    public boolean isCached() {
-        return cached;
-    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 7a1c3f0d0ab..d8494e392a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -2481,6 +2481,17 @@ public class ShowExecutor {
     private void handleShowTableStats() {
         ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
         TableIf tableIf = showTableStatsStmt.getTable();
+        // Handle use table id to show table stats. Mainly for online debug.
+        if (showTableStatsStmt.isUseTableId()) {
+            long tableId = showTableStatsStmt.getTableId();
+            TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableId);
+            if (tableStats == null) {
+                resultSet = showTableStatsStmt.constructEmptyResultSet();
+            } else {
+                resultSet = showTableStatsStmt.constructResultSet(tableStats);
+            }
+            return;
+        }
         TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId());
         /*
            HMSExternalTable table will fetch row count from HMS
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index e7634af1357..5d3debb8ddd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -1146,6 +1146,10 @@ public class AnalysisManager implements Writable {
         idToTblStats.remove(log.id);
     }
 
+    public Set<Long> getIdToTblStatsKeys() {
+        return new HashSet<>(idToTblStats.keySet());
+    }
+
     public ColStatsMeta findColStatsMeta(long tblId, String colName) {
         TableStatsMeta tableStats = findTableStatsStatus(tblId);
         if (tableStats == null) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
index efa13349e3a..fc2022ef681 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.MaterializedIndexMeta;
@@ -27,6 +28,7 @@ import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.util.MasterDaemon;
 import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.datasource.InternalCatalog;
+import org.apache.doris.persist.TableStatsDeletionLog;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.system.SystemInfoService;
 
@@ -75,6 +77,7 @@ public class StatisticsCleaner extends MasterDaemon {
     }
 
     public synchronized void clear() {
+        clearTableStats();
         try {
             if (!init()) {
                 return;
@@ -101,6 +104,59 @@ public class StatisticsCleaner extends MasterDaemon {
         } while (!expiredStats.isEmpty());
     }
 
+    private void clearTableStats() {
+        AnalysisManager analysisManager = 
Env.getCurrentEnv().getAnalysisManager();
+        Set<Long> tableIds = analysisManager.getIdToTblStatsKeys();
+        InternalCatalog internalCatalog = Env.getCurrentInternalCatalog();
+        for (long id : tableIds) {
+            try {
+                TableStatsMeta stats = 
analysisManager.findTableStatsStatus(id);
+                if (stats == null) {
+                    continue;
+                }
+                // If ctlName, dbName and tblName exist, it means the table 
stats is created under new version.
+                // First try to find the table by the given names. If table 
exists, means the tableMeta is valid,
+                // it should be kept in memory.
+                try {
+                    StatisticsUtil.findTable(stats.ctlName, stats.dbName, 
stats.tblName);
+                    continue;
+                } catch (Exception e) {
+                    LOG.debug("Table {}.{}.{} not found.", stats.ctlName, 
stats.dbName, stats.tblName);
+                }
+                // If we couldn't find table by names, try to find it in 
internal catalog. This is to support older
+                // version which the tableStats object doesn't store the names 
but only table id.
+                // We may remove external table's tableStats here, but it's 
not a big problem.
+                // Because the stats in column_statistics table is still 
available,
+                // the only disadvantage is auto analyze may be triggered for 
this table.
+                // But it only happens once, the new table stats object will 
have all the catalog, db and table names.
+                if (tableExistInInternalCatalog(internalCatalog, id)) {
+                    continue;
+                }
+                LOG.info("Table {}.{}.{} with id {} not exist, remove its 
table stats record.",
+                        stats.ctlName, stats.dbName, stats.tblName, id);
+                analysisManager.removeTableStats(id);
+                Env.getCurrentEnv().getEditLog().logDeleteTableStats(new 
TableStatsDeletionLog(id));
+            } catch (Exception e) {
+                LOG.info(e);
+            }
+        }
+    }
+
+    private boolean tableExistInInternalCatalog(InternalCatalog 
internalCatalog, long tableId) {
+        List<Long> dbIds = internalCatalog.getDbIds();
+        for (long dbId : dbIds) {
+            Database database = internalCatalog.getDbNullable(dbId);
+            if (database == null) {
+                continue;
+            }
+            TableIf table = database.getTableNullable(tableId);
+            if (table != null) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     private boolean init() {
         try {
             String dbName = SystemInfoService.DEFAULT_CLUSTER + ":" + 
FeConstants.INTERNAL_DB_NAME;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index b403082d92b..5cad3dbaec1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -46,9 +46,24 @@ import java.util.stream.Collectors;
 
 public class TableStatsMeta implements Writable, GsonPostProcessable {
 
+    @SerializedName("ctlId")
+    public final long ctlId;
+
+    @SerializedName("ctln")
+    public final String ctlName;
+
+    @SerializedName("dbId")
+    public final long dbId;
+
+    @SerializedName("dbn")
+    public final String dbName;
+
     @SerializedName("tblId")
     public final long tblId;
 
+    @SerializedName("tbln")
+    public final String tblName;
+
     @SerializedName("idxId")
     public final long idxId;
     @SerializedName("updatedRows")
@@ -82,14 +97,24 @@ public class TableStatsMeta implements Writable, 
GsonPostProcessable {
 
     @VisibleForTesting
     public TableStatsMeta() {
+        ctlId = 0;
+        ctlName = null;
+        dbId = 0;
+        dbName = null;
         tblId = 0;
+        tblName = null;
         idxId = 0;
     }
 
     // It's necessary to store these fields separately from AnalysisInfo, 
since the lifecycle between AnalysisInfo
     // and TableStats is quite different.
     public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf 
table) {
+        this.ctlId = table.getDatabase().getCatalog().getId();
+        this.ctlName = table.getDatabase().getCatalog().getName();
+        this.dbId = table.getDatabase().getId();
+        this.dbName = table.getDatabase().getFullName();
         this.tblId = table.getId();
+        this.tblName = table.getName();
         this.idxId = -1;
         this.rowCount = rowCount;
         update(analyzedJob, table);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
index 4993884f02a..b8178fa5a58 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
@@ -23,6 +23,8 @@ import org.apache.doris.analysis.PartitionNames;
 import org.apache.doris.analysis.ShowAnalyzeStmt;
 import org.apache.doris.analysis.TableName;
 import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.common.Config;
@@ -273,6 +275,7 @@ public class AnalysisManagerTest {
 
     @Test
     public void testReAnalyze() {
+        Database db = new Database();
         new MockUp<OlapTable>() {
 
             int count = 0;
@@ -294,6 +297,11 @@ public class AnalysisManagerTest {
                 return Lists.newArrayList(c);
             }
 
+            @Mock
+            public DatabaseIf getDatabase() {
+                return db;
+            }
+
         };
         OlapTable olapTable = new OlapTable();
         TableStatsMeta stats0 = new TableStatsMeta(
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index 334da91198c..1ab5178c3b7 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -185,6 +185,14 @@ public class StatisticsAutoCollectorTest {
         AnalysisInfo analysisInfo = new 
AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL)
                 .setColToPartitions(new HashMap<>()).setAnalysisType(
                 
AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build();
+        Database db = new Database();
+        new MockUp<OlapTable>() {
+            @Mock
+            public DatabaseIf getDatabase() {
+                return db;
+            }
+
+        };
         new MockUp<AnalysisManager>() {
 
             int count = 0;
diff --git 
a/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy
 
b/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy
new file mode 100644
index 00000000000..af5348732e8
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_drop_expired_table_stats", 
"p0,external,hive,external_docker,external_docker_hive") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("disable Hive test.")
+        return
+    }
+
+    String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
+    String extHiveHmsPort = context.config.otherConfigs.get("hms_port")
+    String catalog_name = "test_drop_expired_table_stats"
+    sql """drop catalog if exists ${catalog_name};"""
+    sql """
+        create catalog if not exists ${catalog_name} properties (
+            'type'='hms',
+            'hadoop.username' = 'hadoop',
+            'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+        );
+    """
+    logger.info("catalog " + catalog_name + " created")
+    sql """switch ${catalog_name};"""
+
+
+    sql """use stats_test"""
+    sql """analyze table employee_gz with sync"""
+    def result = sql """show table stats employee_gz"""
+    assertEquals(1, result.size())
+
+    def ctlId
+    def dbId
+    def tblId
+    result = sql """show catalogs"""
+
+    for (int i = 0; i < result.size(); i++) {
+        if (result[i][1] == catalog_name) {
+            ctlId = result[i][0]
+        }
+    }
+    logger.info("catalog id is " + ctlId)
+    result = sql """show proc '/catalogs/$ctlId'"""
+    for (int i = 0; i < result.size(); i++) {
+        if (result[i][1] == 'stats_test') {
+            dbId = result[i][0]
+        }
+    }
+    logger.info("db id is " + dbId)
+    result = sql """show proc '/catalogs/$ctlId/$dbId'"""
+    for (int i = 0; i < result.size(); i++) {
+        if (result[i][1] == 'employee_gz') {
+            tblId = result[i][0]
+        }
+    }
+    logger.info("table id is " + tblId)
+    result = sql """show table stats $tblId"""
+    logger.info("Table stats " + result)
+    assertEquals(1, result.size())
+
+    sql """drop catalog ${catalog_name}"""
+    result = sql """show table stats $tblId"""
+    logger.info("Table stats " + result)
+    assertEquals(1, result.size())
+
+    try {
+        sql """drop expired stats"""
+    } catch (Exception e) {
+        logger.info("Drop expired stats exception. " + e.getMessage())
+    }
+    result = sql """show table stats $tblId"""
+    logger.info("Table stats " + result)
+    assertEquals(0, result.size())
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to