This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 1e925d922f5 [improve](statistics)Clean expired TableStatsMeta. (#39779) (#40252) 1e925d922f5 is described below commit 1e925d922f5b832873d90f1348a35f8befec59a1 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Mon Sep 2 22:13:19 2024 +0800 [improve](statistics)Clean expired TableStatsMeta. (#39779) (#40252) backport: https://github.com/apache/doris/pull/39779 --- fe/fe-core/src/main/cup/sql_parser.cup | 11 ++- .../apache/doris/analysis/ShowTableStatsStmt.java | 39 ++++++++-- .../java/org/apache/doris/qe/ShowExecutor.java | 11 +++ .../apache/doris/statistics/AnalysisManager.java | 4 + .../apache/doris/statistics/StatisticsCleaner.java | 56 ++++++++++++++ .../apache/doris/statistics/TableStatsMeta.java | 25 ++++++ .../doris/statistics/AnalysisManagerTest.java | 8 ++ .../statistics/StatisticsAutoCollectorTest.java | 8 ++ .../hive/test_drop_expired_table_stats.groovy | 88 ++++++++++++++++++++++ 9 files changed, 240 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 1c2d8347398..51d62cd116e 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4168,14 +4168,19 @@ show_param ::= RESULT = new ShowSyncJobStmt(dbName); :} /* show table stats */ - | KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames + | KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached, null); + RESULT = new ShowTableStatsStmt(tbl, partitionNames, null); + :} + /* show table id stats */ + | KW_TABLE KW_STATS INTEGER_LITERAL:tableId + {: + RESULT = new ShowTableStatsStmt(tableId); :} /* show index stats */ | KW_INDEX KW_STATS table_name:tbl ident:id {: - RESULT = new ShowTableStatsStmt(tbl, null, false, id); + RESULT = new ShowTableStatsStmt(tbl, null, id); :} /* show column stats */ | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 2328581238a..8e9800fc410 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -68,16 +68,26 @@ public class ShowTableStatsStmt extends ShowStmt { private final TableName tableName; private final PartitionNames partitionNames; - private final boolean cached; private final String indexName; + private final long tableId; + private final boolean useTableId; private TableIf table; - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) { + public ShowTableStatsStmt(long tableId) { + this.tableName = null; + this.partitionNames = null; + this.indexName = null; + this.tableId = tableId; + this.useTableId = true; + } + + public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, String indexName) { this.tableName = tableName; this.partitionNames = partitionNames; - this.cached = cached; this.indexName = indexName; + this.tableId = -1; + this.useTableId = false; } public TableName getTableName() { @@ -87,6 +97,13 @@ public class ShowTableStatsStmt extends ShowStmt { @Override public void analyze(Analyzer analyzer) throws UserException { super.analyze(analyzer); + if (useTableId) { + if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied", + ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP()); + } + return; + } tableName.analyze(analyzer); if (partitionNames != null) { partitionNames.analyze(analyzer); @@ -141,6 +158,14 @@ public class ShowTableStatsStmt extends ShowStmt { return table; } + public boolean isUseTableId() { + return useTableId; + } + + public long getTableId() { + return tableId; + } + public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) { if (indexName != null) { return constructIndexResultSet(tableStatistic); @@ -148,6 +173,10 @@ public class ShowTableStatsStmt extends ShowStmt { return constructTableResultSet(tableStatistic); } + public ShowResultSet constructEmptyResultSet() { + return new ShowResultSet(getMetaData(), new ArrayList<>()); + } + public ShowResultSet constructResultSet(long rowCount) { List<List<String>> result = Lists.newArrayList(); List<String> row = Lists.newArrayList(); @@ -207,8 +236,4 @@ public class ShowTableStatsStmt extends ShowStmt { result.add(row); return new ShowResultSet(getMetaData(), result); } - - public boolean isCached() { - return cached; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 7a1c3f0d0ab..d8494e392a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2481,6 +2481,17 @@ public class ShowExecutor { private void handleShowTableStats() { ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; TableIf tableIf = showTableStatsStmt.getTable(); + // Handle use table id to show table stats. Mainly for online debug. + if (showTableStatsStmt.isUseTableId()) { + long tableId = showTableStatsStmt.getTableId(); + TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableId); + if (tableStats == null) { + resultSet = showTableStatsStmt.constructEmptyResultSet(); + } else { + resultSet = showTableStatsStmt.constructResultSet(tableStats); + } + return; + } TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(tableIf.getId()); /* HMSExternalTable table will fetch row count from HMS diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index e7634af1357..5d3debb8ddd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -1146,6 +1146,10 @@ public class AnalysisManager implements Writable { idToTblStats.remove(log.id); } + public Set<Long> getIdToTblStatsKeys() { + return new HashSet<>(idToTblStats.keySet()); + } + public ColStatsMeta findColStatsMeta(long tblId, String colName) { TableStatsMeta tableStats = findTableStatsStatus(tblId); if (tableStats == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java index efa13349e3a..fc2022ef681 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Database; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MaterializedIndexMeta; @@ -27,6 +28,7 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.persist.TableStatsDeletionLog; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.system.SystemInfoService; @@ -75,6 +77,7 @@ public class StatisticsCleaner extends MasterDaemon { } public synchronized void clear() { + clearTableStats(); try { if (!init()) { return; @@ -101,6 +104,59 @@ public class StatisticsCleaner extends MasterDaemon { } while (!expiredStats.isEmpty()); } + private void clearTableStats() { + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + Set<Long> tableIds = analysisManager.getIdToTblStatsKeys(); + InternalCatalog internalCatalog = Env.getCurrentInternalCatalog(); + for (long id : tableIds) { + try { + TableStatsMeta stats = analysisManager.findTableStatsStatus(id); + if (stats == null) { + continue; + } + // If ctlName, dbName and tblName exist, it means the table stats is created under new version. + // First try to find the table by the given names. If table exists, means the tableMeta is valid, + // it should be kept in memory. + try { + StatisticsUtil.findTable(stats.ctlName, stats.dbName, stats.tblName); + continue; + } catch (Exception e) { + LOG.debug("Table {}.{}.{} not found.", stats.ctlName, stats.dbName, stats.tblName); + } + // If we couldn't find table by names, try to find it in internal catalog. This is to support older + // version which the tableStats object doesn't store the names but only table id. + // We may remove external table's tableStats here, but it's not a big problem. + // Because the stats in column_statistics table is still available, + // the only disadvantage is auto analyze may be triggered for this table. + // But it only happens once, the new table stats object will have all the catalog, db and table names. + if (tableExistInInternalCatalog(internalCatalog, id)) { + continue; + } + LOG.info("Table {}.{}.{} with id {} not exist, remove its table stats record.", + stats.ctlName, stats.dbName, stats.tblName, id); + analysisManager.removeTableStats(id); + Env.getCurrentEnv().getEditLog().logDeleteTableStats(new TableStatsDeletionLog(id)); + } catch (Exception e) { + LOG.info(e); + } + } + } + + private boolean tableExistInInternalCatalog(InternalCatalog internalCatalog, long tableId) { + List<Long> dbIds = internalCatalog.getDbIds(); + for (long dbId : dbIds) { + Database database = internalCatalog.getDbNullable(dbId); + if (database == null) { + continue; + } + TableIf table = database.getTableNullable(tableId); + if (table != null) { + return true; + } + } + return false; + } + private boolean init() { try { String dbName = SystemInfoService.DEFAULT_CLUSTER + ":" + FeConstants.INTERNAL_DB_NAME; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index b403082d92b..5cad3dbaec1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -46,9 +46,24 @@ import java.util.stream.Collectors; public class TableStatsMeta implements Writable, GsonPostProcessable { + @SerializedName("ctlId") + public final long ctlId; + + @SerializedName("ctln") + public final String ctlName; + + @SerializedName("dbId") + public final long dbId; + + @SerializedName("dbn") + public final String dbName; + @SerializedName("tblId") public final long tblId; + @SerializedName("tbln") + public final String tblName; + @SerializedName("idxId") public final long idxId; @SerializedName("updatedRows") @@ -82,14 +97,24 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { @VisibleForTesting public TableStatsMeta() { + ctlId = 0; + ctlName = null; + dbId = 0; + dbName = null; tblId = 0; + tblName = null; idxId = 0; } // It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo // and TableStats is quite different. public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { + this.ctlId = table.getDatabase().getCatalog().getId(); + this.ctlName = table.getDatabase().getCatalog().getName(); + this.dbId = table.getDatabase().getId(); + this.dbName = table.getDatabase().getFullName(); this.tblId = table.getId(); + this.tblName = table.getName(); this.idxId = -1; this.rowCount = rowCount; update(analyzedJob, table); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 4993884f02a..b8178fa5a58 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -23,6 +23,8 @@ import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.ShowAnalyzeStmt; import org.apache.doris.analysis.TableName; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.Config; @@ -273,6 +275,7 @@ public class AnalysisManagerTest { @Test public void testReAnalyze() { + Database db = new Database(); new MockUp<OlapTable>() { int count = 0; @@ -294,6 +297,11 @@ public class AnalysisManagerTest { return Lists.newArrayList(c); } + @Mock + public DatabaseIf getDatabase() { + return db; + } + }; OlapTable olapTable = new OlapTable(); TableStatsMeta stats0 = new TableStatsMeta( diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 334da91198c..1ab5178c3b7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -185,6 +185,14 @@ public class StatisticsAutoCollectorTest { AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) .setColToPartitions(new HashMap<>()).setAnalysisType( AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build(); + Database db = new Database(); + new MockUp<OlapTable>() { + @Mock + public DatabaseIf getDatabase() { + return db; + } + + }; new MockUp<AnalysisManager>() { int count = 0; diff --git a/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy b/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy new file mode 100644 index 00000000000..af5348732e8 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_drop_expired_table_stats.groovy @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_drop_expired_table_stats", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return + } + + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get("hms_port") + String catalog_name = "test_drop_expired_table_stats" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + + + sql """use stats_test""" + sql """analyze table employee_gz with sync""" + def result = sql """show table stats employee_gz""" + assertEquals(1, result.size()) + + def ctlId + def dbId + def tblId + result = sql """show catalogs""" + + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == catalog_name) { + ctlId = result[i][0] + } + } + logger.info("catalog id is " + ctlId) + result = sql """show proc '/catalogs/$ctlId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'stats_test') { + dbId = result[i][0] + } + } + logger.info("db id is " + dbId) + result = sql """show proc '/catalogs/$ctlId/$dbId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'employee_gz') { + tblId = result[i][0] + } + } + logger.info("table id is " + tblId) + result = sql """show table stats $tblId""" + logger.info("Table stats " + result) + assertEquals(1, result.size()) + + sql """drop catalog ${catalog_name}""" + result = sql """show table stats $tblId""" + logger.info("Table stats " + result) + assertEquals(1, result.size()) + + try { + sql """drop expired stats""" + } catch (Exception e) { + logger.info("Drop expired stats exception. " + e.getMessage()) + } + result = sql """show table stats $tblId""" + logger.info("Table stats " + result) + assertEquals(0, result.size()) +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org