This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new db9c74c38f8 [improvement](statistics)Drop expired external stats only when the catalog is dropped. (#42244) (#42410) db9c74c38f8 is described below commit db9c74c38f883362bc64715a1eb606b7d10c25be Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Thu Oct 24 23:47:53 2024 +0800 [improvement](statistics)Drop expired external stats only when the catalog is dropped. (#42244) (#42410) backport: https://github.com/apache/doris/pull/42244 --- .../apache/doris/statistics/StatisticsCleaner.java | 16 +++-- .../statistics/test_drop_expired_stats.groovy | 76 ++++++++++++++++++++++ 2 files changed, 88 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java index 2dbdca39b58..a61d08f7d8f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java @@ -183,10 +183,8 @@ public class StatisticsCleaner extends MasterDaemon { private Map<Long, DatabaseIf> constructDbMap() { Map<Long, DatabaseIf> idToDb = Maps.newHashMap(); - for (CatalogIf<? extends DatabaseIf> ctl : idToCatalog.values()) { - for (DatabaseIf db : ctl.getAllDbs()) { - idToDb.put(db.getId(), db); - } + for (DatabaseIf db : Env.getCurrentEnv().getInternalCatalog().getAllDbs()) { + idToDb.put(db.getId(), db); } return idToDb; } @@ -272,6 +270,16 @@ public class StatisticsCleaner extends MasterDaemon { expiredStats.expiredCatalog.add(catalogId); continue; } + // Skip check external DBs and tables to avoid fetch too much metadata. + // Remove expired external table stats only when the external catalog is dropped. + // TODO: Need to check external database and table exist or not. But for now, we only check catalog. + // Because column_statistics table only keep table id and db id. + // But meta data doesn't always cache all external tables' ids. + // So we may fail to find the external table only by id. Need to use db name and table name instead. + // Have to store db name and table name in column_statistics in the future. + if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID) { + continue; + } long dbId = statsId.dbId; if (!idToDb.containsKey(dbId)) { expiredStats.expiredDatabase.add(dbId); diff --git a/regression-test/suites/statistics/test_drop_expired_stats.groovy b/regression-test/suites/statistics/test_drop_expired_stats.groovy new file mode 100644 index 00000000000..23067f670b5 --- /dev/null +++ b/regression-test/suites/statistics/test_drop_expired_stats.groovy @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_drop_expired_stats") { + + sql """drop database if exists test_drop_expired_stats""" + sql """create database test_drop_expired_stats""" + sql """use test_drop_expired_stats""" + sql """set global enable_auto_analyze=false""" + + sql """CREATE TABLE table1 ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """CREATE TABLE table2 ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + def id1 = getTableId("test_drop_expired_stats", "table1") + def id2 = getTableId("test_drop_expired_stats", "table2") + + sql """analyze table table1 with sync""" + sql """analyze table table2 with sync""" + def result = sql """select * from __internal_schema.column_statistics where tbl_id = ${id1}""" + assertEquals(5, result.size()) + result = sql """select * from __internal_schema.column_statistics where tbl_id = ${id2}""" + assertEquals(3, result.size()) + sql """drop table table1""" + sql """drop expired stats""" + result = sql """select * from __internal_schema.column_statistics where tbl_id = ${id1}""" + assertEquals(0, result.size()) + result = sql """select * from __internal_schema.column_statistics where tbl_id = ${id2}""" + assertEquals(3, result.size()) + + sql """drop database if exists test_drop_expired_stats""" + sql """drop expired stats""" + result = sql """select * from __internal_schema.column_statistics where tbl_id = ${id1}""" + assertEquals(0, result.size()) + result = sql """select * from __internal_schema.column_statistics where tbl_id = ${id2}""" + assertEquals(0, result.size()) +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org