This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 6976682862c [fix](statistics)Fix empty table keep auto analyze bug. (#40811) (#41919) 6976682862c is described below commit 6976682862c2b3a2f3d7261aa004a66b0d795d7f Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Wed Oct 16 10:16:55 2024 +0800 [fix](statistics)Fix empty table keep auto analyze bug. (#40811) (#41919) backport: https://github.com/apache/doris/pull/40811 --- .../doris/statistics/AnalysisInfoBuilder.java | 2 +- .../apache/doris/statistics/TableStatsMeta.java | 11 ++--- .../doris/statistics/util/StatisticsUtil.java | 28 ++++++------- .../statistics/test_drop_stats_and_truncate.groovy | 48 ++++++++++++++++++++++ 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 43f592629bd..73817363ef1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -61,7 +61,7 @@ public class AnalysisInfoBuilder { private boolean usingSqlForExternalTable; private long tblUpdateTime; private long rowCount; - private boolean userInject; + private boolean userInject = false; private long updateRows; private JobPriority priority; private Map<Long, Long> partitionUpdateRows; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index de0f0eed18d..61a5a9b1f88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -25,7 +25,6 @@ import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; -import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.StatisticsUtil; @@ -165,7 +164,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { public void update(AnalysisInfo analyzedJob, TableIf tableIf) { updatedTime = analyzedJob.tblUpdateTime; - userInjected = analyzedJob.userInject; + if (analyzedJob.userInject) { + userInjected = true; + } for (Pair<String, String> colPair : analyzedJob.jobColumns) { ColStatsMeta colStatsMeta = colToColStatsMeta.get(colPair); if (colStatsMeta == null) { @@ -194,15 +195,15 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { clearStaleIndexRowCount((OlapTable) tableIf); } rowCount = analyzedJob.rowCount; - if (rowCount == 0 && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) { - return; - } if (analyzedJob.jobColumns.containsAll( tableIf.getColumnIndexPairs( tableIf.getSchemaAllIndexes(false).stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) .map(Column::getName).collect(Collectors.toSet())))) { partitionChanged.set(false); + } + // Set userInject back to false after manual analyze. + if (JobType.MANUAL.equals(jobType) && !analyzedJob.userInject) { userInjected = false; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 065667eb061..66f6bda6819 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -43,7 +43,6 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructType; -import org.apache.doris.catalog.TableAttributes; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.VariantType; @@ -966,27 +965,26 @@ public class StatisticsUtil { } public static boolean isEmptyTable(TableIf table, AnalysisInfo.AnalysisMethod method) { - int waitRowCountReportedTime = 75; + int waitRowCountReportedTime = 120; if (!(table instanceof OlapTable) || method.equals(AnalysisInfo.AnalysisMethod.FULL)) { return false; } OlapTable olapTable = (OlapTable) table; + long rowCount = 0; for (int i = 0; i < waitRowCountReportedTime; i++) { - if (olapTable.getRowCount() > 0) { - return false; - } - // If visible version is 2, table is probably not empty. So we wait row count to be reported. - // If visible version is not 2 and getRowCount return 0, we assume it is an empty table. - if (olapTable.getVisibleVersion() != TableAttributes.TABLE_INIT_VERSION + 1) { - return true; - } - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - LOG.info("Sleep interrupted.", e); + rowCount = olapTable.getRowCountForIndex(olapTable.getBaseIndexId(), true); + // rowCount == -1 means new table or first load row count not fully reported, need to wait. + if (rowCount == -1) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + LOG.info("Sleep interrupted."); + } + continue; } + break; } - return true; + return rowCount == 0; } public static boolean needAnalyzeColumn(TableIf table, Pair<String, String> column) { diff --git a/regression-test/suites/statistics/test_drop_stats_and_truncate.groovy b/regression-test/suites/statistics/test_drop_stats_and_truncate.groovy index d4447d12f50..6dc3c6d1797 100644 --- a/regression-test/suites/statistics/test_drop_stats_and_truncate.groovy +++ b/regression-test/suites/statistics/test_drop_stats_and_truncate.groovy @@ -175,6 +175,54 @@ suite("test_drop_stats_and_truncate") { columns = all_columns.split(","); assertEquals(9, columns.size()) + sql """drop table part""" + sql """CREATE TABLE `part` ( + `id` INT NULL, + `colint` INT NULL, + `coltinyint` tinyint NULL, + `colsmallint` smallINT NULL, + `colbigint` bigINT NULL, + `collargeint` largeINT NULL, + `colfloat` float NULL, + `coldouble` double NULL, + `coldecimal` decimal(27, 9) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + PARTITION BY RANGE(`id`) + ( + PARTITION p1 VALUES [("-2147483648"), ("10000")), + PARTITION p2 VALUES [("10000"), ("20000")), + PARTITION p3 VALUES [("20000"), ("30000")) + ) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql """analyze table part with sync""" + sql """Insert into part values (1, 1, 1, 1, 1, 1, 1.1, 1.1, 1.1)""" + result = sql """show table stats part""" + assertEquals("true", result[0][6]) + sql """truncate table part partition(p1)""" + result = sql """show table stats part""" + assertEquals("true", result[0][6]) + sql """analyze table part with sample rows 100 with sync""" + result = sql """show table stats part""" + if (result[0][6].equals("true")) { + result = """show index stats part part""" + logger.info("Report not ready. index stats: " + result) + sql """analyze table part with sample rows 100 with sync""" + result = sql """show table stats part""" + } + if (result[0][6].equals("true")) { + result = """show index stats part part""" + logger.info("Report not ready. index stats: " + result) + sql """analyze table part with sample rows 100 with sync""" + result = sql """show table stats part""" + } + assertEquals("false", result[0][6]) + sql """drop database if exists test_drop_stats_and_truncate""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org