This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 383e9900c6 [improvement](external statistics)Fix external stats 
collection bugs (#22788) (#23122)
383e9900c6 is described below

commit 383e9900c688edd19e115465bb4796fa88e5a166
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Thu Aug 17 21:05:15 2023 +0800

    [improvement](external statistics)Fix external stats collection bugs 
(#22788) (#23122)
---
 fe/fe-core/src/main/cup/sql_parser.cup             |  4 ++--
 .../doris/analysis/AlterColumnStatsStmt.java       | 11 +++--------
 .../org/apache/doris/analysis/AnalyzeTblStmt.java  |  7 +++++--
 .../apache/doris/analysis/ShowTableStatsStmt.java  |  8 +++++++-
 .../java/org/apache/doris/qe/ShowExecutor.java     | 15 +++++++++++++-
 .../apache/doris/statistics/AnalysisManager.java   |  4 ++++
 .../apache/doris/statistics/HMSAnalysisTask.java   | 10 ++++++++++
 .../apache/doris/statistics/StatisticsCache.java   |  8 ++++++++
 .../statistics/TableStatisticsCacheLoader.java     |  4 +++-
 .../hive/test_hive_statistic.groovy                | 23 +++++++++++++++++-----
 10 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index 7c96ca0cb4..02c6743665 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4083,9 +4083,9 @@ show_param ::=
         RESULT = new ShowSyncJobStmt(dbName);
     :}
     /* show table stats */
-    | KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames
+    | KW_TABLE opt_cached:cached KW_STATS table_name:tbl 
opt_partition_names:partitionNames
     {:
-        RESULT = new ShowTableStatsStmt(tbl, partitionNames);
+        RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
     :}
     /* show column stats */
     | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols 
opt_partition_names:partitionNames
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 0e7892dcd1..58b8121267 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -22,7 +22,6 @@ import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.Partition;
 import org.apache.doris.catalog.PartitionType;
-import org.apache.doris.catalog.Table;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
@@ -148,17 +147,13 @@ public class AlterColumnStatsStmt extends DdlStmt {
         DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb());
         TableIf table = db.getTableOrAnalysisException(tableName.getTbl());
 
-        if (table.getType() != Table.TableType.OLAP) {
-            throw new AnalysisException("Only OLAP table statistics are 
supported");
-        }
-
-        OlapTable olapTable = (OlapTable) table;
-        if (olapTable.getColumn(columnName) == null) {
+        if (table.getColumn(columnName) == null) {
             
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
                     columnName, FeNameFormat.getColumnNameRegex());
         }
 
-        if (optPartitionNames != null) {
+        if (optPartitionNames != null && table instanceof OlapTable) {
+            OlapTable olapTable = (OlapTable) table;
             if 
(olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
                 throw new AnalysisException("Not a partitioned table: " + 
olapTable.getName());
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
index 2460d37942..920b60627a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
@@ -142,8 +142,11 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
         }
         checkAnalyzePriv(tableName.getDb(), tableName.getTbl());
         if (columnNames == null) {
-            columnNames = table.getBaseSchema(false)
-                    
.stream().map(Column::getName).collect(Collectors.toList());
+            // Filter unsupported type columns.
+            columnNames = table.getBaseSchema(false).stream()
+                .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
+                .map(Column::getName)
+                .collect(Collectors.toList());
         }
         table.readLock();
         try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index e462c8585c..da10d5c492 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -53,12 +53,14 @@ public class ShowTableStatsStmt extends ShowStmt {
     private final TableName tableName;
 
     private final PartitionNames partitionNames;
+    private final boolean cached;
 
     private TableIf table;
 
-    public ShowTableStatsStmt(TableName tableName, PartitionNames 
partitionNames) {
+    public ShowTableStatsStmt(TableName tableName, PartitionNames 
partitionNames, boolean cached) {
         this.tableName = tableName;
         this.partitionNames = partitionNames;
+        this.cached = cached;
     }
 
     public TableName getTableName() {
@@ -133,4 +135,8 @@ public class ShowTableStatsStmt extends ShowStmt {
         result.add(row);
         return new ShowResultSet(getMetaData(), result);
     }
+
+    public boolean isCached() {
+        return cached;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index b25b24e982..448098f394 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -136,6 +136,7 @@ import org.apache.doris.catalog.Tablet;
 import org.apache.doris.catalog.TabletInvertedIndex;
 import org.apache.doris.catalog.TabletMeta;
 import org.apache.doris.catalog.View;
+import org.apache.doris.catalog.external.ExternalTable;
 import org.apache.doris.catalog.external.HMSExternalTable;
 import org.apache.doris.clone.DynamicPartitionScheduler;
 import org.apache.doris.cluster.ClusterNamespace;
@@ -234,6 +235,7 @@ import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Predicate;
@@ -2345,8 +2347,19 @@ public class ShowExecutor {
         ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
         TableIf tableIf = showTableStatsStmt.getTable();
         long partitionId = showTableStatsStmt.getPartitionId();
+        boolean showCache = showTableStatsStmt.isCached();
         try {
-            if (partitionId > 0) {
+            if (tableIf instanceof ExternalTable && showCache) {
+                Optional<TableStatistic> tableStatistics = 
Env.getCurrentEnv().getStatisticsCache().getTableStatistics(
+                        tableIf.getDatabase().getCatalog().getId(),
+                        tableIf.getDatabase().getId(),
+                        tableIf.getId());
+                if (tableStatistics.isPresent()) {
+                    resultSet = 
showTableStatsStmt.constructResultSet(tableStatistics.get());
+                } else {
+                    resultSet = 
showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN);
+                }
+            } else if (partitionId > 0) {
                 TableStatistic partStats = 
StatisticsRepository.fetchTableLevelOfPartStats(partitionId);
                 resultSet = showTableStatsStmt.constructResultSet(partStats);
             } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index e549bd6b0b..cde111af4b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -180,6 +180,8 @@ public class AnalysisManager extends Daemon implements 
Writable {
                 }
                 TableName tableName = new 
TableName(analyzeDBStmt.getCtlIf().getName(), db.getFullName(),
                         table.getName());
+                // columnNames null means to add all visitable columns.
+                // Will get all the visible columns in analyzeTblStmt.check()
                 AnalyzeTblStmt analyzeTblStmt = new 
AnalyzeTblStmt(analyzeDBStmt.getAnalyzeProperties(), tableName,
                         null, db.getId(), table);
                 try {
@@ -772,6 +774,8 @@ public class AnalysisManager extends Daemon implements 
Writable {
         }
         if (dropStatsStmt.dropTableRowCount()) {
             StatisticsRepository.dropExternalTableStatistics(tblId);
+            // Table cache key doesn't care about catalog id and db id, 
because the table id is globally unique.
+            Env.getCurrentEnv().getStatisticsCache().invalidateTableStats(-1, 
-1, tblId);
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 119368d91d..d569cd79bd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.external.HMSExternalTable;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.util.TimeUtils;
@@ -291,4 +292,13 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
                 
LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(timestamp) * 1000),
                         ZoneId.systemDefault())));
     }
+
+    @Override
+    protected void afterExecution() {
+        if (isTableLevelTask) {
+            
Env.getCurrentEnv().getStatisticsCache().refreshTableStatsSync(catalog.getId(), 
db.getId(), tbl.getId());
+        } else {
+            
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, 
col.getName());
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index 27756dcc3f..cb9e4ca322 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -174,6 +174,14 @@ public class StatisticsCache {
         columnStatisticsCache.synchronous().refresh(new 
StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName));
     }
 
+    public void invalidateTableStats(long catalogId, long dbId, long tblId) {
+        tableStatisticsCache.synchronous().invalidate(new 
StatisticsCacheKey(catalogId, dbId, tblId));
+    }
+
+    public void refreshTableStatsSync(long catalogId, long dbId, long tblId) {
+        tableStatisticsCache.synchronous().refresh(new 
StatisticsCacheKey(catalogId, dbId, tblId));
+    }
+
     public void refreshHistogramSync(long tblId, long idxId, String colName) {
         histogramCache.synchronous().refresh(new StatisticsCacheKey(tblId, 
idxId, colName));
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java
index 817e74540f..953bc9a427 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatisticsCacheLoader.java
@@ -36,7 +36,9 @@ public class TableStatisticsCacheLoader extends 
StatisticsCacheLoader<Optional<T
     protected Optional<TableStatistic> doLoad(StatisticsCacheKey key) {
         try {
             TableStatistic tableStatistic = 
StatisticsRepository.fetchTableLevelStats(key.tableId);
-            return Optional.of(tableStatistic);
+            if (tableStatistic != TableStatistic.UNKNOWN) {
+                return Optional.of(tableStatistic);
+            }
         } catch (DdlException e) {
             LOG.debug("Fail to get table line number from table_statistics 
table. "
                     + "Will try to get from data source.", e);
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy 
b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
index c6df0b0eaa..97745ce1fe 100644
--- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
+++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy
@@ -221,15 +221,28 @@ suite("test_hive_statistic", "p2") {
         assertTrue(result[0][6] == "'AIR'")
         assertTrue(result[0][7] == "'TRUCK'")
 
-        // sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS 
('row_count'='6001215')"""
-        // result = sql "show column stats `statistics` (lo_shipmode)"
-        // assertTrue(result.size() == 1)
-        // assertTrue(result[0][0] == "lo_shipmode")
-        // assertTrue(result[0][1] == "6001215.0")
+        sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS 
('row_count'='6001215')"""
+        result = sql "show column stats `statistics` (lo_shipmode)"
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "lo_shipmode")
+        assertTrue(result[0][1] == "6001215.0")
 
         sql """drop stats statistics"""
         result = sql """show column stats statistics"""
         assertTrue(result.size() == 0)
+
+        sql """analyze database `statistics` with sync"""
+        result = sql """show table stats statistics"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "100")
+
+        result = sql """show table cached stats statistics"""
+        assertTrue(result.size() == 1)
+        assertTrue(result[0][0] == "100")
+
+        sql """drop stats statistics"""
+        result = sql """show column cached stats statistics"""
+        assertTrue(result.size() == 0)
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to