This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dff9f497f21 [fix](statistics)Fix column cached stats size bug. (#37545)
dff9f497f21 is described below

commit dff9f497f21f6e86adf50236cac7e54986a8526c
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Wed Jul 10 14:28:31 2024 +0800

    [fix](statistics)Fix column cached stats size bug. (#37545)
    
    Fix column cached stats average size calculate bug. Average size is
    double type, when calculating, it is calculated by totalSize/rowCount.
    We need to cast totalSize to double, otherwise the result is not
    accurate, only keep the integer part.
---
 .../org/apache/doris/statistics/ColStatsData.java  |  2 +-
 .../suites/statistics/analyze_stats.groovy         | 73 ++++++++++++++++++----
 2 files changed, 63 insertions(+), 12 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 6bbafdbe5b5..7cf75462fee 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -142,7 +142,7 @@ public class ColStatsData {
             columnStatisticBuilder.setNdv(ndv);
             columnStatisticBuilder.setNumNulls(nullCount);
             columnStatisticBuilder.setDataSize(dataSizeInBytes);
-            columnStatisticBuilder.setAvgSizeByte(count == 0 ? 0 : 
dataSizeInBytes / count);
+            columnStatisticBuilder.setAvgSizeByte(count == 0 ? 0 : ((double) 
dataSizeInBytes) / count);
             if (statsId == null) {
                 return ColumnStatistic.UNKNOWN;
             }
diff --git a/regression-test/suites/statistics/analyze_stats.groovy 
b/regression-test/suites/statistics/analyze_stats.groovy
index f5339b9f684..de49b8b5104 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -181,7 +181,7 @@ suite("test_analyze") {
     """
 
     def contains_expected_table = { r ->
-        for (int i = 0; i < r.size; i++) {
+        for (int i = 0; i < r.size(); i++) {
             if (r[i][3] == "${tbl}") {
                 return true
             }
@@ -190,7 +190,7 @@ suite("test_analyze") {
     }
 
     def stats_job_removed = { r, id ->
-        for (int i = 0; i < r.size; i++) {
+        for (int i = 0; i < r.size(); i++) {
             if (r[i][0] == id) {
                 return false
             }
@@ -250,7 +250,7 @@ suite("test_analyze") {
     """
 
     def expected_result = { r->
-        for(int i = 0; i < r.size; i++) {
+        for(int i = 0; i < r.size(); i++) {
             if ((int) Double.parseDouble(r[i][2]) == 6) {
                 return true
             } else {
@@ -1150,9 +1150,9 @@ PARTITION `p599` VALUES IN (599)
     sql """ INSERT INTO test_updated_rows VALUES('1',1,1); """
     def cnt1 = sql """ SHOW TABLE STATS test_updated_rows """
     for (int i = 0; i < 10; ++i) {
-      if (Integer.valueOf(cnt1[0][0]) == 8) break;
-      Thread.sleep(1000) // rows updated report is async
-      cnt1 = sql """ SHOW TABLE STATS test_updated_rows """
+        if (Integer.valueOf(cnt1[0][0]) == 8) break;
+        Thread.sleep(1000) // rows updated report is async
+        cnt1 = sql """ SHOW TABLE STATS test_updated_rows """
     }
     assertEquals(Integer.valueOf(cnt1[0][0]), 1)
     sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
@@ -1162,9 +1162,9 @@ PARTITION `p599` VALUES IN (599)
     sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
     def cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
     for (int i = 0; i < 10; ++i) {
-      if (Integer.valueOf(cnt2[0][0]) == 8) break;
-      Thread.sleep(1000) // rows updated report is async
-      cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
+        if (Integer.valueOf(cnt2[0][0]) == 8) break;
+        Thread.sleep(1000) // rows updated report is async
+        cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
     }
     assertTrue(Integer.valueOf(cnt2[0][0]) == 0 || Integer.valueOf(cnt2[0][0]) 
== 8)
 
@@ -1214,7 +1214,7 @@ PARTITION `p599` VALUES IN (599)
     """
 
     def tbl_name_as_expetected = { r,name ->
-        for (int i = 0; i < r.size; i++) {
+        for (int i = 0; i < r.size(); i++) {
             if (r[i][3] != name) {
                 return false
             }
@@ -1232,7 +1232,7 @@ PARTITION `p599` VALUES IN (599)
     assert show_result.size() > 0
 
     def all_finished = { r ->
-        for (int i = 0; i < r.size; i++) {
+        for (int i = 0; i < r.size(); i++) {
             if (r[i][9] != "FINISHED") {
                 return  false
             }
@@ -2810,6 +2810,57 @@ PARTITION `p599` VALUES IN (599)
     result_sample = sql """show analyze task status ${jobId}"""
     assertEquals(2, result_sample.size())
 
+    // Test inject stats avg_size.
+    sql """CREATE TABLE `date_dim` (
+          `d_date_sk` BIGINT NOT NULL,
+          `d_date_id` CHAR(16) NOT NULL,
+          `d_date` DATE NULL,
+          `d_month_seq` INT NULL,
+          `d_week_seq` INT NULL,
+          `d_quarter_seq` INT NULL,
+          `d_year` INT NULL,
+          `d_dow` INT NULL,
+          `d_moy` INT NULL,
+          `d_dom` INT NULL,
+          `d_qoy` INT NULL,
+          `d_fy_year` INT NULL,
+          `d_fy_quarter_seq` INT NULL,
+          `d_fy_week_seq` INT NULL,
+          `d_day_name` CHAR(9) NULL,
+          `d_quarter_name` CHAR(6) NULL,
+          `d_holiday` CHAR(1) NULL,
+          `d_weekend` CHAR(1) NULL,
+          `d_following_holiday` CHAR(1) NULL,
+          `d_first_dom` INT NULL,
+          `d_last_dom` INT NULL,
+          `d_same_day_ly` INT NULL,
+          `d_same_day_lq` INT NULL,
+          `d_current_day` CHAR(1) NULL,
+          `d_current_week` CHAR(1) NULL,
+          `d_current_month` CHAR(1) NULL,
+          `d_current_quarter` CHAR(1) NULL,
+          `d_current_year` CHAR(1) NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`d_date_sk`)
+        DISTRIBUTED BY HASH(`d_date_sk`) BUCKETS 12
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1")
+    """
+
+    sql """
+        alter table date_dim modify column d_day_name set stats 
('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='Friday', 
'max_value'='Wednesday', 'data_size'='521779')
+    """
+
+    alter_result = sql """show column cached stats date_dim"""
+    assertEquals("d_day_name", alter_result[0][0])
+    assertEquals("date_dim", alter_result[0][1])
+    assertEquals("73049.0", alter_result[0][2])
+    assertEquals("7.0", alter_result[0][3])
+    assertEquals("0.0", alter_result[0][4])
+    assertEquals("521779.0", alter_result[0][5])
+    assertEquals("7.142863009760572", alter_result[0][6])
+
+
     sql """DROP DATABASE IF EXISTS trigger"""
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to