This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 31603089082 Limit the max string length to 1024 while collecting 
column stats to control BE memory usage. (#32470) (#33084)
31603089082 is described below

commit 31603089082ca2a511227473727a6756f9b85e1d
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Mon Apr 1 12:01:23 2024 +0800

    Limit the max string length to 1024 while collecting column stats to 
control BE memory usage. (#32470) (#33084)
---
 .../apache/doris/statistics/BaseAnalysisTask.java  | 24 ++++++++++++++++++++++
 .../apache/doris/statistics/HMSAnalysisTask.java   |  6 +++++-
 .../apache/doris/statistics/OlapAnalysisTask.java  |  6 +++++-
 .../doris/statistics/OlapAnalysisTaskTest.java     | 12 +++++------
 4 files changed, 40 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 68767843507..f871e8761a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -82,6 +82,30 @@ public abstract class BaseAnalysisTask {
             + "NOW() "
             + "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} 
${sampleHints} ${limit}";
 
+    protected static final String DUJ1_ANALYZE_STRING_TEMPLATE = "SELECT "
+            + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
+            + "${catalogId} AS `catalog_id`, "
+            + "${dbId} AS `db_id`, "
+            + "${tblId} AS `tbl_id`, "
+            + "${idxId} AS `idx_id`, "
+            + "'${colId}' AS `col_id`, "
+            + "NULL AS `part_id`, "
+            + "${rowCount} AS `row_count`, "
+            + "${ndvFunction} as `ndv`, "
+            + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) 
* ${scaleFactor} as `null_count`, "
+            + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+            + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+            + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+            + "NOW() "
+            + "FROM ( "
+            + "    SELECT t0.`colValue` as `column_key`, COUNT(1) as `count` "
+            + "    FROM "
+            + "    (SELECT SUBSTRING(CAST(`${colName}` AS STRING), 1, 1024) AS 
`colValue` "
+            + "         FROM `${catalogName}`.`${dbName}`.`${tblName}` 
${index} "
+            + "    ${sampleHints} ${limit}) as `t0` "
+            + "    GROUP BY `t0`.`colValue` "
+            + ") as `t1` ";
+
     protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT "
             + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, "
             + "${catalogId} AS `catalog_id`, "
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 1fe827420c9..a90308a8000 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -148,7 +148,11 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
                 params.put("ndvFunction", "ROUND(NDV(`${colName}`) * 
${scaleFactor})");
                 params.put("rowCount", "ROUND(count(1) * ${scaleFactor})");
             } else {
-                sb.append(DUJ1_ANALYZE_TEMPLATE);
+                if (col.getType().isStringType()) {
+                    sb.append(DUJ1_ANALYZE_STRING_TEMPLATE);
+                } else {
+                    sb.append(DUJ1_ANALYZE_TEMPLATE);
+                }
                 params.put("dataSizeFunction", getDataSizeFunction(col, true));
                 params.put("ndvFunction", getNdvFunction("ROUND(SUM(t1.count) 
* ${scaleFactor})"));
                 params.put("rowCount", "ROUND(SUM(t1.count) * 
${scaleFactor})");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index e6dd46e9fc7..60a3528afc8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -163,7 +163,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
                 sql = stringSubstitutor.replace(LINEAR_ANALYZE_TEMPLATE);
             } else {
                 params.put("dataSizeFunction", getDataSizeFunction(col, true));
-                sql = stringSubstitutor.replace(DUJ1_ANALYZE_TEMPLATE);
+                if (col.getType().isStringType()) {
+                    sql = 
stringSubstitutor.replace(DUJ1_ANALYZE_STRING_TEMPLATE);
+                } else {
+                    sql = stringSubstitutor.replace(DUJ1_ANALYZE_TEMPLATE);
+                }
             }
             LOG.info("Sample for column [{}]. Total rows [{}], rows to sample 
[{}], scale factor [{}], "
                     + "limited [{}], distribute column [{}], partition column 
[{}], key column [{}], "
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index 5d9d57406a3..75506b1c85a 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -159,11 +159,10 @@ public class OlapAnalysisTaskTest {
                         + " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as 
`null_count`, "
                         + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`,"
                         + " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
-                        + "SUM(LENGTH(`column_key`) * count) * 5.0 AS 
`data_size`, NOW() "
+                        + "SUM(t1.count) * 4 * 5.0 AS `data_size`, NOW() "
                         + "FROM (     SELECT t0.`${colName}` as `column_key`, 
COUNT(1) "
-                        + "as `count`     FROM     (SELECT `${colName}` FROM "
-                        + "`catalogName`.`${dbName}`.`${tblName}`      "
-                        + " limit 100) as `t0`     GROUP BY `t0`.`${colName}` 
) as `t1` ", sql);
+                        + "as `count`     FROM     (SELECT `${colName}` FROM 
`catalogName`.`${dbName}`.`${tblName}`"
+                        + "       limit 100) as `t0`     GROUP BY 
`t0`.`${colName}` ) as `t1` ", sql);
                 return;
             }
         };
@@ -183,7 +182,7 @@ public class OlapAnalysisTaskTest {
         };
 
         OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask();
-        olapAnalysisTask.col = new Column("test", PrimitiveType.STRING);
+        olapAnalysisTask.col = new Column("test", PrimitiveType.INT);
         olapAnalysisTask.tbl = tableIf;
         AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
         analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL);
@@ -322,7 +321,8 @@ public class OlapAnalysisTaskTest {
                         + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, "
                         + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, "
                         + "SUM(LENGTH(`column_key`) * count) * 5.0 AS 
`data_size`, NOW() "
-                        + "FROM (     SELECT t0.`${colName}` as `column_key`, 
COUNT(1) as `count`     FROM     (SELECT `${colName}` FROM 
`catalogName`.`${dbName}`.`${tblName}`       limit 100) as `t0`     GROUP BY 
`t0`.`${colName}` ) as `t1` ", sql);
+                        + "FROM (     SELECT t0.`colValue` as `column_key`, 
COUNT(1) as `count`     FROM     "
+                        + "(SELECT SUBSTRING(CAST(`${colName}` AS STRING), 1, 
1024) AS `colValue`          FROM `catalogName`.`${dbName}`.`${tblName}`       
limit 100) as `t0`     GROUP BY `t0`.`colValue` ) as `t1` ", sql);
                 return;
             }
         };


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to