This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new c3d81cb3a0d [opt](stats) Use escape rather than base64 for min/max 
value #27746 (#27748)
c3d81cb3a0d is described below

commit c3d81cb3a0d84db1b006c7499df4909bcc8277a5
Author: AKIRA <33112463+kikyou1...@users.noreply.github.com>
AuthorDate: Wed Nov 29 20:59:02 2023 +0800

    [opt](stats) Use escape rather than base64 for min/max value #27746 (#27748)
---
 .../org/apache/doris/statistics/BaseAnalysisTask.java    |  6 +++---
 .../java/org/apache/doris/statistics/ColStatsData.java   | 16 +++-------------
 .../org/apache/doris/statistics/ColumnStatistic.java     |  4 ----
 .../org/apache/doris/statistics/OlapAnalysisTask.java    |  4 ++--
 .../apache/doris/statistics/StatisticsRepository.java    |  4 ++--
 .../org/apache/doris/statistics/util/StatisticsUtil.java |  4 +++-
 .../apache/doris/statistics/BaseAnalysisTaskTest.java    |  4 ++--
 .../apache/doris/statistics/OlapAnalysisTaskTest.java    |  6 +++---
 .../java/org/apache/doris/statistics/StatsMockUtil.java  |  2 +-
 .../apache/doris/statistics/util/StatisticsUtilTest.java |  8 ++++++++
 10 files changed, 27 insertions(+), 31 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 824e3f74abd..4c0e07ce6cb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -252,7 +252,7 @@ public abstract class BaseAnalysisTask {
 
     protected String getMinFunction() {
         if (tableSample == null) {
-            return "to_base64(CAST(MIN(`${colName}`) as ${type})) ";
+            return "CAST(MIN(`${colName}`) as ${type}) ";
         } else {
             // Min value is not accurate while sample, so set it to NULL to 
avoid optimizer generate bad plan.
             return "NULL";
@@ -276,7 +276,7 @@ public abstract class BaseAnalysisTask {
     // Max value is not accurate while sample, so set it to NULL to avoid 
optimizer generate bad plan.
     protected String getMaxFunction() {
         if (tableSample == null) {
-            return "to_base64(CAST(MAX(`${colName}`) as ${type})) ";
+            return "CAST(MAX(`${colName}`) as ${type}) ";
         } else {
             return "NULL";
         }
@@ -315,7 +315,7 @@ public abstract class BaseAnalysisTask {
         long startTime = System.currentTimeMillis();
         try (AutoCloseConnectContext a  = 
StatisticsUtil.buildConnectContext()) {
             stmtExecutor = new StmtExecutor(a.connectContext, sql);
-            ColStatsData colStatsData = new 
ColStatsData(stmtExecutor.executeInternalQuery().get(0), needEncode);
+            ColStatsData colStatsData = new 
ColStatsData(stmtExecutor.executeInternalQuery().get(0));
             job.appendBuf(this, Collections.singletonList(colStatsData));
         } finally {
             LOG.debug("End cost time in secs: " + (System.currentTimeMillis() 
- startTime) / 1000);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index fa7546abd5b..7878a065488 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -21,8 +21,6 @@ import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.annotations.VisibleForTesting;
 
-import java.nio.charset.StandardCharsets;
-import java.util.Base64;
 import java.util.StringJoiner;
 
 /**
@@ -56,8 +54,6 @@ public class ColStatsData {
 
     public final String updateTime;
 
-    public final boolean needEncode;
-
     @VisibleForTesting
     public ColStatsData() {
         statsId = new StatsId();
@@ -68,10 +64,9 @@ public class ColStatsData {
         maxLit = null;
         dataSizeInBytes = 0;
         updateTime = null;
-        needEncode = true;
     }
 
-    public ColStatsData(ResultRow row, boolean needEncode) {
+    public ColStatsData(ResultRow row) {
         this.statsId = new StatsId(row);
         this.count = (long) Double.parseDouble(row.get(7));
         this.ndv = (long) Double.parseDouble(row.getWithDefault(8, "0"));
@@ -80,7 +75,6 @@ public class ColStatsData {
         this.maxLit = row.get(11);
         this.dataSizeInBytes = (long) 
Double.parseDouble(row.getWithDefault(12, "0"));
         this.updateTime = row.get(13);
-        this.needEncode = needEncode;
     }
 
     public String toSQL(boolean roundByParentheses) {
@@ -93,12 +87,8 @@ public class ColStatsData {
         sj.add(String.valueOf(count));
         sj.add(String.valueOf(ndv));
         sj.add(String.valueOf(nullCount));
-        sj.add(minLit == null ? "NULL" : needEncode
-                ? "'" + 
Base64.getEncoder().encodeToString(minLit.getBytes(StandardCharsets.UTF_8)) + 
"'"
-                : "'" + minLit + "'");
-        sj.add(maxLit == null ? "NULL" : needEncode
-                ? "'" + 
Base64.getEncoder().encodeToString(maxLit.getBytes(StandardCharsets.UTF_8)) + 
"'"
-                : "'" + maxLit + "'");
+        sj.add(minLit == null ? "NULL" : "'" + 
StatisticsUtil.escapeSQL(minLit) + "'");
+        sj.add(maxLit == null ? "NULL" : "'" + 
StatisticsUtil.escapeSQL(maxLit) + "'");
         sj.add(String.valueOf(dataSizeInBytes));
         sj.add(StatisticsUtil.quote(updateTime));
         return sj.toString();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index eefaf7badeb..5ea1f9097bb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -31,8 +31,6 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.json.JSONObject;
 
-import java.nio.charset.StandardCharsets;
-import java.util.Base64;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -175,7 +173,6 @@ public class ColumnStatistic {
             String min = row.get(10);
             String max = row.get(11);
             if (min != null && !min.equalsIgnoreCase("NULL")) {
-                min = new String(Base64.getDecoder().decode(min), 
StandardCharsets.UTF_8);
                 // Internal catalog get the min/max value using a separate SQL,
                 // and the value is already encoded by base64. Need to handle 
internal and external catalog separately.
                 if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
min.equalsIgnoreCase("NULL")) {
@@ -193,7 +190,6 @@ public class ColumnStatistic {
                 columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
             }
             if (max != null && !max.equalsIgnoreCase("NULL")) {
-                max = new String(Base64.getDecoder().decode(max), 
StandardCharsets.UTF_8);
                 if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
max.equalsIgnoreCase("NULL")) {
                     
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
                 } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 04a763b6884..45ccdbb0d68 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -92,8 +92,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
             // Get basic stats, including min and max.
             ResultRow basicStats = collectBasicStat(r);
             long rowCount = tbl.getRowCount();
-            String min = StatisticsUtil.encodeValue(basicStats, 0);
-            String max = StatisticsUtil.encodeValue(basicStats, 1);
+            String min = StatisticsUtil.escapeSQL(basicStats.get(0));
+            String max = StatisticsUtil.escapeSQL(basicStats.get(1));
 
             boolean limitFlag = false;
             long rowsToSample = pair.second;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 12ca6b4aa1f..29e11ac75ad 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -269,8 +269,8 @@ public class StatisticsRepository {
         params.put("count", String.valueOf(columnStatistic.count));
         params.put("ndv", String.valueOf(columnStatistic.ndv));
         params.put("nullCount", String.valueOf(columnStatistic.numNulls));
-        params.put("min", StatisticsUtil.encodeString(min));
-        params.put("max", StatisticsUtil.encodeString(max));
+        params.put("min", StatisticsUtil.escapeSQL(min));
+        params.put("max", StatisticsUtil.escapeSQL(max));
         params.put("dataSize", String.valueOf(columnStatistic.dataSize));
 
         if (partitionIds.isEmpty()) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 66b13ff87f1..3f9abcad5f9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -786,7 +786,9 @@ public class StatisticsUtil {
         if (str == null) {
             return null;
         }
-        return org.apache.commons.lang3.StringUtils.replace(str, "'", "''");
+        return str.replace("'", "''")
+                .replace("\\", "\\\\")
+                .replace("\"", "\"\"");
     }
 
     public static boolean isExternalTable(String catalogName, String dbName, 
String tblName) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
index fe81c055e0d..187c4d207df 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java
@@ -42,14 +42,14 @@ public class BaseAnalysisTaskTest {
         Assertions.assertEquals("SUM(t1.count) * 4", dataSizeFunction);
 
         String minFunction = olapAnalysisTask.getMinFunction();
-        Assertions.assertEquals("to_base64(CAST(MIN(`${colName}`) as ${type})) 
", minFunction);
+        Assertions.assertEquals("CAST(MIN(`${colName}`) as ${type}) ", 
minFunction);
         olapAnalysisTask.tableSample = new TableSample(true, 20L);
         minFunction = olapAnalysisTask.getMinFunction();
         Assertions.assertEquals("NULL", minFunction);
 
         olapAnalysisTask.tableSample = null;
         String maxFunction = olapAnalysisTask.getMaxFunction();
-        Assertions.assertEquals("to_base64(CAST(MAX(`${colName}`) as ${type})) 
", maxFunction);
+        Assertions.assertEquals("CAST(MAX(`${colName}`) as ${type}) ", 
maxFunction);
         olapAnalysisTask.tableSample = new TableSample(true, 20L);
         maxFunction = olapAnalysisTask.getMaxFunction();
         Assertions.assertEquals("NULL", maxFunction);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
index c174795b36b..7b7894e54b4 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java
@@ -151,7 +151,7 @@ public class OlapAnalysisTaskTest {
             @Mock
             public void runQuery(String sql, boolean needEncode) {
                 Assertions.assertFalse(needEncode);
-                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'M [...]
+                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
                 return;
             }
         };
@@ -218,7 +218,7 @@ public class OlapAnalysisTaskTest {
             @Mock
             public void runQuery(String sql, boolean needEncode) {
                 Assertions.assertFalse(needEncode);
-                Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 
'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 
AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, 
ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS 
NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, 'MQ==' AS `min`, 'Mg==' AS 
`max`, SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM 
`catalogName`.`${dbName}`.`${tblName}`  limit [...]
+                Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 
'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 
AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, 
ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS 
NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`, 
SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM 
`catalogName`.`${dbName}`.`${tblName}`  limit 100", sql);
                 return;
             }
         };
@@ -292,7 +292,7 @@ public class OlapAnalysisTaskTest {
             @Mock
             public void runQuery(String sql, boolean needEncode) {
                 Assertions.assertFalse(needEncode);
-                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'M [...]
+                Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', 
'-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS 
`tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS 
`row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - 
SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * 
SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, 
`t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' A [...]
                 return;
             }
         };
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java
index 84e1112d216..45c78f665b0 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java
@@ -40,7 +40,7 @@ public class StatsMockUtil {
                 add("0");
                 add("10");
                 // 11
-                add("MTE=");
+                add("11");
                 add("12");
                 add(String.valueOf(System.currentTimeMillis()));
             }};
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
index 2c0854dcf21..c827a7d1690 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
@@ -142,4 +142,12 @@ public class StatisticsUtilTest {
                 .encodeToString("a".getBytes(StandardCharsets.UTF_8)), 
StatisticsUtil.encodeValue(row, 1));
         Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 2));
     }
+
+    @Test
+    public void testEscape() {
+        // \'"
+        String origin = "\\'\"";
+        // \\''""
+        Assertions.assertEquals("\\\\''\"\"", 
StatisticsUtil.escapeSQL(origin));
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to