morningman commented on code in PR #59123:
URL: https://github.com/apache/doris/pull/59123#discussion_r2638473221


##########
gensrc/thrift/Exprs.thrift:
##########
@@ -322,4 +322,13 @@ struct TExprList {
   1: required list<TExpr> exprs
 }
 
+struct TExprMinMaxValue {
+  1: required Types.TPrimitiveType type

Review Comment:
   all fields need to use `optional`



##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -2173,6 +2175,9 @@ public boolean isEnableHboNonStrictMatchingMode() {
     @VariableMgr.VarAttr(name = FILE_SPLIT_SIZE, needForward = true)
     public long fileSplitSize = 0;
 
+    @VariableMgr.VarAttr(name = ENABLE_ICEBERG_MIN_MAX_OPTIMIZATION, 
needForward = true)

Review Comment:
   Need to add description



##########
gensrc/thrift/Exprs.thrift:
##########
@@ -322,4 +322,13 @@ struct TExprList {
   1: required list<TExpr> exprs
 }
 
+struct TExprMinMaxValue {
+  1: required Types.TPrimitiveType type
+  2: required bool has_null
+  3: optional i64 min_int_value
+  4: optional i64 max_int_value
+  5: optional double min_float_value
+  6: optional double max_float_value

Review Comment:
   what about string type?



##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java:
##########
@@ -330,6 +337,9 @@ public TableScan createTableScan() throws UserException {
 
         TableScan scan = icebergTable.newScan().metricsReporter(new 
IcebergMetricsReporter());
 
+        if (getPushDownAggNoGroupingOp() == TPushAggOp.MINMAX) {
+            scan = scan.includeColumnStats();

Review Comment:
   What is this for? What is the impact of this operation?



##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java:
##########
@@ -612,10 +631,14 @@ protected void toThrift(TPlanNode planNode) {
 
     @Override
     public String getNodeExplainString(String prefix, TExplainLevel 
detailLevel) {
+        StringBuilder sb = new StringBuilder();
+        if (getPushDownAggNoGroupingOp() == TPushAggOp.MINMAX) {
+            sb.append(prefix).append(String.format("MINMAX: 
opt/total_splits(%d/%d)",
+                    minMaxOptimizationSplitsCount, 
selectedSplitNum)).append("\n");

Review Comment:
   Could you show me an example of explain's result?



##########
be/src/vec/exec/scan/file_scanner.cpp:
##########
@@ -1644,6 +1646,9 @@ Status FileScanner::_init_expr_ctxes() {
             return Status::InternalError(
                     fmt::format("Unknown source slot descriptor, slot_id={}", 
slot_id));
         }
+        if (!_current_range.min_max_values.contains(slot_id)) {

Review Comment:
   In what scenarios would this happen?



##########
regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_min_max.groovy:
##########
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_optimize_min_max", 
"p0,external,doris,external_docker,external_docker_doris") {
+    String enabled = context.config.otherConfigs.get("enableIcebergTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        return
+    }
+
+    String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+    String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+    String catalog_name = "test_iceberg_optimize_count"
+
+    try {
+
+        sql """drop catalog if exists ${catalog_name}"""
+        sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+                'type'='iceberg',
+                'iceberg.catalog.type'='rest',
+                'uri' = 'http://${externalEnvIp}:${rest_port}',
+                "s3.access_key" = "admin",
+                "s3.secret_key" = "password",
+                "s3.endpoint" = "http://${externalEnvIp}:${minio_port}";,
+                "s3.region" = "us-east-1"
+            );"""
+
+        sql """ switch ${catalog_name} """
+        sql """ use format_v2 """
+
+        sql """ set enable_iceberg_min_max_optimization=false; """
+        String SQLSTR = """
+                MIN(id) AS id_min,
+                MAX(id) AS id_max,
+                MIN(col_boolean) AS col_boolean_min,
+                MAX(col_boolean) AS col_boolean_max,
+                MIN(col_short) AS col_short_min,
+                MAX(col_short) AS col_short_max,
+                MIN(col_byte) AS col_byte_min,
+                MAX(col_byte) AS col_byte_max,
+                MIN(col_integer) AS col_integer_min,
+                MAX(col_integer) AS col_integer_max,
+                MIN(col_long) AS col_long_min,
+                MAX(col_long) AS col_long_max,
+                MIN(col_float) AS col_float_min,
+                MAX(col_float) AS col_float_max,
+                MIN(col_double) AS col_double_min,
+                MAX(col_double) AS col_double_max,
+                MIN(col_date) AS col_date_min,
+                MAX(col_date) AS col_date_max
+        """;
+        def sqlstr1 = """ SELECT ${SQLSTR} FROM sample_mor_parquet; """
+        def sqlstr2 = """ select ${SQLSTR} from sample_cow_parquet; """
+        def sqlstr3 = """ select ${SQLSTR} from sample_mor_orc; """
+        def sqlstr4 = """ select ${SQLSTR} from sample_mor_parquet; """
+
+        // don't use push down count
+        sql """ set enable_iceberg_min_max_optimization=false; """
+        qt_false01 """${sqlstr1}""" 
+        qt_false02 """${sqlstr2}""" 
+        qt_false03 """${sqlstr3}""" 
+        qt_false04 """${sqlstr4}""" 
+
+        // use push down count
+        sql """ set enable_iceberg_min_max_optimization=true; """
+        for (String val: ["1K", "0"]) {
+            sql "set file_split_size=${val}"
+            qt_q01 """${sqlstr1}""" 
+            qt_q02 """${sqlstr2}""" 
+            qt_q03 """${sqlstr3}""" 
+            qt_q04 """${sqlstr4}""" 
+        }
+        sql "unset variable file_split_size;"
+
+        // traditional mode
+        sql """set num_files_in_batch_mode=100000"""
+        explain {
+            sql("""select * from sample_cow_orc""")
+            notContains "approximate"
+        }
+        explain {
+            sql("""${sqlstr1}""")
+            contains """pushdown agg=MINMAX"""

Review Comment:
   we should also check `MINMAX: opt/total_splits`
   the number of opt splits is expected?



##########
regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_min_max.groovy:
##########
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_optimize_min_max", 
"p0,external,doris,external_docker,external_docker_doris") {
+    String enabled = context.config.otherConfigs.get("enableIcebergTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        return
+    }
+
+    String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+    String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+    String catalog_name = "test_iceberg_optimize_count"
+
+    try {
+
+        sql """drop catalog if exists ${catalog_name}"""
+        sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+                'type'='iceberg',
+                'iceberg.catalog.type'='rest',
+                'uri' = 'http://${externalEnvIp}:${rest_port}',
+                "s3.access_key" = "admin",
+                "s3.secret_key" = "password",
+                "s3.endpoint" = "http://${externalEnvIp}:${minio_port}";,
+                "s3.region" = "us-east-1"
+            );"""
+
+        sql """ switch ${catalog_name} """
+        sql """ use format_v2 """
+
+        sql """ set enable_iceberg_min_max_optimization=false; """
+        String SQLSTR = """
+                MIN(id) AS id_min,
+                MAX(id) AS id_max,
+                MIN(col_boolean) AS col_boolean_min,
+                MAX(col_boolean) AS col_boolean_max,
+                MIN(col_short) AS col_short_min,
+                MAX(col_short) AS col_short_max,
+                MIN(col_byte) AS col_byte_min,
+                MAX(col_byte) AS col_byte_max,
+                MIN(col_integer) AS col_integer_min,
+                MAX(col_integer) AS col_integer_max,
+                MIN(col_long) AS col_long_min,
+                MAX(col_long) AS col_long_max,
+                MIN(col_float) AS col_float_min,
+                MAX(col_float) AS col_float_max,
+                MIN(col_double) AS col_double_min,
+                MAX(col_double) AS col_double_max,
+                MIN(col_date) AS col_date_min,
+                MAX(col_date) AS col_date_max
+        """;
+        def sqlstr1 = """ SELECT ${SQLSTR} FROM sample_mor_parquet; """
+        def sqlstr2 = """ select ${SQLSTR} from sample_cow_parquet; """
+        def sqlstr3 = """ select ${SQLSTR} from sample_mor_orc; """
+        def sqlstr4 = """ select ${SQLSTR} from sample_mor_parquet; """
+
+        // don't use push down count
+        sql """ set enable_iceberg_min_max_optimization=false; """
+        qt_false01 """${sqlstr1}""" 
+        qt_false02 """${sqlstr2}""" 
+        qt_false03 """${sqlstr3}""" 
+        qt_false04 """${sqlstr4}""" 
+
+        // use push down count
+        sql """ set enable_iceberg_min_max_optimization=true; """
+        for (String val: ["1K", "0"]) {
+            sql "set file_split_size=${val}"
+            qt_q01 """${sqlstr1}""" 
+            qt_q02 """${sqlstr2}""" 
+            qt_q03 """${sqlstr3}""" 
+            qt_q04 """${sqlstr4}""" 
+        }
+        sql "unset variable file_split_size;"
+
+        // traditional mode

Review Comment:
   batch mode?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to