This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e4af142e9b7 [fix](inverted index) fix wrong read data for primary key 
(#47841)
e4af142e9b7 is described below

commit e4af142e9b73e56174458e2f5f42ea15a94b9325
Author: airborne12 <jiang...@selectdb.com>
AuthorDate: Fri Feb 21 15:08:24 2025 +0800

    [fix](inverted index) fix wrong read data for primary key (#47841)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    Previously, queries like SELECT COUNT(*) FROM table WHERE
    date='2017-10-01' required reading the date column in the first read
    phase, even though it was only used for filtering and not in the
    aggregation. This PR optimizes the execution plan to eliminate
    unnecessary column reads, improving performance.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp         | 14 +++++++++++++-
 .../inverted_index_p0/test_pk_no_need_read_data.groovy     | 13 ++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f1174d3ff26..970ab3c2645 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2507,7 +2507,19 @@ bool SegmentIterator::_no_need_read_key_data(ColumnId 
cid, vectorized::MutableCo
         return false;
     }
 
-    if (!_check_all_conditions_passed_inverted_index_for_column(cid)) {
+    // seek_schema is set when get_row_ranges_by_keys, it is null when there 
is no primary key range
+    // in this case, we need to read data
+    if (!_seek_schema) {
+        return false;
+    }
+    // check if the column is in the seek_schema
+    if (std::none_of(_seek_schema->columns().begin(), 
_seek_schema->columns().end(),
+                     [&](const Field* col) {
+                         return (col && 
_opts.tablet_schema->field_index(col->unique_id()) == cid);
+                     })) {
+        return false;
+    }
+    if (!_check_all_conditions_passed_inverted_index_for_column(cid, true)) {
         return false;
     }
 
diff --git 
a/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy 
b/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy
index 1641d0e3e16..976ef3623b9 100644
--- a/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy
+++ b/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-suite("test_pk_no_need_read_data", "p0"){
+suite("test_pk_no_need_read_data", "nonConcurrent"){
     def table1 = "test_pk_no_need_read_data"
 
     sql "drop table if exists ${table1}"
@@ -56,12 +56,15 @@ suite("test_pk_no_need_read_data", "p0"){
     sql "set enable_count_on_index_pushdown = true"
     sql """ set enable_common_expr_pushdown = true """
 
-    qt_select_0 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
+    try {
+        
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
+        qt_select_0 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
+    } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
+    }
     qt_select_1 "SELECT COUNT() FROM ${table1} WHERE year(date)='2017'"
-
-    // case1: disable count on index
+    // case2: disable count on index
     sql "set enable_count_on_index_pushdown = false"
-
     qt_select_2 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
     qt_select_3 "SELECT COUNT() FROM ${table1} WHERE year(date)='2017'"
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to