This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new a6155a517d7 [fix] (topn) fix uncleared block in topn_next() (#39119) 
(#39224)
a6155a517d7 is described below

commit a6155a517d737a46aa08f8772655f8ade4002012
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Tue Aug 13 10:34:17 2024 +0800

    [fix] (topn) fix uncleared block in topn_next() (#39119) (#39224)
    
    ## Proposed changes
    
    pick from master #39119
---
 be/src/vec/olap/vcollect_iterator.cpp              | 32 ++++-----
 .../data/inverted_index_p0/topn_clear_block.out    |  5 ++
 .../inverted_index_p0/topn_clear_block.groovy      | 84 ++++++++++++++++++++++
 3 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/be/src/vec/olap/vcollect_iterator.cpp 
b/be/src/vec/olap/vcollect_iterator.cpp
index 3ce1869546c..10ac1e236b2 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -256,18 +256,21 @@ Status VCollectIterator::_topn_next(Block* block) {
         return Status::Error<END_OF_FILE>("");
     }
 
+    // clear TEMP columns to avoid column align problem
+    auto clear_temp_columns = [](Block* block) {
+        auto all_column_names = block->get_names();
+        for (auto& name : all_column_names) {
+            if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
+                // clear TEMP columns from block to prevent from storage 
engine merge with this
+                // fake column
+                block->erase(name);
+            }
+        }
+    };
+
+    clear_temp_columns(block);
     auto clone_block = block->clone_empty();
     MutableBlock mutable_block = 
vectorized::MutableBlock::build_mutable_block(&clone_block);
-    // clear TEMP columns to avoid column align problem in 
mutable_block.add_rows bellow
-    auto all_column_names = mutable_block.get_names();
-    for (auto& name : all_column_names) {
-        if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
-            mutable_block.erase(name);
-            // clear TEMP columns from block to prevent from storage engine 
merge with this
-            // fake column
-            block->erase(name);
-        }
-    }
 
     if (!_reader->_reader_context.read_orderby_key_columns) {
         return Status::Error<ErrorCode::INTERNAL_ERROR>(
@@ -301,6 +304,8 @@ Status VCollectIterator::_topn_next(Block* block) {
                 if (status.is<END_OF_FILE>()) {
                     eof = true;
                     if (block->rows() == 0) {
+                        // clear TEMP columns to avoid column align problem in 
segment iterator
+                        clear_temp_columns(block);
                         break;
                     }
                 } else {
@@ -312,12 +317,7 @@ Status VCollectIterator::_topn_next(Block* block) {
             RETURN_IF_ERROR(VExprContext::filter_block(
                     _reader->_reader_context.filter_block_conjuncts, block, 
block->columns()));
             // clear TMPE columns to avoid column align problem in 
mutable_block.add_rows bellow
-            auto all_column_names = block->get_names();
-            for (auto& name : all_column_names) {
-                if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
-                    block->erase(name);
-                }
-            }
+            clear_temp_columns(block);
 
             // update read rows
             read_rows += block->rows();
diff --git a/regression-test/data/inverted_index_p0/topn_clear_block.out 
b/regression-test/data/inverted_index_p0/topn_clear_block.out
new file mode 100644
index 00000000000..6f6227298ab
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/topn_clear_block.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+17.0.0.0
+17.0.0.0
+
diff --git a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy 
b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
new file mode 100644
index 00000000000..7486a658d60
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_clear_block") { 
+
+    // load data
+    def load_data = { loadTableName, fileName ->
+        streamLoad {
+            table loadTableName
+            set 'read_json_by_line', 'true'
+            set 'format', 'json'
+            file fileName
+            time 10000
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+    }
+
+    sql """ set enable_match_without_inverted_index = false; """
+    // sql """ set 
+    def dupTableName = "dup_httplogs"
+    sql """ drop table if exists ${dupTableName} """
+    // create table
+    sql """
+        CREATE TABLE IF NOT EXISTS dup_httplogs
+        (   
+            `id`          bigint NOT NULL AUTO_INCREMENT(100),
+            `@timestamp` int(11) NULL,
+            `clientip`   varchar(20) NULL,
+            `request`    text NULL,
+            `status`     int(11) NULL,
+            `size`       int(11) NULL,
+            INDEX        clientip_idx (`clientip`) USING INVERTED COMMENT '',
+            INDEX        request_idx (`request`) USING INVERTED 
PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '',
+            INDEX        status_idx (`status`) USING INVERTED COMMENT '',
+            INDEX        size_idx (`size`) USING INVERTED COMMENT ''
+        ) DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH (`id`) BUCKETS 32
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "compaction_policy" = "time_series",
+        "inverted_index_storage_format" = "v2",
+        "compression" = "ZSTD",
+        "disable_auto_compaction" = "true"
+        );
+    """
+
+    load_data.call(dupTableName, 'documents-1000.json');
+    load_data.call(dupTableName, 'documents-1000.json');
+    load_data.call(dupTableName, 'documents-1000.json');
+    load_data.call(dupTableName, 'documents-1000.json');
+    load_data.call(dupTableName, 'documents-1000.json');
+    sql """ delete from dup_httplogs where clientip = '40.135.0.0'; """
+    sql """ delete from dup_httplogs where status = 304; """
+    sql """ delete from dup_httplogs where size = 24736; """
+    sql """ delete from dup_httplogs where request = 'GET /images/hm_bg.jpg 
HTTP/1.0'; """
+
+    sql """ sync """
+
+    qt_sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN 
(NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 2 """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to