This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f9eed180e0 [fix](iceberg)fix read parquet page index core when read  
iceberg with equal delete. (#55190)
6f9eed180e0 is described below

commit 6f9eed180e0cd9c46253f7a778e2c237df10df6b
Author: daidai <[email protected]>
AuthorDate: Sun Sep 7 12:22:50 2025 +0800

    [fix](iceberg)fix read parquet page index core when read  iceberg with 
equal delete. (#55190)
    
    ### What problem does this PR solve?
    Related PR: #54240
    
    Problem Summary:
    
    In PR #54240, some logic for reading the Parquet page index was removed,
    which causes a coredump when reading an Iceberg table with equal
    deletes.
    
    ```
    erminate called after throwing an instance of 'std::out_of_range'
      what():  unordered_map::at
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/mnt/disk2/chenqi/doris-master/be/src/common/signal_handler.h:420
     1# 0x00007F84D27531D0 in /lib64/libpthread.so.0
     2# __GI_raise in /lib64/libc.so.6
     3# abort in /lib64/libc.so.6
     4# 0x000055F08125DBB2 in 
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
     5# __cxxabiv1::__terminate(void (*)()) in 
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
     6# 0x000055F08125C0B1 in 
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
     7# 0x000055F08125C204 in 
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
     8# std::__throw_out_of_range(char const*) in 
/mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
     9# 
doris::vectorized::ParquetReader::_process_page_index(tparquet::RowGroup 
const&, doris::vectorized::RowGroupReader::RowGroupIndex const&, 
std::vector<doris::vectorized::RowRange, 
std::allocator<doris::vectorized::RowRange> >&) at 
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/parquet/vparquet_reader.cpp:1037
    10# doris::vectorized::ParquetReader::_next_row_group_reader() at 
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/parquet/vparquet_reader.cpp:846
    11# 
doris::vectorized::ParquetReader::get_next_block(doris::vectorized::Block*, 
unsigned long*, bool*) at 
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/parquet/vparquet_reader.cpp:752
    12# 
doris::vectorized::IcebergTableReader::get_next_block_inner(doris::vectorized::Block*,
 unsigned long*, bool*) at 
/mnt/disk2/chenqi/doris-master/be/src/vec/exec/format/table/iceberg_reader.cpp:99
    13# 
doris::vectorized::TableFormatReader::get_next_block(doris::vectorized::Block*, 
unsigned long*, bool*) in /mnt/disk2/chenqi/doris-master-output/be/lib/doris_be
    ...
    ```
---
 be/src/vec/exec/format/parquet/vparquet_reader.cpp |   9 ++--
 .../iceberg/test_iceberg_equal_delete.out          | Bin 0 -> 217 bytes
 .../iceberg/test_iceberg_equal_delete.groovy       |  50 +++++++++++++++++++++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 09b1883f1a6..82b7bcf9c42 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -1036,7 +1036,8 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
         return Status::OK();
     }
     PageIndex page_index;
-    if (!config::enable_parquet_page_index || 
!_has_page_index(row_group.columns, page_index)) {
+    if (!config::enable_parquet_page_index || 
!_has_page_index(row_group.columns, page_index) ||
+        _colname_to_slot_id == nullptr) {
         read_whole_row_group();
         return Status::OK();
     }
@@ -1065,8 +1066,10 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
     for (size_t idx = 0; idx < _read_table_columns.size(); idx++) {
         const auto& read_table_col = _read_table_columns[idx];
         const auto& read_file_col = _read_file_columns[idx];
-
-        DCHECK(_colname_to_slot_id != nullptr && 
_colname_to_slot_id->contains(read_table_col));
+        if (!_colname_to_slot_id->contains(read_table_col)) {
+            // equal delete may add column to read_table_col, but this column 
no slot_id.
+            continue;
+        }
         auto slot_id = _colname_to_slot_id->at(read_table_col);
         if (!_push_down_simple_expr.contains(slot_id)) {
             continue;
diff --git 
a/regression-test/data/external_table_p2/iceberg/test_iceberg_equal_delete.out 
b/regression-test/data/external_table_p2/iceberg/test_iceberg_equal_delete.out
new file mode 100644
index 00000000000..d25b832fe97
Binary files /dev/null and 
b/regression-test/data/external_table_p2/iceberg/test_iceberg_equal_delete.out 
differ
diff --git 
a/regression-test/suites/external_table_p2/iceberg/test_iceberg_equal_delete.groovy
 
b/regression-test/suites/external_table_p2/iceberg/test_iceberg_equal_delete.groovy
new file mode 100644
index 00000000000..97761bdeb60
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/iceberg/test_iceberg_equal_delete.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_equal_delete", 
"p2,external,iceberg,external_remote,external_remote_iceberg") {
+    String enabled = context.config.otherConfigs.get("enableIcebergTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        return
+    }
+
+    String catalog = "test_iceberg_equal_delete"
+    String access_key = context.config.otherConfigs.get("dlf_access_key")
+    String secret_key = context.config.otherConfigs.get("dlf_secret_key")
+
+
+    sql """drop catalog if exists ${catalog};"""
+    sql  """
+        create catalog if not exists ${catalog} properties (
+            "warehouse" = 
"oss://selectdb-qa-datalake-test/iceberg_temp/warehouse",
+            "type" = "iceberg",
+            "oss.secret_key" = "${secret_key}",
+            "oss.endpoint" = "oss-cn-beijing-internal.aliyuncs.com",
+            "oss.access_key" = "${access_key}",
+            "iceberg.catalog.type" = "hadoop"
+            ); 
+    """
+
+
+    sql """ use ${catalog}.flink_db """
+    String tb = """ sample """
+
+    qt_q1  """ select * from ${tb} order by id """
+    qt_q2  """ select data from ${tb} where data = "sample data 8"; """
+    qt_q3  """ select data from ${tb} where data = "sample data 3" """
+    qt_q4  """ select * from ${tb} where id = 10 """
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to