(doris) branch master updated: [fix] remove useless const_cast and explain const_cast(3) (#56441)

yiguolei Sun, 26 Oct 2025 02:50:19 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 955d04d13a6 [fix] remove useless const_cast and explain const_cast(3) 
(#56441)
955d04d13a6 is described below

commit 955d04d13a678e0c109f77d07e2e175f74c789e4
Author: admiring_xm <[email protected]>
AuthorDate: Sun Oct 26 17:50:06 2025 +0800

    [fix] remove useless const_cast and explain const_cast(3) (#56441)
    
    ### What problem does this PR solve?
     go through whole be/ and find all const_cast
    
    Issue Number: #55057
    
    Problem Summary:
    1. remove useless const_cast
    2. explain why using const_cast does not result in undefined behavior
    3. don't modify some const_cast
        (1) some code in DBUG_EXECUTE_IF or test file
        (2) underlying data structures, such as cow
        (3) const_cast<const T*>
    
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [x] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [x] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [x] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/olap/rowset/segment_v2/bloom_filter.h       |  2 +-
 .../segment_v2/inverted_index_fs_directory.cpp     | 22 +++++++++++-----------
 .../segment_v2/inverted_index_fs_directory.h       |  8 ++++----
 .../rowset/segment_v2/inverted_index_writer.cpp    |  4 ++--
 be/src/olap/rowset/segment_v2/ngram_bloom_filter.h |  4 +---
 be/src/olap/rowset/segment_v2/page_io.cpp          |  1 +
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  4 ++--
 be/src/olap/rowset/segment_v2/segment_writer.h     |  2 +-
 .../rowset/segment_v2/vertical_segment_writer.cpp  |  4 ++--
 .../rowset/segment_v2/vertical_segment_writer.h    |  2 +-
 .../rowset/segment_v2/block_bloom_filter_test.cpp  |  2 +-
 11 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/bloom_filter.h 
b/be/src/olap/rowset/segment_v2/bloom_filter.h
index ab33bd4a51b..d9df2e323fd 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter.h
+++ b/be/src/olap/rowset/segment_v2/bloom_filter.h
@@ -194,7 +194,7 @@ public:
     /// Bloom filters must have equal size and seed.
     virtual bool contains(const BloomFilter& bf_) const { return true; };
 
-    virtual char* data() const { return _data; }
+    virtual const char* data() const { return _data; }
 
     size_t num_bytes() const { return _num_bytes; }
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
index f7412ca5a3e..936dda66e85 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
@@ -738,12 +738,12 @@ bool DorisRAMFSDirectory::list(std::vector<std::string>* 
names) const {
 
 bool DorisRAMFSDirectory::fileExists(const char* name) const {
     std::lock_guard<std::mutex> wlock(_this_lock);
-    return filesMap->exists((char*)name);
+    return filesMap->exists(name);
 }
 
 int64_t DorisRAMFSDirectory::fileModified(const char* name) const {
     std::lock_guard<std::mutex> wlock(_this_lock);
-    auto* f = filesMap->get((char*)name);
+    auto* f = filesMap->get(name);
     DBUG_EXECUTE_IF("DorisRAMFSDirectory::fileModified_file_not_found", { f = 
nullptr; })
     if (f == nullptr) {
         _CLTHROWA(CL_ERR_IO, fmt::format("NOT FOUND File {}.", name).c_str());
@@ -755,7 +755,7 @@ void DorisRAMFSDirectory::touchFile(const char* name) {
     lucene::store::RAMFile* file = nullptr;
     {
         std::lock_guard<std::mutex> wlock(_this_lock);
-        file = filesMap->get((char*)name);
+        file = filesMap->get(name);
         DBUG_EXECUTE_IF("DorisRAMFSDirectory::touchFile_file_not_found", { 
file = nullptr; })
         if (file == nullptr) {
             _CLTHROWA(CL_ERR_IO, fmt::format("NOT FOUND File {}.", 
name).c_str());
@@ -775,7 +775,7 @@ void DorisRAMFSDirectory::touchFile(const char* name) {
 
 int64_t DorisRAMFSDirectory::fileLength(const char* name) const {
     std::lock_guard<std::mutex> wlock(_this_lock);
-    auto* f = filesMap->get((char*)name);
+    auto* f = filesMap->get(name);
     DBUG_EXECUTE_IF("DorisRAMFSDirectory::fileLength_file_not_found", { f = 
nullptr; })
     if (f == nullptr) {
         _CLTHROWA(CL_ERR_IO, fmt::format("NOT FOUND File {}.", name).c_str());
@@ -786,7 +786,7 @@ int64_t DorisRAMFSDirectory::fileLength(const char* name) 
const {
 bool DorisRAMFSDirectory::openInput(const char* name, 
lucene::store::IndexInput*& ret,
                                     CLuceneError& error, int32_t bufferSize) {
     std::lock_guard<std::mutex> wlock(_this_lock);
-    auto* file = filesMap->get((char*)name);
+    auto* file = filesMap->get(name);
     DBUG_EXECUTE_IF("DorisRAMFSDirectory::openInput_file_not_found", { file = 
nullptr; })
     if (file == nullptr) {
         error.set(CL_ERR_IO,
@@ -805,7 +805,7 @@ void DorisRAMFSDirectory::close() {
 
 bool DorisRAMFSDirectory::doDeleteFile(const char* name) {
     std::lock_guard<std::mutex> wlock(_this_lock);
-    auto itr = filesMap->find((char*)name);
+    auto itr = filesMap->find(name);
     if (itr != filesMap->end()) {
         SCOPED_LOCK_MUTEX(this->THIS_LOCK);
         sizeInBytes -= itr->second->sizeInBytes;
@@ -821,15 +821,15 @@ bool DorisRAMFSDirectory::deleteDirectory() {
 
 void DorisRAMFSDirectory::renameFile(const char* from, const char* to) {
     std::lock_guard<std::mutex> wlock(_this_lock);
-    auto itr = filesMap->find((char*)from);
+    auto itr = filesMap->find(from);
 
     /* DSR:CL_BUG_LEAK:
     ** If a file named $to already existed, its old value was leaked.
     ** My inclination would be to prevent this implicit deletion with an
     ** exception, but it happens routinely in CLucene's internals (e.g., during
     ** IndexWriter.addIndexes with the file named 'segments'). */
-    if (filesMap->exists((char*)to)) {
-        auto itr1 = filesMap->find((char*)to);
+    if (filesMap->exists(to)) {
+        auto itr1 = filesMap->find(to);
         SCOPED_LOCK_MUTEX(this->THIS_LOCK);
         sizeInBytes -= itr1->second->sizeInBytes;
         filesMap->removeitr(itr1);
@@ -855,8 +855,8 @@ lucene::store::IndexOutput* 
DorisRAMFSDirectory::createOutput(const char* name)
     std::lock_guard<std::mutex> wlock(_this_lock);
 
     // get the actual pointer to the output name
-    char* n = nullptr;
-    auto itr = filesMap->find(const_cast<char*>(name));
+    const char* n = nullptr;
+    auto itr = filesMap->find(name);
     DBUG_EXECUTE_IF("DorisRAMFSDirectory::createOutput_itr_filesMap_end",
                     { itr = filesMap->end(); })
     if (itr != filesMap->end()) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h 
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
index 7f799807a6e..ec606e52911 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
@@ -102,10 +102,10 @@ private:
 
 class CLUCENE_EXPORT DorisRAMFSDirectory : public DorisFSDirectory {
 protected:
-    using FileMap =
-            lucene::util::CLHashMap<char*, lucene::store::RAMFile*, 
lucene::util::Compare::Char,
-                                    lucene::util::Equals::Char, 
lucene::util::Deletor::acArray,
-                                    
lucene::util::Deletor::Object<lucene::store::RAMFile>>;
+    using FileMap = lucene::util::CLHashMap<const char*, 
lucene::store::RAMFile*,
+                                            lucene::util::Compare::Char, 
lucene::util::Equals::Char,
+                                            lucene::util::Deletor::cacArray,
+                                            
lucene::util::Deletor::Object<lucene::store::RAMFile>>;
 
     // unlike the java Hashtable, FileMap is not synchronized, and all access 
must be protected by a lock
     FileMap* filesMap;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 13aa3356f2d..1f5d8ab561d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -542,7 +542,7 @@ Status 
InvertedIndexColumnWriter<field_type>::add_array_values(size_t field_size
             return Status::InternalError("field or index writer is null in 
inverted index writer");
         }
         for (int i = 0; i < count; ++i) {
-            auto* item_data_ptr = 
const_cast<CollectionValue*>(values)->mutable_data();
+            const auto* item_data_ptr = values->data();
             std::vector<std::string> strings;
 
             for (size_t j = 0; j < values->length(); ++j) {
@@ -561,7 +561,7 @@ Status 
InvertedIndexColumnWriter<field_type>::add_array_values(size_t field_size
         }
     } else if constexpr (field_is_numeric_type(field_type)) {
         for (int i = 0; i < count; ++i) {
-            auto* item_data_ptr = 
const_cast<CollectionValue*>(values)->mutable_data();
+            const auto* item_data_ptr = values->data();
 
             for (size_t j = 0; j < values->length(); ++j) {
                 const auto* p = reinterpret_cast<const 
CppType*>(item_data_ptr);
diff --git a/be/src/olap/rowset/segment_v2/ngram_bloom_filter.h 
b/be/src/olap/rowset/segment_v2/ngram_bloom_filter.h
index f2ba94c5629..f2be8feae98 100644
--- a/be/src/olap/rowset/segment_v2/ngram_bloom_filter.h
+++ b/be/src/olap/rowset/segment_v2/ngram_bloom_filter.h
@@ -41,9 +41,7 @@ public:
     void add_bytes(const char* data, size_t len) override;
     bool contains(const BloomFilter& bf_) const override;
     Status init(const char* buf, size_t size, HashStrategyPB strategy) 
override;
-    char* data() const override {
-        return reinterpret_cast<char*>(const_cast<uint64_t*>(filter.data()));
-    }
+    const char* data() const override { return reinterpret_cast<const 
char*>(filter.data()); }
     size_t size() const override { return _size; }
     void add_hash(uint64_t) override {}
     bool test_hash(uint64_t hash) const override { return true; }
diff --git a/be/src/olap/rowset/segment_v2/page_io.cpp 
b/be/src/olap/rowset/segment_v2/page_io.cpp
index b1874f565a2..2af8dd756f1 100644
--- a/be/src/olap/rowset/segment_v2/page_io.cpp
+++ b/be/src/olap/rowset/segment_v2/page_io.cpp
@@ -176,6 +176,7 @@ Status PageIO::read_and_decompress_page_(const 
PageReadOptions& opts, PageHandle
     if (opts.verify_checksum) {
         uint32_t expect = decode_fixed32_le((uint8_t*)page_slice.data + 
page_slice.size - 4);
         uint32_t actual = crc32c::Value(page_slice.data, page_slice.size - 4);
+        // here const_cast is used for testing.
         InjectionContext ctx = {&actual, const_cast<PageReadOptions*>(&opts)};
         (void)ctx;
         
TEST_INJECTION_POINT_CALLBACK("PageIO::read_and_decompress_page:crc_failure_inj",
 &ctx);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 0dbe2153778..2bb38d7f40e 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -375,7 +375,7 @@ void SegmentWriter::_maybe_invalid_row_cache(const 
std::string& key) {
     }
 }
 
-void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) {
+void SegmentWriter::_serialize_block_to_row_column(const vectorized::Block& 
block) {
     if (block.rows() == 0) {
         return;
     }
@@ -697,7 +697,7 @@ Status SegmentWriter::append_block(const vectorized::Block* 
block, size_t row_po
     // or it's schema change write(since column data type maybe changed, so we 
should reubild)
     if (_opts.write_type == DataWriteType::TYPE_DIRECT ||
         _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) {
-        _serialize_block_to_row_column(*const_cast<vectorized::Block*>(block));
+        _serialize_block_to_row_column(*block);
     }
 
     _olap_data_convertor->set_source_content(block, row_pos, num_rows);
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h 
b/be/src/olap/rowset/segment_v2/segment_writer.h
index b5c99bbafa9..d7f58dfe5dd 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -192,7 +192,7 @@ private:
     void set_min_max_key(const Slice& key);
     void set_min_key(const Slice& key);
     void set_max_key(const Slice& key);
-    void _serialize_block_to_row_column(vectorized::Block& block);
+    void _serialize_block_to_row_column(const vectorized::Block& block);
     Status _generate_primary_key_index(
             const std::vector<const KeyCoder*>& primary_key_coders,
             const std::vector<vectorized::IOlapColumnDataAccessor*>& 
primary_key_columns,
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 66eaf5a9c4b..8fde31b9135 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -355,7 +355,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const 
std::string& key) con
     }
 }
 
-void VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& 
block) {
+void VerticalSegmentWriter::_serialize_block_to_row_column(const 
vectorized::Block& block) {
     if (block.rows() == 0) {
         return;
     }
@@ -948,7 +948,7 @@ Status VerticalSegmentWriter::write_batch() {
         _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) {
         for (auto& data : _batched_blocks) {
             // TODO: maybe we should pass range to this method
-            
_serialize_block_to_row_column(*const_cast<vectorized::Block*>(data.block));
+            _serialize_block_to_row_column(*data.block);
         }
     }
 
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
index 294756788c4..bc2b770ce30 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h
@@ -162,7 +162,7 @@ private:
     void _set_min_max_key(const Slice& key);
     void _set_min_key(const Slice& key);
     void _set_max_key(const Slice& key);
-    void _serialize_block_to_row_column(vectorized::Block& block);
+    void _serialize_block_to_row_column(const vectorized::Block& block);
     Status _probe_key_for_mow(std::string key, std::size_t segment_pos, bool 
have_input_seq_column,
                               bool have_delete_sign,
                               const std::vector<RowsetSharedPtr>& 
specified_rowsets,
diff --git a/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp 
b/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp
index e4db2828666..d1a27e4259b 100644
--- a/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp
+++ b/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp
@@ -87,7 +87,7 @@ TEST_F(BlockBloomFilterTest, Normal) {
     EXPECT_TRUE(bf2->test_bytes(nullptr, 1));
 
     bf->reset();
-    char* data = bf->data();
+    const char* data = bf->data();
     // data is reset to 0
     for (int i = 0; i < bf->size(); ++i) {
         EXPECT_EQ(*data, 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [fix] remove useless const_cast and explain const_cast(3) (#56441)

Reply via email to