This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d4e855631b [Bug](coredump) fix regresstion test coredump in multi 
thread access map (#31664)
7d4e855631b is described below

commit 7d4e855631b72ebdca0906da0c81e749acf482cc
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Sat Mar 2 23:38:57 2024 +0800

    [Bug](coredump) fix regresstion test coredump in multi thread access map 
(#31664)
---
 be/src/olap/comparison_predicate.h | 44 ++++++++++++++++++++++----------------
 be/src/olap/data_dir.cpp           |  2 +-
 be/src/olap/olap_common.h          | 28 +++++++++++++-----------
 be/src/olap/snapshot_manager.cpp   |  2 +-
 be/src/olap/storage_engine.h       |  4 ++--
 be/src/olap/tablet.cpp             |  4 ++--
 6 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 826b1414b2a..3673b89a1d0 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -578,24 +578,26 @@ private:
 
     __attribute__((flatten)) int32_t _find_code_from_dictionary_column(
             const vectorized::ColumnDictI32& column) const {
-        if 
(!_segment_id_to_cached_code.contains(column.get_rowset_segment_id())) {
-            int32_t code = _is_range() ? column.find_code_by_bound(_value, 
_is_greater(), _is_eq())
-                                       : column.find_code(_value);
-
-            // Sometimes the dict is not initialized when run comparison 
predicate here, for example,
-            // the full page is null, then the reader will skip read, so that 
the dictionary is not
-            // inited. The cached code is wrong during this case, because the 
following page maybe not
-            // null, and the dict should have items in the future.
-            //
-            // Cached code may have problems, so that add a config here, if 
not opened, then
-            // we will return the code and not cache it.
-            if (column.is_dict_empty() || 
!config::enable_low_cardinality_cache_code) {
-                return code;
-            }
-            // If the dict is not empty, then the dict is inited and we could 
cache the value.
-            _segment_id_to_cached_code[column.get_rowset_segment_id()] = code;
+        int32_t code = 0;
+        if (_segment_id_to_cached_code.if_contains(
+                    column.get_rowset_segment_id(),
+                    [&code](const auto& pair) { code = pair.second; })) {
+            return code;
+        }
+        code = _is_range() ? column.find_code_by_bound(_value, _is_greater(), 
_is_eq())
+                           : column.find_code(_value);
+        // Sometimes the dict is not initialized when run comparison predicate 
here, for example,
+        // the full page is null, then the reader will skip read, so that the 
dictionary is not
+        // inited. The cached code is wrong during this case, because the 
following page maybe not
+        // null, and the dict should have items in the future.
+        //
+        // Cached code may have problems, so that add a config here, if not 
opened, then
+        // we will return the code and not cache it.
+        if (!column.is_dict_empty() && 
config::enable_low_cardinality_cache_code) {
+            _segment_id_to_cached_code.emplace(std::pair 
{column.get_rowset_segment_id(), code});
         }
-        return _segment_id_to_cached_code[column.get_rowset_segment_id()];
+
+        return code;
     }
 
     std::string _debug_string() const override {
@@ -604,7 +606,13 @@ private:
         return info;
     }
 
-    mutable std::map<std::pair<RowsetId, uint32_t>, int32_t> 
_segment_id_to_cached_code;
+    mutable phmap::parallel_flat_hash_map<
+            std::pair<RowsetId, uint32_t>, int32_t,
+            phmap::priv::hash_default_hash<std::pair<RowsetId, uint32_t>>,
+            phmap::priv::hash_default_eq<std::pair<RowsetId, uint32_t>>,
+            std::allocator<std::pair<const std::pair<RowsetId, uint32_t>, 
int32_t>>, 4,
+            std::shared_mutex>
+            _segment_id_to_cached_code;
     T _value;
 };
 
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 9a7d1ad5cdc..443cf1b12a3 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -776,7 +776,7 @@ void DataDir::_perform_path_gc_by_rowset(const 
std::vector<std::string>& tablet_
         };
 
         // rowset_id -> is_garbage
-        std::unordered_map<RowsetId, bool, HashOfRowsetId> checked_rowsets;
+        std::unordered_map<RowsetId, bool> checked_rowsets;
         for (auto&& [rowset_id, filename] : rowsets_not_pending) {
             if (auto it = checked_rowsets.find(rowset_id); it != 
checked_rowsets.end()) {
                 if (it->second) { // Is checked garbage rowset
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 42bad24dfed..c08705861df 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -458,18 +458,7 @@ struct RowsetId {
     }
 };
 
-// used for hash-struct of hash_map<RowsetId, Rowset*>.
-struct HashOfRowsetId {
-    size_t operator()(const RowsetId& rowset_id) const {
-        size_t seed = 0;
-        seed = HashUtil::hash64(&rowset_id.hi, sizeof(rowset_id.hi), seed);
-        seed = HashUtil::hash64(&rowset_id.mi, sizeof(rowset_id.mi), seed);
-        seed = HashUtil::hash64(&rowset_id.lo, sizeof(rowset_id.lo), seed);
-        return seed;
-    }
-};
-
-using RowsetIdUnorderedSet = std::unordered_set<RowsetId, HashOfRowsetId>;
+using RowsetIdUnorderedSet = std::unordered_set<RowsetId>;
 
 // Extract rowset id from filename, return uninitialized rowset id if filename 
is invalid
 inline RowsetId extract_rowset_id(std::string_view filename) {
@@ -517,3 +506,18 @@ struct RidAndPos {
 using PartialUpdateReadPlan = std::map<RowsetId, std::map<uint32_t, 
std::vector<RidAndPos>>>;
 
 } // namespace doris
+
+// This intended to be a "good" hash function.  It may change from time to 
time.
+template <>
+struct std::hash<doris::RowsetId> {
+    size_t operator()(const doris::RowsetId& rowset_id) const {
+        size_t seed = 0;
+        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.hi, 
sizeof(rowset_id.hi),
+                                                 seed);
+        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.mi, 
sizeof(rowset_id.mi),
+                                                 seed);
+        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.lo, 
sizeof(rowset_id.lo),
+                                                 seed);
+        return seed;
+    }
+};
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index c5bf5a785db..71e39e60d0f 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -156,7 +156,7 @@ Result<std::vector<PendingRowsetGuard>> 
SnapshotManager::convert_rowset_ids(
     tablet_schema->init_from_pb(new_tablet_meta_pb.schema());
 
     std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map;
-    std::unordered_map<RowsetId, RowsetId, HashOfRowsetId> rowset_id_mapping;
+    std::unordered_map<RowsetId, RowsetId> rowset_id_mapping;
     guards.reserve(cloned_tablet_meta_pb.rs_metas_size() +
                    cloned_tablet_meta_pb.stale_rs_metas_size());
     for (auto&& visible_rowset : cloned_tablet_meta_pb.rs_metas()) {
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 6ebfb9479f4..b0e9ed6000e 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -158,7 +158,7 @@ protected:
 
     // Hold reference of quering rowsets
     std::mutex _quering_rowsets_mutex;
-    std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId> 
_querying_rowsets;
+    std::unordered_map<RowsetId, RowsetSharedPtr> _querying_rowsets;
     scoped_refptr<Thread> _evict_quering_rowset_thread;
 };
 
@@ -409,7 +409,7 @@ private:
     std::atomic_bool _stopped {false};
 
     std::mutex _gc_mutex;
-    std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId> 
_unused_rowsets;
+    std::unordered_map<RowsetId, RowsetSharedPtr> _unused_rowsets;
     PendingRowsetSet _pending_local_rowsets;
     PendingRowsetSet _pending_remote_rowsets;
 
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 7f72af7f86c..c217505eaaa 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2432,10 +2432,10 @@ Status Tablet::check_rowid_conversion(
         return Status::OK();
     }
     std::vector<segment_v2::SegmentSharedPtr> dst_segments;
+
     RETURN_IF_ERROR(
             
std::dynamic_pointer_cast<BetaRowset>(dst_rowset)->load_segments(&dst_segments));
-    std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>, 
HashOfRowsetId>
-            input_rowsets_segment;
+    std::unordered_map<RowsetId, std::vector<segment_v2::SegmentSharedPtr>> 
input_rowsets_segment;
 
     VLOG_DEBUG << "check_rowid_conversion, dst_segments size: " << 
dst_segments.size();
     for (auto [src_rowset, locations] : location_map) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to