This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 23b964492d2e6d23b34231c07d4bda22a8625333
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Tue Jul 11 18:34:20 2023 +0800

    [improvement](bitmap) Use shared_ptr in BitmapValue to avoid deep copying 
(#19101) (#21271)
    
    Currently bitmapvalue type is copied between columns, it cost a lot of 
memory. Use a shared ptr in bitmap value to avoid copy data.
---
 be/src/util/bitmap_value.h         | 227 +++++++++++++++++++++++++------------
 be/test/util/bitmap_value_test.cpp |  22 ++++
 2 files changed, 178 insertions(+), 71 deletions(-)

diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index 21fa5f217e..f2c869f862 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -1144,10 +1144,10 @@ class BitmapValueIterator;
 class BitmapValue {
 public:
     // Construct an empty bitmap.
-    BitmapValue() : _type(EMPTY) {}
+    BitmapValue() : _type(EMPTY), _is_shared(false) {}
 
     // Construct a bitmap with one element.
-    explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE) {}
+    explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE), 
_is_shared(false) {}
 
     // Construct a bitmap from serialized data.
     explicit BitmapValue(const char* src) {
@@ -1155,8 +1155,48 @@ public:
         DCHECK(res);
     }
 
+    BitmapValue(const BitmapValue& other) {
+        _type = other._type;
+        _sv = other._sv;
+        _bitmap = other._bitmap;
+        _is_shared = true;
+        // should also set other's state to shared, so that other bitmap value 
will
+        // create a new bitmap when it wants to modify it.
+        const_cast<BitmapValue&>(other)._is_shared = true;
+    }
+
+    BitmapValue(BitmapValue&& other) {
+        _type = other._type;
+        _sv = other._sv;
+        _is_shared = other._is_shared;
+        _bitmap = std::move(other._bitmap);
+    }
+
+    BitmapValue& operator=(const BitmapValue& other) {
+        _type = other._type;
+        _sv = other._sv;
+        _bitmap = other._bitmap;
+        _is_shared = true;
+        // should also set other's state to shared, so that other bitmap value 
will
+        // create a new bitmap when it wants to modify it.
+        const_cast<BitmapValue&>(other)._is_shared = true;
+        return *this;
+    }
+
+    BitmapValue& operator=(BitmapValue&& other) {
+        if (this == &other) {
+            return *this;
+        }
+
+        _type = other._type;
+        _sv = other._sv;
+        _bitmap = std::move(other._bitmap);
+        _is_shared = other._is_shared;
+        return *this;
+    }
+
     // Construct a bitmap from given elements.
-    explicit BitmapValue(const std::vector<uint64_t>& bits) {
+    explicit BitmapValue(const std::vector<uint64_t>& bits) : 
_is_shared(false) {
         switch (bits.size()) {
         case 0:
             _type = EMPTY;
@@ -1167,7 +1207,8 @@ public:
             break;
         default:
             _type = BITMAP;
-            _bitmap.addMany(bits.size(), &bits[0]);
+            _prepare_bitmap_for_write();
+            _bitmap->addMany(bits.size(), &bits[0]);
         }
     }
 
@@ -1182,12 +1223,15 @@ public:
             if (_sv == value) {
                 break;
             }
-            _bitmap.add(_sv);
-            _bitmap.add(value);
+            _prepare_bitmap_for_write();
+            _bitmap->add(_sv);
+            _bitmap->add(value);
             _type = BITMAP;
             break;
         case BITMAP:
-            _bitmap.add(value);
+            _prepare_bitmap_for_write();
+            _bitmap->add(value);
+            break;
         }
     }
 
@@ -1202,7 +1246,8 @@ public:
             }
             break;
         case BITMAP:
-            _bitmap.remove(value);
+            _prepare_bitmap_for_write();
+            _bitmap->remove(value);
             _convert_to_smaller_type();
         }
     }
@@ -1220,12 +1265,13 @@ public:
             case EMPTY:
                 break;
             case SINGLE:
-                if (rhs._bitmap.contains(_sv)) {
+                if (rhs._bitmap->contains(_sv)) {
                     _type = EMPTY;
                 }
                 break;
             case BITMAP:
-                _bitmap -= rhs._bitmap;
+                _prepare_bitmap_for_write();
+                *_bitmap -= *rhs._bitmap;
                 _convert_to_smaller_type();
                 break;
             }
@@ -1250,15 +1296,21 @@ public:
             switch (_type) {
             case EMPTY:
                 _bitmap = rhs._bitmap;
+                const_cast<BitmapValue&>(rhs)._is_shared = true;
+                _is_shared = true;
                 _type = BITMAP;
                 break;
             case SINGLE:
                 _bitmap = rhs._bitmap;
-                _bitmap.add(_sv);
+                const_cast<BitmapValue&>(rhs)._is_shared = true;
+                _is_shared = true;
+                _prepare_bitmap_for_write();
+                _bitmap->add(_sv);
                 _type = BITMAP;
                 break;
             case BITMAP:
-                _bitmap |= rhs._bitmap;
+                _prepare_bitmap_for_write();
+                *_bitmap |= *rhs._bitmap;
             }
             break;
         }
@@ -1277,24 +1329,25 @@ public:
                 single_values.push_back(value->_sv);
                 break;
             case BITMAP:
-                bitmaps.push_back(&value->_bitmap);
+                bitmaps.push_back(value->_bitmap.get());
                 break;
             }
         }
 
         if (!bitmaps.empty()) {
+            _prepare_bitmap_for_write();
             switch (_type) {
             case EMPTY:
-                _bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), 
bitmaps.data());
+                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), 
bitmaps.data());
                 _type = BITMAP;
                 break;
             case SINGLE:
-                _bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), 
bitmaps.data());
-                _bitmap.add(_sv);
+                *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(), 
bitmaps.data());
+                _bitmap->add(_sv);
                 _type = BITMAP;
                 break;
             case BITMAP:
-                _bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(), 
bitmaps.data());
+                *_bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(), 
bitmaps.data());
                 break;
             }
         }
@@ -1303,9 +1356,10 @@ public:
             _sv = single_values[0];
             _type = SINGLE;
         } else if (!single_values.empty()) {
-            _bitmap.addMany(single_values.size(), single_values.data());
+            _prepare_bitmap_for_write();
+            _bitmap->addMany(single_values.size(), single_values.data());
             if (_type == SINGLE) {
-                _bitmap.add(_sv);
+                _bitmap->add(_sv);
             }
             _type = BITMAP;
         }
@@ -1322,7 +1376,7 @@ public:
         switch (rhs._type) {
         case EMPTY:
             _type = EMPTY;
-            _bitmap.clear();
+            _bitmap.reset();
             break;
         case SINGLE:
             switch (_type) {
@@ -1334,13 +1388,13 @@ public:
                 }
                 break;
             case BITMAP:
-                if (!_bitmap.contains(rhs._sv)) {
+                if (!_bitmap->contains(rhs._sv)) {
                     _type = EMPTY;
                 } else {
                     _type = SINGLE;
                     _sv = rhs._sv;
                 }
-                _bitmap.clear();
+                _bitmap.reset();
                 break;
             }
             break;
@@ -1349,12 +1403,13 @@ public:
             case EMPTY:
                 break;
             case SINGLE:
-                if (!rhs._bitmap.contains(_sv)) {
+                if (!rhs._bitmap->contains(_sv)) {
                     _type = EMPTY;
                 }
                 break;
             case BITMAP:
-                _bitmap &= rhs._bitmap;
+                _prepare_bitmap_for_write();
+                *_bitmap &= *rhs._bitmap;
                 _convert_to_smaller_type();
                 break;
             }
@@ -1380,16 +1435,17 @@ public:
             case SINGLE:
                 if (_sv == rhs._sv) {
                     _type = EMPTY;
-                    _bitmap.clear();
+                    _bitmap.reset();
                 } else {
                     add(rhs._sv);
                 }
                 break;
             case BITMAP:
-                if (!_bitmap.contains(rhs._sv)) {
+                if (!_bitmap->contains(rhs._sv)) {
                     add(rhs._sv);
                 } else {
-                    _bitmap.remove(rhs._sv);
+                    _prepare_bitmap_for_write();
+                    _bitmap->remove(rhs._sv);
                 }
                 break;
             }
@@ -1398,19 +1454,25 @@ public:
             switch (_type) {
             case EMPTY:
                 _bitmap = rhs._bitmap;
+                const_cast<BitmapValue&>(rhs)._is_shared = true;
+                _is_shared = true;
                 _type = BITMAP;
                 break;
             case SINGLE:
                 _bitmap = rhs._bitmap;
+                const_cast<BitmapValue&>(rhs)._is_shared = true;
+                _is_shared = true;
                 _type = BITMAP;
-                if (!rhs._bitmap.contains(_sv)) {
-                    _bitmap.add(_sv);
+                _prepare_bitmap_for_write();
+                if (!rhs._bitmap->contains(_sv)) {
+                    _bitmap->add(_sv);
                 } else {
-                    _bitmap.remove(_sv);
+                    _bitmap->remove(_sv);
                 }
                 break;
             case BITMAP:
-                _bitmap ^= rhs._bitmap;
+                _prepare_bitmap_for_write();
+                *_bitmap ^= *rhs._bitmap;
                 _convert_to_smaller_type();
                 break;
             }
@@ -1427,7 +1489,7 @@ public:
         case SINGLE:
             return _sv == x;
         case BITMAP:
-            return _bitmap.contains(x);
+            return _bitmap->contains(x);
         }
         return false;
     }
@@ -1442,7 +1504,7 @@ public:
         case SINGLE:
             return 1;
         case BITMAP:
-            return _bitmap.cardinality();
+            return _bitmap->cardinality();
         }
         return 0;
     }
@@ -1458,16 +1520,16 @@ public:
             case SINGLE:
                 return _sv == rhs._sv;
             case BITMAP:
-                return _bitmap.contains(rhs._sv);
+                return _bitmap->contains(rhs._sv);
             }
         case BITMAP:
             switch (_type) {
             case EMPTY:
                 return 0;
             case SINGLE:
-                return rhs._bitmap.contains(_sv);
+                return rhs._bitmap->contains(_sv);
             case BITMAP:
-                return _bitmap.andCardinality(rhs._bitmap);
+                return _bitmap->andCardinality(*rhs._bitmap);
             }
         }
         return 0;
@@ -1484,16 +1546,16 @@ public:
             case SINGLE:
                 return 1 + (_sv != rhs._sv);
             case BITMAP:
-                return cardinality() + !_bitmap.contains(rhs._sv);
+                return cardinality() + !_bitmap->contains(rhs._sv);
             }
         case BITMAP:
             switch (_type) {
             case EMPTY:
                 return rhs.cardinality();
             case SINGLE:
-                return rhs.cardinality() + !rhs._bitmap.contains(_sv);
+                return rhs.cardinality() + !rhs._bitmap->contains(_sv);
             case BITMAP:
-                return _bitmap.orCardinality(rhs._bitmap);
+                return _bitmap->orCardinality(*rhs._bitmap);
             }
         }
         return 0;
@@ -1510,16 +1572,16 @@ public:
             case SINGLE:
                 return 2 - 2 * (_sv == rhs._sv);
             case BITMAP:
-                return cardinality() + 1 - 2 * (_bitmap.contains(rhs._sv));
+                return cardinality() + 1 - 2 * (_bitmap->contains(rhs._sv));
             }
         case BITMAP:
             switch (_type) {
             case EMPTY:
                 return rhs.cardinality();
             case SINGLE:
-                return rhs.cardinality() + 1 - 2 * (rhs._bitmap.contains(_sv));
+                return rhs.cardinality() + 1 - 2 * 
(rhs._bitmap->contains(_sv));
             case BITMAP:
-                return _bitmap.xorCardinality(rhs._bitmap);
+                return _bitmap->xorCardinality(*rhs._bitmap);
             }
         }
         return 0;
@@ -1536,16 +1598,16 @@ public:
             case SINGLE:
                 return 1 - _sv == rhs._sv;
             case BITMAP:
-                return cardinality() - _bitmap.contains(rhs._sv);
+                return cardinality() - _bitmap->contains(rhs._sv);
             }
         case BITMAP:
             switch (_type) {
             case EMPTY:
                 return 0;
             case SINGLE:
-                return !rhs._bitmap.contains(_sv);
+                return !rhs._bitmap->contains(_sv);
             case BITMAP:
-                return _bitmap.andnotCardinality(rhs._bitmap);
+                return _bitmap->andnotCardinality(*rhs._bitmap);
             }
         }
         return 0;
@@ -1567,9 +1629,9 @@ public:
             }
             break;
         case BITMAP:
-            _bitmap.runOptimize();
-            _bitmap.shrinkToFit();
-            res = _bitmap.getSizeInBytes();
+            _bitmap->runOptimize();
+            _bitmap->shrinkToFit();
+            res = _bitmap->getSizeInBytes();
             break;
         }
         return res;
@@ -1592,7 +1654,7 @@ public:
             }
             break;
         case BITMAP:
-            _bitmap.write(dst);
+            _bitmap->write(dst);
             break;
         }
     }
@@ -1615,7 +1677,8 @@ public:
         case BitmapTypeCode::BITMAP32:
         case BitmapTypeCode::BITMAP64:
             _type = BITMAP;
-            _bitmap = detail::Roaring64Map::read(src);
+            _prepare_bitmap_for_write();
+            *_bitmap = detail::Roaring64Map::read(src);
             break;
         default:
             LOG(ERROR) << "BitmapTypeCode invalid, should between: " << 
BitmapTypeCode::EMPTY
@@ -1631,7 +1694,7 @@ public:
         case SINGLE:
             return doris_udf::BigIntVal(_sv);
         case BITMAP:
-            return doris_udf::BigIntVal(_bitmap.minimum());
+            return doris_udf::BigIntVal(_bitmap->minimum());
         default:
             return doris_udf::BigIntVal::null();
         }
@@ -1653,7 +1716,7 @@ public:
             } iter_ctx;
             iter_ctx.ss = &ss;
 
-            _bitmap.iterate(
+            _bitmap->iterate(
                     [](uint64_t value, void* c) -> bool {
                         auto ctx = reinterpret_cast<IterCtx*>(c);
                         if (ctx->first) {
@@ -1676,18 +1739,18 @@ public:
         case SINGLE:
             return doris_udf::BigIntVal(_sv);
         case BITMAP:
-            return doris_udf::BigIntVal(_bitmap.maximum());
+            return doris_udf::BigIntVal(_bitmap->maximum());
         default:
             return doris_udf::BigIntVal::null();
         }
     }
 
     uint64_t max(bool* empty) const {
-        return min_or_max(empty, [&]() { return _bitmap.maximum(); });
+        return min_or_max(empty, [&]() { return _bitmap->maximum(); });
     }
 
     uint64_t min(bool* empty) const {
-        return min_or_max(empty, [&]() { return _bitmap.minimum(); });
+        return min_or_max(empty, [&]() { return _bitmap->minimum(); });
     }
 
     bool empty() const { return _type == EMPTY; }
@@ -1711,7 +1774,7 @@ public:
         }
         case BITMAP: {
             int64_t count = 0;
-            for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
                 if (*it < range_start) {
                     continue;
                 }
@@ -1750,7 +1813,7 @@ public:
         }
         case BITMAP: {
             int64_t count = 0;
-            for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
                 if (*it < range_start) {
                     continue;
                 }
@@ -1786,23 +1849,23 @@ public:
             }
         }
         case BITMAP: {
-            if (std::abs(offset) >= _bitmap.cardinality()) {
+            if (std::abs(offset) >= _bitmap->cardinality()) {
                 return 0;
             }
         }
         }
         int64_t abs_offset = offset;
         if (offset < 0) {
-            abs_offset = _bitmap.cardinality() + offset;
+            abs_offset = _bitmap->cardinality() + offset;
         }
 
         int64_t count = 0;
         int64_t offset_count = 0;
-        auto it = _bitmap.begin();
-        for (; it != _bitmap.end() && offset_count < abs_offset; ++it) {
+        auto it = _bitmap->begin();
+        for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
             ++offset_count;
         }
-        for (; it != _bitmap.end() && count < limit; ++it, ++count) {
+        for (; it != _bitmap->end() && count < limit; ++it, ++count) {
             ret_bitmap->add(*it);
         }
         return count;
@@ -1818,7 +1881,7 @@ public:
             break;
         }
         case BITMAP: {
-            for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+            for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
                 data.emplace_back(*it);
             }
             break;
@@ -1828,7 +1891,7 @@ public:
 
     void clear() {
         _type = EMPTY;
-        _bitmap.clear();
+        _bitmap.reset();
         _sv = 0;
     }
 
@@ -1843,15 +1906,15 @@ public:
 private:
     void _convert_to_smaller_type() {
         if (_type == BITMAP) {
-            uint64_t c = _bitmap.cardinality();
+            uint64_t c = _bitmap->cardinality();
             if (c > 1) return;
             if (c == 0) {
                 _type = EMPTY;
             } else {
                 _type = SINGLE;
-                _sv = _bitmap.minimum();
+                _sv = _bitmap->minimum();
             }
-            _bitmap.clear();
+            _bitmap.reset();
         }
     }
 
@@ -1874,14 +1937,36 @@ private:
         return result;
     }
 
+    void _prepare_bitmap_for_write() {
+        if (!_bitmap) {
+            _bitmap = std::make_shared<detail::Roaring64Map>();
+            _is_shared = false;
+            return;
+        }
+
+        if (!_is_shared) {
+            // the state is not shared, not need to check use count any more
+            return;
+        }
+
+        if (_bitmap.use_count() > 1) {
+            auto new_one = std::make_shared<detail::Roaring64Map>();
+            *new_one = *_bitmap;
+            _bitmap = new_one;
+        }
+        _is_shared = false;
+    }
+
     enum BitmapDataType {
         EMPTY = 0,
         SINGLE = 1, // single element
         BITMAP = 2  // more than one elements
     };
-    uint64_t _sv = 0;             // store the single value when _type == 
SINGLE
-    detail::Roaring64Map _bitmap; // used when _type == BITMAP
-    BitmapDataType _type;
+    uint64_t _sv = 0;                              // store the single value 
when _type == SINGLE
+    std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
+    BitmapDataType _type {EMPTY};
+    // Indicate whether the state is shared among multi BitmapValue object
+    bool _is_shared = true;
 };
 
 // A simple implement of bitmap value iterator(Read only)
@@ -1903,7 +1988,7 @@ public:
             _sv = _bitmap._sv;
             break;
         case BitmapValue::BitmapDataType::BITMAP:
-            _iter = new 
detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end);
+            _iter = new 
detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
             break;
         default:
             CHECK(false) << _bitmap._type;
diff --git a/be/test/util/bitmap_value_test.cpp 
b/be/test/util/bitmap_value_test.cpp
index ee950744db..5964927d4d 100644
--- a/be/test/util/bitmap_value_test.cpp
+++ b/be/test/util/bitmap_value_test.cpp
@@ -27,6 +27,28 @@
 namespace doris {
 using roaring::Roaring;
 
+TEST(BitmapValueTest, copy) {
+    BitmapValue empty;
+    BitmapValue single(1024);
+    BitmapValue bitmap({1024, 1025, 1026});
+
+    BitmapValue copied = bitmap;
+    EXPECT_TRUE(copied.contains(1024));
+    EXPECT_TRUE(copied.contains(1025));
+    EXPECT_TRUE(copied.contains(1026));
+    copied &= single;
+
+    // value of copied changed.
+    EXPECT_TRUE(copied.contains(1024));
+    EXPECT_FALSE(copied.contains(1025));
+    EXPECT_FALSE(copied.contains(1026));
+
+    // value of bitmap not changed.
+    EXPECT_TRUE(bitmap.contains(1024));
+    EXPECT_TRUE(bitmap.contains(1025));
+    EXPECT_TRUE(bitmap.contains(1026));
+}
+
 TEST(BitmapValueTest, bitmap_union) {
     BitmapValue empty;
     BitmapValue single(1024);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to