This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 68d21272bed branch-3.0: [opt](rowset meta) truncate segments key
bounds if too large to avoid `RowsetMetaCloudPB` exceeds fdb's 100KB limits
(#45287) (#51595)
68d21272bed is described below
commit 68d21272bed653998e9377279b6ff90fab6ac4d8
Author: bobhan1 <[email protected]>
AuthorDate: Wed Jun 11 11:07:35 2025 +0800
branch-3.0: [opt](rowset meta) truncate segments key bounds if too large to
avoid `RowsetMetaCloudPB` exceeds fdb's 100KB limits (#45287) (#51595)
pick https://github.com/apache/doris/pull/45287
---
be/src/cloud/pb_convert.cpp | 4 +
be/src/common/config.cpp | 10 +
be/src/common/config.h | 6 +
be/src/olap/base_tablet.cpp | 16 +-
be/src/olap/base_tablet.h | 1 -
be/src/olap/compaction.cpp | 17 +-
be/src/olap/rowset/beta_rowset_writer.cpp | 7 +-
be/src/olap/rowset/beta_rowset_writer.h | 2 +
be/src/olap/rowset/rowset.h | 4 +
be/src/olap/rowset/rowset_meta.cpp | 31 +
be/src/olap/rowset/rowset_meta.h | 17 +-
be/src/olap/task/index_builder.cpp | 2 +
be/src/util/{slice.cpp => key_util.cpp} | 22 +-
be/src/util/key_util.h | 11 +-
be/src/util/slice.cpp | 19 +
be/src/util/slice.h | 7 +
be/src/vec/olap/block_reader.cpp | 17 +-
be/src/vec/olap/block_reader.h | 4 +-
be/test/olap/ordered_data_compaction_test.cpp | 1 +
.../olap/segments_key_bounds_truncation_test.cpp | 788 +++++++++++++++++++++
gensrc/proto/olap_file.proto | 10 +-
.../test_key_bounds_truncation_read_scenarios.out | Bin 0 -> 719 bytes
.../test_key_bounds_truncation_write_scenarios.out | Bin 0 -> 111 bytes
.../org/apache/doris/regression/util/Http.groovy | 8 +-
.../test_key_bounds_truncation_basic.groovy | 122 ++++
...est_key_bounds_truncation_read_scenarios.groovy | 100 +++
...st_key_bounds_truncation_write_scenarios.groovy | 284 ++++++++
27 files changed, 1462 insertions(+), 48 deletions(-)
diff --git a/be/src/cloud/pb_convert.cpp b/be/src/cloud/pb_convert.cpp
index ec483ba682c..0d24192a758 100644
--- a/be/src/cloud/pb_convert.cpp
+++ b/be/src/cloud/pb_convert.cpp
@@ -75,6 +75,7 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, const
RowsetMetaPB& in)
}
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
+ out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
out->mutable_segments_file_size()->CopyFrom(in.segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
@@ -129,6 +130,7 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out,
RowsetMetaPB&& in) {
}
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
+ out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
out->mutable_segments_file_size()->Swap(in.mutable_segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
@@ -230,6 +232,7 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const
RowsetMetaCloudPB& in,
}
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
+ out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
out->mutable_segments_file_size()->CopyFrom(in.segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
@@ -285,6 +288,7 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out,
RowsetMetaCloudPB&& in,
}
out->set_txn_expiration(in.txn_expiration());
out->set_segments_overlap_pb(in.segments_overlap_pb());
+ out->set_segments_key_bounds_truncated(in.segments_key_bounds_truncated());
out->mutable_segments_file_size()->Swap(in.mutable_segments_file_size());
out->set_index_id(in.index_id());
if (in.has_schema_version()) {
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 61758a0b4fe..2fc927dcbfd 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1495,6 +1495,12 @@ DEFINE_mBool(enable_compaction_pause_on_high_memory,
"true");
DEFINE_mBool(enable_calc_delete_bitmap_between_segments_concurrently, "false");
+// the max length of segments key bounds, in bytes
+// ATTENTION: as long as this conf has ever been enabled, cluster downgrade
and backup recovery will no longer be supported.
+DEFINE_mInt32(segments_key_bounds_truncation_threshold, "-1");
+// ATTENTION: for test only, use random segments key bounds truncation
threshold every time
+DEFINE_mBool(random_segments_key_bounds_truncation, "false");
+
// clang-format off
#ifdef BE_TEST
// test s3
@@ -1943,6 +1949,10 @@ Status set_fuzzy_configs() {
fuzzy_field_and_value["string_overflow_size"] =
((distribution(*generator) % 2) == 0) ? "10" : "4294967295";
+ std::uniform_int_distribution<int64_t> distribution2(-2, 10);
+ fuzzy_field_and_value["segments_key_bounds_truncation_threshold"] =
+ std::to_string(distribution2(*generator));
+
fmt::memory_buffer buf;
for (auto& it : fuzzy_field_and_value) {
const auto& field = it.first;
diff --git a/be/src/common/config.h b/be/src/common/config.h
index a7e16c53d5e..c342b260288 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1573,6 +1573,12 @@ DECLARE_mBool(enable_compaction_pause_on_high_memory);
DECLARE_mBool(enable_calc_delete_bitmap_between_segments_concurrently);
+// the max length of segments key bounds, in bytes
+// ATTENTION: as long as this conf has ever been enabled, cluster downgrade
and backup recovery will no longer be supported.
+DECLARE_mInt32(segments_key_bounds_truncation_threshold);
+// ATTENTION: for test only, use random segments key bounds truncation
threshold every time
+DECLARE_mBool(random_segments_key_bounds_truncation);
+
#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index 54fcb0edd1f..8c8e52be30f 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -46,6 +46,7 @@
#include "util/crc32c.h"
#include "util/debug_points.h"
#include "util/doris_metrics.h"
+#include "util/key_util.h"
#include "vec/common/assert_cast.h"
#include "vec/common/schema_util.h"
#include "vec/data_types/data_type_factory.hpp"
@@ -476,21 +477,22 @@ Status BaseTablet::lookup_row_key(const Slice&
encoded_key, TabletSchema* latest
RowLocation loc;
for (size_t i = 0; i < specified_rowsets.size(); i++) {
- auto& rs = specified_rowsets[i];
- auto& segments_key_bounds =
rs->rowset_meta()->get_segments_key_bounds();
- int num_segments = rs->num_segments();
+ const auto& rs = specified_rowsets[i];
+ std::vector<KeyBoundsPB> segments_key_bounds;
+ rs->rowset_meta()->get_segments_key_bounds(&segments_key_bounds);
+ int num_segments = static_cast<int>(rs->num_segments());
DCHECK_EQ(segments_key_bounds.size(), num_segments);
std::vector<uint32_t> picked_segments;
- for (int i = num_segments - 1; i >= 0; i--) {
+ for (int j = num_segments - 1; j >= 0; j--) {
// If mow table has cluster keys, the key bounds is short keys,
not primary keys
// use PrimaryKeyIndexMetaPB in primary key index?
if (schema->cluster_key_idxes().empty()) {
- if (key_without_seq.compare(segments_key_bounds[i].max_key())
> 0 ||
- key_without_seq.compare(segments_key_bounds[i].min_key())
< 0) {
+ if (key_is_not_in_segment(key_without_seq,
segments_key_bounds[j],
+
rs->rowset_meta()->is_segments_key_bounds_truncated())) {
continue;
}
}
- picked_segments.emplace_back(i);
+ picked_segments.emplace_back(j);
}
if (picked_segments.empty()) {
continue;
diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h
index 4f4b41f11ab..4e9ddaac1b5 100644
--- a/be/src/olap/base_tablet.h
+++ b/be/src/olap/base_tablet.h
@@ -145,7 +145,6 @@ public:
RowsetSharedPtr rowset, const TupleDescriptor* desc,
OlapReaderStatistics& stats, std::string& values,
bool write_to_cache = false);
-
// Lookup the row location of `encoded_key`, the function sets
`row_location` on success.
// NOTE: the method only works in unique key model with primary key index,
you will got a
// not supported error in other data model.
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 3d6d6cad558..9b173db26fb 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -86,7 +86,8 @@ using namespace ErrorCode;
namespace {
-bool is_rowset_tidy(std::string& pre_max_key, const RowsetSharedPtr& rhs) {
+bool is_rowset_tidy(std::string& pre_max_key, bool&
pre_rs_key_bounds_truncated,
+ const RowsetSharedPtr& rhs) {
size_t min_tidy_size = config::ordered_data_compaction_min_segment_size;
if (rhs->num_segments() == 0) {
return true;
@@ -109,11 +110,13 @@ bool is_rowset_tidy(std::string& pre_max_key, const
RowsetSharedPtr& rhs) {
if (!ret) {
return false;
}
- if (min_key <= pre_max_key) {
+ bool cur_rs_key_bounds_truncated {rhs->is_segments_key_bounds_truncated()};
+ if (!Slice::lhs_is_strictly_less_than_rhs(Slice {pre_max_key},
pre_rs_key_bounds_truncated,
+ Slice {min_key},
cur_rs_key_bounds_truncated)) {
return false;
}
CHECK(rhs->last_key(&pre_max_key));
-
+ pre_rs_key_bounds_truncated = cur_rs_key_bounds_truncated;
return true;
}
@@ -302,12 +305,13 @@ Status CompactionMixin::do_compact_ordered_rowsets() {
<< ", output_version=" << _output_version;
// link data to new rowset
auto seg_id = 0;
+ bool segments_key_bounds_truncated {false};
std::vector<KeyBoundsPB> segment_key_bounds;
for (auto rowset : _input_rowsets) {
RETURN_IF_ERROR(rowset->link_files_to(tablet()->tablet_path(),
_output_rs_writer->rowset_id(),
seg_id));
seg_id += rowset->num_segments();
-
+ segments_key_bounds_truncated |=
rowset->is_segments_key_bounds_truncated();
std::vector<KeyBoundsPB> key_bounds;
RETURN_IF_ERROR(rowset->get_segments_key_bounds(&key_bounds));
segment_key_bounds.insert(segment_key_bounds.end(),
key_bounds.begin(), key_bounds.end());
@@ -322,7 +326,7 @@ Status CompactionMixin::do_compact_ordered_rowsets() {
rowset_meta->set_num_segments(_input_num_segments);
rowset_meta->set_segments_overlap(NONOVERLAPPING);
rowset_meta->set_rowset_state(VISIBLE);
-
+
rowset_meta->set_segments_key_bounds_truncated(segments_key_bounds_truncated);
rowset_meta->set_segments_key_bounds(segment_key_bounds);
_output_rowset = _output_rs_writer->manual_build(rowset_meta);
return Status::OK();
@@ -404,8 +408,9 @@ bool CompactionMixin::handle_ordered_data_compaction() {
// files to handle compaction
auto input_size = _input_rowsets.size();
std::string pre_max_key;
+ bool pre_rs_key_bounds_truncated {false};
for (auto i = 0; i < input_size; ++i) {
- if (!is_rowset_tidy(pre_max_key, _input_rowsets[i])) {
+ if (!is_rowset_tidy(pre_max_key, pre_rs_key_bounds_truncated,
_input_rowsets[i])) {
if (i <= input_size / 2) {
return false;
} else {
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index eb8a6a3bfb6..6f14f45138d 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -90,7 +90,8 @@ void build_rowset_meta_with_spec_field(RowsetMeta&
rowset_meta,
rowset_meta.set_num_segments(spec_rowset_meta.num_segments());
rowset_meta.set_segments_overlap(spec_rowset_meta.segments_overlap());
rowset_meta.set_rowset_state(spec_rowset_meta.rowset_state());
-
+ rowset_meta.set_segments_key_bounds_truncated(
+ spec_rowset_meta.is_segments_key_bounds_truncated());
std::vector<KeyBoundsPB> segments_key_bounds;
spec_rowset_meta.get_segments_key_bounds(&segments_key_bounds);
rowset_meta.set_segments_key_bounds(segments_key_bounds);
@@ -679,6 +680,7 @@ Status BaseBetaRowsetWriter::add_rowset(RowsetSharedPtr
rowset) {
_num_segment += static_cast<int32_t>(rowset->num_segments());
// append key_bounds to current rowset
RETURN_IF_ERROR(rowset->get_segments_key_bounds(&_segments_encoded_key_bounds));
+ _segments_key_bounds_truncated =
rowset->rowset_meta()->is_segments_key_bounds_truncated();
// TODO update zonemap
if (rowset->rowset_meta()->has_delete_predicate()) {
@@ -888,6 +890,9 @@ Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta*
rowset_meta, bool ch
for (auto& key_bound : _segments_encoded_key_bounds) {
segments_encoded_key_bounds.push_back(key_bound);
}
+ if (_segments_key_bounds_truncated.has_value()) {
+
rowset_meta->set_segments_key_bounds_truncated(_segments_key_bounds_truncated.value());
+ }
// segment key bounds are empty in old version(before version 1.2.x). So
we should not modify
// the overlap property when key bounds are empty.
if (!segments_encoded_key_bounds.empty() &&
diff --git a/be/src/olap/rowset/beta_rowset_writer.h
b/be/src/olap/rowset/beta_rowset_writer.h
index 2ae999eae20..19e3c4da31d 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -236,8 +236,10 @@ protected:
// record rows number of every segment already written, using for rowid
// conversion when compaction in unique key with MoW model
std::vector<uint32_t> _segment_num_rows;
+
// for unique key table with merge-on-write
std::vector<KeyBoundsPB> _segments_encoded_key_bounds;
+ std::optional<bool> _segments_key_bounds_truncated;
// counters and statistics maintained during add_rowset
std::atomic<int64_t> _num_rows_written;
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index db6872875a5..3d2a3d965f0 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -294,6 +294,10 @@ public:
return true;
}
+ bool is_segments_key_bounds_truncated() const {
+ return _rowset_meta->is_segments_key_bounds_truncated();
+ }
+
bool check_rowset_segment();
[[nodiscard]] virtual Status add_to_binlog() { return Status::OK(); }
diff --git a/be/src/olap/rowset/rowset_meta.cpp
b/be/src/olap/rowset/rowset_meta.cpp
index 6bed5e800ed..c9851cdc5fc 100644
--- a/be/src/olap/rowset/rowset_meta.cpp
+++ b/be/src/olap/rowset/rowset_meta.cpp
@@ -20,6 +20,7 @@
#include <gen_cpp/olap_file.pb.h>
#include <memory>
+#include <random>
#include "common/logging.h"
#include "google/protobuf/util/message_differencer.h"
@@ -220,6 +221,34 @@ int64_t RowsetMeta::segment_file_size(int seg_id) {
: -1;
}
+void RowsetMeta::set_segments_key_bounds(const std::vector<KeyBoundsPB>&
segments_key_bounds) {
+ for (const KeyBoundsPB& key_bounds : segments_key_bounds) {
+ KeyBoundsPB* new_key_bounds =
_rowset_meta_pb.add_segments_key_bounds();
+ *new_key_bounds = key_bounds;
+ }
+
+ int32_t truncation_threshold =
config::segments_key_bounds_truncation_threshold;
+ if (config::random_segments_key_bounds_truncation) {
+ static thread_local std::mt19937 generator(std::random_device {}());
+ std::uniform_int_distribution<int> distribution(-10, 40);
+ truncation_threshold = distribution(generator);
+ }
+ bool really_do_truncation {false};
+ if (truncation_threshold > 0) {
+ for (auto& segment_key_bounds :
*_rowset_meta_pb.mutable_segments_key_bounds()) {
+ if (segment_key_bounds.min_key().size() > truncation_threshold) {
+ really_do_truncation = true;
+
segment_key_bounds.mutable_min_key()->resize(truncation_threshold);
+ }
+ if (segment_key_bounds.max_key().size() > truncation_threshold) {
+ really_do_truncation = true;
+
segment_key_bounds.mutable_max_key()->resize(truncation_threshold);
+ }
+ }
+ }
+ set_segments_key_bounds_truncated(really_do_truncation ||
is_segments_key_bounds_truncated());
+}
+
void RowsetMeta::merge_rowset_meta(const RowsetMeta& other) {
set_num_segments(num_segments() + other.num_segments());
set_num_rows(num_rows() + other.num_rows());
@@ -227,6 +256,8 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta& other)
{
set_total_disk_size(total_disk_size() + other.total_disk_size());
set_index_disk_size(index_disk_size() + other.index_disk_size());
set_total_disk_size(data_disk_size() + index_disk_size());
+ set_segments_key_bounds_truncated(is_segments_key_bounds_truncated() ||
+
other.is_segments_key_bounds_truncated());
for (auto&& key_bound : other.get_segments_key_bounds()) {
add_segment_key_bounds(key_bound);
}
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 40e55e07452..4421b6dda1f 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -24,6 +24,7 @@
#include <string>
#include <vector>
+#include "common/config.h"
#include "io/fs/file_system.h"
#include "olap/metadata_adder.h"
#include "olap/olap_common.h"
@@ -299,6 +300,15 @@ public:
auto& get_segments_key_bounds() const { return
_rowset_meta_pb.segments_key_bounds(); }
+ bool is_segments_key_bounds_truncated() const {
+ return _rowset_meta_pb.has_segments_key_bounds_truncated() &&
+ _rowset_meta_pb.segments_key_bounds_truncated();
+ }
+
+ void set_segments_key_bounds_truncated(bool truncated) {
+ _rowset_meta_pb.set_segments_key_bounds_truncated(truncated);
+ }
+
bool get_first_segment_key_bound(KeyBoundsPB* key_bounds) {
// for compatibility, old version has not segment key bounds
if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
@@ -316,12 +326,7 @@ public:
return true;
}
- void set_segments_key_bounds(const std::vector<KeyBoundsPB>&
segments_key_bounds) {
- for (const KeyBoundsPB& key_bounds : segments_key_bounds) {
- KeyBoundsPB* new_key_bounds =
_rowset_meta_pb.add_segments_key_bounds();
- *new_key_bounds = key_bounds;
- }
- }
+ void set_segments_key_bounds(const std::vector<KeyBoundsPB>&
segments_key_bounds);
void add_segment_key_bounds(KeyBoundsPB segments_key_bounds) {
*_rowset_meta_pb.add_segments_key_bounds() =
std::move(segments_key_bounds);
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index a8851a46e82..9f72056af8d 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -258,6 +258,8 @@ Status IndexBuilder::update_inverted_index_info() {
rowset_meta->set_rowset_state(input_rowset_meta->rowset_state());
std::vector<KeyBoundsPB> key_bounds;
RETURN_IF_ERROR(input_rowset->get_segments_key_bounds(&key_bounds));
+ rowset_meta->set_segments_key_bounds_truncated(
+ input_rowset_meta->is_segments_key_bounds_truncated());
rowset_meta->set_segments_key_bounds(key_bounds);
auto output_rowset = output_rs_writer->manual_build(rowset_meta);
if (input_rowset_meta->has_delete_predicate()) {
diff --git a/be/src/util/slice.cpp b/be/src/util/key_util.cpp
similarity index 53%
copy from be/src/util/slice.cpp
copy to be/src/util/key_util.cpp
index a681b5bba7d..b49639d7075 100644
--- a/be/src/util/slice.cpp
+++ b/be/src/util/key_util.cpp
@@ -15,16 +15,18 @@
// specific language governing permissions and limitations
// under the License.
-#include "util/slice.h"
-
-#include "util/faststring.h"
+#include "util/key_util.h"
namespace doris {
-// NOTE(zc): we define this function here to make compile work.
-Slice::Slice(const faststring& s)
- : // NOLINT(runtime/explicit)
- data((char*)(s.data())),
- size(s.size()) {}
-
-} // namespace doris
+bool key_is_not_in_segment(Slice key, const KeyBoundsPB& segment_key_bounds,
+ bool is_segments_key_bounds_truncated) {
+ Slice maybe_truncated_min_key {segment_key_bounds.min_key()};
+ Slice maybe_truncated_max_key {segment_key_bounds.max_key()};
+ bool res1 = Slice::lhs_is_strictly_less_than_rhs(key, false,
maybe_truncated_min_key,
+
is_segments_key_bounds_truncated);
+ bool res2 = Slice::lhs_is_strictly_less_than_rhs(maybe_truncated_max_key,
+
is_segments_key_bounds_truncated, key, false);
+ return res1 || res2;
+}
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/util/key_util.h b/be/src/util/key_util.h
index fd57566fa4f..01094905cf5 100644
--- a/be/src/util/key_util.h
+++ b/be/src/util/key_util.h
@@ -17,16 +17,12 @@
#pragma once
+#include <gen_cpp/olap_file.pb.h>
#include <gen_cpp/segment_v2.pb.h>
#include <cstdint>
-#include <iterator>
#include <string>
-#include <vector>
-#include "common/status.h"
-#include "util/debug_util.h"
-#include "util/faststring.h"
#include "util/slice.h"
namespace doris {
@@ -111,4 +107,9 @@ void encode_key(std::string* buf, const RowType& row,
size_t num_keys) {
}
}
+// we can only know if a key is excluded from the segment
+// based on strictly order compare result with segments key bounds
+bool key_is_not_in_segment(Slice key, const KeyBoundsPB& segment_key_bounds,
+ bool is_segments_key_bounds_truncated);
+
} // namespace doris
diff --git a/be/src/util/slice.cpp b/be/src/util/slice.cpp
index a681b5bba7d..9c15f901f25 100644
--- a/be/src/util/slice.cpp
+++ b/be/src/util/slice.cpp
@@ -27,4 +27,23 @@ Slice::Slice(const faststring& s)
data((char*)(s.data())),
size(s.size()) {}
+bool Slice::lhs_is_strictly_less_than_rhs(Slice X, bool X_is_truncated, Slice
Y,
+ [[maybe_unused]] bool
Y_is_truncated) {
+ // suppose X is a prefix of X', Y is a prefix of Y'
+ if (!X_is_truncated) {
+ // (X_is_truncated == false) means X' == X
+ // we have Y <= Y',
+ // so X < Y => X < Y',
+ // so X' = X < Y'
+ return X.compare(Y) < 0;
+ }
+
+ // let m = min(|X|,|Y|),
+ // we have Y[1..m] = Y'[1..m] <= Y'
+ // so X'[1..m] < Y[1..m] => X' < Y'
+ std::size_t m {std::min(X.get_size(), Y.get_size())};
+ Slice Y_to_cmp {Y.get_data(), m};
+ return X.compare(Y_to_cmp) < 0;
+}
+
} // namespace doris
diff --git a/be/src/util/slice.h b/be/src/util/slice.h
index fd6bcf0adfb..e46546a03f3 100644
--- a/be/src/util/slice.h
+++ b/be/src/util/slice.h
@@ -273,6 +273,13 @@ public:
}
return buf;
}
+
+ // X is (maybe) a truncated prefix of string X'
+ // Y is (maybe) a truncated prefix of string Y'
+ // return true only if we can determine that X' is strictly less than Y'
+ // based on these maybe truncated prefixes
+ static bool lhs_is_strictly_less_than_rhs(Slice X, bool X_is_truncated,
Slice Y,
+ bool Y_is_truncated);
};
inline std::ostream& operator<<(std::ostream& os, const Slice& slice) {
diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp
index 07befd47d88..5374a2c2d73 100644
--- a/be/src/vec/olap/block_reader.cpp
+++ b/be/src/vec/olap/block_reader.cpp
@@ -72,8 +72,9 @@ Status BlockReader::next_block_with_aggregation(Block* block,
bool* eof) {
return res;
}
-bool BlockReader::_rowsets_mono_asc_disjoint(const ReaderParams& read_params) {
- std::string cur_rs_last_key;
+bool BlockReader::_rowsets_not_mono_asc_disjoint(const ReaderParams&
read_params) {
+ std::string pre_rs_last_key;
+ bool pre_rs_key_bounds_truncated {false};
const std::vector<RowSetSplits>& rs_splits = read_params.rs_splits;
for (const auto& rs_split : rs_splits) {
if (rs_split.rs_reader->rowset()->num_rows() == 0) {
@@ -87,13 +88,17 @@ bool BlockReader::_rowsets_mono_asc_disjoint(const
ReaderParams& read_params) {
if (!has_first_key) {
return true;
}
- if (rs_first_key <= cur_rs_last_key) {
+ bool cur_rs_key_bounds_truncated {
+
rs_split.rs_reader->rowset()->is_segments_key_bounds_truncated()};
+ if (!Slice::lhs_is_strictly_less_than_rhs(Slice {pre_rs_last_key},
+ pre_rs_key_bounds_truncated,
Slice {rs_first_key},
+
cur_rs_key_bounds_truncated)) {
return true;
}
- bool has_last_key =
rs_split.rs_reader->rowset()->last_key(&cur_rs_last_key);
+ bool has_last_key =
rs_split.rs_reader->rowset()->last_key(&pre_rs_last_key);
+ pre_rs_key_bounds_truncated = cur_rs_key_bounds_truncated;
CHECK(has_last_key);
}
-
return false;
}
@@ -110,7 +115,7 @@ Status BlockReader::_init_collect_iter(const ReaderParams&
read_params) {
// check if rowsets are noneoverlapping
{
SCOPED_RAW_TIMER(&_stats.block_reader_vcollect_iter_init_timer_ns);
- _is_rowsets_overlapping = _rowsets_mono_asc_disjoint(read_params);
+ _is_rowsets_overlapping = _rowsets_not_mono_asc_disjoint(read_params);
_vcollect_iter.init(this, _is_rowsets_overlapping,
read_params.read_orderby_key,
read_params.read_orderby_key_reverse);
}
diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h
index f33fe743109..b665150cbcb 100644
--- a/be/src/vec/olap/block_reader.h
+++ b/be/src/vec/olap/block_reader.h
@@ -86,8 +86,8 @@ private:
bool _get_next_row_same();
- // return true if keys of rowsets are mono ascending and disjoint
- bool _rowsets_mono_asc_disjoint(const ReaderParams& read_params);
+ // return false if keys of rowsets are mono ascending and disjoint
+ bool _rowsets_not_mono_asc_disjoint(const ReaderParams& read_params);
VCollectIterator _vcollect_iter;
IteratorRowRef _next_row {{}, -1, false};
diff --git a/be/test/olap/ordered_data_compaction_test.cpp
b/be/test/olap/ordered_data_compaction_test.cpp
index 934dfbef3ea..dc53b57ed92 100644
--- a/be/test/olap/ordered_data_compaction_test.cpp
+++ b/be/test/olap/ordered_data_compaction_test.cpp
@@ -107,6 +107,7 @@ protected:
ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
config::enable_ordered_data_compaction = true;
config::ordered_data_compaction_min_segment_size = 10;
+ config::segments_key_bounds_truncation_threshold = -1;
}
void TearDown() override {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok());
diff --git a/be/test/olap/segments_key_bounds_truncation_test.cpp
b/be/test/olap/segments_key_bounds_truncation_test.cpp
new file mode 100644
index 00000000000..8530d9d52ea
--- /dev/null
+++ b/be/test/olap/segments_key_bounds_truncation_test.cpp
@@ -0,0 +1,788 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gen_cpp/olap_file.pb.h>
+#include <gen_cpp/segment_v2.pb.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "common/config.h"
+#include "io/fs/local_file_system.h"
+#include "olap/cumulative_compaction.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/segment_v2/segment.h"
+#include "olap/rowset/segment_v2/segment_writer.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet_meta.h"
+#include "olap/tablet_reader.h"
+#include "olap/tablet_schema.h"
+#include "runtime/exec_env.h"
+#include "util/key_util.h"
+#include "vec/olap/block_reader.h"
+
+namespace doris {
+static std::string kSegmentDir =
"./ut_dir/segments_key_bounds_truncation_test";
+
+class SegmentsKeyBoundsTruncationTest : public testing::Test {
+private:
+ StorageEngine* engine_ref = nullptr;
+ string absolute_dir;
+ std::unique_ptr<DataDir> data_dir;
+ int cur_version {2};
+
+public:
+ void SetUp() override {
+ auto st = io::global_local_filesystem()->delete_directory(kSegmentDir);
+ ASSERT_TRUE(st.ok()) << st;
+ st = io::global_local_filesystem()->create_directory(kSegmentDir);
+ ASSERT_TRUE(st.ok()) << st;
+ doris::EngineOptions options;
+ auto engine = std::make_unique<StorageEngine>(options);
+ engine_ref = engine.get();
+ data_dir = std::make_unique<DataDir>(*engine_ref, kSegmentDir);
+ ASSERT_TRUE(data_dir->update_capacity().ok());
+ ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
+ }
+
+ void TearDown() override {
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kSegmentDir).ok());
+ engine_ref = nullptr;
+ ExecEnv::GetInstance()->set_storage_engine(nullptr);
+ }
+
+ void disable_segments_key_bounds_truncation() {
+ config::segments_key_bounds_truncation_threshold = -1;
+ }
+
+ TabletSchemaSPtr create_schema(int varchar_length) {
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ TabletSchemaPB tablet_schema_pb;
+ tablet_schema_pb.set_keys_type(DUP_KEYS);
+ tablet_schema_pb.set_num_short_key_columns(1);
+ tablet_schema_pb.set_num_rows_per_row_block(1024);
+ tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
+ tablet_schema_pb.set_next_column_unique_id(4);
+
+ ColumnPB* column_1 = tablet_schema_pb.add_column();
+ column_1->set_unique_id(1);
+ column_1->set_name("k1");
+ column_1->set_type("VARCHAR");
+ column_1->set_is_key(true);
+ column_1->set_length(varchar_length);
+ column_1->set_index_length(36);
+ column_1->set_is_nullable(false);
+ column_1->set_is_bf_column(false);
+
+ ColumnPB* column_2 = tablet_schema_pb.add_column();
+ column_2->set_unique_id(2);
+ column_2->set_name("c1");
+ column_2->set_type("INT");
+ column_2->set_length(4);
+ column_2->set_index_length(4);
+ column_2->set_is_nullable(true);
+ column_2->set_is_key(false);
+ column_2->set_is_nullable(true);
+ column_2->set_is_bf_column(false);
+
+ tablet_schema->init_from_pb(tablet_schema_pb);
+ return tablet_schema;
+ }
+
+ TabletSharedPtr create_tablet(const TabletSchema& tablet_schema,
+ bool enable_unique_key_merge_on_write) {
+ std::vector<TColumn> cols;
+ std::unordered_map<uint32_t, uint32_t> col_ordinal_to_unique_id;
+ for (auto i = 0; i < tablet_schema.num_columns(); i++) {
+ const TabletColumn& column = tablet_schema.column(i);
+ TColumn col;
+ col.column_type.type = TPrimitiveType::INT;
+ col.__set_column_name(column.name());
+ col.__set_is_key(column.is_key());
+ cols.push_back(col);
+ col_ordinal_to_unique_id[i] = column.unique_id();
+ }
+
+ TTabletSchema t_tablet_schema;
+
t_tablet_schema.__set_short_key_column_count(tablet_schema.num_short_key_columns());
+ t_tablet_schema.__set_schema_hash(3333);
+ if (tablet_schema.keys_type() == UNIQUE_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::UNIQUE_KEYS);
+ } else if (tablet_schema.keys_type() == DUP_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::DUP_KEYS);
+ } else if (tablet_schema.keys_type() == AGG_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::AGG_KEYS);
+ }
+ t_tablet_schema.__set_storage_type(TStorageType::COLUMN);
+ t_tablet_schema.__set_columns(cols);
+ TabletMetaSharedPtr tablet_meta {std::make_shared<TabletMeta>(
+ 2, 2, 2, 2, 2, 2, t_tablet_schema, 2,
col_ordinal_to_unique_id, UniqueId(1, 2),
+ TTabletType::TABLET_TYPE_DISK, TCompressionType::LZ4F, 0,
+ enable_unique_key_merge_on_write)};
+
+ TabletSharedPtr tablet {std::make_shared<Tablet>(*engine_ref,
tablet_meta, data_dir.get())};
+ EXPECT_TRUE(tablet->init().ok());
+ return tablet;
+ }
+
+ RowsetWriterContext create_rowset_writer_context(TabletSchemaSPtr
tablet_schema,
+ const SegmentsOverlapPB&
overlap,
+ uint32_t
max_rows_per_segment,
+ Version version) {
+ RowsetWriterContext rowset_writer_context;
+ rowset_writer_context.rowset_id = engine_ref->next_rowset_id();
+ rowset_writer_context.rowset_type = BETA_ROWSET;
+ rowset_writer_context.rowset_state = VISIBLE;
+ rowset_writer_context.tablet_schema = tablet_schema;
+ rowset_writer_context.tablet_path = kSegmentDir;
+ rowset_writer_context.version = version;
+ rowset_writer_context.segments_overlap = overlap;
+ rowset_writer_context.max_rows_per_segment = max_rows_per_segment;
+ return rowset_writer_context;
+ }
+
+ void create_and_init_rowset_reader(Rowset* rowset, RowsetReaderContext&
context,
+ RowsetReaderSharedPtr* result) {
+ auto s = rowset->create_reader(result);
+ EXPECT_TRUE(s.ok());
+ EXPECT_TRUE(*result != nullptr);
+
+ s = (*result)->init(&context);
+ EXPECT_TRUE(s.ok());
+ }
+
+ std::vector<vectorized::Block> generate_blocks(
+ TabletSchemaSPtr tablet_schema, const
std::vector<std::vector<std::string>>& data) {
+ std::vector<vectorized::Block> ret;
+ int const_value = 999;
+ for (const auto& segment_rows : data) {
+ vectorized::Block block = tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+ for (const auto& row : segment_rows) {
+ columns[0]->insert_data(row.data(), row.size());
+ columns[1]->insert_data(reinterpret_cast<const
char*>(&const_value),
+ sizeof(const_value));
+ }
+ ret.emplace_back(std::move(block));
+ }
+ return ret;
+ }
+
+ std::vector<std::vector<std::string>> get_expected_key_bounds(
+ const std::vector<std::vector<std::string>>& data) {
+ std::vector<std::vector<std::string>> ret;
+ for (const auto& rows : data) {
+ auto& cur = ret.emplace_back();
+ auto min_key = rows.front();
+ auto max_key = rows.front();
+ for (const auto& row : rows) {
+ if (row < min_key) {
+ min_key = row;
+ }
+ if (row > max_key) {
+ max_key = row;
+ }
+ }
+
+ // segments key bounds have marker
+ min_key = std::string {KEY_NORMAL_MARKER} + min_key;
+ max_key = std::string {KEY_NORMAL_MARKER} + max_key;
+
+ cur.emplace_back(do_trunacte(min_key));
+ cur.emplace_back(do_trunacte(max_key));
+ }
+ return ret;
+ }
+
+ RowsetSharedPtr create_rowset(TabletSchemaSPtr tablet_schema,
SegmentsOverlapPB overlap,
+ const std::vector<vectorized::Block> blocks,
int64_t version,
+ bool is_vertical) {
+ auto writer_context = create_rowset_writer_context(tablet_schema,
overlap, UINT32_MAX,
+ {version, version});
+ auto res = RowsetFactory::create_rowset_writer(*engine_ref,
writer_context, is_vertical);
+ EXPECT_TRUE(res.has_value()) << res.error();
+ auto rowset_writer = std::move(res).value();
+
+ uint32_t num_rows = 0;
+ for (const auto& block : blocks) {
+ num_rows += block.rows();
+ EXPECT_TRUE(rowset_writer->add_block(&block).ok());
+ EXPECT_TRUE(rowset_writer->flush().ok());
+ }
+
+ RowsetSharedPtr rowset;
+ EXPECT_EQ(Status::OK(), rowset_writer->build(rowset));
+ EXPECT_EQ(blocks.size(), rowset->rowset_meta()->num_segments());
+ EXPECT_EQ(num_rows, rowset->rowset_meta()->num_rows());
+ return rowset;
+ }
+
+ std::string do_trunacte(std::string key) {
+ if (segments_key_bounds_truncation_enabled()) {
+ auto threshold = config::segments_key_bounds_truncation_threshold;
+ if (key.size() > threshold) {
+ key.resize(threshold);
+ }
+ }
+ return key;
+ }
+
+ bool segments_key_bounds_truncation_enabled() {
+ return (config::segments_key_bounds_truncation_threshold > 0);
+ }
+
+ void check_key_bounds(const std::vector<std::vector<std::string>>& data,
+ const std::vector<KeyBoundsPB>& segments_key_bounds)
{
+ // 1. check size
+ for (const auto& segments_key_bound : segments_key_bounds) {
+ const auto& min_key = segments_key_bound.min_key();
+ const auto& max_key = segments_key_bound.max_key();
+
+ if (segments_key_bounds_truncation_enabled()) {
+ EXPECT_LE(min_key.size(),
config::segments_key_bounds_truncation_threshold);
+ EXPECT_LE(max_key.size(),
config::segments_key_bounds_truncation_threshold);
+ }
+ }
+
+ // 2. check content
+ auto expected_key_bounds = get_expected_key_bounds(data);
+ for (std::size_t i = 0; i < expected_key_bounds.size(); i++) {
+ const auto& min_key = segments_key_bounds[i].min_key();
+ const auto& max_key = segments_key_bounds[i].max_key();
+
+ EXPECT_EQ(min_key, expected_key_bounds[i][0]);
+ EXPECT_EQ(max_key, expected_key_bounds[i][1]);
+ std::cout << fmt::format("min_key={}, size={}\nmax_key={},
size={}\n",
+ hexdump(min_key.data(), min_key.size()),
min_key.size(),
+ hexdump(max_key.data(), max_key.size()),
max_key.size());
+ }
+ }
+
+ std::vector<RowsetSharedPtr> create_rowsets(TabletSchemaSPtr tablet_schema,
+ const
std::vector<std::vector<std::string>>& data,
+ const std::vector<int64_t>&
truncate_lengths = {}) {
+ std::vector<RowsetSharedPtr> rowsets;
+ for (size_t i {0}; i < data.size(); i++) {
+ const auto rows = data[i];
+ if (!truncate_lengths.empty()) {
+ config::segments_key_bounds_truncation_threshold =
truncate_lengths[i];
+ }
+ std::vector<std::vector<std::string>> rowset_data {rows};
+ auto blocks = generate_blocks(tablet_schema, rowset_data);
+ RowsetSharedPtr rowset =
+ create_rowset(tablet_schema, NONOVERLAPPING, blocks,
cur_version++, false);
+
+ std::vector<KeyBoundsPB> segments_key_bounds;
+
rowset->rowset_meta()->get_segments_key_bounds(&segments_key_bounds);
+ for (const auto& segments_key_bound : segments_key_bounds) {
+ const auto& min_key = segments_key_bound.min_key();
+ const auto& max_key = segments_key_bound.max_key();
+
+ LOG(INFO) << fmt::format(
+ "\n==== rowset_id={}, segment_key_bounds_truncated={}
====\nmin_key={}, "
+ "size={}\nmax_key={}, size={}\n",
+ rowset->rowset_id().to_string(),
rowset->is_segments_key_bounds_truncated(),
+ min_key, min_key.size(), max_key, max_key.size());
+ }
+
+ rowsets.push_back(rowset);
+ RowsetReaderSharedPtr rs_reader;
+ EXPECT_TRUE(rowset->create_reader(&rs_reader));
+ }
+ for (std::size_t i {0}; i < truncate_lengths.size(); i++) {
+ EXPECT_EQ((truncate_lengths[i] > 0),
rowsets[i]->is_segments_key_bounds_truncated());
+ }
+ return rowsets;
+ }
+
+ TabletReader::ReaderParams create_reader_params(
+ TabletSchemaSPtr tablet_schema, const
std::vector<std::vector<std::string>>& data,
+ const std::vector<int64_t>& truncate_lengths = {}) {
+ TabletReader::ReaderParams reader_params;
+ std::vector<RowsetSharedPtr> rowsets =
+ create_rowsets(tablet_schema, data, truncate_lengths);
+ std::vector<RowSetSplits> rs_splits;
+ for (size_t i {0}; i < rowsets.size(); i++) {
+ RowsetReaderSharedPtr rs_reader;
+ EXPECT_TRUE(rowsets[i]->create_reader(&rs_reader));
+ RowSetSplits rs_split;
+ rs_split.rs_reader = rs_reader;
+ rs_splits.emplace_back(rs_split);
+ }
+ reader_params.rs_splits = std::move(rs_splits);
+ return reader_params;
+ }
+};
+
+TEST_F(SegmentsKeyBoundsTruncationTest, CompareFuncTest) {
+ // test `Slice::lhs_is_strictly_less_than_rhs`
+ // enumerating all possible combinations
+ // this test is reduntant, n = 3 is enough
+ constexpr int n = 8;
+ std::vector<std::string> datas;
+ for (int l = 1; l <= n; l++) {
+ for (int x = 0; x < (1 << l); x++) {
+ datas.emplace_back(fmt::format("{:0{width}b}", x,
fmt::arg("width", l)));
+ }
+ }
+ std::cout << "datas.size()=" << datas.size() << "\n";
+
+ int count1 {0}, count2 {0}, total {0};
+ for (size_t i = 0; i < datas.size(); i++) {
+ for (size_t j = 0; j < datas.size(); j++) {
+ Slice X {datas[i]};
+ Slice Y {datas[j]};
+ for (int l1 = 0; l1 <= n; l1++) {
+ bool X_is_truncated = (l1 != 0);
+ Slice a {X};
+ if (X_is_truncated && X.get_size() >= l1) {
+ a.truncate(l1);
+ }
+ for (int l2 = 0; l2 <= n; l2++) {
+ bool Y_is_truncated = (l2 != 0);
+ Slice b {Y};
+ if (Y_is_truncated && Y.get_size() >= l2) {
+ b.truncate(l2);
+ }
+
+ bool res1 = Slice::lhs_is_strictly_less_than_rhs(a,
X_is_truncated, b,
+
Y_is_truncated);
+ bool res2 = (X.compare(Y) < 0);
+ ++total;
+ if (res1 && res2) {
+ ++count1;
+ }
+ if (res2) {
+ ++count2;
+ }
+ EXPECT_FALSE(res1 && !res2) << fmt::format(
+ "X={}, a={}, l1={}, Y={}, b={}, l2={}, res1={},
res2={}", X.to_string(),
+ a.to_string(), l1, Y.to_string(), b.to_string(),
l2, res1, res2);
+ }
+ }
+ }
+ }
+ std::cout << fmt::format("count1={}, count2={}, count1/count2={},
total={}\n", count1, count2,
+ double(count1) / count2, total);
+}
+
+TEST_F(SegmentsKeyBoundsTruncationTest, BasicTruncationTest) {
+ {
+ // 1. don't do segments key bounds truncation when the config is off
+ config::segments_key_bounds_truncation_threshold = -1;
+
+ auto tablet_schema = create_schema(100);
+ std::vector<std::vector<std::string>> data {{std::string(2, 'x'),
std::string(3, 'y')},
+ {std::string(4, 'a'),
std::string(15, 'b')},
+ {std::string(18, 'c'),
std::string(5, 'z')},
+ {std::string(20, '0'),
std::string(22, '1')}};
+ auto blocks = generate_blocks(tablet_schema, data);
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, NONOVERLAPPING,
blocks, 2, false);
+
+ auto rowset_meta = rowset->rowset_meta();
+ EXPECT_EQ(false, rowset_meta->is_segments_key_bounds_truncated());
+ std::vector<KeyBoundsPB> segments_key_bounds;
+ rowset_meta->get_segments_key_bounds(&segments_key_bounds);
+ EXPECT_EQ(segments_key_bounds.size(), data.size());
+ check_key_bounds(data, segments_key_bounds);
+ }
+
+ {
+ // 2. do segments key bounds truncation when the config is on
+ config::segments_key_bounds_truncation_threshold = 10;
+
+ auto tablet_schema = create_schema(100);
+ std::vector<std::vector<std::string>> data {{std::string(2, 'x'),
std::string(3, 'y')},
+ {std::string(4, 'a'),
std::string(15, 'b')},
+ {std::string(18, 'c'),
std::string(5, 'z')},
+ {std::string(20, '0'),
std::string(22, '1')}};
+ auto blocks = generate_blocks(tablet_schema, data);
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, NONOVERLAPPING,
blocks, 2, false);
+
+ auto rowset_meta = rowset->rowset_meta();
+ EXPECT_EQ(true, rowset_meta->is_segments_key_bounds_truncated());
+ std::vector<KeyBoundsPB> segments_key_bounds;
+ rowset_meta->get_segments_key_bounds(&segments_key_bounds);
+ EXPECT_EQ(segments_key_bounds.size(), data.size());
+ check_key_bounds(data, segments_key_bounds);
+ }
+
+ {
+ // 3. segments_key_bounds_truncated should be set to false if no
actual truncation happend
+ config::segments_key_bounds_truncation_threshold = 100;
+
+ auto tablet_schema = create_schema(100);
+ std::vector<std::vector<std::string>> data {{std::string(2, 'x'),
std::string(3, 'y')},
+ {std::string(4, 'a'),
std::string(15, 'b')},
+ {std::string(18, 'c'),
std::string(5, 'z')},
+ {std::string(20, '0'),
std::string(22, '1')}};
+ auto blocks = generate_blocks(tablet_schema, data);
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, NONOVERLAPPING,
blocks, 2, false);
+
+ auto rowset_meta = rowset->rowset_meta();
+ EXPECT_EQ(false, rowset_meta->is_segments_key_bounds_truncated());
+ }
+}
+
+TEST_F(SegmentsKeyBoundsTruncationTest, BlockReaderJudgeFuncTest) {
+ auto tablet_schema = create_schema(100);
+
+ {
+ // all rowsets are truncated to same size
+ // keys are distinctable from any index
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
+ {"cccccc", "dddddd"},
+ {"eeeeeee", "fffffff"},
+ {"xxxxxxx", "yyyyyyyy"}};
+ {
+ disable_segments_key_bounds_truncation();
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ config::segments_key_bounds_truncation_threshold = 3;
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+ // can still determine that segments are non ascending after
truncation
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ }
+
+ {
+ // all rowsets are truncated to same size
+ // keys are distinctable from any index before truncation
+ // some keys are not comparable after truncation
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
+ {"cccccccccccc",
"ccdddddddd"},
+ {"cceeeeeeee", "fffffff"},
+ {"xxxxxxx", "yyyyyyyy"}};
+ {
+ disable_segments_key_bounds_truncation();
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ config::segments_key_bounds_truncation_threshold = 6;
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ config::segments_key_bounds_truncation_threshold = 3;
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+ // can not determine wether rowset 2 and rowset 3 are mono
ascending
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ }
+
+ {
+ // all rowsets are truncated to same size
+ // keys are not mono ascending before truncation
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
+ {"bbbbb", "cccccccc"},
+ {"cccccccc", "xxxxxxx"},
+ {"xxxxxxx", "yyyyyyyy"}};
+ {
+ disable_segments_key_bounds_truncation();
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ config::segments_key_bounds_truncation_threshold = 3;
+ TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data);
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ }
+
+ {
+ // some rowsets are truncated, some are not
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbccccccc"},
+ {"bbbbbbddddddd",
"dddddd"}};
+ {
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {-1, 9});
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {-1, 4});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {9, -1});
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, -1});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ }
+
+ {
+ // some rowsets are truncated, some are not, truncated lengths may be
different
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, 4, -1,
6});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, 8, -1,
6});
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, -1, 4,
6});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, -1, 8,
6});
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, 8, 4, 6});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, 4, 8, 6});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, 8, 9, 6});
+ vectorized::BlockReader block_reader;
+
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ {
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbbbb"},
+ {"ccccccccc",
"dddddd"},
+ {"eeeeeee",
"ffffffggggg"},
+ {"ffffffhhhhhh",
"hhhhhhh"},
+ {"iiiiiiii",
"jjjjjjjjj"}};
+ TabletReader::ReaderParams read_params =
+ create_reader_params(tablet_schema, data, {4, 5, 3, 4, 6});
+ vectorized::BlockReader block_reader;
+
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
+ }
+ }
+}
+
+TEST_F(SegmentsKeyBoundsTruncationTest, OrderedCompactionTest) {
+ auto tablet_schema = create_schema(100);
+ config::enable_ordered_data_compaction = true;
+ config::ordered_data_compaction_min_segment_size = 1;
+
+ {
+ disable_segments_key_bounds_truncation();
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data);
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
+ EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {4, 4, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {4, 8, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {8, 4, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {8, 9, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
+ EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {8, -1, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
+ EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {-1, 9, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
+ EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {-1, 4, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
+ }
+
+ {
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
+ std::vector<std::vector<std::string>> data {{"aaaaaaaaa",
"bbbbbcccccc"},
+ {"bbbbbddddddd", "dddddd"},
+ {"eeeeeee", "fffffffff"},
+ {"gggggggg", "hhhhhhh"},
+ {"iiiiiiii", "jjjjjjjjj"}};
+ auto input_rowsets = create_rowsets(tablet_schema, data, {4, -1, 4, 4,
4});
+ CumulativeCompaction cu_compaction(*engine_ref, tablet);
+ cu_compaction._input_rowsets = std::move(input_rowsets);
+ EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
+ }
+}
+} // namespace doris
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 8904ffc74e4..92d6a9fe3b8 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -109,7 +109,8 @@ message RowsetMetaPB {
// latest write time
optional int64 newest_write_timestamp = 26 [default = -1];
// the encoded segment min/max key of segments in this rowset,
- // only used in unique key data model with primary_key_index support.
+ // ATTN: segments_key_bounds may be truncated! please refer to field
`segments_key_bounds_truncated`
+ // to check if these are truncated segments key bounds
repeated KeyBoundsPB segments_key_bounds = 27;
// tablet meta pb, for compaction
optional TabletSchemaPB tablet_schema = 28;
@@ -121,6 +122,8 @@ message RowsetMetaPB {
// For backup/restore, record the tablet id and rowset id of the source
cluster.
optional int64 source_tablet_id = 53;
optional string source_rowset_id = 54;
+ // indicate that whether the segments key bounds is truncated
+ optional bool segments_key_bounds_truncated = 55;
// For cloud
// for data recycling
@@ -195,7 +198,8 @@ message RowsetMetaCloudPB {
// latest write time
optional int64 newest_write_timestamp = 26 [default = -1];
// the encoded segment min/max key of segments in this rowset,
- // only used in unique key data model with primary_key_index support.
+ // ATTN: segments_key_bounds may be truncated! please refer to field
`segments_key_bounds_truncated`
+ // to check if these are truncated segments key bounds
repeated KeyBoundsPB segments_key_bounds = 27;
// tablet meta pb, for compaction
optional TabletSchemaCloudPB tablet_schema = 28;
@@ -209,6 +213,8 @@ message RowsetMetaCloudPB {
// For backup/restore, record the tablet id and rowset id of the source
cluster.
optional int64 source_tablet_id = 53;
optional string source_rowset_id = 54;
+ // indicate that whether the segments key bounds is truncated
+ optional bool segments_key_bounds_truncated = 55;
// cloud
// the field is a vector, rename it
diff --git
a/regression-test/data/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_read_scenarios.out
b/regression-test/data/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_read_scenarios.out
new file mode 100644
index 00000000000..07b0edba4b0
Binary files /dev/null and
b/regression-test/data/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_read_scenarios.out
differ
diff --git
a/regression-test/data/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_write_scenarios.out
b/regression-test/data/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_write_scenarios.out
new file mode 100644
index 00000000000..0fdc0f6bd9e
Binary files /dev/null and
b/regression-test/data/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_write_scenarios.out
differ
diff --git
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/Http.groovy
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/Http.groovy
index 2a63f8763df..e8535cae917 100644
---
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/Http.groovy
+++
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/Http.groovy
@@ -47,13 +47,17 @@ class Http {
}
}
- static Object GET(url, isJson = false) {
+ static Object GET(url, isJson = false, printText = true) {
def conn = new URL(url).openConnection()
conn.setRequestMethod('GET')
conn.setRequestProperty('Authorization', 'Basic cm9vdDo=') //token for
root
def code = conn.responseCode
def text = conn.content.text
- logger.info("http get url=${url}, isJson=${isJson}, response
code=${code}, text=${text}")
+ if (printText) {
+ logger.info("http get url=${url}, isJson=${isJson}, response
code=${code}, text=${text}")
+ } else {
+ logger.info("http get url=${url}, isJson=${isJson}, response
code=${code}")
+ }
Assert.assertEquals(200, code)
if (isJson) {
def json = new JsonSlurper()
diff --git
a/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_basic.groovy
b/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_basic.groovy
new file mode 100644
index 00000000000..d9fad5970d0
--- /dev/null
+++
b/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_basic.groovy
@@ -0,0 +1,122 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import com.google.common.collect.Maps
+import org.apache.doris.regression.util.Http
+
+suite("test_key_bounds_truncation_basic", "nonConcurrent") {
+
+ // see be/src/util/key_util.h:50
+ def keyNormalMarker = new String(new Byte[]{2})
+
+ def tableName = "test_key_bounds_truncation_basic"
+ sql """ DROP TABLE IF EXISTS ${tableName} force;"""
+ sql """ CREATE TABLE ${tableName} (
+ `k` varchar(65533) NOT NULL,
+ `v` int)
+ UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES("replication_num" = "1",
+ "enable_unique_key_merge_on_write" = "true",
+ "disable_auto_compaction" = "true"); """
+
+ def getRowsetMetas = { int version ->
+ def metaUrl = sql_return_maparray("show tablets from
${tableName};").get(0).MetaUrl
+ def jsonMeta = Http.GET(metaUrl, true, false)
+ for (def meta : jsonMeta.rs_metas) {
+ int end_version = meta.end_version
+ if (end_version == version) {
+ return meta
+ }
+ }
+ }
+
+ def truncateString = { String s, int l ->
+ if (s.size() > l) {
+ return s.substring(0, l)
+ }
+ return s
+ }
+
+ def checkKeyBounds = { String k1, String k2, int version, boolean
doTruncation, int length, boolean expected ->
+ def rowsetMeta = getRowsetMetas(version)
+ def keyBounds = rowsetMeta.segments_key_bounds
+ assertEquals(keyBounds.size(), 1)
+ def bounds = keyBounds.get(0)
+
+ String min_key = bounds.min_key
+ String max_key = bounds.max_key
+
+ String expected_min_key = keyNormalMarker + k1
+ String expected_max_key = keyNormalMarker + k2
+ if (doTruncation) {
+ expected_min_key = truncateString(expected_min_key, length)
+ expected_max_key = truncateString(expected_max_key, length)
+ }
+
+ logger.info("\nk1=${k1}, size=${k1.size()}, k2=${k2},
size=${k2.size()}")
+ logger.info("\nexpected_min_key=${expected_min_key},
size=${expected_min_key.size()}, expected_max_key=${expected_max_key},
size=${expected_max_key.size()}")
+ logger.info("\nmin_key=${min_key},
size=${min_key.size()}\nmax_key=${max_key}, size=${max_key.size()}")
+
logger.info("\nsegments_key_bounds_truncated=${rowsetMeta.segments_key_bounds_truncated},
expected=${expected}")
+ assertEquals(min_key, expected_min_key)
+ assertEquals(max_key, expected_max_key)
+
+ assertEquals(expected, rowsetMeta.segments_key_bounds_truncated)
+ }
+
+ int curVersion = 1
+
+ // 1. turn off enable_segments_key_bounds_truncation, should not do
truncation
+ def customBeConfig = [
+ segments_key_bounds_truncation_threshold : -1
+ ]
+
+ setBeConfigTemporary(customBeConfig) {
+ String key1 = "aaaaaaaaaaaa"
+ String key2 = "bbbbbbzzzzzzzzzzz"
+ sql """insert into ${tableName} values("$key1", 1), ("$key2", 2);"""
+ checkKeyBounds(key1, key2, ++curVersion, false, -1, false)
+ }
+
+ // 2. turn on enable_segments_key_bounds_truncation, should do truncation
+ customBeConfig = [
+ segments_key_bounds_truncation_threshold : 6
+ ]
+
+ setBeConfigTemporary(customBeConfig) {
+ String key1 = "aa"
+ String key2 = "bbbb"
+ sql """insert into ${tableName} values("$key1", 1), ("$key2", 2);"""
+ checkKeyBounds(key1, key2, ++curVersion, true, 6, false)
+
+ key1 = "000000000000000"
+ key2 = "1111111111111111111"
+ sql """insert into ${tableName} values("$key1", 1), ("$key2", 2);"""
+ checkKeyBounds(key1, key2, ++curVersion, true, 6, true)
+
+ key1 = "xxx"
+ key2 = "yyyyyyyyyyyyyyyyyyyyy"
+ sql """insert into ${tableName} values("$key1", 1), ("$key2", 2);"""
+ checkKeyBounds(key1, key2, ++curVersion, true, 6, true)
+
+ key1 = "cccccccccccccccccccc"
+ key2 = "dddd"
+ sql """insert into ${tableName} values("$key1", 1), ("$key2", 2);"""
+ checkKeyBounds(key1, key2, ++curVersion, true, 6, true)
+ }
+
+}
diff --git
a/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_read_scenarios.groovy
b/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_read_scenarios.groovy
new file mode 100644
index 00000000000..2620e302faa
--- /dev/null
+++
b/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_read_scenarios.groovy
@@ -0,0 +1,100 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import com.google.common.collect.Maps
+import org.apache.commons.lang.RandomStringUtils
+import org.apache.doris.regression.util.Http
+import java.util.concurrent.TimeUnit
+import org.awaitility.Awaitility
+
+suite("test_key_bounds_truncation_read_scenarios", "nonConcurrent") {
+
+ def tableName = "test_key_bounds_truncation_read_scenarios"
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql """
+ CREATE TABLE `${tableName}` (
+ `k1` int NOT NULL,
+ `k2` int NOT NULL,
+ `k3` int NOT NULL,
+ `c1` int NOT NULL )
+ ENGINE=OLAP UNIQUE KEY(k1,k2,k3)
+ DISTRIBUTED BY HASH(k1,k2,k3) BUCKETS 1
+ PROPERTIES ( "replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction"="true",
+ "store_row_column" = "true", "enable_mow_light_delete" = "false" );
+ """
+
+ def getRowsetMetas = { int version ->
+ def metaUrl = sql_return_maparray("show tablets from
${tableName};").get(0).MetaUrl
+ def jsonMeta = Http.GET(metaUrl, true, false)
+ for (def meta : jsonMeta.rs_metas) {
+ int end_version = meta.end_version
+ if (end_version == version) {
+ return meta
+ }
+ }
+ }
+
+ def checkKeyBounds = { int version, int length, boolean turnedOn ->
+ def rowsetMeta = getRowsetMetas(version)
+ def keyBounds = rowsetMeta.segments_key_bounds
+
+ logger.info("\nversion=${version},
segments_key_bounds_truncated=${rowsetMeta.segments_key_bounds_truncated},
turnedOn=${turnedOn}")
+ assertEquals(turnedOn, rowsetMeta.segments_key_bounds_truncated)
+
+ for (def bounds : keyBounds) {
+ String min_key = bounds.min_key
+ String max_key = bounds.max_key
+ logger.info("\nmin_key=${min_key},
size=${min_key.size()}\nmax_key=${max_key}, size=${max_key.size()}")
+ assertTrue(min_key.size() <= length)
+ assertTrue(max_key.size() <= length)
+ }
+ }
+
+
+ def customBeConfig = [
+ segments_key_bounds_truncation_threshold : 2
+ ]
+
+ setBeConfigTemporary(customBeConfig) {
+ // 1. mow load
+ int k1 = 3757202
+ for (int j=1;j<=10;j++) {
+ for (int i=1;i<=9;i++) {
+ sql """insert into ${tableName} values
+ (${k1},${i},1,9),
+ (${k1},${i},2,8),
+ (${k1},${i},3,7)"""
+ }
+ }
+ (2..91).each { idx ->
+ checkKeyBounds(idx, 2, true)
+ }
+ qt_sql "select * from ${tableName} order by k1,k2,k3;"
+
+
+ // 2. point lookup on mow table
+ for (int i=1;i<=9;i++) {
+ explain {
+ sql """ select * from ${tableName} where k1=${k1} and k2=${i}
and k3=1; """
+ contains "SHORT-CIRCUIT"
+ }
+ qt_sql """ select * from ${tableName} where k1=${k1} and k2=${i}
and k3=1; """
+ }
+ }
+}
diff --git
a/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_write_scenarios.groovy
b/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_write_scenarios.groovy
new file mode 100644
index 00000000000..bbb479a7bd3
--- /dev/null
+++
b/regression-test/suites/unique_with_mow_p0/key_bounds/test_key_bounds_truncation_write_scenarios.groovy
@@ -0,0 +1,284 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import com.google.common.collect.Maps
+import org.apache.commons.lang.RandomStringUtils
+import org.apache.doris.regression.util.Http
+import java.util.concurrent.TimeUnit
+import org.awaitility.Awaitility
+
+suite("test_key_bounds_truncation_write_scenarios", "nonConcurrent") {
+
+ def tableName = "test_key_bounds_truncation_write_scenarios"
+ sql """ DROP TABLE IF EXISTS ${tableName} force;"""
+ sql """ CREATE TABLE ${tableName} (
+ `k` varchar(65533) NOT NULL,
+ `v1` int,
+ v2 int,
+ v3 int not null default '99')
+ UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES("replication_num" = "1",
+ "enable_unique_key_merge_on_write" = "true",
+ "disable_auto_compaction" = "true"); """
+
+ def printCompactionStatus = { tblName ->
+ def tablets = sql_return_maparray("show tablets from ${tblName};")
+ for (def tabletStat : tablets) {
+ def compactionStatusUrl = tabletStat.CompactionStatus
+ def jsonMeta = Http.GET(compactionStatusUrl, true, false)
+ logger.info("${jsonMeta.rowsets}")
+ }
+ }
+
+ def checkKeyBounds = { int length, int version = -1 ->
+ def tablets = sql_return_maparray("show tablets from ${tableName};")
+ for (def tabletStat : tablets) {
+ def metaUrl = tabletStat.MetaUrl
+ def tabletId = tabletStat.TabletId
+ logger.info("begin curl ${metaUrl}")
+ def jsonMeta = Http.GET(metaUrl, true, false)
+ for (def meta : jsonMeta.rs_metas) {
+ int end_version = meta.end_version
+ if (version != -1 && version != end_version) {
+ continue
+ }
+
logger.info("version=[${meta.start_version}-${meta.end_version}],
meta.segments_key_bounds_truncated=${meta.segments_key_bounds_truncated}")
+ if (end_version >= 2 && meta.num_rows > 0) {
+ assert meta.segments_key_bounds_truncated
+ }
+ for (def bounds : meta.segments_key_bounds) {
+ String min_key = bounds.min_key
+ String max_key = bounds.max_key
+ // only check length here
+ logger.info("tablet_id=${tabletId},
version=[${meta.start_version}-${meta.end_version}]\nmin_key=${min_key},
size=${min_key.size()}\nmax_key=${max_key}, size=${max_key.size()}")
+ assert min_key.size() <= length
+ assert max_key.size() <= length
+ }
+ }
+ }
+ }
+
+ def enable_publish_spin_wait = {
+ if (isCloudMode()) {
+
GetDebugPoint().enableDebugPointForAllFEs("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.enable_spin_wait")
+ } else {
+
GetDebugPoint().enableDebugPointForAllBEs("EnginePublishVersionTask::execute.enable_spin_wait")
+ }
+ }
+
+ def disable_publish_spin_wait = {
+ if (isCloudMode()) {
+
GetDebugPoint().disableDebugPointForAllFEs("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.enable_spin_wait")
+ } else {
+
GetDebugPoint().disableDebugPointForAllBEs("EnginePublishVersionTask::execute.enable_spin_wait")
+ }
+ }
+
+ def enable_block_in_publish = {
+ if (isCloudMode()) {
+
GetDebugPoint().enableDebugPointForAllFEs("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.block")
+ } else {
+
GetDebugPoint().enableDebugPointForAllBEs("EnginePublishVersionTask::execute.block")
+ }
+ }
+
+ def disable_block_in_publish = {
+ if (isCloudMode()) {
+
GetDebugPoint().disableDebugPointForAllFEs("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.block")
+ } else {
+
GetDebugPoint().disableDebugPointForAllBEs("EnginePublishVersionTask::execute.block")
+ }
+ }
+
+
+ Random random = new Random()
+ def randomString = { ->
+ int count = random.nextInt(200) + 20
+ return RandomStringUtils.randomAlphabetic(count);
+ }
+
+ def customBeConfig = [
+ segments_key_bounds_truncation_threshold : 20
+ ]
+
+ setBeConfigTemporary(customBeConfig) {
+
+ // 1. load
+ logger.info("============= load ==============")
+ int m = 10, n = 20
+ for (int i = 0; i < m; i++) {
+ String sqlStr = "insert into ${tableName} values"
+ for (int j = 1; j <= n; j++) {
+ sqlStr += """("${randomString()}", 1, 1, 1)"""
+ if (j < n) {
+ sqlStr += ","
+ }
+ }
+ sqlStr += ";"
+ sql sqlStr
+ }
+ printCompactionStatus(tableName)
+ checkKeyBounds(20)
+
+
+ // 2. partial update with publish conflict, will generate new segment
and update rowset in publish phase
+ logger.info("============= partial update ==============")
+ set_be_param("segments_key_bounds_truncation_threshold", 16)
+ Thread.sleep(2000)
+ try {
+ GetDebugPoint().clearDebugPointsForAllFEs()
+ GetDebugPoint().clearDebugPointsForAllBEs()
+
+ enable_publish_spin_wait()
+ enable_block_in_publish()
+
+ String values = ""
+ for (int i = 1; i <= m; i++) {
+ values += """("${randomString()}", 2)"""
+ if (i < m) {
+ values += ","
+ }
+ }
+
+ Thread.sleep(200)
+
+ def t1 = Thread.start {
+ sql "set enable_insert_strict = false;"
+ sql "set enable_unique_key_partial_update = true;"
+ sql "sync;"
+ sql """ insert into ${tableName}(k,v1) values ${values};"""
+ }
+
+ def t2 = Thread.start {
+ sql "set enable_insert_strict = false;"
+ sql "set enable_unique_key_partial_update = true;"
+ sql "sync;"
+ sql """ insert into ${tableName}(k,v2) values ${values};"""
+ }
+
+ Thread.sleep(1500)
+ disable_publish_spin_wait()
+ disable_block_in_publish()
+
+ t1.join()
+ t2.join()
+
+ sql "set enable_unique_key_partial_update = false;"
+ sql "set enable_insert_strict = true;"
+ sql "sync;"
+
+ Thread.sleep(1000)
+ printCompactionStatus(tableName)
+ checkKeyBounds(16, 12)
+ checkKeyBounds(16, 13)
+
+ } finally {
+ disable_publish_spin_wait()
+ disable_block_in_publish()
+ }
+
+
+ // 3. schema change
+ def doSchemaChange = { cmd ->
+ sql cmd
+ waitForSchemaChangeDone {
+ sql """SHOW ALTER TABLE COLUMN WHERE TableName='${tableName}'
ORDER BY createtime DESC LIMIT 1"""
+ time 20000
+ }
+ }
+
+ // direct schema change
+ logger.info("============= schema change 1 ==============")
+ set_be_param("segments_key_bounds_truncation_threshold", 12)
+ Thread.sleep(1000)
+ doSchemaChange " ALTER table ${tableName} modify column v2
varchar(100)"
+ printCompactionStatus(tableName)
+ checkKeyBounds(12)
+ sql "insert into ${tableName} select * from ${tableName};"
+ def res1 = sql "select k from ${tableName} group by k having
count(*)>1;"
+ assert res1.size() == 0
+
+ // linked schema change
+ logger.info("============= schema change 2 ==============")
+ set_be_param("segments_key_bounds_truncation_threshold", 20)
+ Thread.sleep(1000)
+ doSchemaChange " ALTER table ${tableName} modify column v3 int null
default '99'"
+ printCompactionStatus(tableName)
+ // will use previous rowsets' segment key bounds
+ // so the length is still 12
+ checkKeyBounds(12)
+ sql "insert into ${tableName} select * from ${tableName};"
+ def res2 = sql "select k from ${tableName} group by k having
count(*)>1;"
+ assert res2.size() == 0
+
+ // sort schema change
+ logger.info("============= schema change 3 ==============")
+ set_be_param("segments_key_bounds_truncation_threshold", 15)
+ Thread.sleep(2000)
+ doSchemaChange " ALTER table ${tableName} add column k2 int key after
k;"
+ doSchemaChange " ALTER table ${tableName} order by (k2,k,v1,v2,v3);"
+ printCompactionStatus(tableName)
+ checkKeyBounds(15)
+ sql "insert into ${tableName} select * from ${tableName};"
+ def res3 = sql "select k from ${tableName} group by k having
count(*)>1;"
+ assert res3.size() == 0
+
+ // 4. compaction
+ logger.info("============= compaction ==============")
+ set_be_param("segments_key_bounds_truncation_threshold", 8)
+ Thread.sleep(2000)
+ def triggerFullCompaction = {
+ def beNodes = sql_return_maparray("show backends;")
+ def tabletStat = sql_return_maparray("show tablets from
${tableName};").get(0)
+ def tabletBackendId = tabletStat.BackendId
+ def tabletId = tabletStat.TabletId
+ def tabletBackend;
+ for (def be : beNodes) {
+ if (be.BackendId == tabletBackendId) {
+ tabletBackend = be
+ break;
+ }
+ }
+
+ logger.info("trigger compaction on another BE
${tabletBackend.Host} with backendId=${tabletBackend.BackendId}")
+ def (code, out, err) = be_run_full_compaction(tabletBackend.Host,
tabletBackend.HttpPort, tabletId)
+ logger.info("Run compaction: code=" + code + ", out=" + out + ",
err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ assertEquals("success", compactJson.status.toLowerCase())
+
+ // wait for full compaction to complete
+ Awaitility.await().atMost(3, TimeUnit.SECONDS).pollDelay(200,
TimeUnit.MILLISECONDS).pollInterval(100, TimeUnit.MILLISECONDS).until(
+ {
+ (code, out, err) =
be_get_compaction_status(tabletBackend.Host, tabletBackend.HttpPort, tabletId)
+ logger.info("Get compaction status: code=" + code + ",
out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success",
compactionStatus.status.toLowerCase())
+ return !compactionStatus.run_status
+ }
+ )
+ }
+
+ // trigger full compaction on tablet
+ triggerFullCompaction()
+ checkKeyBounds(8)
+
+ qt_sql "select count(*) from ${tableName};"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]