This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f466668d48 [improvement] each tuple starting at aligned address to build with ubsan enabled (#8831) f466668d48 is described below commit f466668d4892c00f0c02cee35a6765b5bd575d28 Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com> AuthorDate: Thu Jun 23 14:03:01 2022 +0800 [improvement] each tuple starting at aligned address to build with ubsan enabled (#8831) When I builded doris be with ubsan enabled and enabled vectorization, be core dump at doris::DecimalV2Value::operator long(). It cored because accessing on a non-aligned address by sse. With ubsan enabled, compile generates different assemble code including sse instruction. A sender serializes tuples to a contiguous memory area, while a receiver just copy it. So we should align each tuple offset to 16 bytes. For compatibility, we should use a config to control it. BTW: with tools like ubsan, asan, tsan we can find bugs more easily, e.g. #8815. It is difficult to find the bug without ubsan. Anyway, we should use modern tools to be more productive. --- be/src/common/config.h | 1 + be/src/runtime/row_batch.cpp | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index fd7ebdd395..d3c189231b 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -144,6 +144,7 @@ CONF_String(doris_cgroups, ""); CONF_Int32(num_threads_per_core, "3"); // if true, compresses tuple data in Serialize CONF_mBool(compress_rowbatches, "true"); +CONF_mBool(rowbatch_align_tuple_offset, "false"); // interval between profile reports; in seconds CONF_mInt32(status_report_interval, "5"); // if true, each disk will have a separate thread pool for scanner diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index 8627542a85..74a79a6db6 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -156,6 +156,10 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch) for (auto slot : desc->string_slots()) { DCHECK(slot->type().is_string_type()); + if (tuple->is_null(slot->null_indicator_offset())) { + continue; + } + StringValue* string_val = tuple->get_string_slot(slot->tuple_offset()); int64_t offset = convert_to<int64_t>(string_val->ptr); string_val->ptr = tuple_data + offset; @@ -209,6 +213,14 @@ RowBatch::~RowBatch() { clear(); } +static inline size_t align_tuple_offset(size_t offset) { + if (config::rowbatch_align_tuple_offset) { + return (offset + alignof(std::max_align_t) - 1) & (~(alignof(std::max_align_t) - 1)); + } + + return offset; +} + Status RowBatch::serialize(PRowBatch* output_batch, size_t* uncompressed_size, size_t* compressed_size, bool allow_transfer_large_data) { // num_rows @@ -247,11 +259,16 @@ Status RowBatch::serialize(PRowBatch* output_batch, size_t* uncompressed_size, mutable_new_tuple_offsets->Add(-1); continue; } + + int64_t old_offset = offset; + offset = align_tuple_offset(offset); + tuple_data += offset - old_offset; + // Record offset before creating copy (which increments offset and tuple_data) mutable_tuple_offsets->Add((int32_t)offset); mutable_new_tuple_offsets->Add(offset); row->get_tuple(j)->deep_copy(*desc, &tuple_data, &offset, /* convert_ptrs */ true); - CHECK_LE(offset, tuple_byte_size); + CHECK_GE(offset, 0); } } CHECK_EQ(offset, tuple_byte_size) @@ -541,6 +558,7 @@ size_t RowBatch::total_byte_size() const { if (tuple == nullptr) { continue; } + result = align_tuple_offset(result); result += desc->byte_size(); for (auto slot : desc->string_slots()) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org