This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c9519d50755 [fix] Fixed length error in compress.cpp (#48210) c9519d50755 is described below commit c9519d507550abf95ee96604e955f8844f3e7e7a Author: lzy <2972013...@qq.com> AuthorDate: Wed Feb 26 14:10:15 2025 +0800 [fix] Fixed length error in compress.cpp (#48210) ### What problem does this PR solve? Fixed length error in compress.cpp Issue Number: close #xxx Related PR: #47307 Problem Summary: The compressed string length should be represented by 4 bytes instead of 10, and I replaced the magic value with a constant. And I've added examples of multi-line queries ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [x] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [x] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [x] No. - [ ] Yes. <!-- Add document PR link here. eg: https://github.com/apache/doris-website/pull/1214 --> ### Check List (For Reviewer who merge this PR) - [x] Confirm the release note - [x] Confirm test cases - [x] Confirm document - [x] Add branch pick label <!-- Add branch pick label that this PR should merge into --> --- be/src/common/kerberos/kerberos_ticket_mgr.cpp | 1 + be/src/vec/functions/function_compress.cpp | 14 ++++++++------ .../string_functions/test_compress_uncompress.out | Bin 741 -> 1321 bytes .../test_compress_uncompress.groovy | 20 ++++++++++++++++++++ 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/be/src/common/kerberos/kerberos_ticket_mgr.cpp b/be/src/common/kerberos/kerberos_ticket_mgr.cpp index 9020a692f3b..45dca7eca4b 100644 --- a/be/src/common/kerberos/kerberos_ticket_mgr.cpp +++ b/be/src/common/kerberos/kerberos_ticket_mgr.cpp @@ -17,6 +17,7 @@ #include "common/kerberos/kerberos_ticket_mgr.h" +#include <chrono> #include <iomanip> #include <sstream> diff --git a/be/src/vec/functions/function_compress.cpp b/be/src/vec/functions/function_compress.cpp index b645e944bfe..0a50cfce01d 100644 --- a/be/src/vec/functions/function_compress.cpp +++ b/be/src/vec/functions/function_compress.cpp @@ -52,6 +52,8 @@ class FunctionContext; namespace doris::vectorized { +static constexpr size_t COMPRESS_STR_LENGTH = 4; + class FunctionCompress : public IFunction { public: static constexpr auto name = "compress"; @@ -103,17 +105,17 @@ public: // Z_MEM_ERROR and Z_BUF_ERROR are already handled in compress, making sure st is always Z_OK RETURN_IF_ERROR(compression_codec->compress(data, &compressed_str)); - col_data.resize(col_data.size() + 4 + compressed_str.size()); + col_data.resize(col_data.size() + COMPRESS_STR_LENGTH + compressed_str.size()); std::memcpy(col_data.data() + idx, &length, sizeof(length)); - idx += 4; + idx += COMPRESS_STR_LENGTH; // The length of compress_str is not known in advance, so it cannot be compressed directly into col_data unsigned char* src = compressed_str.data(); for (size_t i = 0; i < compressed_str.size(); idx++, i++, src++) { col_data[idx] = *src; } - col_offset[row] = col_offset[row - 1] + 10 + compressed_str.size(); + col_offset[row] = col_offset[row - 1] + COMPRESS_STR_LENGTH + compressed_str.size(); } block.replace_by_position(result, std::move(result_column)); @@ -174,16 +176,16 @@ public: } union { - char bytes[4]; + char bytes[COMPRESS_STR_LENGTH]; uint32_t value; } length; - std::memcpy(length.bytes, data.data, 4); + std::memcpy(length.bytes, data.data, COMPRESS_STR_LENGTH); size_t idx = col_data.size(); col_data.resize(col_data.size() + length.value); uncompressed_slice = Slice(col_data.data() + idx, length.value); - Slice compressed_data(data.data + 4, data.size - 4); + Slice compressed_data(data.data + COMPRESS_STR_LENGTH, data.size - COMPRESS_STR_LENGTH); auto st = compression_codec->decompress(compressed_data, &uncompressed_slice); if (!st.ok()) { // is not a legal compressed string diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out b/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out index be60951c955..b54c3b71d63 100644 Binary files a/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out and b/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out differ diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy index 9c4df7b1ec9..7aa753891c9 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy @@ -136,4 +136,24 @@ suite("test_compress_uncompress") { UNCOMPRESS(COMPRESS('12345')) AS decompressed_data LIMIT 1; """ + + // Test 12: Multiple COMPRESS calls that COMPRESS the text_col field multiple times directly from the table + order_qt_compress_multiple_calls_from_table """ + SELECT + k0, + COMPRESS(text_col) AS comp1, + binary_col AS comp2 + FROM test_compression + ORDER BY k0; + """ + + // Test 13: multiple COMPRESS and UNCOMPRESS calls + order_qt_compress_uncompress_multiple_calls_from_table """ + SELECT + k0, + text_col AS result1, + UNCOMPRESS(binary_col) AS result2 + FROM test_compression + ORDER BY k0; + """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org