This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c9519d50755 [fix] Fixed length error in compress.cpp (#48210)
c9519d50755 is described below

commit c9519d507550abf95ee96604e955f8844f3e7e7a
Author: lzy <2972013...@qq.com>
AuthorDate: Wed Feb 26 14:10:15 2025 +0800

    [fix] Fixed length error in compress.cpp (#48210)
    
    ### What problem does this PR solve?
    Fixed length error in compress.cpp
    
    Issue Number: close #xxx
    
    Related PR: #47307
    
    Problem Summary:
    The compressed string length should be represented by 4 bytes instead of
    10, and I replaced the magic value with a constant. And I've added
    examples of multi-line queries
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [x] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [x] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [x] Confirm the release note
    - [x] Confirm test cases
    - [x] Confirm document
    - [x] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/common/kerberos/kerberos_ticket_mgr.cpp      |   1 +
 be/src/vec/functions/function_compress.cpp          |  14 ++++++++------
 .../string_functions/test_compress_uncompress.out   | Bin 741 -> 1321 bytes
 .../test_compress_uncompress.groovy                 |  20 ++++++++++++++++++++
 4 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/be/src/common/kerberos/kerberos_ticket_mgr.cpp 
b/be/src/common/kerberos/kerberos_ticket_mgr.cpp
index 9020a692f3b..45dca7eca4b 100644
--- a/be/src/common/kerberos/kerberos_ticket_mgr.cpp
+++ b/be/src/common/kerberos/kerberos_ticket_mgr.cpp
@@ -17,6 +17,7 @@
 
 #include "common/kerberos/kerberos_ticket_mgr.h"
 
+#include <chrono>
 #include <iomanip>
 #include <sstream>
 
diff --git a/be/src/vec/functions/function_compress.cpp 
b/be/src/vec/functions/function_compress.cpp
index b645e944bfe..0a50cfce01d 100644
--- a/be/src/vec/functions/function_compress.cpp
+++ b/be/src/vec/functions/function_compress.cpp
@@ -52,6 +52,8 @@ class FunctionContext;
 
 namespace doris::vectorized {
 
+static constexpr size_t COMPRESS_STR_LENGTH = 4;
+
 class FunctionCompress : public IFunction {
 public:
     static constexpr auto name = "compress";
@@ -103,17 +105,17 @@ public:
 
             // Z_MEM_ERROR and Z_BUF_ERROR are already handled in compress, 
making sure st is always Z_OK
             RETURN_IF_ERROR(compression_codec->compress(data, 
&compressed_str));
-            col_data.resize(col_data.size() + 4 + compressed_str.size());
+            col_data.resize(col_data.size() + COMPRESS_STR_LENGTH + 
compressed_str.size());
 
             std::memcpy(col_data.data() + idx, &length, sizeof(length));
-            idx += 4;
+            idx += COMPRESS_STR_LENGTH;
 
             // The length of compress_str is not known in advance, so it 
cannot be compressed directly into col_data
             unsigned char* src = compressed_str.data();
             for (size_t i = 0; i < compressed_str.size(); idx++, i++, src++) {
                 col_data[idx] = *src;
             }
-            col_offset[row] = col_offset[row - 1] + 10 + compressed_str.size();
+            col_offset[row] = col_offset[row - 1] + COMPRESS_STR_LENGTH + 
compressed_str.size();
         }
 
         block.replace_by_position(result, std::move(result_column));
@@ -174,16 +176,16 @@ public:
             }
 
             union {
-                char bytes[4];
+                char bytes[COMPRESS_STR_LENGTH];
                 uint32_t value;
             } length;
-            std::memcpy(length.bytes, data.data, 4);
+            std::memcpy(length.bytes, data.data, COMPRESS_STR_LENGTH);
 
             size_t idx = col_data.size();
             col_data.resize(col_data.size() + length.value);
             uncompressed_slice = Slice(col_data.data() + idx, length.value);
 
-            Slice compressed_data(data.data + 4, data.size - 4);
+            Slice compressed_data(data.data + COMPRESS_STR_LENGTH, data.size - 
COMPRESS_STR_LENGTH);
             auto st = compression_codec->decompress(compressed_data, 
&uncompressed_slice);
 
             if (!st.ok()) {                                      // is not a 
legal compressed string
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
index be60951c955..b54c3b71d63 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
 and 
b/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
index 9c4df7b1ec9..7aa753891c9 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
@@ -136,4 +136,24 @@ suite("test_compress_uncompress") {
             UNCOMPRESS(COMPRESS('12345')) AS decompressed_data
         LIMIT 1;
     """
+
+       // Test 12: Multiple COMPRESS calls that COMPRESS the text_col field 
multiple times directly from the table
+    order_qt_compress_multiple_calls_from_table """
+        SELECT
+            k0,
+            COMPRESS(text_col) AS comp1,
+            binary_col AS comp2
+        FROM test_compression
+        ORDER BY k0;
+    """
+
+       // Test 13: multiple COMPRESS and UNCOMPRESS calls
+    order_qt_compress_uncompress_multiple_calls_from_table """
+        SELECT
+            k0,
+            text_col AS result1,
+            UNCOMPRESS(binary_col) AS result2
+        FROM test_compression
+        ORDER BY k0;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to