This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b031c95324e815c25496156597db0a3b871d57fd
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Wed Mar 13 10:23:39 2024 +0800

    [Opt](exec) use libbase64 to replace base64 code in doris (#32078)
    
    * [Opt](exec) use libbase64 to replace base64 code in doris
---
 be/cmake/thirdparty.cmake                          |   1 +
 be/src/exec/olap_utils.h                           |  36 -----
 be/src/util/url_coding.cpp                         | 175 +++------------------
 be/src/vec/functions/function_bitmap.cpp           |   6 +-
 be/src/vec/functions/function_string.cpp           |   6 +-
 .../load_p0/stream_load/test_stream_load.groovy    |   2 +-
 .../test_stream_load_move_memtable.groovy          |   8 +-
 7 files changed, 35 insertions(+), 199 deletions(-)

diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 0d485b2466e..e9fbdabee8b 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -117,6 +117,7 @@ add_thirdparty(bitshuffle)
 add_thirdparty(roaring)
 add_thirdparty(fmt)
 add_thirdparty(cctz)
+add_thirdparty(base64)
 
 add_thirdparty(aws-cpp-sdk-core LIB64)
 add_thirdparty(aws-cpp-sdk-s3 LIB64)
diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h
index 4024337c462..2e101b1270f 100644
--- a/be/src/exec/olap_utils.h
+++ b/be/src/exec/olap_utils.h
@@ -61,42 +61,6 @@ public:
     }
 };
 
-static char encoding_table[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 
'J', 'K', 'L', 'M',
-                                'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 
'W', 'X', 'Y', 'Z',
-                                'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 
'j', 'k', 'l', 'm',
-                                'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
'w', 'x', 'y', 'z',
-                                '0', '1', '2', '3', '4', '5', '6', '7', '8', 
'9', '+', '/'};
-
-static int mod_table[] = {0, 2, 1};
-static const char base64_pad = '=';
-
-inline size_t base64_encode(const char* data, size_t length, char* 
encoded_data) {
-    size_t output_length = (size_t)(4.0 * ceil((double)length / 3.0));
-
-    if (encoded_data == nullptr) {
-        return 0;
-    }
-
-    for (uint32_t i = 0, j = 0; i < length;) {
-        uint32_t octet_a = i < length ? (unsigned char)data[i++] : 0;
-        uint32_t octet_b = i < length ? (unsigned char)data[i++] : 0;
-        uint32_t octet_c = i < length ? (unsigned char)data[i++] : 0;
-
-        uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
-
-        encoded_data[j++] = encoding_table[(triple >> 3 * 6) & 0x3F];
-        encoded_data[j++] = encoding_table[(triple >> 2 * 6) & 0x3F];
-        encoded_data[j++] = encoding_table[(triple >> 1 * 6) & 0x3F];
-        encoded_data[j++] = encoding_table[(triple >> 0 * 6) & 0x3F];
-    }
-
-    for (int i = 0; i < mod_table[length % 3]; i++) {
-        encoded_data[output_length - 1 - i] = base64_pad;
-    }
-
-    return output_length;
-}
-
 enum SQLFilterOp {
     FILTER_LARGER = 0,
     FILTER_LARGER_OR_EQUAL = 1,
diff --git a/be/src/util/url_coding.cpp b/be/src/util/url_coding.cpp
index 7d6e264d5e9..6ddd4c05401 100644
--- a/be/src/util/url_coding.cpp
+++ b/be/src/util/url_coding.cpp
@@ -17,6 +17,7 @@
 
 #include "util/url_coding.h"
 
+#include <libbase64.h>
 #include <math.h>
 
 #include <memory>
@@ -86,165 +87,35 @@ bool url_decode(const std::string& in, std::string* out) {
     return true;
 }
 
-static void encode_base64_internal(const std::string& in, std::string* out,
-                                   const unsigned char* basis, bool padding) {
-    size_t len = in.size();
-    // Every 3 source bytes will be encoded into 4 bytes.
-    out->resize((len + 2) / 3 * 4);
-    unsigned char* d = (unsigned char*)out->data();
-    const auto* s = reinterpret_cast<const unsigned char*>(in.data());
-    while (len > 2) {
-        *d++ = basis[(s[0] >> 2) & 0x3f];
-        *d++ = basis[((s[0] & 3) << 4) | (s[1] >> 4)];
-        *d++ = basis[((s[1] & 0x0f) << 2) | (s[2] >> 6)];
-        *d++ = basis[s[2] & 0x3f];
-
-        s += 3;
-        len -= 3;
-    }
-    if (len) {
-        *d++ = basis[(s[0] >> 2) & 0x3f];
-        if (len == 1) {
-            *d++ = basis[(s[0] & 3) << 4];
-            if (padding) {
-                *d++ = '=';
-            }
-        } else {
-            *d++ = basis[((s[0] & 3) << 4) | (s[1] >> 4)];
-            *d++ = basis[(s[1] & 0x0f) << 2];
-        }
-        if (padding) {
-            *d++ = '=';
-        }
-    }
-    out->resize((char*)d - out->data());
-}
-
 void base64_encode(const std::string& in, std::string* out) {
-    static unsigned char basis64[] =
-            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-    encode_base64_internal(in, out, basis64, true);
+    out->resize(in.length() * (4.0 / 3) + 1);
+    auto len = base64_encode(reinterpret_cast<const unsigned 
char*>(in.c_str()), in.length(),
+                             (unsigned char*)out->c_str());
+    out->resize(len);
 }
 
-static char encoding_table[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 
'J', 'K', 'L', 'M',
-                                'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 
'W', 'X', 'Y', 'Z',
-                                'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 
'j', 'k', 'l', 'm',
-                                'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 
'w', 'x', 'y', 'z',
-                                '0', '1', '2', '3', '4', '5', '6', '7', '8', 
'9', '+', '/'};
-
-static const char base64_pad = '=';
-
-static short decoding_table[256] = {
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -2, -2, -2, -2, -2, -2, 
-2, -2, -2, -2, 62,
-        -2, -2, -2, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, 
-2, -2, -2, -2, 0,
-        1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 
18, 19, 20, 21, 22,
-        23, 24, 25, -2, -2, -2, -2, -2, -2, 26, 27, 28, 29, 30, 31, 32, 33, 
34, 35, 36, 37, 38,
-        39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 
-2, -2, -2, -2, -2,
-        -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2};
-
-static int mod_table[] = {0, 2, 1};
-
 size_t base64_encode(const unsigned char* data, size_t length, unsigned char* 
encoded_data) {
-    auto output_length = (size_t)(4.0 * ceil((double)length / 3.0));
-
-    if (encoded_data == nullptr) {
-        return 0;
-    }
-
-    for (uint32_t i = 0, j = 0; i < length;) {
-        uint32_t octet_a = i < length ? data[i++] : 0;
-        uint32_t octet_b = i < length ? data[i++] : 0;
-        uint32_t octet_c = i < length ? data[i++] : 0;
-        uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
-
-        encoded_data[j++] = encoding_table[(triple >> 3 * 6) & 0x3F];
-        encoded_data[j++] = encoding_table[(triple >> 2 * 6) & 0x3F];
-        encoded_data[j++] = encoding_table[(triple >> 1 * 6) & 0x3F];
-        encoded_data[j++] = encoding_table[(triple >> 0 * 6) & 0x3F];
-    }
-
-    for (int i = 0; i < mod_table[length % 3]; i++) {
-        encoded_data[output_length - 1 - i] = '=';
-    }
-
-    return output_length;
+    size_t encode_len = 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
+    do_base64_encode(reinterpret_cast<const char*>(data), length,
+                     reinterpret_cast<char*>(encoded_data), &encode_len, 
BASE64_FORCE_NEON64);
+#else
+    do_base64_encode(reinterpret_cast<const char*>(data), length,
+                     reinterpret_cast<char*>(encoded_data), &encode_len, 0);
+#endif
+    return encode_len;
 }
 
 int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
-    const char* current = data;
-    int ch = 0;
-    int i = 0;
-    int j = 0;
-    int k = 0;
-
-    // run through the whole string, converting as we go
-    while ((ch = *current++) != '\0' && length-- > 0) {
-        if (ch >= 256 || ch < 0) {
-            return -1;
-        }
-
-        if (ch == base64_pad) {
-            if (*current != '=' && (i % 4) == 1) {
-                return -1;
-            }
-            continue;
-        }
-
-        ch = decoding_table[ch];
-        // a space or some other separator character, we simply skip over
-        if (ch == -1) {
-            continue;
-        } else if (ch == -2) {
-            return -1;
-        }
-
-        switch (i % 4) {
-        case 0:
-            decoded_data[j] = ch << 2;
-            break;
-        case 1:
-            decoded_data[j++] |= ch >> 4;
-            decoded_data[j] = (ch & 0x0f) << 4;
-            break;
-        case 2:
-            decoded_data[j++] |= ch >> 2;
-            decoded_data[j] = (ch & 0x03) << 6;
-            break;
-        case 3:
-            decoded_data[j++] |= ch;
-            break;
-        default:
-            break;
-        }
-
-        i++;
-    }
-
-    k = j;
-    /* mop things up if we ended on a boundary */
-    if (ch == base64_pad) {
-        switch (i % 4) {
-        case 1:
-            return 0;
-        case 2:
-            k++;
-            [[fallthrough]];
-        case 3:
-            decoded_data[k] = 0;
-        default:
-            break;
-        }
-    }
-
-    decoded_data[j] = '\0';
-
-    return j;
+    size_t decode_len = 0;
+#if defined(__aarch64__) || defined(_M_ARM64)
+    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, 
decoded_data,
+                                &decode_len, BASE64_FORCE_NEON64);
+#else
+    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, 
decoded_data,
+                                &decode_len, 0);
+#endif
+    return ret > 0 ? decode_len : -1;
 }
 
 bool base64_decode(const std::string& in, std::string* out) {
diff --git a/be/src/vec/functions/function_bitmap.cpp 
b/be/src/vec/functions/function_bitmap.cpp
index d1f6cf432ee..4d77b85259f 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -282,7 +282,7 @@ struct BitmapFromBase64 {
                 decode_buff.resize(curr_decode_buff_len);
                 last_decode_buff_len = curr_decode_buff_len;
             }
-            int outlen = base64_decode(src_str, src_size, decode_buff.data());
+            auto outlen = base64_decode(src_str, src_size, decode_buff.data());
             if (outlen < 0) {
                 res.emplace_back();
                 null_map[i] = 1;
@@ -1012,8 +1012,8 @@ struct BitmapToBase64 {
             }
             bitmap_val.write_to(ser_buff.data());
 
-            int outlen = base64_encode((const unsigned char*)ser_buff.data(), 
cur_ser_size,
-                                       chars_data + encoded_offset);
+            auto outlen = base64_encode((const unsigned char*)ser_buff.data(), 
cur_ser_size,
+                                        chars_data + encoded_offset);
             DCHECK(outlen > 0);
 
             encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0));
diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index ce2c94b937b..4da6d11a5e1 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -817,7 +817,7 @@ struct ToBase64Impl {
                 dst = dst_uptr.get();
             }
 
-            int outlen = base64_encode((const unsigned char*)source, srclen, 
(unsigned char*)dst);
+            auto outlen = base64_encode((const unsigned char*)source, srclen, 
(unsigned char*)dst);
 
             StringOP::push_value_string(std::string_view(dst, outlen), i, 
dst_data, dst_offsets);
         }
@@ -860,7 +860,7 @@ struct ToBase64OldImpl {
                 dst = dst_uptr.get();
             }
 
-            int outlen = base64_encode((const unsigned char*)source, srclen, 
(unsigned char*)dst);
+            auto outlen = base64_encode((const unsigned char*)source, srclen, 
(unsigned char*)dst);
 
             StringOP::push_value_string(std::string_view(dst, outlen), i, 
dst_data, dst_offsets);
         }
@@ -902,7 +902,7 @@ struct FromBase64Impl {
                 dst_uptr.reset(new char[cipher_len]);
                 dst = dst_uptr.get();
             }
-            int outlen = base64_decode(source, srclen, dst);
+            auto outlen = base64_decode(source, srclen, dst);
 
             if (outlen < 0) {
                 StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
index ee69cc47779..6c002b2d29b 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
@@ -1030,7 +1030,7 @@ suite("test_stream_load", "p0") {
         set 'column_separator', '|'
         set 'columns', 'k1, k2, v1, v2, v3'
         set 'strict_mode', 'true'
-        set 'Authorization', 'Basic  Y29tbW9uX3VzZXI6MTIzNDU2dGVzdCE='
+        set 'Authorization', 'Basic Y29tbW9uX3VzZXJAJyUnOjEyMzQ1NnRlc3Qh'
 
         file 'test_auth.csv'
         time 10000 // limit inflight 10s
diff --git 
a/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
 
b/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
index e0d00120552..09d9e57bf6e 100644
--- 
a/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
+++ 
b/regression-test/suites/load_p0/stream_load/test_stream_load_move_memtable.groovy
@@ -877,8 +877,8 @@ suite("test_stream_load_move_memtable", "p0") {
         PROPERTIES ("replication_allocation" = "tag.location.default: 1");
     """
 
-    sql """create USER common_user1@'%' IDENTIFIED BY '123456test!'"""
-    sql """GRANT LOAD_PRIV ON *.* TO 'common_user1'@'%';"""
+    sql """create USER ddd IDENTIFIED BY '123456test!'"""
+    sql """GRANT LOAD_PRIV ON *.* TO 'ddd';"""
 
     streamLoad {
         table "${tableName13}"
@@ -886,7 +886,7 @@ suite("test_stream_load_move_memtable", "p0") {
         set 'column_separator', '|'
         set 'columns', 'k1, k2, v1, v2, v3'
         set 'strict_mode', 'true'
-        set 'Authorization', 'Basic  Y29tbW9uX3VzZXIxOjEyMzQ1NnRlc3Qh'
+        set 'Authorization', 'Basic ZGRkOjEyMzQ1NnRlc3Qh'
         set 'memtable_on_sink_node', 'true'
 
         file 'test_auth.csv'
@@ -906,7 +906,7 @@ suite("test_stream_load_move_memtable", "p0") {
     }
 
     sql "sync"
-    sql """DROP USER 'common_user1'@'%'"""
+    sql """DROP USER 'ddd'"""
 
     // test default value
     def tableName14 = "test_default_value_mm"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to