This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new a3a093efca0 [Opt](compression) Opt gzip decompress by libdeflate on 
X86 and X86_64 platforms: 2. Opt gzip decompression by libdeflate lib. (#27669) 
(#27801)
a3a093efca0 is described below

commit a3a093efca051af7c0ab20e8d367d171b1a4e14b
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Thu Nov 30 22:09:19 2023 +0800

    [Opt](compression) Opt gzip decompress by libdeflate on X86 and X86_64 
platforms: 2. Opt gzip decompression by libdeflate lib. (#27669) (#27801)
    
    Backport from #27669.
---
 be/CMakeLists.txt                 |  1 +
 be/cmake/thirdparty.cmake         |  6 +++++
 be/src/util/block_compression.cpp | 46 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index e10da917b5b..58e059ca6ec 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -559,6 +559,7 @@ set(COMMON_THIRDPARTY
     xml2
     lzma
     simdjson
+    deflate
 )
 
 if ((ARCH_AMD64 OR ARCH_AARCH64) AND OS_LINUX)
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index e8ea8c33308..493cbb87555 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -299,3 +299,9 @@ if (OS_MACOSX)
     add_library(intl STATIC IMPORTED)
     set_target_properties(intl PROPERTIES IMPORTED_LOCATION 
"${THIRDPARTY_DIR}/lib/libintl.a")
 endif()
+
+# Only used on x86 or x86_64
+if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR 
"${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86_64")
+    add_library(deflate STATIC IMPORTED)
+    set_target_properties(deflate PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libdeflate.a)
+endif()
diff --git a/be/src/util/block_compression.cpp 
b/be/src/util/block_compression.cpp
index fb4c963c11e..3c051c240ef 100644
--- a/be/src/util/block_compression.cpp
+++ b/be/src/util/block_compression.cpp
@@ -20,6 +20,11 @@
 #include <gen_cpp/parquet_types.h>
 #include <gen_cpp/segment_v2.pb.h>
 #include <glog/logging.h>
+// Only used on x86 or x86_64
+#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || 
defined(__i386__) || \
+        defined(__i386) || defined(_M_IX86)
+#include <libdeflate.h>
+#endif
 #include <limits.h>
 #include <lz4/lz4.h>
 #include <lz4/lz4frame.h>
@@ -929,7 +934,7 @@ private:
     mutable std::vector<DContext*> _ctx_d_pool;
 };
 
-class GzipBlockCompression final : public ZlibBlockCompression {
+class GzipBlockCompression : public ZlibBlockCompression {
 public:
     static GzipBlockCompression* instance() {
         static GzipBlockCompression s_instance;
@@ -1006,6 +1011,39 @@ private:
     const static int MEM_LEVEL = 8;
 };
 
+// Only used on x86 or x86_64
+#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || 
defined(__i386__) || \
+        defined(__i386) || defined(_M_IX86)
+class GzipBlockCompressionByLibdeflate final : public GzipBlockCompression {
+public:
+    GzipBlockCompressionByLibdeflate() : GzipBlockCompression() {}
+    static GzipBlockCompressionByLibdeflate* instance() {
+        static GzipBlockCompressionByLibdeflate s_instance;
+        return &s_instance;
+    }
+    ~GzipBlockCompressionByLibdeflate() override = default;
+
+    Status decompress(const Slice& input, Slice* output) override {
+        if (input.empty()) {
+            output->size = 0;
+            return Status::OK();
+        }
+        thread_local std::unique_ptr<libdeflate_decompressor, void 
(*)(libdeflate_decompressor*)>
+                decompressor {libdeflate_alloc_decompressor(), 
libdeflate_free_decompressor};
+        if (!decompressor) {
+            return Status::InternalError("libdeflate_alloc_decompressor 
error.");
+        }
+        std::size_t out_len;
+        auto result = libdeflate_gzip_decompress(decompressor.get(), 
input.data, input.size,
+                                                 output->data, output->size, 
&out_len);
+        if (result != LIBDEFLATE_SUCCESS) {
+            return Status::InternalError("libdeflate_gzip_decompress error, 
res={}", result);
+        }
+        return Status::OK();
+    }
+};
+#endif
+
 Status get_block_compression_codec(segment_v2::CompressionTypePB type,
                                    BlockCompressionCodec** codec) {
     switch (type) {
@@ -1054,7 +1092,13 @@ Status 
get_block_compression_codec(tparquet::CompressionCodec::type parquet_code
         *codec = ZstdBlockCompression::instance();
         break;
     case tparquet::CompressionCodec::GZIP:
+// Only used on x86 or x86_64
+#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || 
defined(__i386__) || \
+        defined(__i386) || defined(_M_IX86)
+        *codec = GzipBlockCompressionByLibdeflate::instance();
+#else
         *codec = GzipBlockCompression::instance();
+#endif
         break;
     default:
         return Status::InternalError("unknown compression type({})", 
parquet_codec);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to