This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 40be6a0b05 [fix](hive) do not split compress data file and support 
lz4/snappy block codec (#23245)
40be6a0b05 is described below

commit 40be6a0b059ede9bda5b29b66ea4ab955503aeaf
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Sat Aug 26 12:59:05 2023 +0800

    [fix](hive) do not split compress data file and support lz4/snappy block 
codec (#23245)
    
    1. do not split compress data file
    Some data file in hive is compressed with gzip, deflate, etc.
    These kinds of file can not be splitted.
    
    2. Support lz4 block codec
    for hive scan node, use lz4 block codec instead of lz4 frame codec
    
    4. Support snappy block codec
    For hadoop snappy
    
    5. Optimize the `count(*)` query of csv file
    For query like `select count(*) from tbl`, only need to split the line, no 
need to split the column.
    
    Need to pick to branch-2.0 after this PR: #22304
---
 be/src/exec/decompressor.cpp                       | 185 ++++++++++++++++++++-
 be/src/exec/decompressor.h                         |  39 ++++-
 be/src/service/internal_service.cpp                |   2 +
 be/src/util/load_util.cpp                          |   9 +-
 be/src/vec/exec/format/csv/csv_reader.cpp          |  76 +++++++--
 be/src/vec/exec/format/csv/csv_reader.h            |   6 +
 .../file_reader/new_plain_text_line_reader.cpp     |  10 +-
 .../file_reader/new_plain_text_line_reader.h       |   1 -
 be/src/vec/exec/scan/vfile_scanner.cpp             |  36 ++--
 .../java/org/apache/doris/common/util/Util.java    |   6 +
 .../doris/datasource/hive/HiveMetaStoreCache.java  |   2 +
 .../apache/doris/external/hive/util/HiveUtil.java  |   3 +-
 .../doris/planner/external/FileScanNode.java       |  17 +-
 .../doris/planner/external/HiveScanNode.java       |  11 ++
 .../apache/doris/planner/external/HiveSplit.java   |   5 -
 gensrc/thrift/PlanNodes.thrift                     |   4 +
 .../external_table_p2/hive/test_compress_type.out  |  47 ++++++
 .../hive/test_compress_type.groovy                 |  61 +++++++
 18 files changed, 464 insertions(+), 56 deletions(-)

diff --git a/be/src/exec/decompressor.cpp b/be/src/exec/decompressor.cpp
index af69a896a2..964654132a 100644
--- a/be/src/exec/decompressor.cpp
+++ b/be/src/exec/decompressor.cpp
@@ -42,6 +42,12 @@ Status Decompressor::create_decompressor(CompressType type, 
Decompressor** decom
     case CompressType::LZ4FRAME:
         *decompressor = new Lz4FrameDecompressor();
         break;
+    case CompressType::LZ4BLOCK:
+        *decompressor = new Lz4BlockDecompressor();
+        break;
+    case CompressType::SNAPPYBLOCK:
+        *decompressor = new SnappyBlockDecompressor();
+        break;
 #ifdef DORIS_WITH_LZO
     case CompressType::LZOP:
         *decompressor = new LzopDecompressor();
@@ -59,6 +65,10 @@ Status Decompressor::create_decompressor(CompressType type, 
Decompressor** decom
     return st;
 }
 
+uint32_t Decompressor::_read_int32(uint8_t* buf) {
+    return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+}
+
 std::string Decompressor::debug_info() {
     return "Decompressor";
 }
@@ -239,7 +249,7 @@ Status Lz4FrameDecompressor::decompress(uint8_t* input, 
size_t input_len, size_t
                                         size_t* decompressed_len, bool* 
stream_end,
                                         size_t* more_input_bytes, size_t* 
more_output_bytes) {
     uint8_t* src = input;
-    size_t src_size = input_len;
+    size_t remaining_input_size = input_len;
     size_t ret = 1;
     *input_bytes_read = 0;
 
@@ -257,7 +267,7 @@ Status Lz4FrameDecompressor::decompress(uint8_t* input, 
size_t input_len, size_t
         }
 
         LZ4F_frameInfo_t info;
-        ret = LZ4F_getFrameInfo(_dctx, &info, (void*)src, &src_size);
+        ret = LZ4F_getFrameInfo(_dctx, &info, (void*)src, 
&remaining_input_size);
         if (LZ4F_isError(ret)) {
             return Status::InternalError("LZ4F_getFrameInfo error: {}",
                                          std::string(LZ4F_getErrorName(ret)));
@@ -270,17 +280,17 @@ Status Lz4FrameDecompressor::decompress(uint8_t* input, 
size_t input_len, size_t
                     std::string(LZ4F_getErrorName(ret)));
         }
 
-        *input_bytes_read = src_size;
+        *input_bytes_read = remaining_input_size;
 
-        src += src_size;
-        src_size = input_len - src_size;
+        src += remaining_input_size;
+        remaining_input_size = input_len - remaining_input_size;
 
         LOG(INFO) << "lz4 block size: " << _expect_dec_buf_size;
     }
 
     // decompress
     size_t output_len = output_max_len;
-    ret = LZ4F_decompress(_dctx, (void*)output, &output_len, (void*)src, 
&src_size,
+    ret = LZ4F_decompress(_dctx, (void*)output, &output_len, (void*)src, 
&remaining_input_size,
                           /* LZ4F_decompressOptions_t */ nullptr);
     if (LZ4F_isError(ret)) {
         return Status::InternalError("Decompression error: {}",
@@ -288,7 +298,7 @@ Status Lz4FrameDecompressor::decompress(uint8_t* input, 
size_t input_len, size_t
     }
 
     // update
-    *input_bytes_read += src_size;
+    *input_bytes_read += remaining_input_size;
     *decompressed_len = output_len;
     if (ret == 0) {
         *stream_end = true;
@@ -324,4 +334,165 @@ size_t Lz4FrameDecompressor::get_block_size(const 
LZ4F_frameInfo_t* info) {
     }
 }
 
+/// Lz4BlockDecompressor
+Status Lz4BlockDecompressor::init() {
+    return Status::OK();
+}
+
+Status Lz4BlockDecompressor::decompress(uint8_t* input, size_t input_len, 
size_t* input_bytes_read,
+                                        uint8_t* output, size_t output_max_len,
+                                        size_t* decompressed_len, bool* 
stream_end,
+                                        size_t* more_input_bytes, size_t* 
more_output_bytes) {
+    uint8_t* src = input;
+    size_t remaining_input_size = input_len;
+    int64_t uncompressed_total_len = 0;
+    *input_bytes_read = 0;
+
+    // The hadoop lz4 codec is as:
+    // <4 byte big endian uncompressed size>
+    // <4 byte big endian compressed size>
+    // <lz4 compressed block>
+    // ....
+    // <4 byte big endian uncompressed size>
+    // <4 byte big endian compressed size>
+    // <lz4 compressed block>
+    //
+    // See:
+    // 
https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc
+    while (remaining_input_size > 0) {
+        // Read uncompressed size
+        uint32_t uncompressed_block_len = Decompressor::_read_int32(src);
+        int64_t remaining_output_size = output_max_len - 
uncompressed_total_len;
+        if (remaining_output_size < uncompressed_block_len) {
+            // Need more output buffer
+            *more_output_bytes = uncompressed_block_len - 
remaining_output_size;
+            break;
+        }
+
+        // Read compressed size
+        size_t tmp_src_size = remaining_input_size - sizeof(uint32_t);
+        size_t compressed_len = Decompressor::_read_int32(src + 
sizeof(uint32_t));
+        if (compressed_len == 0 || compressed_len > tmp_src_size) {
+            // Need more input data
+            *more_input_bytes = compressed_len - tmp_src_size;
+            break;
+        }
+
+        src += 2 * sizeof(uint32_t);
+        remaining_input_size -= 2 * sizeof(uint32_t);
+
+        // Decompress
+        int uncompressed_len = LZ4_decompress_safe(reinterpret_cast<const 
char*>(src),
+                                                   
reinterpret_cast<char*>(output), compressed_len,
+                                                   remaining_output_size);
+        if (uncompressed_len < 0 || uncompressed_len != 
uncompressed_block_len) {
+            return Status::InternalError(
+                    "lz4 block decompress failed. uncompressed_len: {}, 
expected: {}",
+                    uncompressed_len, uncompressed_block_len);
+        }
+
+        output += uncompressed_len;
+        src += compressed_len;
+        remaining_input_size -= compressed_len;
+        uncompressed_total_len += uncompressed_len;
+    }
+
+    *input_bytes_read += (input_len - remaining_input_size);
+    *decompressed_len = uncompressed_total_len;
+    // If no more input and output need, means this is the end of a compressed 
block
+    *stream_end = (*more_input_bytes == 0 && *more_output_bytes == 0);
+
+    return Status::OK();
+}
+
+std::string Lz4BlockDecompressor::debug_info() {
+    std::stringstream ss;
+    ss << "Lz4BlockDecompressor.";
+    return ss.str();
+}
+
+/// SnappyBlockDecompressor
+Status SnappyBlockDecompressor::init() {
+    return Status::OK();
+}
+
+Status SnappyBlockDecompressor::decompress(uint8_t* input, size_t input_len,
+                                           size_t* input_bytes_read, uint8_t* 
output,
+                                           size_t output_max_len, size_t* 
decompressed_len,
+                                           bool* stream_end, size_t* 
more_input_bytes,
+                                           size_t* more_output_bytes) {
+    uint8_t* src = input;
+    size_t remaining_input_size = input_len;
+    int64_t uncompressed_total_len = 0;
+    *input_bytes_read = 0;
+
+    // The hadoop snappy codec is as:
+    // <4 byte big endian uncompressed size>
+    // <4 byte big endian compressed size>
+    // <snappy compressed block>
+    // ....
+    // <4 byte big endian uncompressed size>
+    // <4 byte big endian compressed size>
+    // <snappy compressed block>
+    //
+    // See:
+    // 
https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/SnappyCodec.cc
+    while (remaining_input_size > 0) {
+        // Read uncompressed size
+        uint32_t uncompressed_block_len = Decompressor::_read_int32(src);
+        int64_t remaining_output_size = output_max_len - 
uncompressed_total_len;
+        if (remaining_output_size < uncompressed_block_len) {
+            // Need more output buffer
+            *more_output_bytes = uncompressed_block_len - 
remaining_output_size;
+            break;
+        }
+
+        // Read compressed size
+        size_t tmp_src_size = remaining_input_size - sizeof(uint32_t);
+        size_t compressed_len = _read_int32(src + sizeof(uint32_t));
+        if (compressed_len == 0 || compressed_len > tmp_src_size) {
+            // Need more input data
+            *more_input_bytes = compressed_len - tmp_src_size;
+            break;
+        }
+
+        src += 2 * sizeof(uint32_t);
+        remaining_input_size -= 2 * sizeof(uint32_t);
+
+        // ATTN: the uncompressed len from GetUncompressedLength() is same as
+        // uncompressed_block_len, so I think it is unnecessary to get it 
again.
+        // Get uncompressed len from snappy
+        // size_t uncompressed_len;
+        // if (!snappy::GetUncompressedLength(reinterpret_cast<const 
char*>(src),
+        //             compressed_len, &uncompressed_len)) {
+        //     return Status::InternalError("snappy block decompress failed to 
get uncompressed len");
+        // }
+
+        // Decompress
+        if (!snappy::RawUncompress(reinterpret_cast<const char*>(src), 
compressed_len,
+                                   reinterpret_cast<char*>(output))) {
+            return Status::InternalError("snappy block decompress failed. 
uncompressed_len: {}",
+                                         uncompressed_block_len);
+        }
+
+        output += uncompressed_block_len;
+        src += compressed_len;
+        remaining_input_size -= compressed_len;
+        uncompressed_total_len += uncompressed_block_len;
+    }
+
+    *input_bytes_read += (input_len - remaining_input_size);
+    *decompressed_len = uncompressed_total_len;
+    // If no more input and output need, means this is the end of a compressed 
block
+    *stream_end = (*more_input_bytes == 0 && *more_output_bytes == 0);
+
+    return Status::OK();
+}
+
+std::string SnappyBlockDecompressor::debug_info() {
+    std::stringstream ss;
+    ss << "SnappyBlockDecompressor.";
+    return ss.str();
+}
+
 } // namespace doris
diff --git a/be/src/exec/decompressor.h b/be/src/exec/decompressor.h
index af37335f1f..2b07e71139 100644
--- a/be/src/exec/decompressor.h
+++ b/be/src/exec/decompressor.h
@@ -18,7 +18,10 @@
 #pragma once
 
 #include <bzlib.h>
+#include <lz4/lz4.h>
 #include <lz4/lz4frame.h>
+#include <lz4/lz4hc.h>
+#include <snappy.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <zlib.h>
@@ -34,7 +37,7 @@
 
 namespace doris {
 
-enum CompressType { UNCOMPRESSED, GZIP, DEFLATE, BZIP2, LZ4FRAME, LZOP };
+enum CompressType { UNCOMPRESSED, GZIP, DEFLATE, BZIP2, LZ4FRAME, LZOP, 
LZ4BLOCK, SNAPPYBLOCK };
 
 class Decompressor {
 public:
@@ -68,6 +71,8 @@ public:
 protected:
     virtual Status init() = 0;
 
+    static uint32_t _read_int32(uint8_t* buf);
+
     Decompressor(CompressType ctype) : _ctype(ctype) {}
 
     CompressType _ctype;
@@ -140,6 +145,38 @@ private:
     const static unsigned DORIS_LZ4F_VERSION;
 };
 
+class Lz4BlockDecompressor : public Decompressor {
+public:
+    ~Lz4BlockDecompressor() override {}
+
+    Status decompress(uint8_t* input, size_t input_len, size_t* 
input_bytes_read, uint8_t* output,
+                      size_t output_max_len, size_t* decompressed_len, bool* 
stream_end,
+                      size_t* more_input_bytes, size_t* more_output_bytes) 
override;
+
+    std::string debug_info() override;
+
+private:
+    friend class Decompressor;
+    Lz4BlockDecompressor() : Decompressor(CompressType::LZ4FRAME) {}
+    Status init() override;
+};
+
+class SnappyBlockDecompressor : public Decompressor {
+public:
+    ~SnappyBlockDecompressor() override {}
+
+    Status decompress(uint8_t* input, size_t input_len, size_t* 
input_bytes_read, uint8_t* output,
+                      size_t output_max_len, size_t* decompressed_len, bool* 
stream_end,
+                      size_t* more_input_bytes, size_t* more_output_bytes) 
override;
+
+    std::string debug_info() override;
+
+private:
+    friend class Decompressor;
+    SnappyBlockDecompressor() : Decompressor(CompressType::SNAPPYBLOCK) {}
+    Status init() override;
+};
+
 #ifdef DORIS_WITH_LZO
 class LzopDecompressor : public Decompressor {
 public:
diff --git a/be/src/service/internal_service.cpp 
b/be/src/service/internal_service.cpp
index 3838668577..b458af6336 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -632,6 +632,8 @@ void 
PInternalServiceImpl::fetch_table_schema(google::protobuf::RpcController* c
         case TFileFormatType::FORMAT_CSV_GZ:
         case TFileFormatType::FORMAT_CSV_BZ2:
         case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+        case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+        case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
         case TFileFormatType::FORMAT_CSV_LZOP:
         case TFileFormatType::FORMAT_CSV_DEFLATE: {
             // file_slots is no use
diff --git a/be/src/util/load_util.cpp b/be/src/util/load_util.cpp
index 8736561db4..1277132378 100644
--- a/be/src/util/load_util.cpp
+++ b/be/src/util/load_util.cpp
@@ -46,9 +46,15 @@ void LoadUtil::parse_format(const std::string& format_str, 
const std::string& co
         } else if (iequal(compress_type_str, "LZ4")) {
             *format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
             *compress_type = TFileCompressType::LZ4FRAME;
+        } else if (iequal(compress_type_str, "LZ4_BLOCK")) {
+            *format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
+            *compress_type = TFileCompressType::LZ4BLOCK;
         } else if (iequal(compress_type_str, "LZOP")) {
             *format_type = TFileFormatType::FORMAT_CSV_LZOP;
             *compress_type = TFileCompressType::LZO;
+        } else if (iequal(compress_type_str, "SNAPPY_BLOCK")) {
+            *format_type = TFileFormatType::FORMAT_CSV_SNAPPYBLOCK;
+            *compress_type = TFileCompressType::SNAPPYBLOCK;
         } else if (iequal(compress_type_str, "DEFLATE")) {
             *format_type = TFileFormatType::FORMAT_CSV_DEFLATE;
             *compress_type = TFileCompressType::DEFLATE;
@@ -72,6 +78,7 @@ bool 
LoadUtil::is_format_support_streaming(TFileFormatType::type format) {
     case TFileFormatType::FORMAT_CSV_DEFLATE:
     case TFileFormatType::FORMAT_CSV_GZ:
     case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
     case TFileFormatType::FORMAT_CSV_LZO:
     case TFileFormatType::FORMAT_CSV_LZOP:
     case TFileFormatType::FORMAT_JSON:
@@ -81,4 +88,4 @@ bool 
LoadUtil::is_format_support_streaming(TFileFormatType::type format) {
     }
     return false;
 }
-} // namespace  doris
\ No newline at end of file
+} // namespace  doris
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 28d6f484f7..bc41b66298 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -343,8 +343,12 @@ Status CsvReader::init_reader(bool is_load) {
         [[fallthrough]];
     case TFileFormatType::FORMAT_CSV_LZ4FRAME:
         [[fallthrough]];
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+        [[fallthrough]];
     case TFileFormatType::FORMAT_CSV_LZOP:
         [[fallthrough]];
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+        [[fallthrough]];
     case TFileFormatType::FORMAT_CSV_DEFLATE:
         _line_reader =
                 NewPlainTextLineReader::create_unique(_profile, _file_reader, 
_decompressor.get(),
@@ -400,21 +404,51 @@ Status CsvReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
 
     const int batch_size = std::max(_state->batch_size(), 
(int)_MIN_BATCH_SIZE);
     size_t rows = 0;
-    auto columns = block->mutate_columns();
-    while (rows < batch_size && !_line_reader_eof) {
-        const uint8_t* ptr = nullptr;
-        size_t size = 0;
-        RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, 
&_line_reader_eof, _io_ctx));
-        if (_skip_lines > 0) {
-            _skip_lines--;
-            continue;
+
+    bool success = false;
+    if (_push_down_agg_type == TPushAggOp::type::COUNT) {
+        while (rows < batch_size && !_line_reader_eof) {
+            const uint8_t* ptr = nullptr;
+            size_t size = 0;
+            RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, 
&_line_reader_eof, _io_ctx));
+            if (_skip_lines > 0) {
+                _skip_lines--;
+                continue;
+            }
+            if (size == 0) {
+                // Read empty row, just continue
+                continue;
+            }
+
+            RETURN_IF_ERROR(_validate_line(Slice(ptr, size), &success));
+            ++rows;
         }
-        if (size == 0) {
-            // Read empty row, just continue
-            continue;
+
+        for (auto& col : block->mutate_columns()) {
+            col->resize(rows);
         }
 
-        RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), block, columns, 
&rows));
+    } else {
+        auto columns = block->mutate_columns();
+        while (rows < batch_size && !_line_reader_eof) {
+            const uint8_t* ptr = nullptr;
+            size_t size = 0;
+            RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, 
&_line_reader_eof, _io_ctx));
+            if (_skip_lines > 0) {
+                _skip_lines--;
+                continue;
+            }
+            if (size == 0) {
+                // Read empty row, just continue
+                continue;
+            }
+
+            RETURN_IF_ERROR(_validate_line(Slice(ptr, size), &success));
+            if (!success) {
+                continue;
+            }
+            RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), block, 
columns, &rows));
+        }
     }
 
     *eof = (rows == 0);
@@ -476,9 +510,15 @@ Status CsvReader::_create_decompressor() {
         case TFileCompressType::LZ4FRAME:
             compress_type = CompressType::LZ4FRAME;
             break;
+        case TFileCompressType::LZ4BLOCK:
+            compress_type = CompressType::LZ4BLOCK;
+            break;
         case TFileCompressType::DEFLATE:
             compress_type = CompressType::DEFLATE;
             break;
+        case TFileCompressType::SNAPPYBLOCK:
+            compress_type = CompressType::SNAPPYBLOCK;
+            break;
         default:
             return Status::InternalError("unknown compress type: {}", 
_file_compress_type);
         }
@@ -498,12 +538,18 @@ Status CsvReader::_create_decompressor() {
         case TFileFormatType::FORMAT_CSV_LZ4FRAME:
             compress_type = CompressType::LZ4FRAME;
             break;
+        case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+            compress_type = CompressType::LZ4BLOCK;
+            break;
         case TFileFormatType::FORMAT_CSV_LZOP:
             compress_type = CompressType::LZOP;
             break;
         case TFileFormatType::FORMAT_CSV_DEFLATE:
             compress_type = CompressType::DEFLATE;
             break;
+        case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+            compress_type = CompressType::SNAPPYBLOCK;
+            break;
         default:
             return Status::InternalError("unknown format type: {}", 
_file_format_type);
         }
@@ -557,7 +603,7 @@ Status CsvReader::_fill_dest_columns(const Slice& line, 
Block* block,
     return Status::OK();
 }
 
-Status CsvReader::_line_split_to_values(const Slice& line, bool* success) {
+Status CsvReader::_validate_line(const Slice& line, bool* success) {
     if (!_is_proto_format && !validate_utf8(line.data, line.size)) {
         if (!_is_load) {
             return Status::InternalError("Only support csv data in utf8 
codec");
@@ -575,7 +621,11 @@ Status CsvReader::_line_split_to_values(const Slice& line, 
bool* success) {
             return Status::OK();
         }
     }
+    *success = true;
+    return Status::OK();
+}
 
+Status CsvReader::_line_split_to_values(const Slice& line, bool* success) {
     _split_line(line);
 
     if (_is_load) {
diff --git a/be/src/vec/exec/format/csv/csv_reader.h 
b/be/src/vec/exec/format/csv/csv_reader.h
index 5721bbd929..2659703f8d 100644
--- a/be/src/vec/exec/format/csv/csv_reader.h
+++ b/be/src/vec/exec/format/csv/csv_reader.h
@@ -221,6 +221,12 @@ private:
     // TODO(ftw): parse type
     Status _parse_col_types(size_t col_nums, std::vector<TypeDescriptor>* 
col_types);
 
+    // check the utf8 encoding of a line.
+    // return error status to stop processing.
+    // If return Status::OK but "success" is false, which means this is load 
request
+    // and the line is skipped as unqualified row, and the process should 
continue.
+    Status _validate_line(const Slice& line, bool* success);
+
     RuntimeState* _state;
     RuntimeProfile* _profile;
     ScannerCounter* _counter;
diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp 
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
index b59bbef1f1..c27aba354f 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
@@ -201,7 +201,6 @@ 
NewPlainTextLineReader::NewPlainTextLineReader(RuntimeProfile* profile,
           _output_buf_limit(0),
           _file_eof(false),
           _eof(false),
-          _stream_end(true),
           _more_input_bytes(0),
           _more_output_bytes(0),
           _current_offset(current_offset),
@@ -324,6 +323,7 @@ Status NewPlainTextLineReader::read_line(const uint8_t** 
ptr, size_t* size, bool
     _line_reader_ctx->refresh();
     int found_line_delimiter = 0;
     size_t offset = 0;
+    bool stream_end = true;
     while (!done()) {
         // find line delimiter in current decompressed data
         uint8_t* cur_ptr = _output_buf + _output_buf_pos;
@@ -379,7 +379,7 @@ Status NewPlainTextLineReader::read_line(const uint8_t** 
ptr, size_t* size, bool
                     COUNTER_UPDATE(_bytes_read_counter, read_len);
                 }
                 if (_file_eof || read_len == 0) {
-                    if (!_stream_end) {
+                    if (!stream_end) {
                         return Status::InternalError(
                                 "Compressed file has been truncated, which is 
not allowed");
                     } else {
@@ -392,7 +392,7 @@ Status NewPlainTextLineReader::read_line(const uint8_t** 
ptr, size_t* size, bool
 
                 if (_decompressor == nullptr) {
                     _output_buf_limit += read_len;
-                    _stream_end = true;
+                    stream_end = true;
                 } else {
                     // only update input limit.
                     // input pos is set at MARK step
@@ -418,10 +418,10 @@ Status NewPlainTextLineReader::read_line(const uint8_t** 
ptr, size_t* size, bool
                         _input_buf_limit - _input_buf_pos,                  /* 
input_len */
                         &input_read_bytes, _output_buf + _output_buf_limit, /* 
output */
                         _output_buf_size - _output_buf_limit,               /* 
output_max_len */
-                        &decompressed_len, &_stream_end, &_more_input_bytes, 
&_more_output_bytes));
+                        &decompressed_len, &stream_end, &_more_input_bytes, 
&_more_output_bytes));
 
                 // LOG(INFO) << "after decompress:"
-                //           << " stream_end: " << _stream_end
+                //           << " stream_end: " << stream_end
                 //           << " input_read_bytes: " << input_read_bytes
                 //           << " decompressed_len: " << decompressed_len
                 //           << " more_input_bytes: " << _more_input_bytes
diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h 
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
index 7326812b92..9947259300 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
@@ -235,7 +235,6 @@ private:
 
     bool _file_eof;
     bool _eof;
-    bool _stream_end;
     size_t _more_input_bytes;
     size_t _more_output_bytes;
 
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 055ec224a3..505b6807b4 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -261,22 +261,26 @@ Status VFileScanner::_get_block_impl(RuntimeState* state, 
Block* block, bool* eo
         // use read_rows instead of _src_block_ptr->rows(), because the first 
column of _src_block_ptr
         // may not be filled after calling `get_next_block()`, so 
_src_block_ptr->rows() may return wrong result.
         if (read_rows > 0) {
-            // Convert the src block columns type to string in-place.
-            RETURN_IF_ERROR(_cast_to_input_block(block));
-            // FileReader can fill partition and missing columns itself
-            if (!_cur_reader->fill_all_columns()) {
-                // Fill rows in src block with partition columns from path. 
(e.g. Hive partition columns)
-                RETURN_IF_ERROR(_fill_columns_from_path(read_rows));
-                // Fill columns not exist in file with null or default value
-                RETURN_IF_ERROR(_fill_missing_columns(read_rows));
+            // If the push_down_agg_type is COUNT, no need to do the rest,
+            // because we only save a number in block.
+            if (_parent->get_push_down_agg_type() != TPushAggOp::type::COUNT) {
+                // Convert the src block columns type to string in-place.
+                RETURN_IF_ERROR(_cast_to_input_block(block));
+                // FileReader can fill partition and missing columns itself
+                if (!_cur_reader->fill_all_columns()) {
+                    // Fill rows in src block with partition columns from 
path. (e.g. Hive partition columns)
+                    RETURN_IF_ERROR(_fill_columns_from_path(read_rows));
+                    // Fill columns not exist in file with null or default 
value
+                    RETURN_IF_ERROR(_fill_missing_columns(read_rows));
+                }
+                // Apply _pre_conjunct_ctxs to filter src block.
+                RETURN_IF_ERROR(_pre_filter_src_block());
+                // Convert src block to output block (dest block), string to 
dest data type and apply filters.
+                RETURN_IF_ERROR(_convert_to_output_block(block));
+                // Truncate char columns or varchar columns if size is smaller 
than file columns
+                // or not found in the file column schema.
+                RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
             }
-            // Apply _pre_conjunct_ctxs to filter src block.
-            RETURN_IF_ERROR(_pre_filter_src_block());
-            // Convert src block to output block (dest block), string to dest 
data type and apply filters.
-            RETURN_IF_ERROR(_convert_to_output_block(block));
-            // Truncate char columns or varchar columns if size is smaller 
than file columns
-            // or not found in the file column schema.
-            RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
             break;
         }
     } while (true);
@@ -755,8 +759,10 @@ Status VFileScanner::_get_next_reader() {
         case TFileFormatType::FORMAT_CSV_GZ:
         case TFileFormatType::FORMAT_CSV_BZ2:
         case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+        case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
         case TFileFormatType::FORMAT_CSV_LZOP:
         case TFileFormatType::FORMAT_CSV_DEFLATE:
+        case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
         case TFileFormatType::FORMAT_PROTO: {
             _cur_reader = CsvReader::create_unique(_state, _profile, 
&_counter, *_params, range,
                                                    _file_slot_descs, 
_io_ctx.get());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
index 8d9c2c6b0c..0dff4b6caa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
@@ -550,6 +550,8 @@ public class Util {
             return TFileFormatType.FORMAT_CSV_LZO;
         } else if (lowerCasePath.endsWith(".deflate")) {
             return TFileFormatType.FORMAT_CSV_DEFLATE;
+        } else if (lowerCasePath.endsWith(".snappy")) {
+            return TFileFormatType.FORMAT_CSV_SNAPPYBLOCK;
         } else {
             return TFileFormatType.FORMAT_CSV_PLAIN;
         }
@@ -575,6 +577,8 @@ public class Util {
             return TFileCompressType.LZO;
         } else if (lowerCasePath.endsWith(".deflate")) {
             return TFileCompressType.DEFLATE;
+        } else if (lowerCasePath.endsWith(".snappy")) {
+            return TFileCompressType.SNAPPYBLOCK;
         } else {
             return TFileCompressType.PLAIN;
         }
@@ -599,6 +603,8 @@ public class Util {
                 || fileFormatType == TFileFormatType.FORMAT_CSV_DEFLATE
                 || fileFormatType == TFileFormatType.FORMAT_CSV_GZ
                 || fileFormatType == TFileFormatType.FORMAT_CSV_LZ4FRAME
+                || fileFormatType == TFileFormatType.FORMAT_CSV_LZ4BLOCK
+                || fileFormatType == TFileFormatType.FORMAT_CSV_SNAPPYBLOCK
                 || fileFormatType == TFileFormatType.FORMAT_CSV_LZO
                 || fileFormatType == TFileFormatType.FORMAT_CSV_LZOP
                 || fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 1ad1b12047..e54466ad86 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -1000,6 +1000,7 @@ public class HiveMetaStoreCache {
         // File Cache for self splitter.
         private final List<HiveFileStatus> files = Lists.newArrayList();
         // File split cache for old splitter. This is a temp variable.
+        @Deprecated
         private final List<FileSplit> splits = Lists.newArrayList();
         private boolean isSplittable;
         // The values of partitions.
@@ -1021,6 +1022,7 @@ public class HiveMetaStoreCache {
             }
         }
 
+        @Deprecated
         public void addSplit(FileSplit split) {
             if (isFileVisible(split.getPath())) {
                 splits.add(split);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
index 704d0fadf8..e1baea3652 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
@@ -190,7 +190,8 @@ public final class HiveUtil {
             return true;
         }
 
-        // use reflection to get isSplittable method on FileInputFormat
+        // use reflection to get isSplitable method on FileInputFormat
+        // ATTN: the method name is actually "isSplitable", but the right 
spell is "isSplittable"
         Method method = null;
         for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz = 
clazz.getSuperclass()) {
             try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java
index 662aa939ee..8e2c8ed3a4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java
@@ -25,6 +25,7 @@ import org.apache.doris.analysis.TupleDescriptor;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.planner.PlanNodeId;
 import org.apache.doris.planner.external.FileSplit.FileSplitCreator;
 import org.apache.doris.qe.ConnectContext;
@@ -32,6 +33,7 @@ import org.apache.doris.spi.Split;
 import org.apache.doris.statistics.StatisticalType;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TExpr;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TFileRangeDesc;
 import org.apache.doris.thrift.TFileScanNode;
 import org.apache.doris.thrift.TFileScanRangeParams;
@@ -221,19 +223,20 @@ public abstract class FileScanNode extends 
ExternalScanNode {
         if (blockLocations == null) {
             blockLocations = new BlockLocation[0];
         }
-        long splitSize = 
ConnectContext.get().getSessionVariable().getFileSplitSize();
-        if (splitSize <= 0) {
-            splitSize = blockSize;
-        }
-        // Min split size is DEFAULT_SPLIT_SIZE(128MB).
-        splitSize = Math.max(splitSize, DEFAULT_SPLIT_SIZE);
         List<Split> result = Lists.newArrayList();
-        if (!splittable) {
+        TFileCompressType compressType = 
Util.inferFileCompressTypeByPath(path.toString());
+        if (!splittable || compressType != TFileCompressType.PLAIN) {
             LOG.debug("Path {} is not splittable.", path);
             String[] hosts = blockLocations.length == 0 ? null : 
blockLocations[0].getHosts();
             result.add(splitCreator.create(path, 0, length, length, 
modificationTime, hosts, partitionValues));
             return result;
         }
+        long splitSize = 
ConnectContext.get().getSessionVariable().getFileSplitSize();
+        if (splitSize <= 0) {
+            splitSize = blockSize;
+        }
+        // Min split size is DEFAULT_SPLIT_SIZE(128MB).
+        splitSize = Math.max(splitSize, DEFAULT_SPLIT_SIZE);
         long bytesRemaining;
         for (bytesRemaining = length; (double) bytesRemaining / (double) 
splitSize > 1.1D;
                 bytesRemaining -= splitSize) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
index 8e71e02fac..1209be1cb9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java
@@ -51,6 +51,7 @@ import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.spi.Split;
 import org.apache.doris.statistics.StatisticalType;
 import org.apache.doris.thrift.TFileAttributes;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TFileFormatType;
 import org.apache.doris.thrift.TFileTextScanRangeParams;
 import org.apache.doris.thrift.TFileType;
@@ -391,4 +392,14 @@ public class HiveScanNode extends FileQueryScanNode {
     public boolean pushDownAggNoGroupingCheckCol(FunctionCallExpr aggExpr, 
Column col) {
         return !col.isAllowNull();
     }
+
+    @Override
+    protected TFileCompressType getFileCompressType(FileSplit fileSplit) 
throws UserException {
+        TFileCompressType compressType = super.getFileCompressType(fileSplit);
+        // hadoop use lz4 blocked codec
+        if (compressType == TFileCompressType.LZ4FRAME) {
+            compressType = TFileCompressType.LZ4BLOCK;
+        }
+        return compressType;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
index 0f230c85f4..0bc8442760 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
@@ -32,11 +32,6 @@ public class HiveSplit extends FileSplit {
         this.acidInfo = acidInfo;
     }
 
-    public HiveSplit(Path path, long start, long length, long fileLength, 
String[] hosts, AcidInfo acidInfo) {
-        super(path, start, length, fileLength, hosts, null);
-        this.acidInfo = acidInfo;
-    }
-
     @Override
     public Object getInfo() {
         return acidInfo;
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index e3c9bd9aa4..e75754c322 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -116,6 +116,8 @@ enum TFileFormatType {
     FORMAT_PROTO,
     FORMAT_JNI,
     FORMAT_AVRO,
+    FORMAT_CSV_LZ4BLOCK,
+    FORMAT_CSV_SNAPPYBLOCK,
 }
 
 // In previous versions, the data compression format and file format were 
stored together, as TFileFormatType,
@@ -132,6 +134,8 @@ enum TFileCompressType {
     LZ4FRAME,
     DEFLATE,
     LZOP,
+    LZ4BLOCK,
+    SNAPPYBLOCK
 }
 
 struct THdfsConf {
diff --git a/regression-test/data/external_table_p2/hive/test_compress_type.out 
b/regression-test/data/external_table_p2/hive/test_compress_type.out
new file mode 100644
index 0000000000..a95bf1f0dd
--- /dev/null
+++ b/regression-test/data/external_table_p2/hive/test_compress_type.out
@@ -0,0 +1,47 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !q21 --
+600005
+
+-- !q22 --
+1510010
+
+-- !q23 --
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+
+-- !q31 --
+600005
+
+-- !q32 --
+1510010
+
+-- !q33 --
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+4611870011201662970    0       HD Tube 5*      1       2014-03-22T05:11:29     
2014-03-22      598875  4243808759      92f6fe1be9b9773206d6b63e50feb470        
                196     2314158381335918424     0       3       3       
http://public_search            yandex.ru.livemaster            0       0       
[]      [4,15,333,3912,14512,12818]     [18,348,1010]   []      1846    952     
29      10      1       0.77    0       0       24      73d7    1       1       
0       0                       3238011 0       0               0       0       
1119    641     157     2014-03-22T19:51:48     0       0       0       0       
utf-8   330     0       0       0       7774109565808082252     11274076        
0       0       0       0       0       E       2014-03-22T11:54:54     55      
2       3       4       6       [105,11,9,88,45,14,98,72,3,925,2193,6,2 [...]
+
diff --git 
a/regression-test/suites/external_table_p2/hive/test_compress_type.groovy 
b/regression-test/suites/external_table_p2/hive/test_compress_type.groovy
new file mode 100644
index 0000000000..d02ff3fbd0
--- /dev/null
+++ b/regression-test/suites/external_table_p2/hive/test_compress_type.groovy
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_compress_type", 
"p2,external,hive,external_remote,external_remote_hive") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_compress_type"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        
+        sql """ use multi_catalog """
+
+        // table test_compress_partitioned has 6 partitions with different 
compressed file: plain, gzip, bzip2, deflate
+        sql """set file_split_size=0"""
+        explain {
+            sql("select count(*) from test_compress_partitioned")
+            contains "inputSplitNum=16, totalFileSize=734675596, scanRanges=16"
+            contains "partition=8/8"
+        }
+        qt_q21 """select count(*) from test_compress_partitioned where 
dt="gzip" or dt="mix""""
+        qt_q22 """select count(*) from test_compress_partitioned"""
+        order_qt_q23 """select * from test_compress_partitioned where 
watchid=4611870011201662970"""
+
+        sql """set file_split_size=8388608"""
+        explain {
+            sql("select count(*) from test_compress_partitioned")
+            contains "inputSplitNum=82, totalFileSize=734675596, scanRanges=82"
+            contains "partition=8/8"
+        }
+
+        qt_q31 """select count(*) from test_compress_partitioned where 
dt="gzip" or dt="mix""""
+        qt_q32 """select count(*) from test_compress_partitioned"""
+        order_qt_q33 """select * from test_compress_partitioned where 
watchid=4611870011201662970"""
+        sql """set file_split_size=0"""
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to