This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new f1b7acd098c branch-2.1: [fix](tvf) support compressed json file for 
tvf and refactor code (#51983) (#53063)
f1b7acd098c is described below

commit f1b7acd098c8df04c14b2abc836495036a88f0ef
Author: Socrates <[email protected]>
AuthorDate: Fri Jul 11 06:36:46 2025 +0800

    branch-2.1: [fix](tvf) support compressed json file for tvf and refactor 
code (#51983) (#53063)
    
    bp: #51983
---
 be/src/vec/exec/format/json/new_json_reader.cpp     |   4 +++-
 .../json_format_test/simple_object_json.json.gz     | Bin 0 -> 211 bytes
 .../data/external_table_p0/tvf/test_hdfs_tvf.out    | Bin 40945 -> 41176 bytes
 .../external_table_p0/tvf/test_hdfs_tvf.groovy      |  10 ++++++++++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index 58d9d177d1c..5953ec1319c 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -248,7 +248,9 @@ Status 
NewJsonReader::get_columns(std::unordered_map<std::string, TypeDescriptor
 Status NewJsonReader::get_parsed_schema(std::vector<std::string>* col_names,
                                         std::vector<TypeDescriptor>* 
col_types) {
     RETURN_IF_ERROR(_get_range_params());
-
+    // create decompressor.
+    // _decompressor may be nullptr if this is not a compressed file
+    RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type, 
&_decompressor));
     RETURN_IF_ERROR(_open_file_reader(true));
     if (_read_json_by_line) {
         RETURN_IF_ERROR(_open_line_reader());
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz
new file mode 100644
index 00000000000..8a6db90241f
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz
 differ
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out 
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
index a8f5dcf5396..04ec58cdbae 100644
Binary files a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out and 
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out differ
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
index 74cb1e320aa..8bc8194843d 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
@@ -143,6 +143,16 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") 
{
                         "strip_outer_array" = "false",
                         "read_json_by_line" = "true") order by id; """
 
+            uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/simple_object_json.json.gz"
+            format = "json"
+            qt_json_compressed """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "${format}",
+                        "compress_type" = "GZ",
+                        "strip_outer_array" = "false",
+                        "read_json_by_line" = "true") order by id; """
+
 
            uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
             format = "json"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to