This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new a1d02f36ac [feature](table-valued-function) support `hdfs()` tvf (#14213) a1d02f36ac is described below commit a1d02f36acdabd947911194260ab5f2ef8feda79 Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com> AuthorDate: Fri Nov 18 14:17:02 2022 +0800 [feature](table-valued-function) support `hdfs()` tvf (#14213) This pr does two things: 1. support `hdfs()` table valued function. 2. add regression test --- .licenserc.yaml | 1 + .../hive/scripts/csv_format_test/all_types.csv | 120 +++++++++ .../scripts/csv_format_test/array_malformat.csv | 5 + .../hive/scripts/csv_format_test/array_normal.csv | 9 + .../hive/scripts/csv_format_test/student.csv | 10 + .../scripts/csv_format_test/student_with_names.csv | 11 + .../student_with_names_and_types.csv | 12 + .../docker-compose/hive/scripts/hive-metastore.sh | 4 +- .../scripts/json_format_test/one_array_json.json | 52 ++++ .../json_format_test/simple_object_json.json | 4 +- .../java/org/apache/doris/backup/S3Storage.java | 4 +- .../doris/planner/external/QueryScanProvider.java | 17 +- .../doris/planner/external/TVFScanProvider.java | 4 + .../ExternalFileTableValuedFunction.java | 72 +++-- .../tablefunction/HdfsTableValuedFunction.java | 116 +++++++++ .../doris/tablefunction/S3TableValuedFunction.java | 58 ++++- .../doris/tablefunction/TableValuedFunctionIf.java | 2 + .../table_valued_function/test_hdfs_tvf.out | 289 +++++++++++++++++++++ .../load_p0/stream_load/test_hdfs_json_load.out | 28 +- .../table_valued_function/test_hdfs_tvf.groovy | 199 ++++++++++++++ 20 files changed, 962 insertions(+), 55 deletions(-) diff --git a/.licenserc.yaml b/.licenserc.yaml index 1d1d35a65a..d6554af369 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -27,6 +27,7 @@ header: - "**/test_data/**" - "**/jmockit/**" - "**/*.json" + - "**/*.csv" - "**/*.dat" - "**/*.svg" - "**/*.md5" diff --git a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/all_types.csv b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/all_types.csv new file mode 100644 index 0000000000..9a5e34b270 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/all_types.csv @@ -0,0 +1,120 @@ +0,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +1,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +2,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +3,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +4,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +5,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +6,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +7,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +8,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +9,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +10,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +11,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +12,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +13,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +14,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +15,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +16,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +17,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +18,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +19,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +20,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +21,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +22,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +23,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +24,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +25,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +26,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +27,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +28,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +29,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +30,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +31,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +32,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +33,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +34,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +35,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +36,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +37,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +38,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +39,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +40,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +41,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +42,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +43,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +44,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +45,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +46,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +47,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +48,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +49,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +50,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +51,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +52,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +53,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +54,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +55,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +56,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +57,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +58,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +59,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +60,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +61,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +62,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +63,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +64,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +65,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +66,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +67,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +68,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +69,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +70,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +71,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +72,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +73,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +74,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +75,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +76,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +77,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +78,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +79,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +80,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +81,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +82,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +83,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +84,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +85,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +86,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +87,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +88,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +89,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +90,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +91,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +92,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +93,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +94,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +95,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +96,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +97,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +98,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +99,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +100,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +101,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +102,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +103,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +104,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +105,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +106,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +107,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +108,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +109,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +110,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +111,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +112,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +113,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +114,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +115,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +116,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +117,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +118,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 +119,2,3,4,5,6.6,7.7,8.8,abc,def,ghiaaaaaa,2020-10-10,2020-10-10 11:12:59 \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_malformat.csv b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_malformat.csv new file mode 100644 index 0000000000..3fbc5a50f5 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_malformat.csv @@ -0,0 +1,5 @@ +1|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|["1991-01-01", "1992-02-02", "1993-03-03"]|["1991-01-01 00:00:00"]|[0.33,0.67]|[3.1415926,0.878787878]|[1,1.2,1.3] +2|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|['1991-01-01', '1992-02-02', '1993-03-03']|\N|\N|\N|[1,\N,1.3] +3|\N|\N|\N|\N|\N|\N|\N|\N|\N|\N +4|1,2,3,4,5|\N|\N|\N|\N|\N|\N|\N|\N|\N +5|[1,2,3,4,5|\N|\N|\N|\N|\N|\N|\N|\N|\N diff --git a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_normal.csv b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_normal.csv new file mode 100644 index 0000000000..b4b3a716a7 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/array_normal.csv @@ -0,0 +1,9 @@ +1|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|["1991-01-01", "1992-02-02", "1993-03-03"]|["1991-01-01 00:00:00"]|[0.33,0.67]|[3.1415926,0.878787878]|[1,1.2,1.3] +2|[1,2,3,4,5]|[32767,32768,32769]|[65534,65535,65536]|["a","b","c","d","e"]|["hello","world"]|['1991-01-01', '1992-02-02', '1993-03-03']|\N|\N|\N|[1,\N,1.3] +3|\N|\N|\N|\N|\N|\N|\N|\N|\N|\N +4|[]|[]|[]|[]|[]|[]|[]|[]|[]|[] +5|[null]|[null]|[null]|[null]|[null]|[null]|[null]|[null]|[null]|[null] +6|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null,null,null,null,null] +6|[null,null]|[null,null]|[null,null]|[null,null]|[null,"null"]|[null,null]|[null,null]|[null,null]|[null,null]|[null,null,null,null,null,null] +7|[1,2,3,4,5]|\N|\N|\N|\N|\N|\N|\N|\N|\N +8|[1,2,3,4,5]|\N|\N|\N|\N|\N|[]]|]]|[[]|[[ diff --git a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student.csv b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student.csv new file mode 100644 index 0000000000..3a7d6c5d6f --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student.csv @@ -0,0 +1,10 @@ +1,alice,18 +2,bob,20 +3,jack,24 +4,jackson,19 +5,liming,18 +6,luffy,20 +7,zoro,22 +8,sanzi,26 +9,wusuopu,21 +10,nami,18 \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names.csv b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names.csv new file mode 100644 index 0000000000..62d32e39f4 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names.csv @@ -0,0 +1,11 @@ +id,name,age +1,alice,18 +2,bob,20 +3,jack,24 +4,jackson,19 +5,liming,18 +6,luffy,20 +7,zoro,22 +8,sanzi,26 +9,wusuopu,21 +10,nami,18 \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names_and_types.csv b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names_and_types.csv new file mode 100644 index 0000000000..4e88aef6d8 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/csv_format_test/student_with_names_and_types.csv @@ -0,0 +1,12 @@ +id,name,age +INT,STRING,INT +1,alice,18 +2,bob,20 +3,jack,24 +4,jackson,19 +5,liming,18 +6,luffy,20 +7,zoro,22 +8,sanzi,26 +9,wusuopu,21 +10,nami,18 \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh index 6d97471cc5..884684f2ad 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh +++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh @@ -27,12 +27,14 @@ echo "hadoop fs -mkdir /user/doris/" hadoop fs -mkdir -p /user/doris/ echo "hadoop fs -put /mnt/scripts/tpch1.db /user/doris/" hadoop fs -put /mnt/scripts/tpch1.db /user/doris/ -echo "hadoop fs -put /mnt/scripts/json_format_test.db /user/doris/" +echo "hadoop fs -put /mnt/scripts/json_format_test /user/doris/" hadoop fs -put /mnt/scripts/json_format_test /user/doris/ echo "hadoop fs -put /mnt/scripts/parquet /user/doris/" hadoop fs -put /mnt/scripts/parquet /user/doris/ echo "hadoop fs -put /mnt/scripts/orc /user/doris/" hadoop fs -put /mnt/scripts/orc /user/doris/ +echo "hadoop fs -put /mnt/scripts/csv_format_test /user/doris/" +hadoop fs -put /mnt/scripts/csv_format_test /user/doris/ echo "hive -f /mnt/scripts/create.hql" hive -f /mnt/scripts/create.hql diff --git a/docker/thirdparties/docker-compose/hive/scripts/json_format_test/one_array_json.json b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/one_array_json.json new file mode 100644 index 0000000000..042db8c884 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/one_array_json.json @@ -0,0 +1,52 @@ +[ + { + "id": 1, + "city": "beijing", + "code": 1454547 + }, + { + "id": 2, + "city": "shanghai", + "code": 1244264 + }, + { + "id": 3, + "city": "guangzhou", + "code": 528369 + }, + { + "id": 4, + "city": "shenzhen", + "code": 594201 + }, + { + "id": 5, + "city": "hangzhou", + "code": 594201 + }, + { + "id": 6, + "city": "nanjing", + "code": 2345672 + }, + { + "id": 7, + "city": "wuhan", + "code": 2345673 + }, + { + "id": 8, + "city": "chengdu", + "code": 2345674 + }, + { + "id": 9, + "city": "xian", + "code": 2345675 + }, + { + "id": 10, + "city": "hefei", + "code": 2345676 + } +] \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json index a7912466fd..5c3a9c07e9 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json +++ b/docker/thirdparties/docker-compose/hive/scripts/json_format_test/simple_object_json.json @@ -8,5 +8,5 @@ {"id": 8, "city": "chengdu", "code": 2345678} {"id": 9, "city": "xian", "code": 2345679} {"id": 10, "city": "hefei", "code": 23456710} -{"id": 10, "city": null, "code": 23456711} -{"id": 10, "city": "hefei", "code": null} +{"id": 11, "city": null, "code": 23456711} +{"id": 12, "city": "hefei", "code": null} diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java index ae89175ebd..d1e250e779 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java @@ -97,14 +97,14 @@ public class S3Storage extends BlobStorage { public void setProperties(Map<String, String> properties) { super.setProperties(properties); caseInsensitiveProperties.putAll(properties); - // Virtual hosted-sytle is recommended in the s3 protocol. + // Virtual hosted-style is recommended in the s3 protocol. // The path-style has been abandoned, but for some unexplainable reasons, // the s3 client will determine whether the endpiont starts with `s3` // when generating a virtual hosted-sytle request. // If not, it will not be converted ( https://github.com/aws/aws-sdk-java-v2/pull/763), // but the endpoints of many cloud service providers for object storage do not start with s3, // so they cannot be converted to virtual hosted-sytle. - // Some of them, such as aliyun's oss, only support virtual hosted-sytle, + // Some of them, such as aliyun's oss, only support virtual hosted-style, // and some of them(ceph) may only support // path-style, so we need to do some additional conversion. // diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java index c38d1b967a..9cf2255f64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java @@ -64,12 +64,6 @@ public abstract class QueryScanProvider implements FileScanProviderIf { return; } InputSplit inputSplit = inputSplits.get(0); - String fullPath = ((FileSplit) inputSplit).getPath().toUri().toString(); - String filePath = ((FileSplit) inputSplit).getPath().toUri().getPath(); - // eg: - // hdfs://namenode - // s3://buckets - String fsName = fullPath.replace(filePath, ""); TFileType locationType = getLocationType(); context.params.setFileType(locationType); TFileFormatType fileFormatType = getFileFormatType(); @@ -84,6 +78,17 @@ public abstract class QueryScanProvider implements FileScanProviderIf { // set hdfs params for hdfs file type. Map<String, String> locationProperties = getLocationProperties(); if (locationType == TFileType.FILE_HDFS) { + String fsName = ""; + if (this instanceof TVFScanProvider) { + fsName = ((TVFScanProvider) this).getFsName(); + } else { + String fullPath = ((FileSplit) inputSplit).getPath().toUri().toString(); + String filePath = ((FileSplit) inputSplit).getPath().toUri().getPath(); + // eg: + // hdfs://namenode + // s3://buckets + fsName = fullPath.replace(filePath, ""); + } THdfsParams tHdfsParams = BrokerUtil.generateHdfsParam(locationProperties); tHdfsParams.setFsName(fsName); context.params.setHdfsParams(tHdfsParams); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java index 8c8bdf9d30..954d271a94 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFScanProvider.java @@ -58,6 +58,10 @@ public class TVFScanProvider extends QueryScanProvider { this.tableValuedFunction = tableValuedFunction; } + public String getFsName() { + return tableValuedFunction.getFsName(); + } + // =========== implement abstract methods of QueryScanProvider ================= @Override public TFileAttributes getFileAttributes() throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index ac69cad8de..17b3364294 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -45,10 +45,12 @@ import org.apache.doris.thrift.TFileScanRange; import org.apache.doris.thrift.TFileScanRangeParams; import org.apache.doris.thrift.TFileTextScanRangeParams; import org.apache.doris.thrift.TFileType; +import org.apache.doris.thrift.THdfsParams; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPrimitiveType; import org.apache.doris.thrift.TStatusCode; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import org.apache.log4j.LogManager; @@ -75,20 +77,37 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio protected static final String JSON_PATHS = "jsonpaths"; protected static final String STRIP_OUTER_ARRAY = "strip_outer_array"; protected static final String READ_JSON_BY_LINE = "read_json_by_line"; + protected static final String NUM_AS_STRING = "num_as_string"; + protected static final String FUZZY_PARSE = "fuzzy_parse"; + + protected static final ImmutableSet<String> FILE_FORMAT_PROPERTIES = new ImmutableSet.Builder<String>() + .add(FORMAT) + .add(JSON_ROOT) + .add(JSON_PATHS) + .add(STRIP_OUTER_ARRAY) + .add(READ_JSON_BY_LINE) + .add(NUM_AS_STRING) + .add(FUZZY_PARSE) + .add(COLUMN_SEPARATOR) + .add(LINE_DELIMITER) + .build(); + protected List<Column> columns = null; protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList(); protected Map<String, String> locationProperties; - protected TFileFormatType fileFormatType; - protected String headerType = ""; + private TFileFormatType fileFormatType; + private String headerType = ""; - protected String columnSeparator = DEFAULT_COLUMN_SEPARATOR; - protected String lineDelimiter = DEFAULT_LINE_DELIMITER; - protected String jsonRoot = ""; - protected String jsonPaths = ""; - protected String stripOuterArray = ""; - protected String readJsonByLine = ""; + private String columnSeparator = DEFAULT_COLUMN_SEPARATOR; + private String lineDelimiter = DEFAULT_LINE_DELIMITER; + private String jsonRoot = ""; + private String jsonPaths = ""; + private boolean stripOuterArray; + private boolean readJsonByLine; + private boolean numAsString; + private boolean fuzzyParse; public abstract TFileType getTFileType(); @@ -105,6 +124,16 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio return locationProperties; } + public String getFsName() { + TFileType fileType = getTFileType(); + if (fileType == TFileType.FILE_HDFS) { + return locationProperties.get(HdfsTableValuedFunction.HADOOP_FS_NAME); + } else if (fileType == TFileType.FILE_S3) { + return locationProperties.get(S3TableValuedFunction.S3_ENDPOINT); + } + return ""; + } + protected void parseFile() throws UserException { String path = getFilePath(); BrokerDesc brokerDesc = getBrokerDesc(); @@ -142,8 +171,10 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio lineDelimiter = validParams.getOrDefault(LINE_DELIMITER, DEFAULT_LINE_DELIMITER); jsonRoot = validParams.getOrDefault(JSON_ROOT, ""); jsonPaths = validParams.getOrDefault(JSON_PATHS, ""); - stripOuterArray = validParams.getOrDefault(STRIP_OUTER_ARRAY, "false").toLowerCase(); - readJsonByLine = validParams.getOrDefault(READ_JSON_BY_LINE, "true").toLowerCase(); + readJsonByLine = Boolean.valueOf(validParams.get(READ_JSON_BY_LINE)).booleanValue(); + stripOuterArray = Boolean.valueOf(validParams.get(STRIP_OUTER_ARRAY)).booleanValue(); + numAsString = Boolean.valueOf(validParams.get(NUM_AS_STRING)).booleanValue(); + fuzzyParse = Boolean.valueOf(validParams.get(FUZZY_PARSE)).booleanValue(); } public List<TBrokerFileStatus> getFileStatuses() { @@ -161,17 +192,10 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio } else if (this.fileFormatType == TFileFormatType.FORMAT_JSON) { fileAttributes.setJsonRoot(jsonRoot); fileAttributes.setJsonpaths(jsonPaths); - if (readJsonByLine.equalsIgnoreCase("true")) { - fileAttributes.setReadJsonByLine(true); - } else { - fileAttributes.setReadJsonByLine(false); - } - if (stripOuterArray.equalsIgnoreCase("true")) { - fileAttributes.setStripOuterArray(true); - } else { - fileAttributes.setStripOuterArray(false); - } - // TODO(ftw): num_as_string/fuzzy_parser? + fileAttributes.setReadJsonByLine(readJsonByLine); + fileAttributes.setStripOuterArray(stripOuterArray); + fileAttributes.setNumAsString(numAsString); + fileAttributes.setFuzzyParse(fuzzyParse); } return fileAttributes; } @@ -254,6 +278,12 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio fileScanRangeParams.setFormatType(fileFormatType); fileScanRangeParams.setProperties(locationProperties); fileScanRangeParams.setFileAttributes(getFileAttributes()); + if (getTFileType() == TFileType.FILE_HDFS) { + THdfsParams tHdfsParams = BrokerUtil.generateHdfsParam(locationProperties); + String fsNmae = getLocationProperties().get(HdfsTableValuedFunction.HADOOP_FS_NAME); + tHdfsParams.setFsName(fsNmae); + fileScanRangeParams.setHdfsParams(tHdfsParams); + } // get first file, used to parse table schema TBrokerFileStatus firstFile = null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java new file mode 100644 index 0000000000..175c9e501a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.tablefunction; + +import org.apache.doris.analysis.BrokerDesc; +import org.apache.doris.analysis.ExportStmt; +import org.apache.doris.analysis.StorageBackend.StorageType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.UserException; +import org.apache.doris.common.util.URI; +import org.apache.doris.thrift.TFileType; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import org.apache.commons.collections.map.CaseInsensitiveMap; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + +import java.util.Map; + +/** + * The Implement of table valued function + * hdfs("uri" = "xxx", "hadoop.username" = "xx", "FORMAT" = "csv"). + */ +public class HdfsTableValuedFunction extends ExternalFileTableValuedFunction { + public static final Logger LOG = LogManager.getLogger(HdfsTableValuedFunction.class); + + public static final String NAME = "hdfs"; + public static final String HDFS_URI = "uri"; + public static String HADOOP_FS_NAME = "fs.defaultFS"; + // simple or kerberos + public static String HADOOP_SECURITY_AUTHENTICATION = "hadoop.security.authentication"; + public static String HADOOP_USER_NAME = "hadoop.username"; + public static String HADOOP_KERBEROS_PRINCIPAL = "hadoop.kerberos.principal"; + public static String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab"; + public static String HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit"; + public static String HADOOP_SOCKET_PATH = "dfs.domain.socket.path"; + + private static final ImmutableSet<String> LOCATION_PROPERTIES = new ImmutableSet.Builder<String>() + .add(HDFS_URI) + .add(HADOOP_SECURITY_AUTHENTICATION) + .add(HADOOP_FS_NAME) + .add(HADOOP_USER_NAME) + .add(HADOOP_KERBEROS_PRINCIPAL) + .add(HADOOP_KERBEROS_KEYTAB) + .add(HADOOP_SHORT_CIRCUIT) + .add(HADOOP_SOCKET_PATH) + .build(); + + private URI hdfsUri; + private String filePath; + + public HdfsTableValuedFunction(Map<String, String> params) throws UserException { + Map<String, String> fileFormatParams = new CaseInsensitiveMap(); + locationProperties = Maps.newHashMap(); + for (String key : params.keySet()) { + if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) { + fileFormatParams.put(key, params.get(key)); + } else if (LOCATION_PROPERTIES.contains(key.toLowerCase()) || HADOOP_FS_NAME.equalsIgnoreCase(key)) { + // because HADOOP_FS_NAME contains upper and lower case + if (HADOOP_FS_NAME.equalsIgnoreCase(key)) { + locationProperties.put(HADOOP_FS_NAME, params.get(key)); + } else { + locationProperties.put(key.toLowerCase(), params.get(key)); + } + } else { + throw new AnalysisException(key + " is invalid property"); + } + } + + ExportStmt.checkPath(locationProperties.get(HDFS_URI), StorageType.HDFS); + hdfsUri = URI.create(locationProperties.get(HDFS_URI)); + filePath = locationProperties.get(HADOOP_FS_NAME) + hdfsUri.getPath(); + + parseProperties(fileFormatParams); + parseFile(); + } + + // =========== implement abstract methods of ExternalFileTableValuedFunction ================= + @Override + public TFileType getTFileType() { + return TFileType.FILE_HDFS; + } + + @Override + public String getFilePath() { + // must be "hdfs://namenode/filepath" + return filePath; + } + + @Override + public BrokerDesc getBrokerDesc() { + return new BrokerDesc("S3TvfBroker", StorageType.HDFS, locationProperties); + } + + // =========== implement abstract methods of TableValuedFunctionIf ================= + @Override + public String getTableName() { + return "HDFSTableValuedFunction"; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java index 784a75accc..6af05f3374 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java @@ -26,6 +26,7 @@ import org.apache.doris.thrift.TFileType; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; +import org.apache.commons.collections.map.CaseInsensitiveMap; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -46,44 +47,71 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { private static final String AK = "access_key"; private static final String SK = "secret_key"; - public static final String USE_PATH_STYLE = "use_path_style"; + private static final String USE_PATH_STYLE = "use_path_style"; + private static final String REGION = "region"; private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>() .add(S3_URI) .add(AK) .add(SK) - .add(FORMAT) - .add(JSON_ROOT) - .add(JSON_PATHS) - .add(STRIP_OUTER_ARRAY) - .add(READ_JSON_BY_LINE) + .add(USE_PATH_STYLE) + .add(REGION) .build(); private S3URI s3uri; private String s3AK; private String s3SK; + private String endPoint; + private String virtualBucket; + private boolean forceVirtualHosted; public S3TableValuedFunction(Map<String, String> params) throws UserException { - Map<String, String> validParams = Maps.newHashMap(); + Map<String, String> validParams = new CaseInsensitiveMap(); for (String key : params.keySet()) { - if (!PROPERTIES_SET.contains(key.toLowerCase())) { + if (!PROPERTIES_SET.contains(key.toLowerCase()) && !FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) { throw new AnalysisException(key + " is invalid property"); } - validParams.put(key.toLowerCase(), params.get(key)); + validParams.put(key, params.get(key)); } - s3uri = S3URI.create(validParams.get(S3_URI)); + String originUri = validParams.getOrDefault(S3_URI, ""); + if (originUri.toLowerCase().startsWith("s3")) { + // s3 protocol + forceVirtualHosted = false; + } else { + // not s3 protocol, forceVirtualHosted is determined by USE_PATH_STYLE. + forceVirtualHosted = !Boolean.valueOf(validParams.get(USE_PATH_STYLE)).booleanValue(); + } + + s3uri = S3URI.create(validParams.get(S3_URI), forceVirtualHosted); + if (forceVirtualHosted) { + // s3uri.getVirtualBucket() is: virtualBucket.endpoint, Eg: + // uri: http://my_bucket.cos.ap-beijing.myqcloud.com/file.txt + // s3uri.getVirtualBucket() = my_bucket.cos.ap-beijing.myqcloud.com, + // so we need separate virtualBucket and endpoint. + String[] fileds = s3uri.getVirtualBucket().split("\\.", 2); + virtualBucket = fileds[0]; + if (fileds.length > 1) { + endPoint = fileds[1]; + } else { + throw new AnalysisException("can not parse endpoint, please check uri."); + } + } else { + endPoint = s3uri.getBucketScheme(); + } s3AK = validParams.getOrDefault(AK, ""); s3SK = validParams.getOrDefault(SK, ""); + String usePathStyle = validParams.getOrDefault(USE_PATH_STYLE, "false"); parseProperties(validParams); // set S3 location properties + // these five properties is necessary, no one can be lost. locationProperties = Maps.newHashMap(); - locationProperties.put(S3_ENDPOINT, s3uri.getBucketScheme()); + locationProperties.put(S3_ENDPOINT, endPoint); locationProperties.put(S3_AK, s3AK); locationProperties.put(S3_SK, s3SK); - locationProperties.put(S3_REGION, ""); - locationProperties.put(USE_PATH_STYLE, "true"); + locationProperties.put(S3_REGION, validParams.getOrDefault(REGION, "")); + locationProperties.put(USE_PATH_STYLE, usePathStyle); parseFile(); } @@ -97,6 +125,10 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { @Override public String getFilePath() { // must be "s3://..." + if (forceVirtualHosted) { + return NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM + + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey(); + } return NAME + S3URI.SCHEME_DELIM + s3uri.getKey(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java index 3063880c92..56167d1b87 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/TableValuedFunctionIf.java @@ -48,6 +48,8 @@ public abstract class TableValuedFunctionIf { return new NumbersTableValuedFunction(params); case S3TableValuedFunction.NAME: return new S3TableValuedFunction(params); + case HdfsTableValuedFunction.NAME: + return new HdfsTableValuedFunction(params); default: throw new UserException("Could not find table function " + funcName); } diff --git a/regression-test/data/correctness_p0/table_valued_function/test_hdfs_tvf.out b/regression-test/data/correctness_p0/table_valued_function/test_hdfs_tvf.out new file mode 100644 index 0000000000..6ed17594b9 --- /dev/null +++ b/regression-test/data/correctness_p0/table_valued_function/test_hdfs_tvf.out @@ -0,0 +1,289 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !csv_all_types -- +0 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +1 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +10 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +100 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +101 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +102 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +103 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +104 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +105 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +106 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +107 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +108 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +109 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +11 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +110 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +111 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +112 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +113 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +114 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +115 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +116 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +117 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +118 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +119 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +12 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +13 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +14 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +15 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +16 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +17 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +18 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +19 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +2 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +20 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +21 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +22 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +23 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +24 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +25 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +26 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +27 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +28 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +29 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +3 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +30 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +31 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +32 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +33 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +34 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +35 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +36 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +37 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +38 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +39 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +4 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +40 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +41 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +42 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +43 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +44 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +45 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +46 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +47 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +48 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +49 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +5 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +50 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +51 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +52 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +53 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +54 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +55 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +56 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +57 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +58 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +59 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +6 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +60 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +61 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +62 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +63 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +64 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +65 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +66 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +67 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +68 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +69 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +7 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +70 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +71 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +72 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +73 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +74 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +75 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +76 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +77 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +78 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +79 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +8 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +80 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +81 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +82 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +83 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +84 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +85 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +86 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +87 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +88 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +89 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +9 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +90 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +91 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +92 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +93 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +94 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +95 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +96 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +97 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +98 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 +99 2 3 4 5 6.6 7.7 8.8 abc def ghiaaaaaa 2020-10-10 2020-10-10 11:12:59 + +-- !csv_student -- +1 alice 18 +2 bob 20 +3 jack 24 +4 jackson 19 +5 liming 18 +6 luffy 20 +7 zoro 22 +8 sanzi 26 +9 wusuopu 21 +10 nami 18 + +-- !csv_array_malformat -- +1 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536] ["a","b","c","d","e"] ["hello","world"] ["1991-01-01", "1992-02-02", "1993-03-03"] ["1991-01-01 00:00:00"] [0.33,0.67] [3.1415926,0.878787878] [1,1.2,1.3] +2 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536] ["a","b","c","d","e"] ["hello","world"] ['1991-01-01', '1992-02-02', '1993-03-03'] \N \N \N [1,\\N,1.3] +3 \N \N \N \N \N \N \N \N \N \N +4 1,2,3,4,5 \N \N \N \N \N \N \N \N \N +5 [1,2,3,4,5 \N \N \N \N \N \N \N \N \N + +-- !csv_array_normal -- +1 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536] ["a","b","c","d","e"] ["hello","world"] ["1991-01-01", "1992-02-02", "1993-03-03"] ["1991-01-01 00:00:00"] [0.33,0.67] [3.1415926,0.878787878] [1,1.2,1.3] +2 [1,2,3,4,5] [32767,32768,32769] [65534,65535,65536] ["a","b","c","d","e"] ["hello","world"] ['1991-01-01', '1992-02-02', '1993-03-03'] \N \N \N [1,\\N,1.3] +3 \N \N \N \N \N \N \N \N \N \N +4 [] [] [] [] [] [] [] [] [] [] +5 [null] [null] [null] [null] [null] [null] [null] [null] [null] [null] +6 [null,null] [null,null] [null,null] [null,null] [null,null] [null,null] [null,null] [null,null] [null,null] [null,null,null,null,null,null] +6 [null,null] [null,null] [null,null] [null,null] [null,"null"] [null,null] [null,null] [null,null] [null,null] [null,null,null,null,null,null] +7 [1,2,3,4,5] \N \N \N \N \N \N \N \N \N +8 [1,2,3,4,5] \N \N \N \N \N []] ]] [[] [[ + +-- !csv_names -- +1 alice 18 +2 bob 20 +3 jack 24 +4 jackson 19 +5 liming 18 +6 luffy 20 +7 zoro 22 +8 sanzi 26 +9 wusuopu 21 +10 nami 18 + +-- !csv_names_types -- +1 alice 18 +2 bob 20 +3 jack 24 +4 jackson 19 +5 liming 18 +6 luffy 20 +7 zoro 22 +8 sanzi 26 +9 wusuopu 21 +10 nami 18 + +-- !parquet -- +1 Supplier#000000001 N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ 17 27-918-335-1736 5755 each slyly above the careful +2 Supplier#000000002 89eJ5ksX3ImxJQBvxObC, 5 15-679-861-2259 4032 slyly bold instructions. idle dependen +3 Supplier#000000003 q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3 1 11-383-516-1199 4192 blithely silent requests after the express dependencies are sl +4 Supplier#000000004 Bk7ah4CK8SYQTepEmvMkkgMwg 15 25-843-787-7479 4641 riously even requests above the exp +5 Supplier#000000005 Gcdm2rJRzl5qlTVzc 11 21-151-690-3663 -283 . slyly regular pinto bea +6 Supplier#000000006 tQxuVm7s7CnK 14 24-696-997-4969 1365 final accounts. regular dolphins use against the furiously ironic decoys. +7 Supplier#000000007 s,4TicNGB4uO6PaSqNBUq 23 33-990-965-2201 6820 s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit +8 Supplier#000000008 9Sq4bBH2FQEmaFOocY45sRTxo6yuoG 17 27-498-742-3860 7627 al pinto beans. asymptotes haggl +9 Supplier#000000009 1KhUgZegwM3ua7dsYmekYBsK 10 20-403-398-8662 5302 s. unusual, even requests along the furiously regular pac +10 Supplier#000000010 Saygah3gYWMp72i PY 24 34-852-489-8585 3891 ing waters. regular requests ar +11 Supplier#000000011 JfwTs,LZrV, M,9C 18 28-613-996-1505 3393 y ironic packages. slyly ironic accounts affix furiously; ironically unusual excuses across the flu +12 Supplier#000000012 aLIW q0HYd 8 18-179-925-7181 1432 al packages nag alongside of the bold instructions. express, daring accounts +13 Supplier#000000013 HK71HQyWoqRWOX8GI FpgAifW,2PoH 3 13-727-620-7813 9107 requests engage regularly instructions. furiously special requests ar +14 Supplier#000000014 EXsnO5pTNj4iZRm 15 25-656-247-5058 9189 l accounts boost. fluffily bold warhorses wake +15 Supplier#000000015 olXVbNBfVzRqgokr1T,Ie 8 18-453-357-6394 308 across the furiously regular platelets wake even deposits. quickly express she +16 Supplier#000000016 YjP5C55zHDXL7LalK27zfQnwejdpin4AMpvh 22 32-822-502-4215 2972 ously express ideas haggle quickly dugouts? fu +17 Supplier#000000017 c2d,ESHRSkK3WYnxpgw6aOqN0q 19 29-601-884-9219 1687 eep against the furiously bold ideas. fluffily bold packa +18 Supplier#000000018 PGGVE5PWAMwKDZw 16 26-729-551-1115 7040 accounts snooze slyly furiously bold +19 Supplier#000000019 edZT3es,nBFD8lBXTGeTl 24 34-278-310-2731 6150 refully final foxes across the dogged theodolites sleep slyly abou +20 Supplier#000000020 iybAE,RmTymrZVYaFZva2SH,j 3 13-715-945-6730 530 n, ironic ideas would nag blithely about the slyly regular accounts. silent, expr + +-- !orc -- +1 goldenrod lavender spring chocolate lace Manufacturer#1 Brand#13 PROMO BURNISHED COPPER 7 JUMBO PKG 901 ly. slyly ironi +2 blush thistle blue yellow saddle Manufacturer#1 Brand#13 LARGE BRUSHED BRASS 1 LG CASE 902 lar accounts amo +3 spring green yellow purple cornsilk Manufacturer#4 Brand#42 STANDARD POLISHED BRASS 21 WRAP CASE 903 egular deposits hag +4 cornflower chocolate smoke green pink Manufacturer#3 Brand#34 SMALL PLATED BRASS 14 MED DRUM 904 p furiously r +5 forest brown coral puff cream Manufacturer#3 Brand#32 STANDARD POLISHED TIN 15 SM PKG 905 wake carefully +6 bisque cornflower lawn forest magenta Manufacturer#2 Brand#24 PROMO PLATED STEEL 4 MED BAG 906 sual a +7 moccasin green thistle khaki floral Manufacturer#1 Brand#11 SMALL PLATED COPPER 45 SM BAG 907 lyly. ex +8 misty lace thistle snow royal Manufacturer#4 Brand#44 PROMO BURNISHED TIN 41 LG DRUM 908 eposi +9 thistle dim navajo dark gainsboro Manufacturer#4 Brand#43 SMALL BURNISHED STEEL 12 WRAP CASE 909 ironic foxe +10 linen pink saddle puff powder Manufacturer#5 Brand#54 LARGE BURNISHED STEEL 44 LG CAN 910 ithely final deposit +11 spring maroon seashell almond orchid Manufacturer#2 Brand#25 STANDARD BURNISHED NICKEL 43 WRAP BOX 911 ng gr +12 cornflower wheat orange maroon ghost Manufacturer#3 Brand#33 MEDIUM ANODIZED STEEL 25 JUMBO CASE 912 quickly +13 ghost olive orange rosy thistle Manufacturer#5 Brand#55 MEDIUM BURNISHED NICKEL 1 JUMBO PACK 913 osits. +14 khaki seashell rose cornsilk navajo Manufacturer#1 Brand#13 SMALL POLISHED STEEL 28 JUMBO BOX 914 kages c +15 blanched honeydew sky turquoise medium Manufacturer#1 Brand#15 LARGE ANODIZED BRASS 45 LG CASE 915 usual ac +16 deep sky turquoise drab peach Manufacturer#3 Brand#32 PROMO PLATED TIN 2 MED PACK 916 unts a +17 indian navy coral pink deep Manufacturer#4 Brand#43 ECONOMY BRUSHED STEEL 16 LG BOX 917 regular accounts +18 turquoise indian lemon lavender misty Manufacturer#1 Brand#11 SMALL BURNISHED STEEL 42 JUMBO PACK 918 s cajole slyly a +19 chocolate navy tan deep brown Manufacturer#2 Brand#23 SMALL ANODIZED NICKEL 33 WRAP BOX 919 pending acc +20 ivory navy honeydew sandy midnight Manufacturer#1 Brand#12 LARGE POLISHED NICKEL 48 MED BAG 920 are across the asympt + +-- !json -- +1 beijing 2345671 +10 hefei 23456710 +11 \N 23456711 +12 hefei \N +2 shanghai 2345672 +3 guangzhou 2345673 +4 shenzhen 2345674 +5 hangzhou 2345675 +6 nanjing 2345676 +7 wuhan 2345677 +8 chengdu 2345678 +9 xian 2345679 + +-- !json_root -- +1 beijing 2345671 +2 shanghai 2345672 +3 hangzhou 2345673 +4 shenzhen 2345674 +5 guangzhou 2345675 + +-- !json_paths -- +1 2345671 +2 2345672 +3 2345673 +4 2345674 +5 2345675 +6 2345676 +7 2345677 +8 2345678 +9 2345679 +10 23456710 +11 23456711 +12 \N + +-- !one_array -- +1 beijing 1454547 +2 shanghai 1244264 +3 guangzhou 528369 +4 shenzhen 594201 +5 hangzhou 594201 +6 nanjing 2345672 +7 wuhan 2345673 +8 chengdu 2345674 +9 xian 2345675 +10 hefei 2345676 + +-- !cast -- +1 beijing 2345671 +2 shanghai 2345672 +3 guangzhou 2345673 +4 shenzhen 2345674 +5 hangzhou 2345675 +6 nanjing 2345676 +7 wuhan 2345677 +8 chengdu 2345678 +9 xian 2345679 +10 hefei 23456710 +11 \N 23456711 +12 hefei \N + +-- !insert -- +1 beijing 2345671 +2 shanghai 2345672 +3 hangzhou 2345673 +4 shenzhen 2345674 +5 guangzhou 2345675 + diff --git a/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out b/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out index 594d2ec60a..f55edaaa7e 100644 --- a/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out +++ b/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out @@ -9,8 +9,9 @@ 7 wuhan 2345677 8 chengdu 2345678 9 xian 2345679 -10 \N 23456711 10 hefei 23456710 +11 \N 23456711 +12 hefei \N 200 changsha 3456789 -- !select1 -- @@ -23,8 +24,9 @@ 7 wuhan 2345677 8 chengdu 2345678 9 xian 2345679 -10 \N 23456711 10 hefei 23456710 +11 \N 23456711 +12 hefei \N 200 changsha 3456789 -- !select2 -- @@ -37,8 +39,9 @@ 70 wuhan 2345677 80 chengdu 2345678 90 xian 2345679 -100 \N 23456711 100 hefei 23456710 +110 \N 23456711 +120 hefei \N 200 changsha 3456789 -- !select2 -- @@ -51,8 +54,9 @@ 70 wuhan 2345677 80 chengdu 2345678 90 xian 2345679 -100 \N 23456711 100 hefei 23456710 +110 \N 23456711 +120 hefei \N 200 changsha 3456789 -- !select3 -- @@ -65,9 +69,9 @@ 7 2345677 \N 8 2345678 \N 9 2345679 \N -10 \N \N 10 23456710 \N -10 23456711 \N +11 23456711 \N +12 \N \N 200 changsha 3456789 -- !select3 -- @@ -80,9 +84,9 @@ 7 2345677 \N 8 2345678 \N 9 2345679 \N -10 \N \N 10 23456710 \N -10 23456711 \N +11 23456711 \N +12 \N \N 200 changsha 3456789 -- !select4 -- @@ -95,7 +99,9 @@ 7 \N 270 8 \N 280 9 \N 290 -10 \N 900 +10 \N 300 +11 \N 310 +12 \N 320 200 changsha 3456789 -- !select4 -- @@ -108,7 +114,9 @@ 7 \N 270 8 \N 280 9 \N 290 -10 \N 900 +10 \N 300 +11 \N 310 +12 \N 320 200 changsha 3456789 -- !select5 -- diff --git a/regression-test/suites/correctness_p0/table_valued_function/test_hdfs_tvf.groovy b/regression-test/suites/correctness_p0/table_valued_function/test_hdfs_tvf.groovy new file mode 100644 index 0000000000..a11a4160fa --- /dev/null +++ b/regression-test/suites/correctness_p0/table_valued_function/test_hdfs_tvf.groovy @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hdfs_tvf") { + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + // It's okay to use random `hdfsUser`, but can not be empty. + def hdfsUserName = "doris" + def format = "csv" + def defaultFS = "hdfs://127.0.0.1:${hdfs_port}" + def uri = "" + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + try { + sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" = "true");""" + + // test csv foramt + uri = "${defaultFS}" + "/user/doris/csv_format_test/all_types.csv" + format = "csv" + qt_csv_all_types """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by c1; """ + + + uri = "${defaultFS}" + "/user/doris/csv_format_test/student.csv" + format = "csv" + qt_csv_student """ select cast(c1 as INT) as id, c2 as name, c3 as age from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by id; """ + + uri = "${defaultFS}" + "/user/doris/csv_format_test/array_malformat.csv" + format = "csv" + qt_csv_array_malformat """ select * from HDFS( + "URI" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "csv", + "column_separator" = "|") order by c1; """ + + uri = "${defaultFS}" + "/user/doris/csv_format_test/array_normal.csv" + format = "csv" + qt_csv_array_normal """ select * from HDFS("URI" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "csv", + "column_separator" = "|") order by c1; """ + + // test csv_with_names file format + uri = "${defaultFS}" + "/user/doris/csv_format_test/student_with_names.csv" + format = "csv_with_names" + qt_csv_names """ select cast(id as INT) as id, name, age from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by id; """ + + // test csv_with_names_and_types file format + uri = "${defaultFS}" + "/user/doris/csv_format_test/student_with_names_and_types.csv" + format = "csv_with_names_and_types" + qt_csv_names_types """ select cast(id as INT) as id, name, age from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by id; """ + + + // test parquet + uri = "${defaultFS}" + "/user/doris/tpch1.db/hdfs_tvf/test_parquet.snappy.parquet" + format = "parquet" + qt_parquet """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by s_suppkey limit 20; """ + + // test orc + uri = "${defaultFS}" + "/user/doris/tpch1.db/hdfs_tvf/test_orc.snappy.orc" + format = "orc" + qt_orc """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}") order by p_partkey limit 20; """ + + + // test josn format + uri = "${defaultFS}" + "/user/doris/json_format_test/simple_object_json.json" + format = "json" + qt_json """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by id; """ + + // test json root + uri = "${defaultFS}" + "/user/doris/json_format_test/nest_json.json" + format = "json" + qt_json_root """ select cast(id as INT) as id, city, cast(code as INT) as code from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true", + "json_root" = "\$.item") order by id; """ + + // test json paths + uri = "${defaultFS}" + "/user/doris/json_format_test/simple_object_json.json" + format = "json" + qt_json_paths """ select cast(id as INT) as id, cast(code as INT) as code from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true", + "jsonpaths" = "[\\"\$.id\\", \\"\$.code\\"]") order by id; """ + + // test non read_json_by_line + uri = "${defaultFS}" + "/user/doris/json_format_test/one_array_json.json" + format = "json" + qt_one_array """ select cast(id as INT) as id, city, cast(code as INT) as code from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "false") order by id; """ + + + // test cast to int + uri = "${defaultFS}" + "/user/doris/json_format_test/simple_object_json.json" + format = "json" + qt_cast """ select cast(id as INT) as id, city, cast(code as INT) as code from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by id; """ + + // test insert into select + def testTable = "test_hdfs_tvf" + sql "DROP TABLE IF EXISTS ${testTable}" + def result1 = sql """ CREATE TABLE IF NOT EXISTS ${testTable} + ( + id int, + city varchar(50), + code int + ) + COMMENT "test hdfs tvf table" + DISTRIBUTED BY HASH(id) BUCKETS 32 + PROPERTIES("replication_num" = "1"); """ + + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + + uri = "${defaultFS}" + "/user/doris/json_format_test/nest_json.json" + format = "json" + def result2 = sql """ insert into ${testTable}(id,city,code) + select cast (id as INT) as id, city, cast (code as INT) as code + from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true", + "json_root" = "\$.item") """ + + sql "sync" + assertTrue(result2[0][0] == 5, "Insert should update 12 rows") + qt_insert """ select * from test_hdfs_tvf order by id; """ + } finally { + sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" = "false");""" + } + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org