Re: [PR] [case](regresscases) add regress cases for nested type nested type with csv format [doris]

via GitHub Thu, 12 Oct 2023 21:14:31 -0700


xiaokang commented on code in PR #25355:
URL: https://github.com/apache/doris/pull/25355#discussion_r1357738498



##########
be/src/vec/data_types/serde/data_type_array_serde.cpp:
##########
@@ -94,7 +94,8 @@ Status 
DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
     }
     // empty array []
     if (slice.size == 2) {
-        offsets.push_back(offsets.back());
+        auto last_off = offsets.back();

Review Comment:
   fix similar issues in another pr



##########
regression-test/suites/datatype_p0/nested_types/query/test_nestedtypes_csv_insert_into_with_s3.groovy:
##########
@@ -0,0 +1,294 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+import com.google.common.collect.Lists
+import org.apache.commons.lang3.StringUtils
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_nestedtypes_csv_insert_into_with_s3", "p0") {
+    sql 'use regression_test_datatype_p0_nested_types'
+    sql 'set enable_nereids_planner=false'
+    sql 'set max_allowed_packet=4194304'
+    sql """ADMIN SET FRONTEND CONFIG ('disable_nested_complex_type' = 
'false')"""
+
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String bucket = context.config.otherConfigs.get("s3BucketName");
+
+
+    def dataFilePath = 
"https://"+"${bucket}"+"."+"${s3_endpoint}"+"/regression/datalake";
+
+    // csv format define in two method:
+    //  1.without quote, like doris mysql select output,
+    //  2.csv as json format, which is different from first if meet datetime 
type, because json has no datetime
+    ArrayList<String> csv_without_quote = 
["${dataFilePath}/as_without_quote.csv", 
"${dataFilePath}/arrarr_without_quote.csv",
+                                           
"${dataFilePath}/map_without_quote.csv","${dataFilePath}/arrmap_without_quote.csv",
+                                            
"${dataFilePath}/maparr_without_quote.csv"]
+    ArrayList<String> csv_as_json = ["${dataFilePath}/as_as_json.csv", 
"${dataFilePath}/arrarr_as_json.csv",
+                                     
"${dataFilePath}/map_as_json.csv","${dataFilePath}/arrmap_as_json.csv",
+                                     "${dataFilePath}/maparr_as_json.csv"]
+
+
+    // define dup key table with nested table types with one nested scala
+    List<String> table_names = new ArrayList<>()
+    def nested_table_dup = "tbl_csv_array_nested_types_s3"
+    sql "DROP TABLE IF EXISTS ${nested_table_dup}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${nested_table_dup} (
+            `k1` bigint(11) NULL,
+            `c_bool` array<boolean> NULL,
+            `c_tinyint` array<tinyint(4)> NULL,
+            `c_smallint` array<smallint(6)> NULL,
+            `c_int` array<int(11)> NULL,
+            `c_bigint` array<bigint(20)> NULL,
+            `c_largeint` array<largeint(40)> NULL,
+            `c_float` array<float> NULL,
+            `c_double` array<double> NULL,
+            `c_decimal` array<decimal(20, 3)> NULL,
+            `c_decimalv3` array<decimalv3(20, 3)> NULL,
+            `c_date` array<date> NULL,
+            `c_datetime` array<datetime> NULL,
+            `c_datev2` array<datev2> NULL,
+            `c_datetimev2` array<datetimev2(0)> NULL,
+            `c_char` array<char(15)> NULL,
+            `c_varchar` array<varchar(100)> NULL,
+            `c_string` array<text> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+    table_names.add(nested_table_dup)
+    // define dup key table with nested table types with two nested scala
+    def nested_table_dup2 = "tbl_csv_array_nested_types_s32"
+    sql "DROP TABLE IF EXISTS ${nested_table_dup2}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${nested_table_dup2} (
+            `k1` bigint(11) NULL,
+            `c_bool` array<array<boolean>> NULL,
+            `c_tinyint` array<array<tinyint(4)>> NULL,
+            `c_smallint` array<array<smallint(6)>> NULL,
+            `c_int` array<array<int(11)>> NULL,
+            `c_bigint` array<array<bigint(20)>> NULL,
+            `c_largeint` array<array<largeint(40)>> NULL,
+            `c_float` array<array<float>> NULL,
+            `c_double` array<array<double>> NULL,
+            `c_decimal` array<array<decimal(20, 3)>> NULL,
+            `c_decimalv3` array<array<decimalv3(20, 3)>> NULL,
+            `c_date` array<array<date>> NULL,
+            `c_datetime` array<array<datetime>> NULL,
+            `c_datev2` array<array<datev2>> NULL,
+            `c_datetimev2` array<array<datetimev2(0)>> NULL,
+            `c_char` array<array<char(15)>> NULL,
+            `c_varchar` array<array<varchar(100)>> NULL,
+            `c_string` array<array<text>> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+    table_names.add(nested_table_dup2)
+    
+    // define dup key table with array nested map table types with one nested 
scala
+    def nested_table_array_map_dup = "tbl_csv_array_map_types_s3"
+    sql "DROP TABLE IF EXISTS ${nested_table_array_map_dup}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${nested_table_array_map_dup} (
+            `k1` bigint(11) NULL,
+            `c_bool` array<map<boolean, boolean>> NULL,
+            `c_tinyint` array<map<tinyint(4), tinyint(4)>> NULL,
+            `c_smallint` array<map<smallint(6), smallint(6)>> NULL,
+            `c_int` array<map<int(11), int(11)>> NULL,
+            `c_bigint` array<map<bigint(20), bigint(20)>> NULL,
+            `c_largeint` array<map<largeint(40), largeint(40)>> NULL,
+            `c_float` array<map<float, float>> NULL,
+            `c_double` array<map<double, double>> NULL,
+            `c_decimal` array<map<decimal(20, 3), decimal(20, 3)>> NULL,
+            `c_decimalv3` array<map<decimalv3(20, 3), decimalv3(20, 3)>> NULL,
+            `c_date` array<map<date, date>> NULL,
+            `c_datetime` array<map<datetime, datetime>> NULL,
+            `c_datev2` array<map<datev2, datev2>> NULL,
+            `c_datetimev2` array<map<datetimev2(0), datetimev2(0)>> NULL,
+            `c_char` array<map<char(15), char(15)>> NULL,
+            `c_varchar` array<map<varchar(100), varchar(100)>> NULL,
+            `c_string` array<map<text, text>> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+    table_names.add(nested_table_array_map_dup)
+
+    // define dup key table with map types with one nested scala
+    def nested_table_map_dup = "tbl_csv_map_types_s3"
+    sql "DROP TABLE IF EXISTS ${nested_table_map_dup}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${nested_table_map_dup} (
+            `k1` bigint(11) NULL,
+            `c_bool` map<boolean, boolean> NULL,
+            `c_tinyint` map<tinyint(4), tinyint(4)> NULL,
+            `c_smallint` map<smallint(6), smallint(6)> NULL,
+            `c_int` map<int(11), int(11)> NULL,
+            `c_bigint` map<bigint(20), bigint(20)> NULL,
+            `c_largeint` map<largeint(40), largeint(40)> NULL,
+            `c_float` map<float, float> NULL,
+            `c_double` map<double, double> NULL,
+            `c_decimal` map<decimal(20, 3), decimal(20, 3)> NULL,
+            `c_decimalv3` map<decimalv3(20, 3), decimalv3(20, 3)> NULL,
+            `c_date` map<date, date> NULL,
+            `c_datetime` map<datetime, datetime> NULL,
+            `c_datev2` map<datev2, datev2> NULL,
+            `c_datetimev2` map<datetimev2(0), datetimev2(0)> NULL,
+            `c_char` map<char(15), char(15)> NULL,
+            `c_varchar` map<varchar(100), varchar(100)> NULL,
+            `c_string` map<text, text> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+    table_names.add(nested_table_map_dup)
+
+    // define dup key table with map nested value array table types with one 
nested scala
+    def nested_table_map_array_dup = "tbl_csv_map_array_types_s3"
+    sql "DROP TABLE IF EXISTS ${nested_table_map_array_dup}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${nested_table_map_array_dup} (
+            `k1` bigint(11) NULL,
+            `c_bool` map<boolean, array<boolean>> NULL,
+            `c_tinyint` map<tinyint(4), array<tinyint(4)>> NULL,
+            `c_smallint` map<smallint(6), array<smallint(6)>> NULL,
+            `c_int` map<int(11), array<int(11)>> NULL,
+            `c_bigint` map<bigint(20), array<bigint(20)>> NULL,
+            `c_largeint` map<largeint(40), array<largeint(40)>> NULL,
+            `c_float` map<float, array<float>> NULL,
+            `c_double` map<double, array<double>> NULL,
+            `c_decimal` map<decimal(20, 3), array<decimal(20, 3)>> NULL,
+            `c_decimalv3` map<decimalv3(20, 3), array<decimalv3(20, 3)>> NULL,
+            `c_date` map<date, array<date>> NULL,
+            `c_datetime` map<datetime, array<datetime>> NULL,
+            `c_datev2` map<datev2, array<datev2>> NULL,
+            `c_datetimev2` map<datetimev2(0), array<datetimev2(0)>> NULL,
+            `c_char` map<char(15), array<char(15)>> NULL,
+            `c_varchar` map<varchar(100), array<varchar(100)>> NULL,
+            `c_string` map<text, array<text>> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+    table_names.add(nested_table_map_array_dup)
+
+    // step1. select * from s3 with 0-100 items
+    // step2. insert into doris table
+    // step2. query and check
+
+    for (int i = 0; i < 3; ++i) {

Review Comment:
   3 -> table_names.size()



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Re: [PR] [case](regresscases) add regress cases for nested type nested type with csv format [doris]

Reply via email to