>From Preetham Poluparthi <[email protected]>: Preetham Poluparthi has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20395?usp=email )
Change subject: [WIP] parquet null schema inference fix ...................................................................... [WIP] parquet null schema inference fix Change-Id: I60069bf3d44c60effb69e5555bbf7e869327cc5b --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml 12 files changed, 265 insertions(+), 2 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/95/20395/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp new file mode 100644 index 0000000..0f80378 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.01.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; +USE test; + +CREATE TYPE ColumnType2 AS { +}; + + +CREATE DATASET col2 primary key(id:int); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp new file mode 100644 index 0000000..aadf594 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.02.update.sqlpp @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +INSERT INTO col2 ( + [ +{"id": 1, "name": "aqay awil"}, + {"id": 2} + ] +); + +COPY ( + select id,name from col2 +) toWriter +TO %adapter% +PATH (%pathprefix% "copy-to-result", "parquet-null1") +WITH { + %template_colons%, + %additionalProperties% + "format":"parquet", + "version" : "2" +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp new file mode 100644 index 0000000..ab296ea --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.03.ddl.sqlpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-null1"), + ("format" = "parquet"), + ("requireVersionChangeDetection"="false"), + ("include"="*.parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp new file mode 100644 index 0000000..5aeedb8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null1/parquet-null1.04.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +SELECT id +FROM DatasetCopy c +ORDER BY c.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp new file mode 100644 index 0000000..0f80378 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.01.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; +USE test; + +CREATE TYPE ColumnType2 AS { +}; + + +CREATE DATASET col2 primary key(id:int); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp new file mode 100644 index 0000000..3bac54a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.02.update.sqlpp @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +INSERT INTO col2 ( + [ +{"id": 1, "name": "aqay awil"}, + {"id": 2}, + {"id":3 , "name": null} + ] +); + +COPY ( + select * from col2 +) toWriter +TO %adapter% +PATH (%pathprefix% "copy-to-result", "parquet-simple") +WITH { + %template_colons%, + %additionalProperties% + "format":"parquet", + "version" : "2" +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp new file mode 100644 index 0000000..f1a4798 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.03.ddl.sqlpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-simple"), + ("format" = "parquet"), + ("requireVersionChangeDetection"="false"), + ("include"="*.parquet") +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp new file mode 100644 index 0000000..5aeedb8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null2/parquet-null2.04.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + + +SELECT id +FROM DatasetCopy c +ORDER BY c.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp index 76970a5..abff4f0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp @@ -23,3 +23,5 @@ CREATE TYPE ColumnType2 AS { }; + + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp index 9b21be7..d598e52 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp @@ -21,8 +21,8 @@ COPY ( - select "123" as id -) toWriter +select "123" as id + ) toWriter TO %adapter% PATH (%pathprefix% "copy-to-result", "parquet-simple") TYPE ( {id:string} ) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm new file mode 100644 index 0000000..30a9bda --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null1/parquet-null1.04.adm @@ -0,0 +1,2 @@ +{ "id": 1 } +{ "id": 2 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 7a07da0..fb8cf8e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -70,6 +70,16 @@ </compilation-unit> </test-case> <test-case FilePath="copy-to"> + <compilation-unit name="parquet-null1"> + <placeholder name="adapter" value="S3" /> + <placeholder name="pathprefix" value="" /> + <placeholder name="path_prefix" value="" /> + <placeholder name="additionalProperties" value='"container":"playground",' /> + <placeholder name="additional_Properties" value='("container"="playground")' /> + <output-dir compare="Text">parquet-null1</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="copy-to"> <compilation-unit name="parquet-tweet"> <placeholder name="adapter" value="S3" /> <placeholder name="pathprefix" value="" /> -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20395?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: newchange Gerrit-Project: asterixdb Gerrit-Branch: phoenix Gerrit-Change-Id: I60069bf3d44c60effb69e5555bbf7e869327cc5b Gerrit-Change-Number: 20395 Gerrit-PatchSet: 1 Gerrit-Owner: Preetham Poluparthi <[email protected]>
