This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 295e144906 [fix](tvf)support s3,local compress_type and append regression test (#24055) (#24330) 295e144906 is described below commit 295e14490665dca1030f04ebcf8023e40cdf7be4 Author: daidai <2017501...@qq.com> AuthorDate: Wed Sep 13 22:21:47 2023 +0800 [fix](tvf)support s3,local compress_type and append regression test (#24055) (#24330) support s3,local compress_type and append regression test. --- .../ExternalFileTableValuedFunction.java | 7 +- .../tablefunction/HdfsTableValuedFunction.java | 24 ++- .../HttpStreamTableValuedFunction.java | 80 +++++++++ .../tablefunction/LocalTableValuedFunction.java | 23 ++- .../doris/tablefunction/S3TableValuedFunction.java | 76 ++++----- .../tvf/compress/test_tvf.csv.bz2 | Bin 0 -> 60731 bytes .../tvf/compress/test_tvf.csv.deflate | Bin 0 -> 74687 bytes .../external_table_p2/tvf/compress/test_tvf.csv.gz | Bin 0 -> 74828 bytes .../tvf/compress/test_tvf.csv.lz4 | Bin 0 -> 112626 bytes .../tvf/compress/test_tvf.csv.snappy | Bin 0 -> 107203 bytes .../tvf/test_local_tvf_compression.out | 150 +++++++++++++++++ .../tvf/test_path_partition_keys.out | 68 ++++++++ .../tvf/test_path_partition_keys/dt1=cyw/a.csv | 3 + .../tvf/test_path_partition_keys/dt1=cyw/b.csv | 3 + .../tvf/test_path_partition_keys/dt1=hello/c.csv | 3 + .../test_path_partition_keys/dt2=two/dt1=cyw/a.csv | 3 + .../test_path_partition_keys/dt2=two/dt1=cyw/b.csv | 3 + .../dt2=two/dt1=hello/c.csv | 3 + .../tvf/test_s3_tvf_compression.out | 144 +++++++++++++++++ .../tvf/test_local_tvf_compression.groovy | 127 +++++++++++++++ .../tvf/test_path_partition_keys.groovy | 178 +++++++++++++++++++++ .../tvf/test_s3_tvf_compression.groovy | 171 ++++++++++++++++++++ 22 files changed, 1003 insertions(+), 63 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index 435974d74f..e96646339a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -122,6 +122,8 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio .add(TRIM_DOUBLE_QUOTES) .add(SKIP_LINES) .add(CSV_SCHEMA) + .add(COMPRESS_TYPE) + .add(PATH_PARTITION_KEYS) .build(); // Columns got from file and path(if has) @@ -134,6 +136,8 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList(); protected Map<String, String> locationProperties; + protected String filePath; + private TFileFormatType fileFormatType; private TFileCompressType compressionType; @@ -197,8 +201,9 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio } } + //The keys in the passed validParams map need to be lowercase. protected void parseProperties(Map<String, String> validParams) throws AnalysisException { - String formatString = validParams.getOrDefault(FORMAT, "").toLowerCase(); + String formatString = validParams.getOrDefault(FORMAT, ""); switch (formatString) { case "csv": this.fileFormatType = TFileFormatType.FORMAT_CSV_PLAIN; diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java index 6543d5e105..718b8ae381 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HdfsTableValuedFunction.java @@ -56,26 +56,23 @@ public class HdfsTableValuedFunction extends ExternalFileTableValuedFunction { .build(); private URI hdfsUri; - private String filePath; public HdfsTableValuedFunction(Map<String, String> params) throws AnalysisException { Map<String, String> fileParams = new CaseInsensitiveMap(); locationProperties = Maps.newHashMap(); for (String key : params.keySet()) { - if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) { - fileParams.put(key, params.get(key)); - } else { + String lowerKey = key.toLowerCase(); + if (FILE_FORMAT_PROPERTIES.contains(lowerKey)) { + fileParams.put(lowerKey, params.get(key)); + } else if (LOCATION_PROPERTIES.contains(lowerKey)) { + locationProperties.put(lowerKey, params.get(key)); + } else if (HdfsResource.HADOOP_FS_NAME.equalsIgnoreCase(key)) { // because HADOOP_FS_NAME contains upper and lower case - if (HdfsResource.HADOOP_FS_NAME.equalsIgnoreCase(key)) { - locationProperties.put(HdfsResource.HADOOP_FS_NAME, params.get(key)); - } else { - locationProperties.put(key, params.get(key)); - } + locationProperties.put(HdfsResource.HADOOP_FS_NAME, params.get(key)); + } else { + throw new AnalysisException(key + " is invalid property"); } } - if (params.containsKey(PATH_PARTITION_KEYS)) { - fileParams.put(PATH_PARTITION_KEYS, params.get(PATH_PARTITION_KEYS)); - } if (!locationProperties.containsKey(HDFS_URI)) { throw new AnalysisException(String.format("Configuration '%s' is required.", HDFS_URI)); @@ -84,7 +81,8 @@ public class HdfsTableValuedFunction extends ExternalFileTableValuedFunction { hdfsUri = URI.create(locationProperties.get(HDFS_URI)); filePath = locationProperties.get(HdfsResource.HADOOP_FS_NAME) + hdfsUri.getPath(); - parseProperties(fileParams); + super.parseProperties(fileParams); + parseFile(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java new file mode 100644 index 0000000000..bb32c82653 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/HttpStreamTableValuedFunction.java @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.tablefunction; + +import org.apache.doris.analysis.BrokerDesc; +import org.apache.doris.analysis.StorageBackend.StorageType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.thrift.TFileType; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.HashMap; +import java.util.Map; + +/** + * The Implement of table valued function + * http_stream("FORMAT" = "csv"). + */ +public class HttpStreamTableValuedFunction extends ExternalFileTableValuedFunction { + private static final Logger LOG = LogManager.getLogger(HttpStreamTableValuedFunction.class); + public static final String NAME = "http_stream"; + + public HttpStreamTableValuedFunction(Map<String, String> params) throws AnalysisException { + Map<String, String> fileParams = new HashMap<>(); + for (String key : params.keySet()) { + String lowerKey = key.toLowerCase(); + if (!FILE_FORMAT_PROPERTIES.contains(lowerKey)) { + throw new AnalysisException(key + " is invalid property"); + } + fileParams.put(lowerKey, params.get(key)); + } + + String formatString = fileParams.getOrDefault(FORMAT, ""); + if (formatString.equals("parquet") + || formatString.equals("avro") + || formatString.equals("orc")) { + throw new AnalysisException("current http_stream does not yet support parquet, avro and orc"); + } + + super.parseProperties(fileParams); + } + + // =========== implement abstract methods of ExternalFileTableValuedFunction ================= + @Override + public TFileType getTFileType() { + return TFileType.FILE_STREAM; + } + + @Override + public String getFilePath() { + return null; + } + + @Override + public BrokerDesc getBrokerDesc() { + return new BrokerDesc("HttpStreamTvfBroker", StorageType.STREAM, locationProperties); + } + + // =========== implement abstract methods of TableValuedFunctionIf ================= + @Override + public String getTableName() { + return "HttpStreamTableValuedFunction"; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java index f6693317ba..129c3f930c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java @@ -56,32 +56,31 @@ public class LocalTableValuedFunction extends ExternalFileTableValuedFunction { .add(BACKEND_ID) .build(); - private String filePath; private long backendId; public LocalTableValuedFunction(Map<String, String> params) throws AnalysisException { - Map<String, String> fileFormatParams = new CaseInsensitiveMap(); + Map<String, String> fileParams = new CaseInsensitiveMap(); locationProperties = Maps.newHashMap(); for (String key : params.keySet()) { - if (FILE_FORMAT_PROPERTIES.contains(key.toLowerCase())) { - fileFormatParams.put(key, params.get(key)); - } else if (LOCATION_PROPERTIES.contains(key.toLowerCase())) { - locationProperties.put(key.toLowerCase(), params.get(key)); + String lowerKey = key.toLowerCase(); + if (FILE_FORMAT_PROPERTIES.contains(lowerKey)) { + fileParams.put(lowerKey, params.get(key)); + } else if (LOCATION_PROPERTIES.contains(lowerKey)) { + locationProperties.put(lowerKey, params.get(key)); } else { throw new AnalysisException(key + " is invalid property"); } } - if (!locationProperties.containsKey(FILE_PATH)) { - throw new AnalysisException(String.format("Configuration '%s' is required.", FILE_PATH)); - } - if (!locationProperties.containsKey(BACKEND_ID)) { - throw new AnalysisException(String.format("Configuration '%s' is required.", BACKEND_ID)); + for (String key : LOCATION_PROPERTIES) { + if (!locationProperties.containsKey(key)) { + throw new AnalysisException(String.format("Configuration '%s' is required.", key)); + } } filePath = locationProperties.get(FILE_PATH); backendId = Long.parseLong(locationProperties.get(BACKEND_ID)); - parseProperties(fileFormatParams); + super.parseProperties(fileParams); getFileListFromBackend(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java index 300c51c7ad..74c8ae5c4a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java @@ -57,7 +57,7 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { ImmutableSet.of(S3Properties.SESSION_TOKEN, PropertyConverter.USE_PATH_STYLE, S3Properties.REGION, PATH_PARTITION_KEYS); - private static final ImmutableSet<String> PROPERTIES_SET = ImmutableSet.<String>builder() + private static final ImmutableSet<String> LOCATION_PROPERTIES = ImmutableSet.<String>builder() .add(S3_URI) .add(S3Properties.ENDPOINT) .addAll(DEPRECATED_KEYS) @@ -70,31 +70,56 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { private String virtualBucket = ""; public S3TableValuedFunction(Map<String, String> params) throws AnalysisException { - Map<String, String> tvfParams = getValidParams(params); - forceVirtualHosted = isVirtualHosted(tvfParams); - s3uri = getS3Uri(tvfParams); + + Map<String, String> fileParams = new HashMap<>(); + for (Map.Entry<String, String> entry : params.entrySet()) { + String key = entry.getKey(); + String lowerKey = key.toLowerCase(); + if (!LOCATION_PROPERTIES.contains(lowerKey) && !FILE_FORMAT_PROPERTIES.contains(lowerKey)) { + throw new AnalysisException("Invalid property: " + key); + } + if (DEPRECATED_KEYS.contains(lowerKey)) { + lowerKey = S3Properties.S3_PREFIX + lowerKey; + } + fileParams.put(lowerKey, entry.getValue()); + } + + if (!fileParams.containsKey(S3_URI)) { + throw new AnalysisException("Missing required property: " + S3_URI); + } + + forceVirtualHosted = isVirtualHosted(fileParams); + s3uri = getS3Uri(fileParams); final String endpoint = forceVirtualHosted ? getEndpointAndSetVirtualBucket(params) : s3uri.getBucketScheme(); - if (!tvfParams.containsKey(S3Properties.REGION)) { + if (!fileParams.containsKey(S3Properties.REGION)) { String region = S3Properties.getRegionOfEndpoint(endpoint); - tvfParams.put(S3Properties.REGION, region); + fileParams.put(S3Properties.REGION, region); } CloudCredentialWithEndpoint credential = new CloudCredentialWithEndpoint(endpoint, - tvfParams.get(S3Properties.REGION), - tvfParams.get(S3Properties.ACCESS_KEY), - tvfParams.get(S3Properties.SECRET_KEY)); - if (tvfParams.containsKey(S3Properties.SESSION_TOKEN)) { - credential.setSessionToken(tvfParams.get(S3Properties.SESSION_TOKEN)); + fileParams.get(S3Properties.REGION), + fileParams.get(S3Properties.ACCESS_KEY), + fileParams.get(S3Properties.SECRET_KEY)); + if (fileParams.containsKey(S3Properties.SESSION_TOKEN)) { + credential.setSessionToken(fileParams.get(S3Properties.SESSION_TOKEN)); } // set S3 location properties // these five properties is necessary, no one can be lost. locationProperties = S3Properties.credentialToMap(credential); - String usePathStyle = tvfParams.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false"); + String usePathStyle = fileParams.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false"); locationProperties.put(PropertyConverter.USE_PATH_STYLE, usePathStyle); - parseProperties(tvfParams); + super.parseProperties(fileParams); + + if (forceVirtualHosted) { + filePath = NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM + + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey(); + } else { + filePath = NAME + S3URI.SCHEME_DELIM + s3uri.getKey(); + } + if (FeConstants.runningUnitTest) { // Just check FileSystemFactory.getS3FileSystem(locationProperties); @@ -103,25 +128,6 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { } } - private static Map<String, String> getValidParams(Map<String, String> params) throws AnalysisException { - Map<String, String> validParams = new HashMap<>(); - for (Map.Entry<String, String> entry : params.entrySet()) { - String key = entry.getKey(); - String lowerKey = key.toLowerCase(); - if (!PROPERTIES_SET.contains(lowerKey) && !FILE_FORMAT_PROPERTIES.contains(lowerKey)) { - throw new AnalysisException("Invalid property: " + key); - } - if (DEPRECATED_KEYS.contains(lowerKey)) { - lowerKey = S3Properties.S3_PREFIX + lowerKey; - } - validParams.put(lowerKey, entry.getValue()); - } - if (!validParams.containsKey(S3_URI)) { - throw new AnalysisException("Missing required property: " + S3_URI); - } - return S3Properties.requiredS3TVFProperties(validParams); - } - private String getEndpointAndSetVirtualBucket(Map<String, String> params) throws AnalysisException { Preconditions.checkState(forceVirtualHosted, "only invoked when force virtual hosted."); String[] fileds = s3uri.getVirtualBucket().split("\\.", 2); @@ -167,11 +173,7 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { @Override public String getFilePath() { // must be "s3://..." - if (forceVirtualHosted) { - return NAME + S3URI.SCHEME_DELIM + virtualBucket + S3URI.PATH_DELIM - + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey(); - } - return NAME + S3URI.SCHEME_DELIM + s3uri.getKey(); + return filePath; } @Override diff --git a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2 b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2 new file mode 100644 index 0000000000..b0bff9aa47 Binary files /dev/null and b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.bz2 differ diff --git a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate new file mode 100644 index 0000000000..d47c707da0 Binary files /dev/null and b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.deflate differ diff --git a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz new file mode 100644 index 0000000000..1f35b6ba8f Binary files /dev/null and b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.gz differ diff --git a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4 b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4 new file mode 100644 index 0000000000..8341cce4fd Binary files /dev/null and b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.lz4 differ diff --git a/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy new file mode 100644 index 0000000000..9ac2b7ae29 Binary files /dev/null and b/regression-test/data/external_table_p2/tvf/compress/test_tvf.csv.snappy differ diff --git a/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out b/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out new file mode 100644 index 0000000000..19699b0dc5 --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_local_tvf_compression.out @@ -0,0 +1,150 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !gz_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 + +-- !gz_2 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 + +-- !bz2_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 +1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255 +101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854 +1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04 3.8087069699523313 + +-- !bz2_2 -- +1476 2023-09-07 +1521 2023-09-07 +259 2023-09-07 +50 2023-09-07 +71 2023-09-07 +785 2023-09-07 +869 2023-09-07 +1064 2023-09-08 +126 2023-09-08 +137 2023-09-08 +1425 2023-09-08 +804 2023-09-08 +1240 2023-09-09 +1565 2023-09-09 +1688 2023-09-09 + +-- !lz4_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 +1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255 +101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854 +1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04 3.8087069699523313 +1011 252076 gHmFDhtytYzWETIxdpkpMUpnLd 2023-09-17 6.773606843056635 +1012 819615 rFfRHquexplDJvSeUK 2023-11-02 3.220639250504097 +1013 413456 uvNPelHXYjJKiOkwdNbmUkGzxiiqLo 2024-03-15 8.305048700108081 +1014 308042 vnzcsvHxnWFhvLwJkAtUqe 2024-06-15 1.5668867233009998 +1015 603837 VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU 2024-08-17 3.8287482122289007 + +-- !lz4_2 -- +694832 buHDwfGeNHfpRFdNaogneddi +950297 OulifcGqzIILdOGcHZlWaCiHlEB +143630 jqtiiLUUvSGeTkxsHL +664267 eeVExxxcioSmmX +890760 DYwfhhbkWATuSr +79734 hgXsiaeVOkXdWUQvNnNjLPsdiD +855390 axGECHeiluHLBUKPEKqDheksZ +276590 lVQfdliXrLiJOpjlWM +585845 ztkLoqCHmOuanAdOUV +218729 goZsLvvWFOIjlzSAitC +303099 xRBcfDbimqmycPY +353815 CTDIqGYPRei +165056 NMqtBlPfByAWyMpLdp +172440 GjCGMSYnDVp +887563 CxqhRyCsNhLjfyV +248229 rCbtJQHJifNyhTEVrwESIQDGBylUWG +444180 imAEgaSWymXzsCjSZQpPSy +453083 XJzGEouGptILvnSTmVbOt +988672 RtONQThrfkeepz +977907 HMIJjkgcmNZVxdQaKqpMsgJYws + +-- !deflate_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 + +-- !deflate_2 -- +2023-09-07 7 +2023-09-08 5 +2023-09-09 6 +2023-09-10 6 +2023-09-11 4 +2023-09-12 8 +2023-09-13 4 +2023-09-14 6 +2023-09-15 6 +2023-09-16 5 +2023-09-17 15 +2023-09-18 7 + +-- !snappy_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 +1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255 +101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854 +1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04 3.8087069699523313 +1011 252076 gHmFDhtytYzWETIxdpkpMUpnLd 2023-09-17 6.773606843056635 +1012 819615 rFfRHquexplDJvSeUK 2023-11-02 3.220639250504097 +1013 413456 uvNPelHXYjJKiOkwdNbmUkGzxiiqLo 2024-03-15 8.305048700108081 +1014 308042 vnzcsvHxnWFhvLwJkAtUqe 2024-06-15 1.5668867233009998 +1015 603837 VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU 2024-08-17 3.8287482122289007 +1016 912679 eEjldPhxojSjTnE 2024-01-09 1.3717891874157961 +1017 630392 TcczYHXbwaCYzFSfXJlhsFjN 2023-10-07 4.733337480058437 + +-- !snappy_2 -- + diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out b/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out new file mode 100644 index 0000000000..6ac8589d90 --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys.out @@ -0,0 +1,68 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !hdfs_1 -- +1 hello cyw +2 aaaaaaa cyw +3 1121399 cyw +33 qqqqq cyw +44 tttttttttt cyw +55 qwr cyw + +-- !hdfs_2 -- + +-- !hdfs_3 -- +hello 1111 1 +hello 11111 1 +hello 33333 1 + +-- !hdfs_4 -- +1111 mkdir iiiiii hello +11111 8888888 hello hello +33333 helloworld 999999 hello + +-- !hdfs_5 -- +1 hello 0 two cyw +2 aaaaaaa 9 two cyw +3 1121399 1 two cyw +33 qqqqq 666 two cyw +44 tttttttttt 77 two cyw +55 qwr 91 two cyw + +-- !local_1 -- +1 hello cyw +2 aaaaaaa cyw +3 1121399 cyw + +-- !local_2 -- +1 hello cyw +2 aaaaaaa cyw + +-- !local_3 -- +1111 hello +11111 hello +33333 hello + +-- !local_4 -- +two hello 1111 mkdir +two hello 11111 8888888 +two hello 33333 helloworld + +-- !s3_1 -- +cyw +cyw +cyw + +-- !s3_2 -- +1111 hello +11111 hello +33333 hello + +-- !s3_3 -- +1111 mkdir iiiiii hello +11111 8888888 hello hello +33333 helloworld 999999 hello + +-- !s3_4 -- +33 qqqqq 666 two cyw +44 tttttttttt 77 two cyw +55 qwr 91 two cyw + diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv new file mode 100644 index 0000000000..b8537e591b --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/a.csv @@ -0,0 +1,3 @@ +1,hello +2,aaaaaaa +3,1121399 diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv new file mode 100644 index 0000000000..0743633d2f --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=cyw/b.csv @@ -0,0 +1,3 @@ +33,qqqqq +44,tttttttttt +55,qwr diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv new file mode 100644 index 0000000000..b51cbf9041 --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt1=hello/c.csv @@ -0,0 +1,3 @@ +11111,8888888 +33333,helloworld +1111,mkdir diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv new file mode 100644 index 0000000000..3b2ba1cf44 --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv @@ -0,0 +1,3 @@ +1,hello,0 +2,aaaaaaa,9 +3,1121399,1 diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv new file mode 100644 index 0000000000..e5573bf50c --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv @@ -0,0 +1,3 @@ +33,qqqqq,666 +44,tttttttttt,77 +55,qwr,91 diff --git a/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv new file mode 100644 index 0000000000..ff4b3f9ac6 --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv @@ -0,0 +1,3 @@ +11111,8888888,hello +33333,helloworld,999999 +1111,mkdir,iiiiii diff --git a/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out b/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out new file mode 100644 index 0000000000..1308b7ffef --- /dev/null +++ b/regression-test/data/external_table_p2/tvf/test_s3_tvf_compression.out @@ -0,0 +1,144 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !gz_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 +1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255 +101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854 +1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04 3.8087069699523313 +1011 252076 gHmFDhtytYzWETIxdpkpMUpnLd 2023-09-17 6.773606843056635 +1012 819615 rFfRHquexplDJvSeUK 2023-11-02 3.220639250504097 +1013 413456 uvNPelHXYjJKiOkwdNbmUkGzxiiqLo 2024-03-15 8.305048700108081 +1014 308042 vnzcsvHxnWFhvLwJkAtUqe 2024-06-15 1.5668867233009998 +1015 603837 VBEsRVGyhRNWQeKzDaBnJHmFDnXAOU 2024-08-17 3.8287482122289007 + +-- !gz_2 -- +1 2024-02-09 +2 2024-08-31 +3 2024-05-06 +4 2023-10-07 +5 2024-01-11 +6 2023-11-11 +7 2024-02-17 +8 2023-11-16 +9 2024-08-16 +10 2024-06-10 +11 2024-01-04 +12 2023-12-18 +13 2024-05-15 +14 2024-06-30 +15 2024-05-06 +16 2024-07-26 +17 2024-02-08 +18 2024-08-11 +19 2024-05-27 +20 2023-12-18 + +-- !bz2_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 +1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255 +101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854 +1010 427079 AlRUfmxfAuoLnPqUTvQVMtrS 2024-06-04 3.8087069699523313 + +-- !bz2_2 -- +1476 2023-09-07 +1521 2023-09-07 +259 2023-09-07 +50 2023-09-07 +71 2023-09-07 +785 2023-09-07 +869 2023-09-07 +1064 2023-09-08 +126 2023-09-08 +137 2023-09-08 +1425 2023-09-08 +804 2023-09-08 +1240 2023-09-09 + +-- !lz4_1 -- +1 694832 buHDwfGeNHfpRFdNaogneddi 2024-02-09 4.899588807225554 +10 218729 goZsLvvWFOIjlzSAitC 2024-06-10 4.137732740231178 +100 813423 zICskqgcdPc 2024-03-23 8.486529018746493 +1000 612650 RzOXeYpKOmuJOogUyeIEDNDmvq 2023-12-05 7.8741752707933435 +1001 29486 WoUAFJFuJNnwyqMnoDhX 2024-03-11 9.758244908785949 +1002 445363 OdTEeeWtxfcRwx 2024-08-01 0.3934945460194128 +1003 707035 JAYnKxusVpGzYueACf 2023-11-14 5.377110182643222 +1004 227858 JIFyjKzmbjkt 2024-03-24 5.748037621519263 +1005 539305 PlruLkSUSXZgaHafFriklrhCi 2023-11-08 4.122635188836725 +1006 145518 KCwqEcSCGuXrHerwn 2024-06-22 8.482290064407216 +1007 939028 KzXhEMelsKVLbDMsEKh 2024-01-01 8.144449761594585 +1008 913569 CHlqPKqkIdqwBCBUHreXbFAkCt 2024-05-25 1.5683842369495904 +1009 757881 AjcSyYMIMzS 2024-05-04 7.5674012939461255 +101 326164 QWLnalYNmYDt 2024-01-07 3.8159876011523854 + +-- !lz4_2 -- +1 buHDwfGeNHfpRFdNaogneddi + +-- !deflate_1 -- +4611713315956779722 0 [159] +4611737294102341731 1 [18,348,1010] +4611746138795773784 0 [18] +4611784761593342388 0 [] +4611801970150944452 0 [] +4611823514689510950 0 [] {"Превьюшки":{"doc +4611838050999642253 0 [18] +4611870011201662970 0 [18,348,1010] +4611987206053671537 0 [18] {"Превьюшки +4612024970660173441 0 [18,868] +4612121739736542264 0 [18,348,1010] +4612128194949363638 0 [] +4612152063486747092 0 [3] +4612190315946473296 1 [18,348,1010] +4612251026602549726 0 [32,62,45,48,120,194,159,348] +4612255738481454387 0 [] +4612315312096080662 0 [] {"Правая колонка": + +-- !deflate_2 -- +4611713315956779722 0 +4611737294102341731 1 +4611746138795773784 1 +4611784761593342388 1 +4611801970150944452 1 + +-- !snappy_1 -- +4611713315956779722 0 [159] +4611737294102341731 1 [18,348,1010] +4611746138795773784 0 [18] +4611784761593342388 0 [] +4611801970150944452 0 [] +4611823514689510950 0 [] {"Превьюшки":{"doc +4611838050999642253 0 [18] +4611870011201662970 0 [18,348,1010] +4611987206053671537 0 [18] {"Превьюшки +4612024970660173441 0 [18,868] +4612121739736542264 0 [18,348,1010] +4612128194949363638 0 [] +4612152063486747092 0 [3] +4612190315946473296 1 [18,348,1010] +4612251026602549726 0 [32,62,45,48,120,194,159,348] +4612255738481454387 0 [] +4612315312096080662 0 [] {"Правая колонка": + +-- !snappy_2 -- +0 + diff --git a/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy b/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy new file mode 100644 index 0000000000..0f783900df --- /dev/null +++ b/regression-test/suites/external_table_p2/tvf/test_local_tvf_compression.groovy @@ -0,0 +1,127 @@ +import org.junit.Assert + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_local_tvf_compression", "p2,external,tvf,external_remote,external_remote_tvf") { + List<List<Object>> backends = sql """ show backends """ + assertTrue(backends.size() > 0) + def be_id = backends[0][0] + def dataFilePath = context.config.dataPath + "/external_table_p2/tvf/compress" + + def outFilePath="/compress" + + for (List<Object> backend : backends) { + def be_host = backend[1] + scpFiles ("root", be_host, dataFilePath, outFilePath, false); + } + + String filename = "test_tvf.csv" + + + String compress_type = "gz" + qt_gz_1 """ + select * from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 12; + """ + + qt_gz_2 """ + select * from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}") where c1="1" order by c1,c2,c3,c4,c5 limit 12; + """ + + + + compress_type = "bz2" + qt_bz2_1 """ + select * from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 15; + """ + qt_bz2_2 """ + select c1,c4 from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}") order by cast(c4 as date),c1 limit 15; + """ + + + + + compress_type = "lz4"; + + qt_lz4_1 """ + select * from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}FRAME") order by c1,c2,c3,c4,c5 limit 20; + """ + qt_lz4_2 """ + select c2,c3 from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}FRAME") where c2!="abcsdasdsadsad" order by cast(c1 as int),c2,c3 limit 20; + """ + + + + compress_type = "deflate"; + qt_deflate_1 """ + select * from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 12 ; + """ + qt_deflate_2 """ + select c4,count(*) from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}") group by c4 order by c4 limit 12 ; + """ + + + + compress_type = "snappy"; + qt_snappy_1 """ + select * from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}block") order by c1,c2,c3,c4,c5 limit 22 ; + """ + qt_snappy_2 """ + select c2,c3 from local( + "file_path" = "${outFilePath}/${filename}.${compress_type}", + "backend_id" = "${be_id}", + "format" = "csv", + "compress_type" ="${compress_type}block") where c2="abcd" order by c3 limit 22 ; + """ + +} diff --git a/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy b/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy new file mode 100644 index 0000000000..0c4c74e924 --- /dev/null +++ b/regression-test/suites/external_table_p2/tvf/test_path_partition_keys.groovy @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_path_partition_keys", "p2,external,tvf,external_remote,external_remote_tvf") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost") + String hdfsPort = context.config.otherConfigs.get("extHdfsPort") + + String baseUri = "hdfs://${nameNodeHost}:${hdfsPort}/catalog/tvf/csv/test_path_partition_keys" + String baseFs = "hdfs://${nameNodeHost}:${hdfsPort}" + + order_qt_hdfs_1 """ + select * from HDFS( + "uri" = "${baseUri}/dt1=cyw/*", + "fs.defaultFS"= "${baseFs}", + "hadoop.username" = "hadoop", + "format" = "csv", + "path_partition_keys"="dt1" ) order by c1,c2 ; + """ + + order_qt_hdfs_2 """ + select * from HDFS( + "uri" = "${baseUri}/dt1=cyw/*", + "fs.defaultFS"= "${baseFs}", + "hadoop.username" = "hadoop", + "format" = "csv", + "path_partition_keys"="dt1") where dt1!="cyw" order by c1,c2 limit 3; + """ + + order_qt_hdfs_3 """ + select dt1,c1,count(*) from HDFS( + "uri" = "${baseUri}/dt1=hello/*", + "fs.defaultFS"= "${baseFs}", + "hadoop.username" = "hadoop", + "format" = "csv", + "path_partition_keys"="dt1") group by c1,dt1 order by c1; + """ + + order_qt_hdfs_4 """ + select * from HDFS( + "uri" = "${baseUri}/dt2=two/dt1=hello/*", + "fs.defaultFS"= "${baseFs}", + "hadoop.username" = "hadoop", + "format" = "csv", + "path_partition_keys"="dt1") order by c1; + """ + + order_qt_hdfs_5 """ + select * from HDFS( + "uri" = "${baseUri}/dt2=two/dt1=cyw/*", + "fs.defaultFS"= "${baseFs}", + "hadoop.username" = "hadoop", + "format" = "csv", + "path_partition_keys"="dt2,dt1"); + """ + + } + + List<List<Object>> backends = sql """ show backends """ + assertTrue(backends.size() > 0) + def be_id = backends[0][0] + def dataFilePath = context.config.dataPath + "/external_table_p2/test_path_partition_keys/" + + def outFilePath="/test_path_partition_keys" + + for (List<Object> backend : backends) { + def be_host = backend[1] + scpFiles ("root", be_host, dataFilePath, outFilePath, false); + } + + order_qt_local_1 """ + select * from local( + "file_path" = "${outFilePath}/dt1=cyw/a.csv", + "backend_id" = "${be_id}", + "format" = "csv", + "path_partition_keys"="dt1") order by c1,c2; + """ + + order_qt_local_2 """ + select * from local( + "file_path" = "${outFilePath}/dt1=cyw/*", + "backend_id" = "${be_id}", + "format" = "csv", + "path_partition_keys"="dt1") order by c1,c2 limit 2; + """ + + order_qt_local_3 """ + select c1,dt1 from local( + "file_path" = "${outFilePath}/dt1=hello/c.csv", + "backend_id" = "${be_id}", + "format" = "csv", + "path_partition_keys"="dt1") order by c1,c2 limit 7; + """ + + order_qt_local_4 """ + select dt2,dt1,c1,c2 from local( + "file_path" = "${outFilePath}/dt2=two/dt1=hello/c.csv", + "backend_id" = "${be_id}", + "format" = "csv", + "path_partition_keys"="dt2,dt1") order by c1,c2 limit 9; + """ + + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + sql """ set query_timeout=3600; """ + + order_qt_s3_1 """ + select dt1 from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt1=cyw/b.csv", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "path_partition_keys"="dt1") + """ + + + + order_qt_s3_2 """ + select c1,dt1 from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt1=hello/c.csv", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "path_partition_keys"="dt1") limit 3; + """ + + + order_qt_s3_3 """ + select * from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "path_partition_keys"="dt1") limit 3; + """ + + + order_qt_s3_4 """ + select *from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "path_partition_keys"="dt2,dt1") limit 3; + """ +} diff --git a/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy b/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy new file mode 100644 index 0000000000..57cfdb136d --- /dev/null +++ b/regression-test/suites/external_table_p2/tvf/test_s3_tvf_compression.groovy @@ -0,0 +1,171 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_s3_tvf_compression", "p2,external,tvf,external_remote,external_remote_tvf") { + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + sql """ set query_timeout=3600; """ + + String compress_type = "gz" + qt_gz_1 """ + select * from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 20; + """ + + + qt_gz_2 """ + select c1,c4 from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "compress_type" ="${compress_type}") order by cast(c1 as int),c4 limit 20; + """ + + + + compress_type = "bz2"; + qt_bz2_1 """ + select * from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "compress_type" ="${compress_type}") order by c1,c2,c3,c4,c5 limit 15; + """ + + + qt_bz2_2 """ + select c1,c4 from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "compress_type" ="${compress_type}") where c1!="100" order by cast(c4 as date),c1 limit 13; + """ + + + + compress_type = "lz4"; + qt_lz4_1 """ + select * from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "compress_type" ="${compress_type}FRAME") order by c1,c2,c3,c4,c5 limit 14; + """ + + + qt_lz4_2 """ + select c1,c3 from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/test_tvf.csv.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "compress_type" ="${compress_type}FRAME") where c3="buHDwfGeNHfpRFdNaogneddi" order by c3,c1 limit 14; + """ + + + String select_field = "c1,c12,c23,c40"; + String orderBy_limit = "order by c1,c12,c23,c40 limit 17 "; + + compress_type = "deflate"; + qt_deflate_1 """ + select ${select_field} from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "column_separator" = '\001', + "compress_type" ="${compress_type}") ${orderBy_limit}; + """ + + qt_deflate_2 """ + select c1,c2 from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "column_separator" = '\001', + "use_path_style" = "true", + "compress_type" ="${compress_type}") group by c1,c2 order by c1,c2 limit 5; + """ + + + + + compress_type = "snappy"; + qt_snappy_1 """ + select ${select_field} from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "column_separator" = '\001', + "compress_type" ="${compress_type}block") ${orderBy_limit}; + """ + + + qt_snappy_2 """ + select count(*) from + s3( + "URI" = "https://${bucket}.${s3_endpoint}/regression/tvf/compression/000000_0.${compress_type}", + "s3.access_key" = "${ak}", + "s3.secret_key" = "${sk}", + "REGION" = "${region}", + "FORMAT" = "csv", + "use_path_style" = "true", + "column_separator" = '\001', + "compress_type" ="${compress_type}block") where c2 ="abccc"; + """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org