This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new d66f3f5d4e4 branch-4.0: [fix](docker-compose) Fix docker-compose
subnet conflicts by assigning unique subnets #60158 (#60220)
d66f3f5d4e4 is described below
commit d66f3f5d4e49c9412c07c189245e940d96766925
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jan 27 08:19:50 2026 +0800
branch-4.0: [fix](docker-compose) Fix docker-compose subnet conflicts by
assigning unique subnets #60158 (#60220)
Cherry-picked from #60158
Co-authored-by: Socrates <[email protected]>
---
.../thirdparties/docker-compose/hudi/hudi.yaml.tpl | 4 +
.../iceberg-rest/docker-compose.yaml.tpl | 3 +
.../docker-compose/polaris/docker-compose.yaml.tpl | 3 +
.../thirdparties/docker-compose/spark/spark.yaml | 46 -----
.../spark/tools/csv_format_conversion_tool.py | 83 --------
.../spark/tools/orc_format_conversion_tool.py | 71 -------
.../spark/tools/parquet_format_conversion_tool.py | 71 -------
.../docker-compose/spark/tools/requirements.txt | 1 -
.../docker-compose/spark/tools/sample.csv | 2 -
.../docker-compose/trino/gen_env.sh.tpl | 39 ----
.../docker-compose/trino/hive.properties.tpl | 19 --
.../trino/scripts/create_trino_table.sql | 222 ---------------------
.../docker-compose/trino/trino_hive.env.tpl | 53 -----
.../docker-compose/trino/trino_hive.yaml.tpl | 144 -------------
docker/thirdparties/run-thirdparties-docker.sh | 102 +---------
15 files changed, 12 insertions(+), 851 deletions(-)
diff --git a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl
b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl
index a08f47212ae..64e588f1a6a 100644
--- a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl
@@ -18,6 +18,10 @@
networks:
${HUDI_NETWORK}:
name: ${HUDI_NETWORK}
+ ipam:
+ driver: default
+ config:
+ - subnet: 168.3.0.0/24
services:
${CONTAINER_UID}hudi-minio:
diff --git
a/docker/thirdparties/docker-compose/iceberg-rest/docker-compose.yaml.tpl
b/docker/thirdparties/docker-compose/iceberg-rest/docker-compose.yaml.tpl
index f9ecde13f0c..ee70b27e3f8 100644
--- a/docker/thirdparties/docker-compose/iceberg-rest/docker-compose.yaml.tpl
+++ b/docker/thirdparties/docker-compose/iceberg-rest/docker-compose.yaml.tpl
@@ -230,3 +230,6 @@ services:
networks:
${CONTAINER_UID}iceberg-rest:
driver: bridge
+ ipam:
+ config:
+ - subnet: 168.4.0.0/24
diff --git a/docker/thirdparties/docker-compose/polaris/docker-compose.yaml.tpl
b/docker/thirdparties/docker-compose/polaris/docker-compose.yaml.tpl
index bb2effb2e4c..8a41447b9f7 100644
--- a/docker/thirdparties/docker-compose/polaris/docker-compose.yaml.tpl
+++ b/docker/thirdparties/docker-compose/polaris/docker-compose.yaml.tpl
@@ -119,3 +119,6 @@ services:
networks:
${CONTAINER_UID}polaris:
name: ${CONTAINER_UID}polaris
+ ipam:
+ config:
+ - subnet: 168.5.0.0/24
diff --git a/docker/thirdparties/docker-compose/spark/spark.yaml
b/docker/thirdparties/docker-compose/spark/spark.yaml
deleted file mode 100644
index 18719f11325..00000000000
--- a/docker/thirdparties/docker-compose/spark/spark.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-version: '3.3'
-
-networks:
- doris--spark--network:
- ipam:
- driver: default
- config:
- - subnet: 168.47.0.0/24
-
-services:
- spark-master:
- image: bitnami/spark:3.2.0
- container_name: spark-master
- ports:
- - "8080:8080"
- - "7077:7077"
- environment:
- - SPARK_MASTER_HOST=spark-master
- networks:
- - doris--spark--network
-
- spark-worker-1:
- image: bitnami/spark:3.2.0
- container_name: spark-worker-1
- environment:
- - SPARK_MODE=worker
- - SPARK_MASTER=spark://spark-master:7077
- networks:
- - doris--spark--network
diff --git
a/docker/thirdparties/docker-compose/spark/tools/csv_format_conversion_tool.py
b/docker/thirdparties/docker-compose/spark/tools/csv_format_conversion_tool.py
deleted file mode 100644
index 33e7ba440c7..00000000000
---
a/docker/thirdparties/docker-compose/spark/tools/csv_format_conversion_tool.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!usr/bin/python3
-# -*- coding=utf-8
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql import SparkSession
-from pyspark.sql.types import *
-
-# create SparkSession object
-spark =
SparkSession.builder.appName("parquet_format_conversion_tool").config("spark.master",
"local").config("spark.sql.warehouse.dir",
"/user/hive/warehouse").config("spark.hadoop.hive.metastore.uris",
"thrift://127.0.0.1:9083").enableHiveSupport().getOrCreate()
-
-# Define the path to the local csv file
-csv_file_path = "./sample.csv"
-print("source csv_file_path: ", csv_file_path)
-
-# Defines the table structure of the generated file
-csv_schema = StructType([
- StructField("k1", StringType(), False),
- StructField("k2", StringType(), False)
-])
-
-# csv field delimiter
-df = spark.read.format("csv").schema(csv_schema).option("delimiter",
"|").load(csv_file_path)
-
-# Displays the first 10 rows of the DataFrame
-df.show(10)
-
-# orc file
-orc_file_path = "./sample_orc"
-print("target orc_file_path: ", orc_file_path)
-df.write.format("orc").option("delimiter", ",").option("compression",
"snappy").mode("overwrite").save(orc_file_path)
-
-# read ORC file
-orc_df = spark.read.orc(orc_file_path)
-
-# show DataFrame schema and A few rows of the DataFrame
-print("DataFrame Schema:")
-orc_df.printSchema()
-
-print("\nFirst 5 rows of DataFrame:")
-orc_df.show(5)
-
-# 获取 DataFrame 的统计信息
-print("\nDataFrame Summary Statistics:")
-orc_df.describe().show()
-
-print("****************************************************************")
-
-# parquet file
-parquet_file_path = "./sample_parquet"
-print("target parquet_file_path: ", parquet_file_path)
-df.write.format("parquet").option("delimiter", ",").option("compression",
"snappy").mode("overwrite").save(parquet_file_path)
-
-# read Parquet file
-parquet_df = spark.read.parquet(parquet_file_path)
-
-# Displays the schema and the first few rows of data for the DataFrame
-print("DataFrame Schema:")
-parquet_df.printSchema()
-
-print("\nFirst 5 rows of DataFrame:")
-parquet_df.show(5)
-
-# Get statistics for a DataFrame
-print("\nDataFrame Summary Statistics:")
-parquet_df.describe().show()
-
-spark.stop()
diff --git
a/docker/thirdparties/docker-compose/spark/tools/orc_format_conversion_tool.py
b/docker/thirdparties/docker-compose/spark/tools/orc_format_conversion_tool.py
deleted file mode 100644
index dc937349ed9..00000000000
---
a/docker/thirdparties/docker-compose/spark/tools/orc_format_conversion_tool.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!usr/bin/python3
-# -*- coding=utf-8
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql import SparkSession
-
-
-# dataname.tablename format in hive
-origin_db_table = "zgq.t2"
-target_db_name = "orc_db"
-target_table_name = "table_01"
-
-# create SparkSession object
-spark =
SparkSession.builder.appName("parquet_format_conversion_tool").config("spark.master",
"local").config("spark.sql.warehouse.dir",
"/user/hive/warehouse").config("spark.hadoop.hive.metastore.uris",
"thrift://127.0.0.1:9083").enableHiveSupport().getOrCreate()
-
-orc_output_path = "hdfs://127.0.0.1:8020/user/hive/warehouse/" +
target_db_name + "/" + target_table_name
-print("target parquet file path: ", orc_output_path)
-
-hive_table_df = spark.table(origin_db_table)
-print("source data: ")
-hive_table_df.show(10)
-
-# orc.compress: Compression method. Optional values: NONE, ZLIB, SNAPPY, LZO.
Default value: ZLIB.
-# orc.compress.size: Compression block size. Default value: 262144.
-# orc.stripe.size: Stripe size of ORC file. Default value: 268435456.
-# orc.row.index.stride: Row index stride. Default value: 10000.
-# orc.bloom.filter.columns: Names of columns using Bloom filter.
-# orc.bloom.filter.fpp: False positive probability of Bloom filter. Default
value: 0.05.
-# orc.dictionary.key.threshold: Threshold for dictionary encoding columns.
Default value: 1.0.
-# orc.enable.indexes: Whether to enable indexes. Default value: true.
-# orc.create.index: Whether to create indexes when writing ORC files. Default
value: true.
-# orc.bloom.filter.storage.bitset: Storage method of Bloom filter, BitSet or
BitArray. Default value: true.
-# orc.bloom.filter.write.max.memory: Maximum memory for writing Bloom filter.
Default value: 268435456.
-# orc.bloom.filter.page.size: Page size of Bloom filter. Default value: 1024.
-# save as orc format file Different properties can be set according to
requirements
-# These properties can be set using the .option("property_name",
"property_value") method, for example:
-
-
-hive_table_df.write.format("orc").option("compression",
"zlib").option("orc.create.index", "true").option("orc.stripe.size",
"268435456").option("orc.row.index.stride",
"10000").option("orc.bloom.filter.columns",
"col1,col2").option("orc.bloom.filter.fpp",
"0.05").option("orc.dictionary.key.threshold",
"1.0").option("orc.encoding.strategy",
"SPEED").mode("overwrite").save(orc_output_path)
-
-# Read the ORC file
-orc_df = spark.read.orc(orc_output_path)
-
-# Displays the schema and the first few rows of data for the DataFrame
-print("DataFrame Schema:")
-orc_df.printSchema()
-
-print("\nFirst 5 rows of DataFrame:")
-orc_df.show(5)
-
-# Get statistics for a DataFrame
-print("\nDataFrame Summary Statistics:")
-orc_df.describe().show()
-
-spark.stop()
diff --git
a/docker/thirdparties/docker-compose/spark/tools/parquet_format_conversion_tool.py
b/docker/thirdparties/docker-compose/spark/tools/parquet_format_conversion_tool.py
deleted file mode 100644
index 54e69a7566b..00000000000
---
a/docker/thirdparties/docker-compose/spark/tools/parquet_format_conversion_tool.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!usr/bin/python3
-# -*- coding=utf-8
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql import SparkSession
-
-# source hive dataname.tablename
-origin_db_table = "zgq.t2"
-
-target_db_name = "parquet_db"
-target_table_name = "table_01"
-
-# create SparkSession object
-spark =
SparkSession.builder.appName("parquet_format_conversion_tool").config("spark.master",
"local").config("spark.sql.warehouse.dir",
"/user/hive/warehouse").config("spark.hadoop.hive.metastore.uris",
"thrift://127.0.0.1:9083").enableHiveSupport().getOrCreate()
-
-output_path = "hdfs://127.0.0.1:8020/user/hive/warehouse/" + target_db_name +
"/" + target_table_name
-
-print("target parquet file path", output_path)
-
-# read Hive table
-hive_table_df = spark.table(origin_db_table)
-print("source data: ")
-hive_table_df.show(10)
-
-# Specify different property configurations
-# parquet.compression: Set the compression algorithm, optional values include
uncompressed, gzip, snappy, lz4, brotli, zstd, etc.
-# parquet.enable.dictionary: Whether to enable dictionary encoding, default is
true.
-# parquet.writer.version: Parquet write version, optional values are 1 or 2.
-# parquet.enable.data.page.v2: Whether to enable data page version 2, default
is true.
-# parquet.page.size: Data page size, default is 1MB.
-# parquet.block.size: Data block size, default is 128MB.
-# parquet.dictionary.page.size: Dictionary page size, default is 8KB.
-# parquet.enable.dictionary.compression: Whether to enable dictionary page
compression, default is false.
-# parquet.filter.dictionary: Whether to enable dictionary filtering, default
is false.
-# parquet.avro.write-old-list-structure: Whether to write old Avro list
structure, default is false.
-# save as parquet format file Different properties can be set according to
requirements
-# These properties can be set using the .option("property_name",
"property_value") method, for example:
-
-hive_table_df.write.format("parquet").option("compression",
"snappy").option("parquet.block.size",
"131072").option("parquet.enable.dictionary",
"true").mode("overwrite").save(output_path)
-
-# read Parquet file
-parquet_df = spark.read.parquet(output_path)
-
-# Displays the schema and the first few rows of data for the DataFrame
-print("DataFrame Schema:")
-parquet_df.printSchema()
-
-print("\nFirst 5 rows of DataFrame:")
-parquet_df.show(5)
-
-# Get statistics for a DataFrame
-print("\nDataFrame Summary Statistics:")
-parquet_df.describe().show()
-
-spark.stop()
\ No newline at end of file
diff --git a/docker/thirdparties/docker-compose/spark/tools/requirements.txt
b/docker/thirdparties/docker-compose/spark/tools/requirements.txt
deleted file mode 100644
index 67f2796528f..00000000000
--- a/docker/thirdparties/docker-compose/spark/tools/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pyspark==3.4.1
\ No newline at end of file
diff --git a/docker/thirdparties/docker-compose/spark/tools/sample.csv
b/docker/thirdparties/docker-compose/spark/tools/sample.csv
deleted file mode 100644
index cf858a88c14..00000000000
--- a/docker/thirdparties/docker-compose/spark/tools/sample.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-aaa|bbb
-ccc|ddd
\ No newline at end of file
diff --git a/docker/thirdparties/docker-compose/trino/gen_env.sh.tpl
b/docker/thirdparties/docker-compose/trino/gen_env.sh.tpl
deleted file mode 100644
index dc1357540a0..00000000000
--- a/docker/thirdparties/docker-compose/trino/gen_env.sh.tpl
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-####################################################################
-# This script will generate hadoop-hive.env from hadoop-hive.env.tpl
-####################################################################
-
-set -eo pipefail
-
-ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
-
-FS_PORT=8120
-HMS_PORT=9183
-
-# Need to set hostname of container to same as host machine's.
-# Otherwise, the doris process can not connect to namenode directly.
-HOST_NAME="doris--"
-
-{
- echo "FS_PORT=${FS_PORT}"
- echo "HMS_PORT=${HMS_PORT}"
- echo "CORE_CONF_fs_defaultFS=hdfs://doris--namenode:${FS_PORT}"
- echo "HOST_NAME=${HOST_NAME}"
-} >>"${ROOT}"/trino_hive.env
diff --git a/docker/thirdparties/docker-compose/trino/hive.properties.tpl
b/docker/thirdparties/docker-compose/trino/hive.properties.tpl
deleted file mode 100644
index 4f116828647..00000000000
--- a/docker/thirdparties/docker-compose/trino/hive.properties.tpl
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-connector.name=hive
-hive.metastore.uri=thrift://metastore_ip:9083
diff --git
a/docker/thirdparties/docker-compose/trino/scripts/create_trino_table.sql
b/docker/thirdparties/docker-compose/trino/scripts/create_trino_table.sql
deleted file mode 100644
index ea9749f18a2..00000000000
--- a/docker/thirdparties/docker-compose/trino/scripts/create_trino_table.sql
+++ /dev/null
@@ -1,222 +0,0 @@
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements. See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership. The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License. You may obtain a copy of the License at
---
--- http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing,
--- software distributed under the License is distributed on an
--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
--- KIND, either express or implied. See the License for the
--- specific language governing permissions and limitations
--- under the License.
-
-create schema hive.doris_test;
-create table hive.doris_test.orc_basic_data_type
-(
- T_BOOLEAN BOOLEAN,
- T_TINYINT TINYINT,
- T_SMALLINT SMALLINT,
- T_INTEGER INTEGER,
- T_BIGINT BIGINT,
- T_REAL REAL,
- T_DOUBLE DOUBLE,
- T_DECIMAL DECIMAL(38,12),
- T_CHAR CHAR,
- T_VARCHAR VARCHAR,
- T_DATE DATE,
- T_TIMESTAMP TIMESTAMP
-) WITH (format = 'ORC');
-
-create table hive.doris_test.parquet_basic_data_type
-(
- T_BOOLEAN BOOLEAN,
- T_TINYINT TINYINT,
- T_SMALLINT SMALLINT,
- T_INTEGER INTEGER,
- T_BIGINT BIGINT,
- T_REAL REAL,
- T_DOUBLE DOUBLE,
- T_DECIMAL DECIMAL(38,12),
- T_CHAR CHAR,
- T_VARCHAR VARCHAR,
- T_DATE DATE,
- T_TIMESTAMP TIMESTAMP
-) WITH (format = 'PARQUET');
-
-
-insert into hive.doris_test.orc_basic_data_type
-values (true, tinyint '1', smallint '1', integer '126', bigint '123456789',
real '12.34', double '12.3456', decimal
- '12.3456789', char 'A', varchar 'Beijing,Shanghai', date '2023-05-23',
timestamp '2023-05-24 12:00:00.123'),
- (false, tinyint '1', smallint '1', integer '126', bigint
'1234567890123456', real '12.34', double '12.3456', decimal
- '12.345678901', char 'A', varchar 'Beijing,Shanghai', date
'2023-05-24', timestamp '2023-05-24 13:00:00.123'),
- (false, tinyint '1', smallint '1', integer '126', bigint
'123456789012345678', real '12', double '10', decimal
- '12.3456789012', char 'A', varchar 'Beijing,Shanghai', date
'2023-05-25', timestamp '2023-05-24 13:00:00.123'),
- (null, null, null, null, null, null, null, null, null, null, null,
null);
-
-insert into hive.doris_test.parquet_basic_data_type
-select *
-from hive.doris_test.orc_basic_data_type;
-
-
-CREATE TABLE hive.doris_test.orc_array_data_type
-(
- t_int_array array(integer),
- t_tinyint_array array(tinyint),
- t_smallint_array array(smallint),
- t_bigint_array array(bigint),
- t_real_array array(real),
- t_double_array array(double),
- t_string_array array(varchar),
- t_boolean_array array(boolean),
- t_timestamp_array array(timestamp (3)),
- t_date_array array(date),
- t_decimal_array array(decimal (38, 12))
-)
- WITH (
- format = 'ORC'
- );
-
-CREATE TABLE hive.doris_test.parquet_array_data_type
-(
- t_int_array array( integer),
- t_tinyint_array array(tinyint),
- t_smallint_array array( smallint),
- t_bigint_array array(bigint),
- t_real_array array( real),
- t_double_array array( double),
- t_string_array array( varchar),
- t_boolean_array array(boolean),
- t_timestamp_array array( timestamp (3)),
- t_date_array array( date),
- t_decimal_array array( decimal (38, 12))
-)
- WITH (
- format = 'PARQUET'
- );
-
-insert into hive.doris_test.orc_array_data_type
-values
(ARRAY[1,2,3,4,5,6,7],ARRAY[1,2,3,4,5,6,7],ARRAY[1,2,3,4,5,6,7],ARRAY[1234567890123,12345678901234],
-
ARRAY[45.123,123.45,11.0],ARRAY[45.12344,123.4544,11.0],ARRAY['TEST','TEST#12123123'],ARRAY[TRUE,FALSE,TRUE,FALSE],
- ARRAY[TIMESTAMP '2023-05-24 13:00:00.123',TIMESTAMP '2023-05-24
14:00:00.123'],
- ARRAY[DATE '2023-05-24',DATE '2023-05-26'],
- ARRAY[DECIMAL '10001.11122233344']);
-
-insert into hive.doris_test.parquet_array_data_type select * from
hive.doris_test.orc_array_data_type;
-
-
-create table hive.doris_test.orc_string_complex
-(
- t_string_array array(varchar),
- t_string_map map(varchar,varchar),
- t_string_struct row(f_string varchar,f_int varchar)
-)WITH (
- FORMAT = 'ORC'
- );
-
-create table hive.doris_test.parquet_string_complex
-(
- t_string_array array(varchar),
- t_string_map map(varchar,varchar),
- t_string_struct row(f_string varchar,f_int varchar)
-)WITH (
- FORMAT = 'PARQUET'
- );
-
-insert into hive.doris_test.orc_string_complex
-values (array['1', '2', '3', '北京', 'beijing'],
- map(array['1', '2', '3'], array['1', 'beijing', '北京']),
- row('beijing', '1'));
-
-insert into hive.doris_test.parquet_string_complex
-select *
-from hive.doris_test.orc_string_complex;
-
-CREATE TABLE hive.doris_test.orc_supplier_partitioned
-(
- suppkey bigint,
- name varchar(25),
- address varchar(40),
- phone varchar(15),
- acctbal double,
- comment varchar(101),
- nationkey bigint
-)
- WITH (
- format = 'ORC',
- partitioned_by = ARRAY['nationkey']
- );
-
-CREATE TABLE hive.doris_test.parquet_supplier_partitioned
-(
- suppkey bigint,
- name varchar(25),
- address varchar(40),
- phone varchar(15),
- acctbal double,
- comment varchar(101),
- nationkey bigint
-)
- WITH (
- format = 'PARQUET',
- partitioned_by = ARRAY['nationkey']
- );
-
-insert into hive.doris_test.orc_supplier_partitioned
-select suppkey, name, address, phone, acctbal, comment, nationkey
-from tpch.sf100.supplier;
-
-insert into hive.doris_test.parquet_supplier_partitioned
-select *
-from hive.doris_test.orc_supplier_partitioned;
-
--- partition and bucket
-CREATE TABLE hive.doris_test.orc_supplier_partitioned_bucketed
-(
- suppkey bigint,
- name varchar(25),
- address varchar(40),
- phone varchar(15),
- acctbal double,
- comment varchar(101),
- nationkey bigint
-)
- WITH (
- format = 'ORC',
- partitioned_by = ARRAY['nationkey'],
- bucketed_by = ARRAY['suppkey'],
- bucket_count = 10
- );
-
-CREATE TABLE hive.doris_test.parquet_supplier_partitioned_bucketed
-(
- suppkey bigint,
- name varchar(25),
- address varchar(40),
- phone varchar(15),
- acctbal double,
- comment varchar(101),
- nationkey bigint
-)
- WITH (
- format = 'PARQUET',
- partitioned_by = ARRAY['nationkey'],
- bucketed_by = ARRAY['suppkey'],
- bucket_count = 10
- );
-
-insert into hive.doris_test.orc_supplier_partitioned_bucketed
-select suppkey, name, address, phone, acctbal, comment, nationkey
-from tpch.sf100.supplier;
-
-insert into hive.doris_test.parquet_supplier_partitioned_bucketed
-select *
-from hive.doris_test.orc_supplier_partitioned_bucketed;
-
-
-
-
diff --git a/docker/thirdparties/docker-compose/trino/trino_hive.env.tpl
b/docker/thirdparties/docker-compose/trino/trino_hive.env.tpl
deleted file mode 100644
index 0f2ce0a443f..00000000000
--- a/docker/thirdparties/docker-compose/trino/trino_hive.env.tpl
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-DOCKER_TRINO_EXTERNAL_PORT=8080
-
-HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore
-HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
-HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
-HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
-HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
-HIVE_SITE_CONF_hive_metastore_uris=thrift://doris--hive-metastore:9083
-HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
-HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
-HIVE_SITE_CONF_hive_server2_thrift_port=10000
-
-CORE_CONF_hadoop_http_staticuser_user=root
-CORE_CONF_hadoop_proxyuser_hue_hosts=*
-CORE_CONF_hadoop_proxyuser_hue_groups=*
-CORE_CONF_hadoop_proxyuser_hive_hosts=*
-
-HDFS_CONF_dfs_webhdfs_enabled=true
-HDFS_CONF_dfs_permissions_enabled=false
-
-YARN_CONF_yarn_log___aggregation___enable=true
-YARN_CONF_yarn_resourcemanager_recovery_enabled=true
-YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
-YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
-YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
-YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
-YARN_CONF_yarn_timeline___service_enabled=true
-YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
-YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
-YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
-YARN_CONF_yarn_timeline___service_hostname=historyserver
-YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
-YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
-YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
-
diff --git a/docker/thirdparties/docker-compose/trino/trino_hive.yaml.tpl
b/docker/thirdparties/docker-compose/trino/trino_hive.yaml.tpl
deleted file mode 100644
index e99ecadab83..00000000000
--- a/docker/thirdparties/docker-compose/trino/trino_hive.yaml.tpl
+++ /dev/null
@@ -1,144 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-version: "3.8"
-
-networks:
- doris--network:
- ipam:
- driver: default
- config:
- - subnet: 168.43.0.0/24
-
-services:
-
- doris--trino:
- image: trinodb/trino:418
- hostname: doris--trino
- container_name: doris--trino
- env_file:
- - ./trino_hive.env
- ports:
- - "${DOCKER_TRINO_EXTERNAL_PORT}:8080"
- volumes:
- - ./scripts:/scripts
- healthcheck:
- test: [ "CMD", "curl", "-f", "http://localhost:8080/" ]
- interval: 5s
- timeout: 120s
- retries: 120
- networks:
- - doris--network
-
- doris--namenode:
- image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
- environment:
- - CLUSTER_NAME=test
- env_file:
- - ./trino_hive.env
- hostname: doris--namenode
- container_name: doris--namenode
- expose:
- - "50070"
- - "8020"
- - "9000"
- - "${FS_PORT}"
- ports:
- - "${FS_PORT}:${FS_PORT}"
- healthcheck:
- test: [ "CMD", "curl", "http://localhost:50070/" ]
- interval: 5s
- timeout: 120s
- retries: 120
- networks:
- - doris--network
-
- doris--datanode:
- image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
- env_file:
- - ./trino_hive.env
- environment:
- SERVICE_PRECONDITION: "doris--namenode:50070"
- hostname: doris--datanode
- container_name: doris--datanode
- expose:
- - "50075"
- healthcheck:
- test: [ "CMD", "curl", "http://localhost:50075" ]
- interval: 5s
- timeout: 60s
- retries: 120
- networks:
- - doris--network
-
- doris--hive-server:
- image: bde2020/hive:2.3.2-postgresql-metastore
- env_file:
- - ./trino_hive.env
- environment:
- HIVE_CORE_CONF_javax_jdo_option_ConnectionURL:
"jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore"
- SERVICE_PRECONDITION: "doris--hive-metastore:9083"
- hostname: doris--hive-server
- container_name: doris--hive-server
- expose:
- - "10000"
- depends_on:
- - doris--datanode
- - doris--namenode
- healthcheck:
- test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check
-e "show databases;"
- interval: 10s
- timeout: 120s
- retries: 120
- networks:
- - doris--network
-
-
- doris--hive-metastore:
- image: bde2020/hive:2.3.2-postgresql-metastore
- env_file:
- - ./trino_hive.env
- command: /opt/hive/bin/hive --service metastore
- environment:
- SERVICE_PRECONDITION: "doris--namenode:50070 doris--datanode:50075
doris--hive-metastore-postgresql:5432"
- hostname: doris--hive-metastore
- container_name: doris--hive-metastore
- expose:
- - "9083"
- ports:
- - "${HMS_PORT}:9083"
- volumes:
- - ./scripts:/mnt/scripts
- depends_on:
- - doris--hive-metastore-postgresql
- networks:
- - doris--network
-
- doris--hive-metastore-postgresql:
- image: bde2020/hive-metastore-postgresql:2.3.0
- restart: always
- hostname: doris--hive-metastore-postgresql
- container_name: doris--hive-metastore-postgresql
- expose:
- - "5432"
- healthcheck:
- test: ["CMD-SHELL", "pg_isready -U postgres"]
- interval: 5s
- timeout: 60s
- retries: 120
- networks:
- - doris--network
diff --git a/docker/thirdparties/run-thirdparties-docker.sh
b/docker/thirdparties/run-thirdparties-docker.sh
index f15fe60e67b..8731880a6bc 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -40,12 +40,12 @@ Usage: $0 <options>
--load-parallel <num> set the parallel number to load data, default is
the 50% of CPU cores
All valid components:
-
mysql,pg,oracle,sqlserver,clickhouse,es,hive2,hive3,iceberg,iceberg-rest,hudi,trino,kafka,mariadb,db2,oceanbase,lakesoul,kerberos,ranger,polaris
+
mysql,pg,oracle,sqlserver,clickhouse,es,hive2,hive3,iceberg,iceberg-rest,hudi,kafka,mariadb,db2,oceanbase,lakesoul,kerberos,ranger,polaris
"
exit 1
}
DEFAULT_COMPONENTS="mysql,es,hive2,hive3,pg,oracle,sqlserver,clickhouse,mariadb,iceberg,hudi,db2,oceanbase,kerberos,minio"
-ALL_COMPONENTS="${DEFAULT_COMPONENTS},trino,kafka,spark,lakesoul,ranger,polaris"
+ALL_COMPONENTS="${DEFAULT_COMPONENTS},kafka,lakesoul,ranger,polaris"
COMPONENTS=$2
HELP=0
STOP=0
@@ -158,9 +158,7 @@ RUN_ES=0
RUN_ICEBERG=0
RUN_ICEBERG_REST=0
RUN_HUDI=0
-RUN_TRINO=0
RUN_KAFKA=0
-RUN_SPARK=0
RUN_MARIADB=0
RUN_DB2=0
RUN_OCENABASE=0
@@ -199,10 +197,6 @@ for element in "${COMPONENTS_ARR[@]}"; do
elif [[ "${element}"x == "hudi"x ]]; then
RUN_HUDI=1
RESERVED_PORTS="${RESERVED_PORTS},19083,19100,19101,18080"
- elif [[ "${element}"x == "trino"x ]]; then
- RUN_TRINO=1
- elif [[ "${element}"x == "spark"x ]]; then
- RUN_SPARK=1
elif [[ "${element}"x == "mariadb"x ]]; then
RUN_MARIADB=1
elif [[ "${element}"x == "db2"x ]]; then
@@ -403,13 +397,6 @@ start_hive3() {
fi
}
-start_spark() {
- sudo docker compose -f "${ROOT}"/docker-compose/spark/spark.yaml down
- if [[ "${STOP}" -ne 1 ]]; then
- sudo docker compose -f "${ROOT}"/docker-compose/spark/spark.yaml up
--build --remove-orphans -d
- fi
-}
-
start_iceberg() {
# iceberg
ICEBERG_DIR=${ROOT}/docker-compose/iceberg
@@ -464,81 +451,6 @@ start_hudi() {
fi
}
-start_trino() {
- # trino
- trino_docker="${ROOT}"/docker-compose/trino
- TRINO_CONTAINER_ID="${CONTAINER_UID}trino"
- NAMENODE_CONTAINER_ID="${CONTAINER_UID}namenode"
- HIVE_METASTORE_CONTAINER_ID=${CONTAINER_UID}hive-metastore
- for file in trino_hive.yaml trino_hive.env gen_env.sh hive.properties; do
- cp "${trino_docker}/$file.tpl" "${trino_docker}/$file"
- if [[ $file != "hive.properties" ]]; then
- sed -i "s/doris--/${CONTAINER_UID}/g" "${trino_docker}/$file"
- fi
- done
-
- bash "${trino_docker}"/gen_env.sh
- sudo docker compose -f "${trino_docker}"/trino_hive.yaml --env-file
"${trino_docker}"/trino_hive.env down
- if [[ "${STOP}" -ne 1 ]]; then
- sudo sed -i "/${NAMENODE_CONTAINER_ID}/d" /etc/hosts
- sudo docker compose -f "${trino_docker}"/trino_hive.yaml --env-file
"${trino_docker}"/trino_hive.env up --build --remove-orphans -d
- sudo echo "127.0.0.1 ${NAMENODE_CONTAINER_ID}" >>/etc/hosts
- sleep 20s
- hive_metastore_ip=$(docker inspect --format='{{range
.NetworkSettings.Networks}}{{.IPAddress}}{{end}}'
${HIVE_METASTORE_CONTAINER_ID})
-
- if [ -z "$hive_metastore_ip" ]; then
- echo "Failed to get Hive Metastore IP address" >&2
- exit 1
- else
- echo "Hive Metastore IP address is: $hive_metastore_ip"
- fi
-
- sed -i "s/metastore_ip/${hive_metastore_ip}/g"
"${trino_docker}"/hive.properties
- docker cp "${trino_docker}"/hive.properties
"${CONTAINER_UID}trino":/etc/trino/catalog/
-
- # trino load hive catalog need restart server
- max_retries=3
-
- function control_container() {
- max_retries=3
- operation=$1
- expected_status=$2
- retries=0
-
- while [ $retries -lt $max_retries ]; do
- status=$(docker inspect --format '{{.State.Running}}'
${TRINO_CONTAINER_ID})
- if [ "${status}" == "${expected_status}" ]; then
- echo "Container ${TRINO_CONTAINER_ID} has ${operation}ed
successfully."
- break
- else
- echo "Waiting for container ${TRINO_CONTAINER_ID} to
${operation}..."
- sleep 5s
- ((retries++))
- fi
- sleep 3s
- done
-
- if [ $retries -eq $max_retries ]; then
- echo "${operation} operation failed to complete after
$max_retries attempts."
- exit 1
- fi
- }
- # Stop the container
- docker stop ${TRINO_CONTAINER_ID}
- sleep 5s
- control_container "stop" "false"
-
- # Start the container
- docker start ${TRINO_CONTAINER_ID}
- control_container "start" "true"
-
- # waite trino init
- sleep 20s
- # execute create table sql
- docker exec -it ${TRINO_CONTAINER_ID} /bin/bash -c 'trino -f
/scripts/create_trino_table.sql'
- fi
-}
-
start_mariadb() {
# mariadb
cp "${ROOT}"/docker-compose/mariadb/mariadb-10.yaml.tpl
"${ROOT}"/docker-compose/mariadb/mariadb-10.yaml
@@ -748,11 +660,6 @@ if [[ "${RUN_HIVE3}" -eq 1 ]]; then
pids["hive3"]=$!
fi
-if [[ "${RUN_SPARK}" -eq 1 ]]; then
- start_spark > start_spark.log 2>&1 &
- pids["spark"]=$!
-fi
-
if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
start_iceberg > start_iceberg.log 2>&1 &
pids["iceberg"]=$!
@@ -768,11 +675,6 @@ if [[ "${RUN_HUDI}" -eq 1 ]]; then
pids["hudi"]=$!
fi
-if [[ "${RUN_TRINO}" -eq 1 ]]; then
- start_trino > start_trino.log 2>&1 &
- pids["trino"]=$!
-fi
-
if [[ "${RUN_MARIADB}" -eq 1 ]]; then
start_mariadb > start_mariadb.log 2>&1 &
pids["mariadb"]=$!
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]