This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 10f19573791 [feature](docker)add docker-iceberg init tables (#25424)
10f19573791 is described below
commit 10f19573791c5fcd3edeab29eaf84a6ca557fb0c
Author: wuwenchi <[email protected]>
AuthorDate: Tue Oct 24 19:29:57 2023 +0800
[feature](docker)add docker-iceberg init tables (#25424)
Add some init tables for docker-iceberg.
---
.../thirdparties/docker-compose/iceberg/README.md | 25 +++++
.../docker-compose/iceberg/iceberg.env | 2 +-
.../docker-compose/iceberg/iceberg.yaml.tpl | 55 ++++++-----
.../docker-compose/iceberg/tools/gen_data.py | 106 +++++++++++++++++++++
.../iceberg/{iceberg.env => tools/save_docker.sh} | 11 +--
docker/thirdparties/run-thirdparties-docker.sh | 13 ++-
regression-test/conf/regression-conf.groovy | 3 +
.../pipeline/p0/conf/regression-conf.groovy | 3 +
8 files changed, 176 insertions(+), 42 deletions(-)
diff --git a/docker/thirdparties/docker-compose/iceberg/README.md
b/docker/thirdparties/docker-compose/iceberg/README.md
new file mode 100644
index 00000000000..7458c717032
--- /dev/null
+++ b/docker/thirdparties/docker-compose/iceberg/README.md
@@ -0,0 +1,25 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+```
+tools:
+ gen_data.py: generate random data
+ save_docker.sh: save the current docker state
+```
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.env
b/docker/thirdparties/docker-compose/iceberg/iceberg.env
index e4a95c99ce2..4cc8b42eaf9 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.env
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.env
@@ -19,6 +19,6 @@
NOTEBOOK_SERVER_PORT=8888
SPARK_DRIVER_UI_PORT=8080
SPARK_HISTORY_UI_PORT=10000
-REST_CATALOG_PORT=8181
+REST_CATALOG_PORT=18181
MINIO_UI_PORT=9000
MINIO_API_PORT=9001
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
index 1cd3d4acf9e..343c8dd2e5a 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
@@ -18,64 +18,59 @@
version: "3"
services:
- doris--spark-iceberg:
+ spark-iceberg:
image: tabulario/spark-iceberg
container_name: doris--spark-iceberg
hostname: doris--spark-iceberg
build: spark/
depends_on:
- - doris--rest
- - doris--minio
+ - rest
+ - minio
volumes:
- - ./warehouse:/home/iceberg/warehouse
- - ./notebooks:/home/iceberg/notebooks/notebooks
- - ./entrypoint.sh:/opt/spark/entrypoint.sh
- - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
+ - ./data/output/spark-warehouse:/home/iceberg/warehouse
+ - ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks
+ - ./data:/mnt/data
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- ports:
- - ${NOTEBOOK_SERVER_PORT}:8888
- - ${SPARK_DRIVER_UI_PORT}:8080
- - ${SPARK_HISTORY_UI_PORT}:10000
- links:
- - doris--rest:rest
- - doris--minio:minio
networks:
- doris--iceberg
- entrypoint:
- - /opt/spark/entrypoint.sh
- doris--rest:
- image: tabulario/iceberg-rest:0.2.0
+ rest:
+ image: tabulario/iceberg-rest
+ container_name: doris--iceberg-rest
ports:
- ${REST_CATALOG_PORT}:8181
+ volumes:
+ - ./data:/mnt/data
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3a://warehouse/wh/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- - CATALOG_S3_ENDPOINT=http://doris--minio:9000
+ - CATALOG_S3_ENDPOINT=http://minio:9000
networks:
- doris--iceberg
- doris--minio:
+ entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
+
+ minio:
image: minio/minio
container_name: doris--minio
- hostname: doris--minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
- ports:
- - ${MINIO_UI_PORT}:9001
- - ${MINIO_API_PORT}:9000
+ - MINIO_DOMAIN=minio
networks:
- - doris--iceberg
+ doris--iceberg:
+ aliases:
+ - warehouse.minio
command: ["server", "/data", "--console-address", ":9001"]
- doris--mc:
+
+ mc:
depends_on:
- - doris--minio
+ - minio
image: minio/mc
container_name: doris--mc
environment:
@@ -84,12 +79,16 @@ services:
- AWS_REGION=us-east-1
networks:
- doris--iceberg
+ volumes:
+ - ./data:/mnt/data
entrypoint: >
/bin/sh -c "
- until (/usr/bin/mc config host add minio http://doris--minio:9000 admin
password) do echo '...waiting...' && sleep 1; done;
+ until (/usr/bin/mc config host add minio http://minio:9000 admin
password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc rm -r --force minio/warehouse;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
+ echo 'copy data';
+ mc cp -r /mnt/data/input/minio/warehouse/* minio/warehouse/;
tail -f /dev/null
"
networks:
diff --git a/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
b/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
new file mode 100644
index 00000000000..2bd2d7f0ff2
--- /dev/null
+++ b/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import random
+import string
+
+table_name = "demo.format_v1.sample_parquet"
+
+alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()'
+binary_alphabet = '11111111111110000000000000000000'
+data_choice = ["date('2000-12-31')", "date('1969-09-21')",
"date('2969-02-03')"]
+timestamp_choice = [
+ "TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'",
+ "TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'",
+ "TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'",
+ "TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"]
+timestamp_ntz_choice = [
+ "TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'",
+ "TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'",
+ "TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'",
+ "TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'",
+]
+city_choice = [
+ "'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'"
+]
+
+
+def get_one_data():
+ id = random.randint(-100000000, 100000000)
+
+ col_boolean = True
+ if random.randint(-1000000, 1000000) % 2 == 0:
+ col_boolean = False
+
+ col_short = random.randint(-32700, 32700)
+
+ col_byte = random.randint(-128, 127)
+
+ col_integer = random.randint(-21474836, 2147483)
+
+ col_long = random.randint(-92233720368547758, 92233720368547758)
+
+ col_float = random.random() * 10
+
+ col_double = random.random() * 10
+
+ col_date = random.choice(data_choice)
+
+ col_timestamp = random.choice(timestamp_choice)
+
+ col_timestamp_ntz = random.choice(timestamp_ntz_choice)
+
+ col_char = "".join(random.sample(alphabet, random.randint(1,18)))
+
+ col_varchar = ''.join(random.sample(string.ascii_letters + string.digits,
random.randint(1, 20)))
+
+ col_string = ''.join(random.sample(string.ascii_letters + string.digits,
random.randint(1, 20)))
+
+ col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30)))
+
+ col_decimal = random.random() * 10000
+
+ city = random.choice(city_choice)
+
+ out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS
BINARY),{},{}".format(
+ id,
+ col_boolean,
+ col_short,
+ col_byte,
+ col_integer,
+ col_long,
+ col_float,
+ col_double,
+ col_date,
+ col_timestamp,
+ col_timestamp_ntz,
+ col_char,
+ col_varchar,
+ col_string,
+ col_binary,
+ col_decimal,
+ city
+ )
+ return out
+
+with open('insert_table_values.sql', 'w') as f:
+ f.write("INSERT INTO {} VALUES\n".format(table_name))
+ f.write(" ({})\n".format(get_one_data()))
+ for i in range(1, 1000):
+ f.write(", ({})\n".format(get_one_data()))
+ f.write(";\n")
+
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.env
b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh
similarity index 80%
copy from docker/thirdparties/docker-compose/iceberg/iceberg.env
copy to docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh
index e4a95c99ce2..cc149d48118 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.env
+++ b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh
@@ -16,9 +16,8 @@
# specific language governing permissions and limitations
# under the License.
-NOTEBOOK_SERVER_PORT=8888
-SPARK_DRIVER_UI_PORT=8080
-SPARK_HISTORY_UI_PORT=10000
-REST_CATALOG_PORT=8181
-MINIO_UI_PORT=9000
-MINIO_API_PORT=9001
+# save sqlite
+docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory
/mnt/data/input/'
+
+# save iceberg from s3
+docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'
diff --git a/docker/thirdparties/run-thirdparties-docker.sh
b/docker/thirdparties/run-thirdparties-docker.sh
index baa4007ac17..de1422d1afc 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -60,7 +60,7 @@ STOP=0
if [[ "$#" == 1 ]]; then
# default
- COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb"
+ COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb,iceberg"
else
while true; do
case "$1" in
@@ -332,13 +332,12 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
sed -i "s/doris--/${CONTAINER_UID}/g"
"${ROOT}"/docker-compose/iceberg/entrypoint.sh
sed -i "s/doris--/${CONTAINER_UID}/g"
"${ROOT}"/docker-compose/iceberg/spark-defaults.conf
sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml
--env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down
+ sudo rm -rf "${ROOT}"/docker-compose/iceberg/data
if [[ "${STOP}" -ne 1 ]]; then
- sudo rm -rf "${ROOT}"/docker-compose/iceberg/notebooks
- sudo mkdir "${ROOT}"/docker-compose/iceberg/notebooks
- sudo rm -rf "${ROOT}"/docker-compose/iceberg/spark
- sudo mkdir "${ROOT}"/docker-compose/iceberg/spark
- sudo rm -rf "${ROOT}"/docker-compose/iceberg/warehouse
- sudo mkdir "${ROOT}"/docker-compose/iceberg/warehouse
+ wget -P ${ROOT}/docker-compose/iceberg
https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/iceberg/iceberg_data.zip
+ sudo unzip -d "${ROOT}"/docker-compose/iceberg -q
${ROOT}/docker-compose/iceberg/iceberg_data.zip
+ sudo mv "${ROOT}"/docker-compose/iceberg/iceberg_data
"${ROOT}"/docker-compose/iceberg/data
+ sudo rm -rf ${ROOT}/docker-compose/iceberg/iceberg_data.zip
sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml
--env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d
fi
fi
diff --git a/regression-test/conf/regression-conf.groovy
b/regression-test/conf/regression-conf.groovy
index acf9672de1a..5fdcc3692e9 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -177,6 +177,9 @@ s3Endpoint = "cos.ap-hongkong.myqcloud.com"
s3BucketName = "doris-build-hk-1308700295"
s3Region = "ap-hongkong"
+// iceberg rest catalog config
+iceberg_rest_uri_port=18181
+
// If the failure suite num exceeds this config
// all following suite will be skipped to fast quit the run.
// <=0 means no limit.
diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy
b/regression-test/pipeline/p0/conf/regression-conf.groovy
index 549e28d9a82..fecda4db1fb 100644
--- a/regression-test/pipeline/p0/conf/regression-conf.groovy
+++ b/regression-test/pipeline/p0/conf/regression-conf.groovy
@@ -92,6 +92,9 @@ hiveServerPort=10000
enableKafkaTest=true
kafka_port=19193
+// iceberg test config
+iceberg_rest_uri_port=18181
+
enableEsTest=false
es_6_port=19200
es_7_port=29200
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]