This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 10f19573791 [feature](docker)add docker-iceberg init tables (#25424)
10f19573791 is described below

commit 10f19573791c5fcd3edeab29eaf84a6ca557fb0c
Author: wuwenchi <[email protected]>
AuthorDate: Tue Oct 24 19:29:57 2023 +0800

    [feature](docker)add docker-iceberg init tables (#25424)
    
    Add some init tables for docker-iceberg.
---
 .../thirdparties/docker-compose/iceberg/README.md  |  25 +++++
 .../docker-compose/iceberg/iceberg.env             |   2 +-
 .../docker-compose/iceberg/iceberg.yaml.tpl        |  55 ++++++-----
 .../docker-compose/iceberg/tools/gen_data.py       | 106 +++++++++++++++++++++
 .../iceberg/{iceberg.env => tools/save_docker.sh}  |  11 +--
 docker/thirdparties/run-thirdparties-docker.sh     |  13 ++-
 regression-test/conf/regression-conf.groovy        |   3 +
 .../pipeline/p0/conf/regression-conf.groovy        |   3 +
 8 files changed, 176 insertions(+), 42 deletions(-)

diff --git a/docker/thirdparties/docker-compose/iceberg/README.md 
b/docker/thirdparties/docker-compose/iceberg/README.md
new file mode 100644
index 00000000000..7458c717032
--- /dev/null
+++ b/docker/thirdparties/docker-compose/iceberg/README.md
@@ -0,0 +1,25 @@
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+```
+tools:
+    gen_data.py: generate random data
+    save_docker.sh: save the current docker state
+```
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.env 
b/docker/thirdparties/docker-compose/iceberg/iceberg.env
index e4a95c99ce2..4cc8b42eaf9 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.env
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.env
@@ -19,6 +19,6 @@
 NOTEBOOK_SERVER_PORT=8888
 SPARK_DRIVER_UI_PORT=8080
 SPARK_HISTORY_UI_PORT=10000
-REST_CATALOG_PORT=8181
+REST_CATALOG_PORT=18181
 MINIO_UI_PORT=9000
 MINIO_API_PORT=9001
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl 
b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
index 1cd3d4acf9e..343c8dd2e5a 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl
@@ -18,64 +18,59 @@
 version: "3"
 
 services:
-  doris--spark-iceberg:
+  spark-iceberg:
     image: tabulario/spark-iceberg
     container_name: doris--spark-iceberg
     hostname: doris--spark-iceberg
     build: spark/
     depends_on:
-      - doris--rest
-      - doris--minio
+      - rest
+      - minio
     volumes:
-      - ./warehouse:/home/iceberg/warehouse
-      - ./notebooks:/home/iceberg/notebooks/notebooks
-      - ./entrypoint.sh:/opt/spark/entrypoint.sh
-      - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
+      - ./data/output/spark-warehouse:/home/iceberg/warehouse
+      - ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks
+      - ./data:/mnt/data
     environment:
       - AWS_ACCESS_KEY_ID=admin
       - AWS_SECRET_ACCESS_KEY=password
       - AWS_REGION=us-east-1
-    ports:
-      - ${NOTEBOOK_SERVER_PORT}:8888
-      - ${SPARK_DRIVER_UI_PORT}:8080
-      - ${SPARK_HISTORY_UI_PORT}:10000
-    links:
-      - doris--rest:rest
-      - doris--minio:minio
     networks:
       - doris--iceberg
-    entrypoint:
-      - /opt/spark/entrypoint.sh
 
-  doris--rest:
-    image: tabulario/iceberg-rest:0.2.0
+  rest:
+    image: tabulario/iceberg-rest
+    container_name: doris--iceberg-rest
     ports:
       - ${REST_CATALOG_PORT}:8181
+    volumes:
+      - ./data:/mnt/data
     environment:
       - AWS_ACCESS_KEY_ID=admin
       - AWS_SECRET_ACCESS_KEY=password
       - AWS_REGION=us-east-1
       - CATALOG_WAREHOUSE=s3a://warehouse/wh/
       - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
-      - CATALOG_S3_ENDPOINT=http://doris--minio:9000
+      - CATALOG_S3_ENDPOINT=http://minio:9000
     networks:
       - doris--iceberg
-  doris--minio:
+    entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
+
+  minio:
     image: minio/minio
     container_name: doris--minio
-    hostname: doris--minio
     environment:
       - MINIO_ROOT_USER=admin
       - MINIO_ROOT_PASSWORD=password
-    ports:
-      - ${MINIO_UI_PORT}:9001
-      - ${MINIO_API_PORT}:9000
+      - MINIO_DOMAIN=minio
     networks:
-      - doris--iceberg
+      doris--iceberg:
+        aliases:
+          - warehouse.minio
     command: ["server", "/data", "--console-address", ":9001"]
-  doris--mc:
+
+  mc:
     depends_on:
-      - doris--minio
+      - minio
     image: minio/mc
     container_name: doris--mc
     environment:
@@ -84,12 +79,16 @@ services:
       - AWS_REGION=us-east-1
     networks:
       - doris--iceberg
+    volumes:
+      - ./data:/mnt/data
     entrypoint: >
       /bin/sh -c "
-      until (/usr/bin/mc config host add minio http://doris--minio:9000 admin 
password) do echo '...waiting...' && sleep 1; done;
+      until (/usr/bin/mc config host add minio http://minio:9000 admin 
password) do echo '...waiting...' && sleep 1; done;
       /usr/bin/mc rm -r --force minio/warehouse;
       /usr/bin/mc mb minio/warehouse;
       /usr/bin/mc policy set public minio/warehouse;
+      echo 'copy data';
+      mc cp -r /mnt/data/input/minio/warehouse/* minio/warehouse/;
       tail -f /dev/null
       "
 networks:
diff --git a/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py 
b/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
new file mode 100644
index 00000000000..2bd2d7f0ff2
--- /dev/null
+++ b/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import random
+import string
+
+table_name = "demo.format_v1.sample_parquet"
+
+alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()'
+binary_alphabet = '11111111111110000000000000000000'
+data_choice = ["date('2000-12-31')", "date('1969-09-21')", 
"date('2969-02-03')"]
+timestamp_choice = [    
+    "TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'",
+    "TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'",
+    "TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'",
+    "TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"]
+timestamp_ntz_choice = [
+    "TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'",
+    "TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'",
+    "TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'",
+    "TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'",
+]
+city_choice = [
+    "'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'"
+]
+
+
+def get_one_data():
+    id = random.randint(-100000000, 100000000)
+
+    col_boolean = True
+    if random.randint(-1000000, 1000000) % 2 == 0:
+        col_boolean = False
+
+    col_short = random.randint(-32700, 32700)
+
+    col_byte = random.randint(-128, 127)
+
+    col_integer = random.randint(-21474836, 2147483)
+
+    col_long = random.randint(-92233720368547758, 92233720368547758)
+
+    col_float = random.random() * 10
+
+    col_double = random.random() * 10
+
+    col_date = random.choice(data_choice)
+
+    col_timestamp = random.choice(timestamp_choice)
+
+    col_timestamp_ntz = random.choice(timestamp_ntz_choice)
+
+    col_char = "".join(random.sample(alphabet, random.randint(1,18)))
+
+    col_varchar = ''.join(random.sample(string.ascii_letters + string.digits, 
random.randint(1, 20)))
+
+    col_string = ''.join(random.sample(string.ascii_letters + string.digits, 
random.randint(1, 20)))
+
+    col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30)))
+
+    col_decimal = random.random() * 10000
+
+    city = random.choice(city_choice)
+
+    out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS 
BINARY),{},{}".format(
+        id,
+        col_boolean,
+        col_short,
+        col_byte,
+        col_integer,
+        col_long,
+        col_float,
+        col_double,
+        col_date,
+        col_timestamp,
+        col_timestamp_ntz,
+        col_char,
+        col_varchar,
+        col_string,
+        col_binary,
+        col_decimal,
+        city
+    )
+    return out
+
+with open('insert_table_values.sql', 'w') as f:
+    f.write("INSERT INTO {} VALUES\n".format(table_name))
+    f.write("  ({})\n".format(get_one_data()))
+    for i in range(1, 1000):
+        f.write(", ({})\n".format(get_one_data()))
+    f.write(";\n")
+
diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.env 
b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh
similarity index 80%
copy from docker/thirdparties/docker-compose/iceberg/iceberg.env
copy to docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh
index e4a95c99ce2..cc149d48118 100644
--- a/docker/thirdparties/docker-compose/iceberg/iceberg.env
+++ b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh
@@ -16,9 +16,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-NOTEBOOK_SERVER_PORT=8888
-SPARK_DRIVER_UI_PORT=8080
-SPARK_HISTORY_UI_PORT=10000
-REST_CATALOG_PORT=8181
-MINIO_UI_PORT=9000
-MINIO_API_PORT=9001
+# save sqlite
+docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory 
/mnt/data/input/'
+
+# save iceberg from s3
+docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'
diff --git a/docker/thirdparties/run-thirdparties-docker.sh 
b/docker/thirdparties/run-thirdparties-docker.sh
index baa4007ac17..de1422d1afc 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -60,7 +60,7 @@ STOP=0
 
 if [[ "$#" == 1 ]]; then
     # default
-    COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb"
+    COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb,iceberg"
 else
     while true; do
         case "$1" in
@@ -332,13 +332,12 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
     sed -i "s/doris--/${CONTAINER_UID}/g" 
"${ROOT}"/docker-compose/iceberg/entrypoint.sh
     sed -i "s/doris--/${CONTAINER_UID}/g" 
"${ROOT}"/docker-compose/iceberg/spark-defaults.conf
     sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml 
--env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down
+    sudo rm -rf "${ROOT}"/docker-compose/iceberg/data
     if [[ "${STOP}" -ne 1 ]]; then
-        sudo rm -rf "${ROOT}"/docker-compose/iceberg/notebooks
-        sudo mkdir "${ROOT}"/docker-compose/iceberg/notebooks
-        sudo rm -rf "${ROOT}"/docker-compose/iceberg/spark
-        sudo mkdir "${ROOT}"/docker-compose/iceberg/spark
-        sudo rm -rf "${ROOT}"/docker-compose/iceberg/warehouse
-        sudo mkdir "${ROOT}"/docker-compose/iceberg/warehouse
+        wget -P ${ROOT}/docker-compose/iceberg 
https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/iceberg/iceberg_data.zip
+        sudo unzip -d "${ROOT}"/docker-compose/iceberg -q 
${ROOT}/docker-compose/iceberg/iceberg_data.zip
+        sudo mv "${ROOT}"/docker-compose/iceberg/iceberg_data 
"${ROOT}"/docker-compose/iceberg/data
+        sudo rm -rf ${ROOT}/docker-compose/iceberg/iceberg_data.zip
         sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml 
--env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d
     fi
 fi
diff --git a/regression-test/conf/regression-conf.groovy 
b/regression-test/conf/regression-conf.groovy
index acf9672de1a..5fdcc3692e9 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -177,6 +177,9 @@ s3Endpoint = "cos.ap-hongkong.myqcloud.com"
 s3BucketName = "doris-build-hk-1308700295"
 s3Region = "ap-hongkong"
 
+// iceberg rest catalog config
+iceberg_rest_uri_port=18181
+
 // If the failure suite num exceeds this config
 // all following suite will be skipped to fast quit the run.
 // <=0 means no limit.
diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy 
b/regression-test/pipeline/p0/conf/regression-conf.groovy
index 549e28d9a82..fecda4db1fb 100644
--- a/regression-test/pipeline/p0/conf/regression-conf.groovy
+++ b/regression-test/pipeline/p0/conf/regression-conf.groovy
@@ -92,6 +92,9 @@ hiveServerPort=10000
 enableKafkaTest=true
 kafka_port=19193
 
+// iceberg test config
+iceberg_rest_uri_port=18181
+
 enableEsTest=false
 es_6_port=19200
 es_7_port=29200


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to