This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 35fc172ba67 [Opt](docker) kerberos docker healthy check (#46662)
35fc172ba67 is described below

commit 35fc172ba676a68213f14a4ae03212da7fec5c87
Author: zgxme <zhenggaoxi...@selectdb.com>
AuthorDate: Fri Jan 10 18:01:04 2025 +0800

    [Opt](docker) kerberos docker healthy check (#46662)
    
    ### What problem does this PR solve?
    Previously, kerberos docker did not have health checks which could lead
    to unstable tests.
---
 .../kerberos/entrypoint-hive-master-2.sh             | 20 +++++++++++++++++---
 .../kerberos/entrypoint-hive-master.sh               | 20 +++++++++++++++++---
 .../kerberos/health-checks/hadoop-health-check.sh    |  1 +
 .../docker-compose/kerberos/health-checks/health.sh  |  1 +
 .../{health.sh => hive-health-check-2.sh}            | 18 ++----------------
 .../hive-health-check.sh}                            | 16 +---------------
 .../docker-compose/kerberos/kerberos.yaml.tpl        | 16 ++++++++++++----
 docker/thirdparties/run-thirdparties-docker.sh       | 14 +++++---------
 8 files changed, 56 insertions(+), 50 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh 
b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh
index c21460c3a57..eb95c5cb697 100755
--- a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh
+++ b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh
@@ -25,12 +25,26 @@ cp /etc/trino/conf/presto-server.keytab 
/keytabs/other-presto-server.keytab
 cp /keytabs/update-location.sh /etc/hadoop-init.d/update-location.sh
 /usr/local/hadoop-run.sh &
 
-sleep 30
+# check healthy hear
+echo "Waiting for hadoop to be healthy"
+
+for i in {1..10}; do
+    if /usr/local/health.sh; then
+        echo "Hadoop is healthy"
+        break
+    fi
+    echo "Hadoop is not healthy yet. Retrying in 20 seconds..."
+    sleep 20
+done
+
+if [ $i -eq 10 ]; then
+    echo "Hadoop did not become healthy after 120 attempts. Exiting."
+    exit 1
+fi
 
 echo "Init kerberos test data"
 kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-maste...@otherrealm.com
 hive  -f /usr/local/sql/create_kerberos_hive_table.sql
-
-sleep 20
+touch /mnt/SUCCESS
 
 tail -f /dev/null
diff --git 
a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh 
b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh
index 62924992219..76f49724297 100755
--- a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh
+++ b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh
@@ -23,12 +23,26 @@ mkdir -p /etc/hadoop-init.d/
 cp /etc/trino/conf/* /keytabs/
 /usr/local/hadoop-run.sh &
 
-sleep 30
+# check healthy hear
+echo "Waiting for hadoop to be healthy"
+
+for i in {1..10}; do
+    if /usr/local/health.sh; then
+        echo "Hadoop is healthy"
+        break
+    fi
+    echo "Hadoop is not healthy yet. Retrying in 20 seconds..."
+    sleep 20
+done
+
+if [ $i -eq 10 ]; then
+    echo "Hadoop did not become healthy after 120 attempts. Exiting."
+    exit 1
+fi
 
 echo "Init kerberos test data"
 kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-mas...@labs.teradata.com
 hive  -f /usr/local/sql/create_kerberos_hive_table.sql
-
-sleep 20
+touch /mnt/SUCCESS
 
 tail -f /dev/null
diff --git 
a/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh
 
b/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh
index 190fa838d6f..77df431d85a 100755
--- 
a/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh
+++ 
b/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh
@@ -32,6 +32,7 @@ fi
 FAILED=$(supervisorctl status | grep -v RUNNING || true)
 
 if [ "$FAILED" == "" ]; then
+  echo "All services are running"
   exit 0
 else
   echo "Some of the services are failing: ${FAILED}"
diff --git 
a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh 
b/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh
old mode 100644
new mode 100755
index 515f37e36ac..473d7ceaeb6
--- a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh
+++ b/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh
@@ -32,3 +32,4 @@ if test -d "${HEALTH_D}"; then
         "${health_script}" &>> /var/log/container-health.log || exit 1
     done
 fi
+exit 0
diff --git 
a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh 
b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check-2.sh
old mode 100644
new mode 100755
similarity index 72%
copy from docker/thirdparties/docker-compose/kerberos/health-checks/health.sh
copy to 
docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check-2.sh
index 515f37e36ac..854524dac1f
--- a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh
+++ 
b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check-2.sh
@@ -16,19 +16,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -euo pipefail
-
-if test $# -gt 0; then
-    echo "$0 does not accept arguments" >&2
-    exit 32
-fi
-
-set -x
-
-HEALTH_D=${HEALTH_D:-/etc/health.d/}
-
-if test -d "${HEALTH_D}"; then
-    for health_script in "${HEALTH_D}"/*; do
-        "${health_script}" &>> /var/log/container-health.log || exit 1
-    done
-fi
+kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-maste...@otherrealm.com
+beeline -u 
"jdbc:hive2://localhost:10000/default;principal=hive/hadoop-maste...@otherrealm.com"
 -e "show databases;"
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh 
b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check.sh
similarity index 76%
copy from docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh
copy to 
docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check.sh
index 62924992219..4d3d86f69a2 100755
--- a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh
+++ 
b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check.sh
@@ -16,19 +16,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -euo pipefail
-
-echo "Copying kerberos keytabs to keytabs/"
-mkdir -p /etc/hadoop-init.d/
-cp /etc/trino/conf/* /keytabs/
-/usr/local/hadoop-run.sh &
-
-sleep 30
-
-echo "Init kerberos test data"
 kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-mas...@labs.teradata.com
-hive  -f /usr/local/sql/create_kerberos_hive_table.sql
-
-sleep 20
-
-tail -f /dev/null
+beeline -u 
"jdbc:hive2://localhost:10000/default;principal=hive/hadoop-mas...@labs.teradata.com"
 -e "show databases;"
\ No newline at end of file
diff --git a/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl 
b/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl
index 6aa353f3e0c..e635ed6bb27 100644
--- a/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl
+++ b/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl
@@ -24,13 +24,17 @@ services:
       - ./sql:/usr/local/sql
       - 
./common/hadoop/apply-config-overrides.sh:/etc/hadoop-init.d/00-apply-config-overrides.sh
       - ./common/hadoop/hadoop-run.sh:/usr/local/hadoop-run.sh
+      - ./health-checks/health.sh:/usr/local/health.sh
       - 
./health-checks/hadoop-health-check.sh:/etc/health.d/hadoop-health-check.sh
+      - ./health-checks/hive-health-check.sh:/etc/health.d/hive-health-check.sh
       - ./entrypoint-hive-master.sh:/usr/local/entrypoint-hive-master.sh
-    restart: on-failure
     hostname: hadoop-master
     entrypoint: /usr/local/entrypoint-hive-master.sh
     healthcheck:
-      test: ./health-checks/health.sh
+      test: ["CMD", "ls", "/mnt/SUCCESS"]
+      interval: 20s
+      timeout: 60s
+      retries: 120
     ports:
       - "5806:5006"
       - "8820:8020"
@@ -46,17 +50,21 @@ services:
     image: doristhirdpartydocker/trinodb:hdp3.1-hive-kerberized-2_96
     container_name: doris--kerberos2
     hostname: hadoop-master-2
-    restart: on-failure
     volumes:
       - ./two-kerberos-hives:/keytabs
       - ./sql:/usr/local/sql
       - 
./common/hadoop/apply-config-overrides.sh:/etc/hadoop-init.d/00-apply-config-overrides.sh
       - ./common/hadoop/hadoop-run.sh:/usr/local/hadoop-run.sh
+      - ./health-checks/health.sh:/usr/local/health.sh
       - 
./health-checks/hadoop-health-check.sh:/etc/health.d/hadoop-health-check.sh
+      - 
./health-checks/hive-health-check-2.sh:/etc/health.d/hive-health-check-2.sh
       - ./entrypoint-hive-master-2.sh:/usr/local/entrypoint-hive-master-2.sh
     entrypoint: /usr/local/entrypoint-hive-master-2.sh
     healthcheck:
-      test: ./health-checks/health.sh
+      test: ["CMD", "ls", "/mnt/SUCCESS"]
+      interval: 20s
+      timeout: 60s
+      retries: 120
     ports:
       - "15806:5006"
       - "18820:8020"
diff --git a/docker/thirdparties/run-thirdparties-docker.sh 
b/docker/thirdparties/run-thirdparties-docker.sh
index fd9558eef55..e3769025fec 100755
--- a/docker/thirdparties/run-thirdparties-docker.sh
+++ b/docker/thirdparties/run-thirdparties-docker.sh
@@ -708,6 +708,11 @@ if [[ "${RUN_MINIO}" -eq 1 ]]; then
     pids["minio"]=$!
 fi
 
+if [[ "${RUN_KERBEROS}" -eq 1 ]]; then
+    start_kerberos > start_kerberos.log 2>&1 &
+    pids["kerberos"]=$!
+fi
+
 echo "waiting all dockers starting done"
 
 for compose in "${!pids[@]}"; do
@@ -727,15 +732,6 @@ for compose in "${!pids[@]}"; do
     fi
 done
 
-if [[ "${RUN_KERBEROS}" -eq 1 ]]; then
-    echo "Starting Kerberos after all other components..."
-    start_kerberos > start_kerberos.log 2>&1
-    if [ $? -ne 0 ]; then
-        echo "Kerberos startup failed"
-        cat start_kerberos.log
-        exit 1
-    fi
-fi
 echo "docker started"
 docker ps -a --format "{{.ID}} | {{.Image}} | {{.Status}}"
 echo "all dockers started successfully"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to