This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 35fc172ba67 [Opt](docker) kerberos docker healthy check (#46662) 35fc172ba67 is described below commit 35fc172ba676a68213f14a4ae03212da7fec5c87 Author: zgxme <zhenggaoxi...@selectdb.com> AuthorDate: Fri Jan 10 18:01:04 2025 +0800 [Opt](docker) kerberos docker healthy check (#46662) ### What problem does this PR solve? Previously, kerberos docker did not have health checks which could lead to unstable tests. --- .../kerberos/entrypoint-hive-master-2.sh | 20 +++++++++++++++++--- .../kerberos/entrypoint-hive-master.sh | 20 +++++++++++++++++--- .../kerberos/health-checks/hadoop-health-check.sh | 1 + .../docker-compose/kerberos/health-checks/health.sh | 1 + .../{health.sh => hive-health-check-2.sh} | 18 ++---------------- .../hive-health-check.sh} | 16 +--------------- .../docker-compose/kerberos/kerberos.yaml.tpl | 16 ++++++++++++---- docker/thirdparties/run-thirdparties-docker.sh | 14 +++++--------- 8 files changed, 56 insertions(+), 50 deletions(-) diff --git a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh index c21460c3a57..eb95c5cb697 100755 --- a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh +++ b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master-2.sh @@ -25,12 +25,26 @@ cp /etc/trino/conf/presto-server.keytab /keytabs/other-presto-server.keytab cp /keytabs/update-location.sh /etc/hadoop-init.d/update-location.sh /usr/local/hadoop-run.sh & -sleep 30 +# check healthy hear +echo "Waiting for hadoop to be healthy" + +for i in {1..10}; do + if /usr/local/health.sh; then + echo "Hadoop is healthy" + break + fi + echo "Hadoop is not healthy yet. Retrying in 20 seconds..." + sleep 20 +done + +if [ $i -eq 10 ]; then + echo "Hadoop did not become healthy after 120 attempts. Exiting." + exit 1 +fi echo "Init kerberos test data" kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-maste...@otherrealm.com hive -f /usr/local/sql/create_kerberos_hive_table.sql - -sleep 20 +touch /mnt/SUCCESS tail -f /dev/null diff --git a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh index 62924992219..76f49724297 100755 --- a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh +++ b/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh @@ -23,12 +23,26 @@ mkdir -p /etc/hadoop-init.d/ cp /etc/trino/conf/* /keytabs/ /usr/local/hadoop-run.sh & -sleep 30 +# check healthy hear +echo "Waiting for hadoop to be healthy" + +for i in {1..10}; do + if /usr/local/health.sh; then + echo "Hadoop is healthy" + break + fi + echo "Hadoop is not healthy yet. Retrying in 20 seconds..." + sleep 20 +done + +if [ $i -eq 10 ]; then + echo "Hadoop did not become healthy after 120 attempts. Exiting." + exit 1 +fi echo "Init kerberos test data" kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-mas...@labs.teradata.com hive -f /usr/local/sql/create_kerberos_hive_table.sql - -sleep 20 +touch /mnt/SUCCESS tail -f /dev/null diff --git a/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh b/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh index 190fa838d6f..77df431d85a 100755 --- a/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh +++ b/docker/thirdparties/docker-compose/kerberos/health-checks/hadoop-health-check.sh @@ -32,6 +32,7 @@ fi FAILED=$(supervisorctl status | grep -v RUNNING || true) if [ "$FAILED" == "" ]; then + echo "All services are running" exit 0 else echo "Some of the services are failing: ${FAILED}" diff --git a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh b/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh old mode 100644 new mode 100755 index 515f37e36ac..473d7ceaeb6 --- a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh +++ b/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh @@ -32,3 +32,4 @@ if test -d "${HEALTH_D}"; then "${health_script}" &>> /var/log/container-health.log || exit 1 done fi +exit 0 diff --git a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check-2.sh old mode 100644 new mode 100755 similarity index 72% copy from docker/thirdparties/docker-compose/kerberos/health-checks/health.sh copy to docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check-2.sh index 515f37e36ac..854524dac1f --- a/docker/thirdparties/docker-compose/kerberos/health-checks/health.sh +++ b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check-2.sh @@ -16,19 +16,5 @@ # specific language governing permissions and limitations # under the License. -set -euo pipefail - -if test $# -gt 0; then - echo "$0 does not accept arguments" >&2 - exit 32 -fi - -set -x - -HEALTH_D=${HEALTH_D:-/etc/health.d/} - -if test -d "${HEALTH_D}"; then - for health_script in "${HEALTH_D}"/*; do - "${health_script}" &>> /var/log/container-health.log || exit 1 - done -fi +kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-maste...@otherrealm.com +beeline -u "jdbc:hive2://localhost:10000/default;principal=hive/hadoop-maste...@otherrealm.com" -e "show databases;" \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check.sh similarity index 76% copy from docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh copy to docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check.sh index 62924992219..4d3d86f69a2 100755 --- a/docker/thirdparties/docker-compose/kerberos/entrypoint-hive-master.sh +++ b/docker/thirdparties/docker-compose/kerberos/health-checks/hive-health-check.sh @@ -16,19 +16,5 @@ # specific language governing permissions and limitations # under the License. -set -euo pipefail - -echo "Copying kerberos keytabs to keytabs/" -mkdir -p /etc/hadoop-init.d/ -cp /etc/trino/conf/* /keytabs/ -/usr/local/hadoop-run.sh & - -sleep 30 - -echo "Init kerberos test data" kinit -kt /etc/hive/conf/hive.keytab hive/hadoop-mas...@labs.teradata.com -hive -f /usr/local/sql/create_kerberos_hive_table.sql - -sleep 20 - -tail -f /dev/null +beeline -u "jdbc:hive2://localhost:10000/default;principal=hive/hadoop-mas...@labs.teradata.com" -e "show databases;" \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl b/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl index 6aa353f3e0c..e635ed6bb27 100644 --- a/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl +++ b/docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl @@ -24,13 +24,17 @@ services: - ./sql:/usr/local/sql - ./common/hadoop/apply-config-overrides.sh:/etc/hadoop-init.d/00-apply-config-overrides.sh - ./common/hadoop/hadoop-run.sh:/usr/local/hadoop-run.sh + - ./health-checks/health.sh:/usr/local/health.sh - ./health-checks/hadoop-health-check.sh:/etc/health.d/hadoop-health-check.sh + - ./health-checks/hive-health-check.sh:/etc/health.d/hive-health-check.sh - ./entrypoint-hive-master.sh:/usr/local/entrypoint-hive-master.sh - restart: on-failure hostname: hadoop-master entrypoint: /usr/local/entrypoint-hive-master.sh healthcheck: - test: ./health-checks/health.sh + test: ["CMD", "ls", "/mnt/SUCCESS"] + interval: 20s + timeout: 60s + retries: 120 ports: - "5806:5006" - "8820:8020" @@ -46,17 +50,21 @@ services: image: doristhirdpartydocker/trinodb:hdp3.1-hive-kerberized-2_96 container_name: doris--kerberos2 hostname: hadoop-master-2 - restart: on-failure volumes: - ./two-kerberos-hives:/keytabs - ./sql:/usr/local/sql - ./common/hadoop/apply-config-overrides.sh:/etc/hadoop-init.d/00-apply-config-overrides.sh - ./common/hadoop/hadoop-run.sh:/usr/local/hadoop-run.sh + - ./health-checks/health.sh:/usr/local/health.sh - ./health-checks/hadoop-health-check.sh:/etc/health.d/hadoop-health-check.sh + - ./health-checks/hive-health-check-2.sh:/etc/health.d/hive-health-check-2.sh - ./entrypoint-hive-master-2.sh:/usr/local/entrypoint-hive-master-2.sh entrypoint: /usr/local/entrypoint-hive-master-2.sh healthcheck: - test: ./health-checks/health.sh + test: ["CMD", "ls", "/mnt/SUCCESS"] + interval: 20s + timeout: 60s + retries: 120 ports: - "15806:5006" - "18820:8020" diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index fd9558eef55..e3769025fec 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -708,6 +708,11 @@ if [[ "${RUN_MINIO}" -eq 1 ]]; then pids["minio"]=$! fi +if [[ "${RUN_KERBEROS}" -eq 1 ]]; then + start_kerberos > start_kerberos.log 2>&1 & + pids["kerberos"]=$! +fi + echo "waiting all dockers starting done" for compose in "${!pids[@]}"; do @@ -727,15 +732,6 @@ for compose in "${!pids[@]}"; do fi done -if [[ "${RUN_KERBEROS}" -eq 1 ]]; then - echo "Starting Kerberos after all other components..." - start_kerberos > start_kerberos.log 2>&1 - if [ $? -ne 0 ]; then - echo "Kerberos startup failed" - cat start_kerberos.log - exit 1 - fi -fi echo "docker started" docker ps -a --format "{{.ID}} | {{.Image}} | {{.Status}}" echo "all dockers started successfully" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org