This is an automated email from the ASF dual-hosted git repository. rmani pushed a commit to branch RANGER-5310 in repository https://gitbox.apache.org/repos/asf/ranger.git
commit 6cd72c3b0e58b2fdd9a78ff6816b8933f49eb53a Author: Ramesh Mani <[email protected]> AuthorDate: Fri Sep 5 21:55:12 2025 -0700 RANGER-5310:Include Apache Tez as the process framework for ranger-hive docker Signed-off-by: Ramesh Mani <[email protected]> --- dev-support/ranger-docker/.env | 4 +- dev-support/ranger-docker/Dockerfile.ranger | 3 + dev-support/ranger-docker/Dockerfile.ranger-hadoop | 7 ++ dev-support/ranger-docker/Dockerfile.ranger-hbase | 5 +- dev-support/ranger-docker/Dockerfile.ranger-hive | 17 ++- dev-support/ranger-docker/Dockerfile.ranger-kafka | 6 +- dev-support/ranger-docker/Dockerfile.ranger-kms | 4 +- dev-support/ranger-docker/Dockerfile.ranger-knox | 5 +- .../ranger-docker/Dockerfile.ranger-tagsync | 5 +- .../ranger-docker/Dockerfile.ranger-usersync | 5 +- .../ranger-docker/docker-compose.ranger-hadoop.yml | 2 + .../ranger-docker/docker-compose.ranger-hive.yml | 3 + dev-support/ranger-docker/download-archives.sh | 2 + dev-support/ranger-docker/scripts/create-users.sh | 62 ++++++++++ .../scripts/hive-site-metastore-mysql.xml | 132 +++++++++++++++++++++ .../ranger-docker/scripts/hive-site-mysql.xml | 68 +++++++++++ .../ranger-docker/scripts/hive-site-oracle.xml | 68 +++++++++++ .../ranger-docker/scripts/hive-site-postgres.xml | 68 +++++++++++ .../ranger-docker/scripts/hive-site-sqlserver.xml | 68 +++++++++++ .../ranger-docker/scripts/ranger-hadoop-setup.sh | 65 ++++++++++ .../ranger-docker/scripts/ranger-hive-setup.sh | 128 ++++++++++++++++++++ dev-support/ranger-docker/scripts/tez-site.xml | 93 +++++++++++++++ 22 files changed, 809 insertions(+), 11 deletions(-) diff --git a/dev-support/ranger-docker/.env b/dev-support/ranger-docker/.env index e6de538a2..e65b83627 100644 --- a/dev-support/ranger-docker/.env +++ b/dev-support/ranger-docker/.env @@ -25,8 +25,8 @@ SOLR_VERSION=8.11.3 HADOOP_VERSION=3.3.6 HBASE_VERSION=2.6.0 HIVE_VERSION=4.0.1 -HIVE_HADOOP_VERSION=3.1.1 -KAFKA_VERSION=2.8.2 +HIVE_HADOOP_VERSION=3.3.6 +TEZ_VERSION=0.10.4 KNOX_VERSION=2.0.0 TRINO_VERSION=377 OZONE_VERSION=1.4.0 diff --git a/dev-support/ranger-docker/Dockerfile.ranger b/dev-support/ranger-docker/Dockerfile.ranger index b6dcff30a..fecc7b265 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger +++ b/dev-support/ranger-docker/Dockerfile.ranger @@ -29,6 +29,7 @@ COPY ./dist/ranger-${RANGER_VERSION}-admin.tar.gz /home/ranger/dist/ COPY ./scripts/ranger.sh ${RANGER_SCRIPTS}/ COPY ./scripts/ranger-admin-install-${RANGER_DB_TYPE}.properties ${RANGER_SCRIPTS}/ranger-admin-install.properties COPY ./scripts/create-ranger-services.py ${RANGER_SCRIPTS}/ +COPY ./scripts/create-users.sh ${RANGER_SCRIPTS}/ RUN tar xvfz /home/ranger/dist/ranger-${RANGER_VERSION}-admin.tar.gz --directory=${RANGER_HOME} \ && ln -s ${RANGER_HOME}/ranger-${RANGER_VERSION}-admin ${RANGER_HOME}/admin \ @@ -37,6 +38,8 @@ RUN tar xvfz /home/ranger/dist/ranger-${RANGER_VERSION}-admin.tar.gz --direct && mkdir -p /var/run/ranger \ && mkdir -p /var/log/ranger \ && chown -R ranger:ranger ${RANGER_HOME}/admin/ ${RANGER_SCRIPTS}/ /var/run/ranger/ /var/log/ranger/ \ + && chmod +x ${RANGER_SCRIPTS}/create-users.sh \ + && ${RANGER_SCRIPTS}/create-users.sh \ && chmod 755 ${RANGER_SCRIPTS}/ranger.sh \ && mkdir -p /usr/share/java/ diff --git a/dev-support/ranger-docker/Dockerfile.ranger-hadoop b/dev-support/ranger-docker/Dockerfile.ranger-hadoop index 5fc455e4b..3a5761f03 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-hadoop +++ b/dev-support/ranger-docker/Dockerfile.ranger-hadoop @@ -21,22 +21,27 @@ FROM ${RANGER_BASE_IMAGE}:${RANGER_BASE_VERSION} ARG HADOOP_VERSION ARG HDFS_PLUGIN_VERSION ARG YARN_PLUGIN_VERSION +ARG TEZ_VERSION COPY ./dist/version /home/ranger/dist/ COPY ./dist/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin.tar.gz /home/ranger/dist/ COPY ./dist/ranger-${YARN_PLUGIN_VERSION}-yarn-plugin.tar.gz /home/ranger/dist/ COPY ./downloads/hadoop-${HADOOP_VERSION}.tar.gz /home/ranger/dist/ +COPY ./downloads/apache-tez-${TEZ_VERSION}-bin.tar.gz /home/ranger/dist/ COPY ./scripts/ranger-hadoop-setup.sh /home/ranger/scripts/ COPY ./scripts/ranger-hadoop.sh /home/ranger/scripts/ COPY ./scripts/ranger-hadoop-mkdir.sh /home/ranger/scripts/ COPY ./scripts/ranger-hdfs-plugin-install.properties /home/ranger/scripts/ COPY ./scripts/ranger-yarn-plugin-install.properties /home/ranger/scripts/ +COPY ./scripts/create-users.sh /home/ranger/scripts/ RUN tar xvfz /home/ranger/dist/hadoop-${HADOOP_VERSION}.tar.gz --directory=/opt/ && \ ln -s /opt/hadoop-${HADOOP_VERSION} /opt/hadoop && \ rm -f /home/ranger/dist/hadoop-${HADOOP_VERSION}.tar.gz && \ + tar xvfz /home/ranger/dist/apache-tez-${TEZ_VERSION}-bin.tar.gz --directory=/opt/ && \ + ln -s /opt/apache-tez-${TEZ_VERSION}-bin /opt/tez && \ tar xvfz /home/ranger/dist/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin.tar.gz --directory=/opt/ranger && \ ln -s /opt/ranger/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin /opt/ranger/ranger-hdfs-plugin && \ rm -f /home/ranger/dist/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin.tar.gz && \ @@ -46,6 +51,8 @@ RUN tar xvfz /home/ranger/dist/hadoop-${HADOOP_VERSION}.tar.gz --directory=/opt/ rm -f /home/ranger/dist/ranger-${YARN_PLUGIN_VERSION}-yarn-plugin.tar.gz && \ cp -f /home/ranger/scripts/ranger-yarn-plugin-install.properties /opt/ranger/ranger-yarn-plugin/install.properties && \ chmod 744 ${RANGER_SCRIPTS}/ranger-hadoop-setup.sh ${RANGER_SCRIPTS}/ranger-hadoop.sh ${RANGER_SCRIPTS}/ranger-hadoop-mkdir.sh && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh && \ chown hdfs:hadoop ${RANGER_SCRIPTS}/ranger-hadoop-mkdir.sh RUN apt-get update && \ diff --git a/dev-support/ranger-docker/Dockerfile.ranger-hbase b/dev-support/ranger-docker/Dockerfile.ranger-hbase index 0d0120079..0d1884f0a 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-hbase +++ b/dev-support/ranger-docker/Dockerfile.ranger-hbase @@ -29,6 +29,7 @@ COPY ./downloads/hbase-${HBASE_VERSION}-bin.tar.gz /home/ranger/dis COPY ./scripts/ranger-hbase-setup.sh /home/ranger/scripts/ COPY ./scripts/ranger-hbase.sh /home/ranger/scripts/ COPY ./scripts/ranger-hbase-plugin-install.properties /home/ranger/scripts/ +COPY ./scripts/create-users.sh /home/ranger/scripts/ COPY ./scripts/hbase-site.xml /home/ranger/scripts/ RUN tar xvfz /home/ranger/dist/hbase-${HBASE_VERSION}-bin.tar.gz --directory=/opt/ && \ @@ -38,7 +39,9 @@ RUN tar xvfz /home/ranger/dist/hbase-${HBASE_VERSION}-bin.tar.gz --directory=/op ln -s /opt/ranger/ranger-${HBASE_PLUGIN_VERSION}-hbase-plugin /opt/ranger/ranger-hbase-plugin && \ rm -f /home/ranger/dist/ranger-${HBASE_PLUGIN_VERSION}-hbase-plugin.tar.gz && \ cp -f /home/ranger/scripts/ranger-hbase-plugin-install.properties /opt/ranger/ranger-hbase-plugin/install.properties && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-hbase-setup.sh ${RANGER_SCRIPTS}/ranger-hbase.sh + chmod 744 ${RANGER_SCRIPTS}/ranger-hbase-setup.sh ${RANGER_SCRIPTS}/ranger-hbase.sh && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh RUN apt-get update && \ apt-get install -y --no-install-recommends openssh-server && \ diff --git a/dev-support/ranger-docker/Dockerfile.ranger-hive b/dev-support/ranger-docker/Dockerfile.ranger-hive index 7fddfc001..c52e252ff 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-hive +++ b/dev-support/ranger-docker/Dockerfile.ranger-hive @@ -22,6 +22,7 @@ FROM ${RANGER_BASE_IMAGE}:${RANGER_BASE_VERSION} ARG HIVE_VERSION ARG HIVE_HADOOP_VERSION ARG HIVE_PLUGIN_VERSION +ARG TEZ_VERSION ARG RANGER_DB_TYPE @@ -29,6 +30,7 @@ COPY ./dist/version /home/ranger/dist/ COPY ./dist/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin.tar.gz /home/ranger/dist/ COPY ./downloads/apache-hive-${HIVE_VERSION}-bin.tar.gz /home/ranger/dist/ COPY ./downloads/hadoop-${HIVE_HADOOP_VERSION}.tar.gz /home/ranger/dist/ +COPY ./downloads/apache-tez-${TEZ_VERSION}-bin.tar.gz /home/ranger/dist/ COPY ./downloads/postgresql-42.2.16.jre7.jar /home/ranger/dist/ COPY ./downloads/mysql-connector-java-8.0.28.jar /home/ranger/dist/ COPY ./downloads/ojdbc8.jar /home/ranger/dist/ @@ -37,8 +39,10 @@ COPY ./scripts/ranger-hive-setup.sh /home/ranger/scripts/ COPY ./scripts/ranger-hive.sh /home/ranger/scripts/ COPY ./scripts/ranger-hive-plugin-install.properties /home/ranger/scripts/ COPY ./scripts/hive-site-${RANGER_DB_TYPE}.xml /home/ranger/scripts/hive-site.xml +COPY ./scripts/hive-site-metastore-${RANGER_DB_TYPE}.xml /home/ranger/scripts/hive-site-metastore.xml +COPY ./scripts/create-users.sh /home/ranger/scripts/ -RUN tar xvfz /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz --directory=/opt/ && \ +RUN cd /opt && tar xzf /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ ln -s /opt/apache-hive-${HIVE_VERSION}-bin /opt/hive && \ rm -f /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ mv /home/ranger/dist/postgresql-42.2.16.jre7.jar /opt/hive/lib/ && \ @@ -47,15 +51,22 @@ RUN tar xvfz /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz --director tar xvfz /home/ranger/dist/hadoop-${HIVE_HADOOP_VERSION}.tar.gz --directory=/opt/ && \ ln -s /opt/hadoop-${HIVE_HADOOP_VERSION} /opt/hadoop && \ rm -f /home/ranger/dist/hadoop-${HIVE_HADOOP_VERSION}.tar.gz && \ + tar xvfz /home/ranger/dist/apache-tez-${TEZ_VERSION}-bin.tar.gz --directory=/opt/ && \ + ln -s /opt/apache-tez-${TEZ_VERSION}-bin /opt/tez && \ + rm -f /home/ranger/dist/apache-tez-${TEZ_VERSION}-bin.tar.gz && \ tar xvfz /home/ranger/dist/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin.tar.gz --directory=/opt/ranger && \ ln -s /opt/ranger/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin /opt/ranger/ranger-hive-plugin && \ rm -f /home/ranger/dist/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin.tar.gz && \ cp -f /home/ranger/scripts/ranger-hive-plugin-install.properties /opt/ranger/ranger-hive-plugin/install.properties && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-hive-setup.sh ${RANGER_SCRIPTS}/ranger-hive.sh + chmod 744 ${RANGER_SCRIPTS}/ranger-hive-setup.sh ${RANGER_SCRIPTS}/ranger-hive.sh && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh ENV HIVE_HOME=/opt/hive ENV HADOOP_HOME=/opt/hadoop -ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/hive/bin:/opt/hadoop/bin +ENV TEZ_HOME=/opt/tez +ENV TEZ_CONF_DIR=/opt/tez/conf +ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/hive/bin:/opt/hadoop/bin:/opt/tez/bin ENTRYPOINT [ "/home/ranger/scripts/ranger-hive.sh" ] diff --git a/dev-support/ranger-docker/Dockerfile.ranger-kafka b/dev-support/ranger-docker/Dockerfile.ranger-kafka index 48c5789a7..fdc8258e5 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-kafka +++ b/dev-support/ranger-docker/Dockerfile.ranger-kafka @@ -29,6 +29,7 @@ COPY ./downloads/kafka_2.12-${KAFKA_VERSION}.tgz /home/ranger/dist COPY ./scripts/ranger-kafka-setup.sh /home/ranger/scripts/ COPY ./scripts/ranger-kafka.sh /home/ranger/scripts/ COPY ./scripts/ranger-kafka-plugin-install.properties /home/ranger/scripts/ +COPY ./scripts/create-users.sh /home/ranger/scripts/ RUN tar xvfz /home/ranger/dist/kafka_2.12-${KAFKA_VERSION}.tgz --directory=/opt/ && \ ln -s /opt/kafka_2.12-${KAFKA_VERSION} /opt/kafka && \ @@ -37,7 +38,10 @@ RUN tar xvfz /home/ranger/dist/kafka_2.12-${KAFKA_VERSION}.tgz --directory=/opt/ ln -s /opt/ranger/ranger-${KAFKA_PLUGIN_VERSION}-kafka-plugin /opt/ranger/ranger-kafka-plugin && \ rm -f /home/ranger/dist/ranger-${KAFKA_PLUGIN_VERSION}-kafka-plugin.tar.gz && \ cp -f /home/ranger/scripts/ranger-kafka-plugin-install.properties /opt/ranger/ranger-kafka-plugin/install.properties && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-kafka-setup.sh ${RANGER_SCRIPTS}/ranger-kafka.sh + chmod 744 ${RANGER_SCRIPTS}/ranger-kafka-setup.sh ${RANGER_SCRIPTS}/ranger-kafka.sh && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh + ENV KAFKA_HOME=/opt/kafka ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/kafka/bin diff --git a/dev-support/ranger-docker/Dockerfile.ranger-kms b/dev-support/ranger-docker/Dockerfile.ranger-kms index 55401ef60..e1d97c650 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-kms +++ b/dev-support/ranger-docker/Dockerfile.ranger-kms @@ -26,6 +26,7 @@ COPY ./dist/ranger-${KMS_VERSION}-kms.tar.gz /home/ranger/dist COPY ./scripts/ranger-kms.sh ${RANGER_SCRIPTS}/ COPY ./scripts/ranger-kms-install-${RANGER_DB_TYPE}.properties ${RANGER_SCRIPTS}/ranger-kms-install.properties +COPY ./scripts/create-users.sh /home/ranger/scripts/ RUN tar xvfz /home/ranger/dist/ranger-${KMS_VERSION}-kms.tar.gz --directory=${RANGER_HOME} && \ ln -s ${RANGER_HOME}/ranger-${KMS_VERSION}-kms ${RANGER_HOME}/kms && \ @@ -39,7 +40,8 @@ RUN tar xvfz /home/ranger/dist/ranger-${KMS_VERSION}-kms.tar.gz --directory=${RA ln -s /etc/init.d/ranger-kms /etc/rc3.d/K90ranger-kms && \ ln -s ${RANGER_HOME}/kms/ranger-kms-services.sh /usr/bin/ranger-kms-services.sh && \ chown -R rangerkms:ranger ${RANGER_HOME}/kms/ ${RANGER_SCRIPTS}/ /var/run/ranger_kms/ /var/log/ranger/ && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-kms.sh + chmod 744 ${RANGER_SCRIPTS}/ranger-kms.sh && \ + ${RANGER_SCRIPTS}/create-users.sh FROM ranger-kms AS ranger_postgres COPY ./downloads/postgresql-42.2.16.jre7.jar /home/ranger/dist/ diff --git a/dev-support/ranger-docker/Dockerfile.ranger-knox b/dev-support/ranger-docker/Dockerfile.ranger-knox index 653af09ee..13bbf006a 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-knox +++ b/dev-support/ranger-docker/Dockerfile.ranger-knox @@ -25,6 +25,7 @@ ARG KNOX_PLUGIN_VERSION COPY ./dist/version /home/ranger/dist/ COPY ./dist/ranger-${KNOX_PLUGIN_VERSION}-knox-plugin.tar.gz /home/ranger/dist/ COPY ./downloads/knox-${KNOX_VERSION}.tar.gz /home/ranger/dist/ +COPY ./scripts/create-users.sh /home/ranger/scripts/ COPY ./scripts/ranger-knox-setup.sh /home/ranger/scripts/ COPY ./scripts/ranger-knox.sh /home/ranger/scripts/ @@ -40,7 +41,9 @@ RUN tar xvfz /home/ranger/dist/knox-${KNOX_VERSION}.tar.gz --directory=/opt/ && rm -f /home/ranger/dist/ranger-${KNOX_PLUGIN_VERSION}-knox-plugin.tar.gz && \ cp -f /home/ranger/scripts/ranger-knox-plugin-install.properties /opt/ranger/ranger-knox-plugin/install.properties && \ cp -f /home/ranger/scripts/ranger-knox-sandbox.xml /opt/knox/conf/topologies/sandbox.xml && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-knox-setup.sh ${RANGER_SCRIPTS}/ranger-knox.sh ${RANGER_SCRIPTS}/ranger-knox-expect.py + chmod 744 ${RANGER_SCRIPTS}/ranger-knox-setup.sh ${RANGER_SCRIPTS}/ranger-knox.sh ${RANGER_SCRIPTS}/ranger-knox-expect.py && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh ENV KNOX_HOME=/opt/knox ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/knox/bin diff --git a/dev-support/ranger-docker/Dockerfile.ranger-tagsync b/dev-support/ranger-docker/Dockerfile.ranger-tagsync index 59efb40eb..5b0573252 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-tagsync +++ b/dev-support/ranger-docker/Dockerfile.ranger-tagsync @@ -26,6 +26,7 @@ COPY ./dist/ranger-${TAGSYNC_VERSION}-tagsync.tar.gz /home/ranger/dist/ COPY ./scripts/ranger-tagsync.sh ${RANGER_SCRIPTS}/ COPY ./scripts/ranger-tagsync-install.properties ${RANGER_SCRIPTS}/ COPY ./scripts/ranger-tagsync-tags.json ${RANGER_SCRIPTS}/ +COPY ./scripts/create-users.sh ${RANGER_SCRIPTS}/ RUN tar xvfz /home/ranger/dist/ranger-${TAGSYNC_VERSION}-tagsync.tar.gz --directory=${RANGER_HOME} && \ ln -s ${RANGER_HOME}/ranger-${TAGSYNC_VERSION}-tagsync ${RANGER_HOME}/tagsync && \ @@ -43,7 +44,9 @@ RUN tar xvfz /home/ranger/dist/ranger-${TAGSYNC_VERSION}-tagsync.tar.gz --direct ln -s /etc/init.d/ranger-tagsync /etc/rc3.d/K00ranger-tagsync && \ ln -s ${RANGER_HOME}/tagsync/ranger-tagsync-services.sh /usr/bin/ranger-tagsync-services.sh && \ chown -R ranger:ranger ${RANGER_HOME}/tagsync/ ${RANGER_SCRIPTS}/ /var/run/ranger/ /var/log/ranger/ /etc/ranger /etc/init.d/ranger-tagsync && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-tagsync.sh + chmod 744 ${RANGER_SCRIPTS}/ranger-tagsync.sh && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh USER ranger diff --git a/dev-support/ranger-docker/Dockerfile.ranger-usersync b/dev-support/ranger-docker/Dockerfile.ranger-usersync index 9b164cad0..893d44057 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-usersync +++ b/dev-support/ranger-docker/Dockerfile.ranger-usersync @@ -26,6 +26,7 @@ COPY ./dist/ranger-${USERSYNC_VERSION}-usersync.tar.gz /home/ranger/dist/ COPY ./scripts/ranger-usersync.sh ${RANGER_SCRIPTS}/ COPY ./scripts/ranger-usersync-install.properties ${RANGER_SCRIPTS}/ COPY ./scripts/ugsync-file-source.csv ${RANGER_SCRIPTS}/ +COPY ./scripts/create-users.sh ${RANGER_SCRIPTS}/ RUN tar xvfz /home/ranger/dist/ranger-${USERSYNC_VERSION}-usersync.tar.gz --directory=${RANGER_HOME} && \ ln -s ${RANGER_HOME}/ranger-${USERSYNC_VERSION}-usersync ${RANGER_HOME}/usersync && \ @@ -42,7 +43,9 @@ RUN tar xvfz /home/ranger/dist/ranger-${USERSYNC_VERSION}-usersync.tar.gz --dire ln -s /etc/init.d/ranger-usersync /etc/rc3.d/K00ranger-usersync && \ ln -s ${RANGER_HOME}/usersync/ranger-usersync-services.sh /usr/bin/ranger-usersync && \ chown -R ranger:ranger ${RANGER_HOME}/usersync/ ${RANGER_SCRIPTS}/ /var/run/ranger/ /var/log/ranger/ /etc/ranger /etc/init.d/ranger-usersync && \ - chmod 744 ${RANGER_SCRIPTS}/ranger-usersync.sh + chmod 744 ${RANGER_SCRIPTS}/ranger-usersync.sh && \ + chmod +x ${RANGER_SCRIPTS}/create-users.sh && \ + ${RANGER_SCRIPTS}/create-users.sh USER ranger diff --git a/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml b/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml index dacbbf0cc..37fe6b5d8 100644 --- a/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml +++ b/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml @@ -9,6 +9,7 @@ services: - HADOOP_VERSION=${HADOOP_VERSION} - HDFS_PLUGIN_VERSION=${HDFS_PLUGIN_VERSION} - YARN_PLUGIN_VERSION=${YARN_PLUGIN_VERSION} + - TEZ_VERSION=${TEZ_VERSION} image: ranger-hadoop container_name: ranger-hadoop hostname: ranger-hadoop.example.com @@ -32,6 +33,7 @@ services: - HADOOP_VERSION - HDFS_PLUGIN_VERSION - YARN_PLUGIN_VERSION + - TEZ_VERSION networks: ranger: diff --git a/dev-support/ranger-docker/docker-compose.ranger-hive.yml b/dev-support/ranger-docker/docker-compose.ranger-hive.yml index 5815a472d..608463082 100644 --- a/dev-support/ranger-docker/docker-compose.ranger-hive.yml +++ b/dev-support/ranger-docker/docker-compose.ranger-hive.yml @@ -9,6 +9,7 @@ services: - HIVE_HADOOP_VERSION=${HIVE_HADOOP_VERSION} - HIVE_VERSION=${HIVE_VERSION} - HIVE_PLUGIN_VERSION=${HIVE_PLUGIN_VERSION} + - TEZ_VERSION=${TEZ_VERSION} - RANGER_DB_TYPE=${RANGER_DB_TYPE} image: ranger-hive container_name: ranger-hive @@ -19,6 +20,7 @@ services: - ranger ports: - "10000:10000" + - "9083:9083" depends_on: ranger: condition: service_started @@ -30,6 +32,7 @@ services: - HIVE_HADOOP_VERSION - HIVE_VERSION - HIVE_PLUGIN_VERSION + - TEZ_VERSION - RANGER_DB_TYPE networks: diff --git a/dev-support/ranger-docker/download-archives.sh b/dev-support/ranger-docker/download-archives.sh index 0bfca41ad..1ce4240e4 100755 --- a/dev-support/ranger-docker/download-archives.sh +++ b/dev-support/ranger-docker/download-archives.sh @@ -53,6 +53,7 @@ then downloadIfNotPresent hbase-${HBASE_VERSION}-bin.tar.gz https://archive.apache.org/dist/hbase/${HBASE_VERSION} downloadIfNotPresent apache-hive-${HIVE_VERSION}-bin.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION} downloadIfNotPresent hadoop-${HIVE_HADOOP_VERSION}.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-${HIVE_HADOOP_VERSION} + downloadIfNotPresent apache-tez-${TEZ_VERSION}-bin.tar.gz https://archive.apache.org/dist/tez/${TEZ_VERSION} downloadIfNotPresent kafka_2.12-${KAFKA_VERSION}.tgz https://archive.apache.org/dist/kafka/${KAFKA_VERSION} downloadIfNotPresent knox-${KNOX_VERSION}.tar.gz https://archive.apache.org/dist/knox/${KNOX_VERSION} downloadIfNotPresent ozone-${OZONE_VERSION}.tar.gz https://archive.apache.org/dist/ozone/${OZONE_VERSION} @@ -72,6 +73,7 @@ else then downloadIfNotPresent apache-hive-${HIVE_VERSION}-bin.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION} downloadIfNotPresent hadoop-${HIVE_HADOOP_VERSION}.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-${HIVE_HADOOP_VERSION} + downloadIfNotPresent apache-tez-${TEZ_VERSION}-bin.tar.gz https://archive.apache.org/dist/tez/${TEZ_VERSION} elif [[ $arg == 'kafka' ]] then downloadIfNotPresent kafka_2.12-${KAFKA_VERSION}.tgz https://archive.apache.org/dist/kafka/${KAFKA_VERSION} diff --git a/dev-support/ranger-docker/scripts/create-users.sh b/dev-support/ranger-docker/scripts/create-users.sh new file mode 100755 index 000000000..dddca21e2 --- /dev/null +++ b/dev-support/ranger-docker/scripts/create-users.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to create alice and abram users in ranger containers +# This script is designed to be run during container initialization + +# Function to create a user for testing. +create_user_if_not_exists() { + local username=$1 + local uid=$2 + local gid=$3 + local home_dir=$4 + + if ! id "$username" &>/dev/null; then + echo "Creating user: $username (uid:$uid, gid:$gid)" + useradd -u "$uid" -g "$gid" -m -d "$home_dir" -s /bin/bash "$username" + + # Set a default password (same as username for demo purposes) + echo "$username:$username" | chpasswd + + # Add user to hadoop group for HDFS access + if getent group hadoop &>/dev/null; then + usermod -a -G hadoop "$username" + fi + + # Create .ssh directory and set proper permissions + mkdir -p "$home_dir/.ssh" + chmod 700 "$home_dir/.ssh" + chown "$username:$gid" "$home_dir/.ssh" + + echo "User $username created successfully" + else + echo "User $username already exists" + fi +} + +# Ensure hadoop group exists (gid 1001 is used by hdfs, yarn, hive users) +if ! getent group hadoop &>/dev/null; then + groupadd -g 1001 hadoop + echo "Created hadoop group" +fi + +# Create alice user (uid: 2001, gid: 1001 - hadoop group) +create_user_if_not_exists "alice" 2001 1001 "/home/alice" + +# Create abram user (uid: 2002, gid: 1001 - hadoop group) +create_user_if_not_exists "abram" 2002 1001 "/home/abram" diff --git a/dev-support/ranger-docker/scripts/hive-site-metastore-mysql.xml b/dev-support/ranger-docker/scripts/hive-site-metastore-mysql.xml new file mode 100644 index 000000000..1049239f5 --- /dev/null +++ b/dev-support/ranger-docker/scripts/hive-site-metastore-mysql.xml @@ -0,0 +1,132 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!-- +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> +<configuration> + <!-- Database Connection Configuration --> + <property> + <name>javax.jdo.option.ConnectionURL</name> + <value>jdbc:mysql://ranger-db/hive</value> + </property> + <property> + <name>javax.jdo.option.ConnectionDriverName</name> + <value>com.mysql.jdbc.Driver</value> + </property> + <property> + <name>javax.jdo.option.ConnectionUserName</name> + <value>hive</value> + </property> + <property> + <name>javax.jdo.option.ConnectionPassword</name> + <value>rangerR0cks!</value> + </property> + + <!-- Basic Hive Configuration --> + <property> + <name>hive.server2.enable.doAs</name> + <value>false</value> + </property> + + <!-- Warehouse Configuration - Use Local Filesystem --> + <property> + <name>hive.metastore.warehouse.dir</name> + <value>file:///warehouse/tablespace/managed/hive</value> + </property> + <property> + <name>hive.metastore.warehouse.external.dir</name> + <value>file:///warehouse/tablespace/external/hive</value> + </property> + + <property> + <name>hive.zookeeper.quorum</name> + <value>ranger-zk.example.com</value> + </property> + <property> + <name>hive.zookeeper.client.port</name> + <value>2181</value> + </property> + + <!-- Tez Configuration --> + <property> + <name>hive.execution.engine</name> + <value>tez</value> + <description>Execution engine to use for Hive queries</description> + </property> + + <property> + <name>hive.tez.container.size</name> + <value>1024</value> + <description>By default Tez will spawn containers of the size of a mapper</description> + </property> + + <property> + <name>hive.tez.java.opts</name> + <value>-Xmx768m</value> + <description>Java command line options for Tez</description> + </property> + + <property> + <name>hive.tez.log.level</name> + <value>INFO</value> + <description>Log level for Tez</description> + </property> + + <property> + <name>hive.prewarm.enabled</name> + <value>false</value> + <description>Enables container prewarm for Tez</description> + </property> + + <property> + <name>hive.prewarm.numcontainers</name> + <value>3</value> + <description>Controls the number of containers to prewarm for Tez</description> + </property> + + <property> + <name>hive.tez.auto.reducer.parallelism</name> + <value>true</value> + <description>Turn on Tez' auto reducer parallelism feature</description> + </property> + + <property> + <name>hive.tez.min.reducer.per.query</name> + <value>2</value> + <description>Controls the minimum number of reducers for Tez</description> + </property> + + <property> + <name>hive.tez.max.reducer.per.query</name> + <value>999</value> + <description>Controls the maximum number of reducers for Tez</description> + </property> + + <!-- YARN Configuration for Tez --> + <property> + <name>hive.tez.exec.print.summary</name> + <value>true</value> + <description>Whether to print a summary of the plan execution</description> + </property> + + <property> + <name>tez.queue.name</name> + <value>default</value> + <description>YARN queue name for Tez jobs</description> + </property> +</configuration> diff --git a/dev-support/ranger-docker/scripts/hive-site-mysql.xml b/dev-support/ranger-docker/scripts/hive-site-mysql.xml index 118cdbbca..ccb095329 100644 --- a/dev-support/ranger-docker/scripts/hive-site-mysql.xml +++ b/dev-support/ranger-docker/scripts/hive-site-mysql.xml @@ -49,4 +49,72 @@ <name>hive.zookeeper.client.port</name> <value>2181</value> </property> + + <!-- Tez Configuration --> + <property> + <name>hive.execution.engine</name> + <value>tez</value> + <description>Execution engine to use for Hive queries</description> + </property> + + <property> + <name>hive.tez.container.size</name> + <value>1024</value> + <description>By default Tez will spawn containers of the size of a mapper</description> + </property> + + <property> + <name>hive.tez.java.opts</name> + <value>-Xmx768m</value> + <description>Java command line options for Tez</description> + </property> + + <property> + <name>hive.tez.log.level</name> + <value>INFO</value> + <description>Log level for Tez</description> + </property> + + <property> + <name>hive.prewarm.enabled</name> + <value>false</value> + <description>Enables container prewarm for Tez</description> + </property> + + <property> + <name>hive.prewarm.numcontainers</name> + <value>3</value> + <description>Controls the number of containers to prewarm for Tez</description> + </property> + + <property> + <name>hive.tez.auto.reducer.parallelism</name> + <value>true</value> + <description>Turn on Tez' auto reducer parallelism feature</description> + </property> + + <property> + <name>hive.tez.min.reducer.per.query</name> + <value>2</value> + <description>Controls the minimum number of reducers for Tez</description> + </property> + + <property> + <name>hive.tez.max.reducer.per.query</name> + <value>999</value> + <description>Controls the maximum number of reducers for Tez</description> + </property> + + <!-- YARN Configuration for Tez --> + <property> + <name>hive.tez.exec.print.summary</name> + <value>true</value> + <description>Whether to print a summary of the plan execution</description> + </property> + + <property> + <name>tez.queue.name</name> + <value>default</value> + <description>YARN queue name for Tez jobs</description> + </property> </configuration> diff --git a/dev-support/ranger-docker/scripts/hive-site-oracle.xml b/dev-support/ranger-docker/scripts/hive-site-oracle.xml index 2b8cc5e08..53cde1279 100644 --- a/dev-support/ranger-docker/scripts/hive-site-oracle.xml +++ b/dev-support/ranger-docker/scripts/hive-site-oracle.xml @@ -49,4 +49,72 @@ <name>hive.zookeeper.client.port</name> <value>2181</value> </property> + + <!-- Tez Configuration --> + <property> + <name>hive.execution.engine</name> + <value>tez</value> + <description>Execution engine to use for Hive queries</description> + </property> + + <property> + <name>hive.tez.container.size</name> + <value>1024</value> + <description>By default Tez will spawn containers of the size of a mapper</description> + </property> + + <property> + <name>hive.tez.java.opts</name> + <value>-Xmx768m</value> + <description>Java command line options for Tez</description> + </property> + + <property> + <name>hive.tez.log.level</name> + <value>INFO</value> + <description>Log level for Tez</description> + </property> + + <property> + <name>hive.prewarm.enabled</name> + <value>false</value> + <description>Enables container prewarm for Tez</description> + </property> + + <property> + <name>hive.prewarm.numcontainers</name> + <value>3</value> + <description>Controls the number of containers to prewarm for Tez</description> + </property> + + <property> + <name>hive.tez.auto.reducer.parallelism</name> + <value>true</value> + <description>Turn on Tez' auto reducer parallelism feature</description> + </property> + + <property> + <name>hive.tez.min.reducer.per.query</name> + <value>2</value> + <description>Controls the minimum number of reducers for Tez</description> + </property> + + <property> + <name>hive.tez.max.reducer.per.query</name> + <value>999</value> + <description>Controls the maximum number of reducers for Tez</description> + </property> + + <!-- YARN Configuration for Tez --> + <property> + <name>hive.tez.exec.print.summary</name> + <value>true</value> + <description>Whether to print a summary of the plan execution</description> + </property> + + <property> + <name>tez.queue.name</name> + <value>default</value> + <description>YARN configuration for Tez jobs</description> + </property> </configuration> diff --git a/dev-support/ranger-docker/scripts/hive-site-postgres.xml b/dev-support/ranger-docker/scripts/hive-site-postgres.xml index 55343a323..f139af5d4 100644 --- a/dev-support/ranger-docker/scripts/hive-site-postgres.xml +++ b/dev-support/ranger-docker/scripts/hive-site-postgres.xml @@ -49,4 +49,72 @@ <name>hive.zookeeper.client.port</name> <value>2181</value> </property> + + <!-- Tez Configuration --> + <property> + <name>hive.execution.engine</name> + <value>tez</value> + <description>Execution engine to use for Hive queries</description> + </property> + + <property> + <name>hive.tez.container.size</name> + <value>1024</value> + <description>By default Tez will spawn containers of the size of a mapper</description> + </property> + + <property> + <name>hive.tez.java.opts</name> + <value>-Xmx768m</value> + <description>Java command line options for Tez</description> + </property> + + <property> + <name>hive.tez.log.level</name> + <value>INFO</value> + <description>Log level for Tez</description> + </property> + + <property> + <name>hive.prewarm.enabled</name> + <value>false</value> + <description>Enables container prewarm for Tez</description> + </property> + + <property> + <name>hive.prewarm.numcontainers</name> + <value>3</value> + <description>Controls the number of containers to prewarm for Tez</description> + </property> + + <property> + <name>hive.tez.auto.reducer.parallelism</name> + <value>true</value> + <description>Turn on Tez' auto reducer parallelism feature</description> + </property> + + <property> + <name>hive.tez.min.reducer.per.query</name> + <value>2</value> + <description>Controls the minimum number of reducers for Tez</description> + </property> + + <property> + <name>hive.tez.max.reducer.per.query</name> + <value>999</value> + <description>Controls the maximum number of reducers for Tez</description> + </property> + + <!-- YARN Configuration for Tez --> + <property> + <name>hive.tez.exec.print.summary</name> + <value>true</value> + <description>Whether to print a summary of the plan execution</description> + </property> + + <property> + <name>tez.queue.name</name> + <value>default</value> + <description>YARN queue name for Tez jobs</description> + </property> </configuration> diff --git a/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml b/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml index 5bed21c2d..2298f4b64 100644 --- a/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml +++ b/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml @@ -47,4 +47,72 @@ <name>hive.zookeeper.client.port</name> <value>2181</value> </property> + + <!-- Tez Configuration --> + <property> + <name>hive.execution.engine</name> + <value>tez</value> + <description>Execution engine to use for Hive queries</description> + </property> + + <property> + <name>hive.tez.container.size</name> + <value>1024</value> + <description>By default Tez will spawn containers of the size of a mapper</description> + </property> + + <property> + <name>hive.tez.java.opts</name> + <value>-Xmx768m</value> + <description>Java command line options for Tez</description> + </property> + + <property> + <name>hive.tez.log.level</name> + <value>INFO</value> + <description>Log level for Tez</description> + </property> + + <property> + <name>hive.prewarm.enabled</name> + <value>false</value> + <description>Enables container prewarm for Tez</description> + </property> + + <property> + <name>hive.prewarm.numcontainers</name> + <value>3</value> + <description>Controls the number of containers to prewarm for Tez</description> + </property> + + <property> + <name>hive.tez.auto.reducer.parallelism</name> + <value>true</value> + <description>Turn on Tez' auto reducer parallelism feature</description> + </property> + + <property> + <name>hive.tez.min.reducer.per.query</name> + <value>2</value> + <description>Controls the minimum number of reducers for Tez</description> + </property> + + <property> + <name>hive.tez.max.reducer.per.query</name> + <value>999</value> + <description>Controls the maximum number of reducers for Tez</description> + </property> + + <!-- YARN Configuration for Tez --> + <property> + <name>hive.tez.exec.print.summary</name> + <value>true</value> + <description>Whether to print a summary of the plan execution</description> + </property> + + <property> + <name>tez.queue.name</name> + <value>default</value> + <description>YARN queue name for Tez jobs</description> + </property> </configuration> diff --git a/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh b/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh index 10f04acd9..d87548b80 100755 --- a/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh +++ b/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh @@ -52,16 +52,81 @@ cat <<EOF > ${HADOOP_HOME}/etc/hadoop/yarn-site.xml <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> + <property> + <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> + <value>org.apache.hadoop.mapred.ShuffleHandler</value> + </property> <property> <name>yarn.nodemanager.env-whitelist</name> <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> </property> + <property> + <name>yarn.resourcemanager.hostname</name> + <value>ranger-hadoop</value> + </property> + <property> + <name>yarn.nodemanager.resource.memory-mb</name> + <value>4096</value> + </property> + <property> + <name>yarn.scheduler.maximum-allocation-mb</name> + <value>4096</value> + </property> + <property> + <name>yarn.scheduler.minimum-allocation-mb</name> + <value>256</value> + </property> + <property> + <name>yarn.nodemanager.vmem-check-enabled</name> + <value>false</value> + </property> + <property> + <name>yarn.log-aggregation-enable</name> + <value>true</value> + </property> + <property> + <name>yarn.timeline-service.enabled</name> + <value>true</value> + </property> + <property> + <name>yarn.timeline-service.hostname</name> + <value>ranger-hadoop</value> + </property> + <property> + <name>yarn.timeline-service.http-cross-origin.enabled</name> + <value>true</value> + </property> + <property> + <name>yarn.resourcemanager.system-metrics-publisher.enabled</name> + <value>true</value> + </property> </configuration> EOF mkdir -p /opt/hadoop/logs chown -R hdfs:hadoop /opt/hadoop/ chmod g+w /opt/hadoop/logs +# user logs directory permissions for NodeManager health +mkdir -p ${HADOOP_HOME}/logs/userlogs +chown -R yarn:hadoop ${HADOOP_HOME}/logs/userlogs +chmod -R 777 ${HADOOP_HOME}/logs/userlogs + +# Install Tez JARs for YARN NodeManager +echo "Installing Tez JARs for YARN NodeManager..." +if [ -d "/opt/tez" ]; then + echo "Copying Tez JARs to YARN lib directory..." + cp /opt/tez/lib/*.jar /opt/hadoop/share/hadoop/yarn/lib/ 2>/dev/null || echo 'Some Tez lib JARs may already exist' + cp /opt/tez/*.jar /opt/hadoop/share/hadoop/yarn/lib/ 2>/dev/null || echo 'Some Tez JARs may already exist' + + # Set up Tez environment + export TEZ_HOME=/opt/tez + export TEZ_CONF_DIR=${TEZ_HOME}/conf + mkdir -p ${TEZ_CONF_DIR} + + echo "Tez JARs installed successfully for YARN NodeManager" +else + echo "WARNING: Tez directory not found at /opt/tez" +fi cd ${RANGER_HOME}/ranger-hdfs-plugin ./enable-hdfs-plugin.sh diff --git a/dev-support/ranger-docker/scripts/ranger-hive-setup.sh b/dev-support/ranger-docker/scripts/ranger-hive-setup.sh index c0e7ee406..cb4d7e8f9 100755 --- a/dev-support/ranger-docker/scripts/ranger-hive-setup.sh +++ b/dev-support/ranger-docker/scripts/ranger-hive-setup.sh @@ -35,6 +35,134 @@ EOF cp ${RANGER_SCRIPTS}/hive-site.xml ${HIVE_HOME}/conf/hive-site.xml cp ${RANGER_SCRIPTS}/hive-site.xml ${HIVE_HOME}/conf/hiveserver2-site.xml + +# Configure Tez +mkdir -p ${TEZ_HOME}/conf + +# Create Tez configuration directory for Hadoop +mkdir -p ${HADOOP_HOME}/etc/hadoop + +# Create mapred-site.xml for YARN integration +cat <<EOF > ${HADOOP_HOME}/etc/hadoop/mapred-site.xml +<configuration> + <property> + <name>mapreduce.framework.name</name> + <value>yarn</value> + </property> + <property> + <name>mapreduce.application.classpath</name> + <value>\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value> + </property> + <property> + <name>yarn.app.mapreduce.am.env</name> + <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> + </property> + <property> + <name>mapreduce.map.env</name> + <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> + </property> + <property> + <name>mapreduce.reduce.env</name> + <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> + </property> +</configuration> +EOF + +# Create yarn-site.xml for YARN ResourceManager connection +cat <<EOF > ${HADOOP_HOME}/etc/hadoop/yarn-site.xml +<configuration> + <property> + <name>yarn.resourcemanager.hostname</name> + <value>ranger-hadoop</value> + </property> + <property> + <name>yarn.resourcemanager.address</name> + <value>ranger-hadoop:8032</value> + </property> +</configuration> +EOF + +# Fix tez-site.xml to use absolute HDFS path (critical for Tez to find libraries) +cat <<EOF > ${TEZ_HOME}/conf/tez-site.xml +<?xml version="1.0" encoding="UTF-8"?> +<configuration> + <property> + <name>tez.lib.uris</name> + <value>hdfs://ranger-hadoop:9000/apps/tez/apache-tez-${TEZ_VERSION}-bin.tar.gz</value> + <description>Comma-delimited list of the location of the Tez libraries which will be localized for DAGs.</description> + </property> + <property> + <name>tez.use.cluster.hadoop-libs</name> + <value>true</value> + <description>Use Hadoop libraries provided by cluster instead of those packaged with Tez</description> + </property> + <property> + <name>tez.am.resource.memory.mb</name> + <value>1024</value> + <description>The amount of memory to be used by the AppMaster</description> + </property> + <property> + <name>tez.am.java.opts</name> + <value>-Xmx768m</value> + <description>Java opts for the Tez AppMaster process</description> + </property> + <property> + <name>tez.task.resource.memory.mb</name> + <value>1024</value> + <description>The amount of memory to be used by tasks</description> + </property> + <property> + <name>tez.task.launch.cmd-opts</name> + <value>-Xmx768m</value> + <description>Java opts for tasks</description> + </property> + <property> + <name>tez.staging-dir</name> + <value>/tmp/hive</value> + <description>The staging directory for Tez applications in HDFS.</description> + </property> +</configuration> +EOF + +# Copy Tez JARs to Hive lib directory +cp ${TEZ_HOME}/lib/tez-*.jar ${HIVE_HOME}/lib/ +cp ${TEZ_HOME}/tez-*.jar ${HIVE_HOME}/lib/ + +# Copy all Hadoop configurations to Hive conf directory so Hive can find them +cp ${HADOOP_HOME}/etc/hadoop/core-site.xml ${HIVE_HOME}/conf/ +cp ${HADOOP_HOME}/etc/hadoop/mapred-site.xml ${HIVE_HOME}/conf/ +cp ${HADOOP_HOME}/etc/hadoop/yarn-site.xml ${HIVE_HOME}/conf/ +cp ${TEZ_HOME}/conf/tez-site.xml ${HIVE_HOME}/conf/ + +# Upload Tez libraries to HDFS +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /apps/tez" hdfs + +# Recreate Tez tarball if it doesn't exist (it gets removed during Docker build) +if [ ! -f "/opt/apache-tez-${TEZ_VERSION}-bin.tar.gz" ]; then + echo "Recreating Tez tarball for HDFS upload..." + cd /opt + tar czf apache-tez-${TEZ_VERSION}-bin.tar.gz apache-tez-${TEZ_VERSION}-bin/ +fi + +su -c "${HADOOP_HOME}/bin/hdfs dfs -put /opt/apache-tez-${TEZ_VERSION}-bin.tar.gz /apps/tez/" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 755 /apps/tez" hdfs + +# Create HDFS user directory for hive +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /user/hive" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 777 /user/hive" hdfs + +# Create HDFS /tmp/hive directory for Tez staging +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /tmp/hive" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 777 /tmp/hive" hdfs + +# Fix /tmp directory permissions for Ranger (critical for INSERT operations) +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod 777 /tmp" hdfs + +# Create /user/root directory for YARN job execution +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /user/root" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod 777 /user/root" hdfs + +# Initialize Hive schema su -c "${HIVE_HOME}/bin/schematool -dbType ${RANGER_DB_TYPE} -initSchema" hive mkdir -p /opt/hive/logs diff --git a/dev-support/ranger-docker/scripts/tez-site.xml b/dev-support/ranger-docker/scripts/tez-site.xml new file mode 100644 index 000000000..80faf26e8 --- /dev/null +++ b/dev-support/ranger-docker/scripts/tez-site.xml @@ -0,0 +1,93 @@ +<?xml version="1.0" encoding="UTF-8"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!-- +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> +<configuration> + <property> + <name>tez.lib.uris</name> + <value>${fs.defaultFS}/apps/tez/apache-tez-${TEZ_VERSION}-bin.tar.gz</value> + <description>Comma-delimited list of the location of the Tez libraries which will be localized for DAGs.</description> + </property> + + <property> + <name>tez.use.cluster.hadoop-libs</name> + <value>true</value> + <description>Use Hadoop libraries provided by cluster instead of those packaged with Tez</description> + </property> + + <property> + <name>tez.am.resource.memory.mb</name> + <value>1024</value> + <description>The amount of memory to be used by the AppMaster</description> + </property> + + <property> + <name>tez.am.java.opts</name> + <value>-Xmx768m</value> + <description>Java opts for the Tez AppMaster process</description> + </property> + + <property> + <name>tez.task.resource.memory.mb</name> + <value>1024</value> + <description>The amount of memory to be used by tasks</description> + </property> + + <property> + <name>tez.task.launch.cmd-opts</name> + <value>-Xmx768m</value> + <description>Java opts for tasks</description> + </property> + + <property> + <name>tez.runtime.io.sort.mb</name> + <value>256</value> + <description>The size of the sort buffer when output needs to be sorted</description> + </property> + + <property> + <name>tez.runtime.unordered.output.buffer.size-mb</name> + <value>100</value> + <description>The size of the buffer when output does not require to be sorted</description> + </property> + + <property> + <name>tez.session.am.dag.submit.timeout.secs</name> + <value>300</value> + <description>Time to wait (in seconds) for AM to submit a DAG before timing out</description> + </property> + + <property> + <name>tez.am.container.reuse.enabled</name> + <value>true</value> + <description>Whether to reuse containers for tasks belonging to the same session</description> + </property> + + <property> + <name>tez.am.container.reuse.rack-fallback.enabled</name> + <value>true</value> + <description>Whether to fallback to rack local containers when node local containers are not available</description> + </property> + + <property> + <name>tez.am.container.reuse.non-local-fallback.enabled</name> + <value>false</value> + <description>Whether to fallback to non-local containers when rack local containers are not available</description> + </property> +</configuration>
