diff --git a/.gitignore b/.gitignore
index 85d660672c..e0df115492 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@
.settings
pom.xml.versionsBackup
target
+tez-dist/src/docker/cache/
diff --git a/tez-dag/src/main/resources/tez-container-log4j.properties b/tez-dag/src/main/resources/tez-container-log4j.properties
index 7cebec3289..4525d7018f 100644
--- a/tez-dag/src/main/resources/tez-container-log4j.properties
+++ b/tez-dag/src/main/resources/tez-container-log4j.properties
@@ -21,9 +21,16 @@ log4j.rootLogger=${tez.root.logger}
log4j.threshold=ALL
#
-# ContainerLog Appender
+# Console Appender
#
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.out
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{ISO8601} [%p] [%t] |%c{2}|: %m%n
+#
+# ContainerLog Appender
+#
log4j.appender.CLA=org.apache.tez.common.TezContainerLogAppender
log4j.appender.CLA.containerLogDir=${yarn.app.container.log.dir}
diff --git a/tez-dist/pom.xml b/tez-dist/pom.xml
index 9777d0c0b9..5940a996ac 100644
--- a/tez-dist/pom.xml
+++ b/tez-dist/pom.xml
@@ -118,6 +118,34 @@
+
+ docker
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+
+
+ build-docker-image
+ package
+
+ exec
+
+
+ /bin/bash
+
+ ${project.basedir}/src/docker/tez-am/build-am-docker.sh
+ -tez ${project.version}
+ -repo apache
+
+
+
+
+
+
+
+
diff --git a/tez-dist/src/docker/tez-am/Dockerfile.am b/tez-dist/src/docker/tez-am/Dockerfile.am
new file mode 100644
index 0000000000..01647f336c
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/Dockerfile.am
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARG BUILD_ENV=unarchive
+
+# hadolint ignore=DL3006
+FROM ubuntu AS unarchive
+# hadolint ignore=DL3010
+ONBUILD COPY tez-*.tar.gz /opt
+
+# hadolint ignore=DL3006
+FROM ${BUILD_ENV} AS env
+ARG TEZ_VERSION
+
+RUN mkdir -p /opt/tez \
+ && tar -xzv \
+ -f /opt/tez-$TEZ_VERSION.tar.gz \
+ -C /opt/tez \
+ && rm -rf /opt/tez-$TEZ_VERSION.tar.gz
+
+FROM eclipse-temurin:21-jdk-ubi9-minimal AS run
+
+ARG UID=1000
+ARG TEZ_VERSION
+
+# Install dependencies
+# hadolint ignore=DL3041
+RUN set -ex; \
+ microdnf update -y; \
+ microdnf -y install procps gettext findutils hostname; \
+ microdnf clean all; \
+ useradd --no-create-home -s /sbin/nologin -c "" --uid $UID tez
+
+# Set necessary environment variables
+ENV TEZ_HOME=/opt/tez \
+ TEZ_CONF_DIR=/opt/tez/conf
+
+ENV TEZ_CLIENT_VERSION=$TEZ_VERSION
+
+ENV PATH=$TEZ_HOME/bin:$PATH
+
+COPY --from=env --chown=tez /opt/tez $TEZ_HOME
+
+RUN mkdir -p $TEZ_CONF_DIR && chown tez:tez $TEZ_CONF_DIR
+
+COPY --chown=tez am-entrypoint.sh /
+COPY --chown=tez conf $TEZ_CONF_DIR
+
+# Create Extension Point Directory
+RUN mkdir -p /opt/tez/plugins && chown tez:tez /opt/tez/plugins && chmod 755 /opt/tez/plugins
+
+RUN chmod +x /am-entrypoint.sh
+
+USER tez
+WORKDIR $TEZ_HOME
+
+ENTRYPOINT ["/am-entrypoint.sh"]
diff --git a/tez-dist/src/docker/tez-am/README.md b/tez-dist/src/docker/tez-am/README.md
new file mode 100644
index 0000000000..987f381853
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/README.md
@@ -0,0 +1,136 @@
+
+
+# Tez AM Docker
+
+1. Building the docker image:
+
+ ```bash
+ mvn clean install -DskipTests -Pdocker
+ ```
+
+2. Install zookeeper in mac:
+
+ a. Via brew: set the `tez.am.zookeeper.quorum` value as
+ `host.docker.internal:2181` in `tez-site.xml`
+
+ ```bash
+ brew install zookeeper
+ zkServer start
+ ```
+
+ b. Use Zookeeper docker image (Refer to docker compose yml):
+
+ ```bash
+ docker pull zookeeper:3.8.4
+
+ docker run -d \
+ --name zookeeper-server \
+ -p 2181:2181 \
+ -p 8080:8080 \
+ -e ZOO_MY_ID=1 \
+ zookeeper:3.8.4
+ ```
+
+3. Running the Tez AM container explicitly:
+
+ ```bash
+ export TEZ_VERSION=1.0.0-SNAPSHOT
+
+ docker run --rm \
+ -p 10001:10001 \
+ --env-file tez-dist/src/docker/tez-am/am.env \
+ --name tez-am \
+ --hostname localhost \
+ apache/tez-am:$TEZ_VERSION
+ ```
+
+ * `TEZ_VERSION` corresponds to the Maven `${project.version}`.
+ Set this environment variable in your shell before running the commands.
+ * Expose ports using the `-p` flag based on the
+ `tez.am.client.am.port-range` property in `tez-site.xml`.
+ * The `--hostname` flag configures the container's hostname, allowing
+ services on the host (e.g., macOS) to connect to it.
+ * Ensure the `--env-file` flag is included, or at a minimum, pass
+ `-e TEZ_FRAMEWORK_MODE=STANDALONE_ZOOKEEPER` to the `docker run` command.
+
+4. Debugging the Tez AM container:
+Uncomment the `JAVA_TOOL_OPTIONS` in `am.env` and expose 5005 port
+using `-p` flag
+
+ ```bash
+ docker run --rm \
+ -p 10001:10001 -p 5005:5005 \
+ --env-file tez-dist/src/docker/tez-am/am.env \
+ --name tez-am \
+ --hostname localhost \
+ apache/tez-am:$TEZ_VERSION
+ ```
+
+5. To override the tez-site.xml in docker image use:
+ * Set the `TEZ_CUSTOM_CONF_DIR` environment variable in `am.env`
+ or via the `docker run` command (e.g., `/opt/tez/custom-conf`).
+
+ ```bash
+ export TEZ_SITE_PATH=$(pwd)/tez-dist/src/docker/conf/tez-site.xml
+
+ docker run --rm \
+ -p 10001:10001 \
+ --env-file tez-dist/src/docker/tez-am/am.env \
+ -v "$TEZ_SITE_PATH:/opt/tez/custom-conf/tez-site.xml" \
+ --name tez-am \
+ --hostname localhost \
+ apache/tez-am:$TEZ_VERSION
+ ```
+
+6. To add plugin jars in docker image use:
+ * The plugin directory path inside the Docker container is fixed at `/opt/tez/plugins`.
+
+ ```bash
+ docker run --rm \
+ -p 10001:10001 \
+ --env-file tez-dist/src/docker/tez-am/am.env \
+ -v "/path/to/your/local/plugins:/opt/tez/plugins" \
+ --name tez-am \
+ --hostname localhost \
+ apache/tez-am:$TEZ_VERSION
+ ```
+
+7. Using Docker Compose:
+ * Refer to the `docker-compose.yml` file in this directory for
+ an example of how to run both the Tez AM and Zookeeper containers
+ together using Docker Compose.
+
+ ```bash
+ docker-compose -f tez-dist/src/docker/tez-am/docker-compose.yml up -d --build
+ ```
+
+ * This command will start both the Tez AM, Zookeeper, Minimal
+ Hadoop containers as defined in the `docker-compose.yml` file.
+
+8. To mount custom plugins or JARs required by Tez AM (e.g., for split generation
+ — typically the hive-exec jar, but in general, any UDFs or dependencies
+ previously managed via YARN localization:
+ * Create a directory tez-plugins and add all required jars.
+ * Uncomment the following lines in docker compose under the tez-am service
+ to mount this directory as a volume to `/opt/tez/plugins` in the docker container.
+
+ ```yml
+ volumes:
+ - ./tez-plugins:/opt/tez/plugins
+ ```
diff --git a/tez-dist/src/docker/tez-am/am-entrypoint.sh b/tez-dist/src/docker/tez-am/am-entrypoint.sh
new file mode 100644
index 0000000000..a6128419ce
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/am-entrypoint.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -xeou pipefail
+
+################################################
+# 1. Mocking DAGAppMaster#main() env variables #
+################################################
+
+: "${USER:="tez"}"
+: "${LOCAL_DIRS:="/tmp"}"
+: "${LOG_DIRS:="/opt/tez/logs"}"
+: "${APP_SUBMIT_TIME_ENV:=$(($(date +%s) * 1000))}"
+: "${TEZ_AM_EXTERNAL_ID:="tez-session-$(hostname)"}"
+
+export USER LOCAL_DIRS LOG_DIRS APP_SUBMIT_TIME_ENV TEZ_AM_EXTERNAL_ID
+
+mkdir -p "$LOG_DIRS"
+
+###########################
+# Custom Config directory #
+###########################
+if [[ -n "${TEZ_CUSTOM_CONF_DIR:-}" ]] && [[ -d "$TEZ_CUSTOM_CONF_DIR" ]]; then
+ echo "--> Using custom configuration directory: $TEZ_CUSTOM_CONF_DIR"
+ find "${TEZ_CUSTOM_CONF_DIR}" -type f -exec \
+ ln -sf {} "${TEZ_CONF_DIR}"/ \;
+
+ # Remove template keyword if it exists
+ if [[ -f "$TEZ_CONF_DIR/tez-site.xml.template" ]]; then
+ envsubst < "$TEZ_CONF_DIR/tez-site.xml.template" > "$TEZ_CONF_DIR/tez-site.xml"
+ fi
+fi
+
+#############
+# CLASSPATH #
+#############
+
+# Order is: conf -> plugins -> tez jars
+CLASSPATH="${TEZ_CONF_DIR}"
+
+# Custom Plugins
+# This allows mounting a volume at /opt/tez/plugins containing aux jars
+PLUGIN_DIR="/opt/tez/plugins"
+if [[ -d "$PLUGIN_DIR" ]]; then
+ count=$(find "$PLUGIN_DIR" -maxdepth 1 -name "*.jar" 2>/dev/null | wc -l)
+ if [ "$count" != "0" ]; then
+ echo "--> Found $count plugin jars. Prepending to classpath."
+ CLASSPATH="${CLASSPATH}:${PLUGIN_DIR}/*"
+ fi
+fi
+
+# Tez Jars
+CLASSPATH="${CLASSPATH}:${TEZ_HOME}/*:${TEZ_HOME}/lib/*"
+
+#############
+# Execution #
+#############
+TEZ_DAG_JAR=$(find "$TEZ_HOME" -maxdepth 1 -name "tez-dag-*.jar" ! -name "*-tests.jar" | head -n 1)
+
+if [ -z "$TEZ_DAG_JAR" ]; then
+ echo "Error: Could not find tez-dag-*.jar in $TEZ_HOME"
+ exit 1
+fi
+
+echo "--> Starting DAGAppMaster..."
+
+: "${TEZ_AM_HEAP_OPTS:="-Xmx2048m"}"
+# : "${TEZ_AM_GC_OPTS:="-Xlog:gc*=info,class+load=info::time,uptime,level,tags -XX:+UseNUMA"}"
+
+JAVA_ADD_OPENS=(
+ "--add-opens=java.base/java.lang=ALL-UNNAMED"
+ "--add-opens=java.base/java.util=ALL-UNNAMED"
+ "--add-opens=java.base/java.io=ALL-UNNAMED"
+ "--add-opens=java.base/java.net=ALL-UNNAMED"
+ "--add-opens=java.base/java.nio=ALL-UNNAMED"
+ "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED"
+ "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED"
+ "--add-opens=java.base/java.util.regex=ALL-UNNAMED"
+ "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED"
+ "--add-opens=java.sql/java.sql=ALL-UNNAMED"
+ "--add-opens=java.base/java.text=ALL-UNNAMED"
+ "-Dnet.bytebuddy.experimental=true"
+)
+
+read -r -a JAVA_OPTS_ARR <<< "${JAVA_OPTS:-}"
+read -r -a HEAP_OPTS_ARR <<< "${TEZ_AM_HEAP_OPTS}"
+# read -r -a JAVA_GC_OPTS_ARR <<< "${TEZ_AM_GC_OPTS}"
+
+# Add "${JAVA_GC_OPTS_ARR[@]}" in following command to get gc information.
+exec java "${HEAP_OPTS_ARR[@]}" "${JAVA_OPTS_ARR[@]}" "${JAVA_ADD_OPENS[@]}" \
+ -Djava.net.preferIPv4Stack=true \
+ -Djava.io.tmpdir="$PWD/tmp" \
+ -Dtez.root.logger=INFO,CLA,console \
+ -Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator \
+ -Dlog4j.configuration=tez-container-log4j.properties \
+ -Dyarn.app.container.log.dir="$LOG_DIRS" \
+ -Dtez.conf.dir="$TEZ_CONF_DIR" \
+ -cp "$CLASSPATH" \
+ org.apache.tez.dag.app.DAGAppMaster --session \
+ "$@"
diff --git a/tez-dist/src/docker/tez-am/am.env b/tez-dist/src/docker/tez-am/am.env
new file mode 100644
index 0000000000..93cabeea32
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/am.env
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Tez AM Container Environment Configuration
+
+USER=tez
+LOG_DIRS=/opt/tez/logs
+TEZ_FRAMEWORK_MODE=STANDALONE_ZOOKEEPER
+TEZ_CUSTOM_CONF_DIR=/opt/tez/custom-conf
+# TEZ_AM_HEAP_OPTS configures the maximum heap size (Xmx) for the Tez AM.
+TEZ_AM_HEAP_OPTS=-Xmx2048m
+# Enable remote debugging on port 5005
+# JAVA_TOOL_OPTIONS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005'
diff --git a/tez-dist/src/docker/tez-am/build-am-docker.sh b/tez-dist/src/docker/tez-am/build-am-docker.sh
new file mode 100755
index 0000000000..66bf7fc738
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/build-am-docker.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -xeou pipefail
+
+TEZ_VERSION=
+REPO=
+
+usage() {
+ cat <&2
+Usage: $0 [-h] [-tez ] [-repo ]
+Build the Apache Tez AM Docker image
+-help Display help
+-tez Build image with the specified Tez version
+-repo Docker repository
+EOF
+}
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ -h)
+ usage
+ exit 0
+ ;;
+ -tez)
+ shift
+ TEZ_VERSION=$1
+ shift
+ ;;
+ -repo)
+ shift
+ REPO=$1
+ shift
+ ;;
+ *)
+ shift
+ ;;
+ esac
+done
+
+SCRIPT_DIR=$(
+ cd "$(dirname "$0")"
+ pwd
+)
+
+DIST_DIR=${DIST_DIR:-"$SCRIPT_DIR/../../.."}
+PROJECT_ROOT=${PROJECT_ROOT:-"$SCRIPT_DIR/../../../.."}
+
+REPO=${REPO:-apache}
+WORK_DIR="$(mktemp -d)"
+
+# Defaults Tez versions from pom.xml if not provided
+TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout)}
+
+#####################################
+# Pick tez tarball from local build #
+#####################################
+TEZ_FILE_NAME="tez-$TEZ_VERSION.tar.gz"
+LOCAL_DIST_PATH="$DIST_DIR/target/$TEZ_FILE_NAME"
+
+if [ -f "$LOCAL_DIST_PATH" ]; then
+ echo "--> Found local Tez build artifact at: $LOCAL_DIST_PATH"
+ cp "$LOCAL_DIST_PATH" "$WORK_DIR/"
+else
+ echo "--> Error: Local Tez artifact not found at $LOCAL_DIST_PATH"
+ echo "--> Please build the project first (e.g., mvn clean install -DskipTests)."
+ exit 1
+fi
+
+# -------------------------------------------------------------------------
+# BUILD CONTEXT PREPARATION
+# -------------------------------------------------------------------------
+cp -R "$SCRIPT_DIR/conf" "$WORK_DIR/" 2>/dev/null || mkdir -p "$WORK_DIR/conf"
+cp "$SCRIPT_DIR/am-entrypoint.sh" "$WORK_DIR/"
+cp "$SCRIPT_DIR/Dockerfile.am" "$WORK_DIR/"
+
+echo "Building Docker image..."
+docker build \
+ "$WORK_DIR" \
+ -f "$WORK_DIR/Dockerfile.am" \
+ -t "$REPO/tez-am:$TEZ_VERSION" \
+ --build-arg "BUILD_ENV=unarchive" \
+ --build-arg "TEZ_VERSION=$TEZ_VERSION"
+
+rm -r "${WORK_DIR}"
+echo "Docker image $REPO/tez-am:$TEZ_VERSION built successfully."
diff --git a/tez-dist/src/docker/tez-am/conf/core-site.xml b/tez-dist/src/docker/tez-am/conf/core-site.xml
new file mode 100644
index 0000000000..e7b1f2c97c
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/conf/core-site.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+ fs.defaultFS
+ hdfs://namenode:9000
+
+
+
+ hadoop.tmp.dir
+ /data/tmp
+
+
+
diff --git a/tez-dist/src/docker/tez-am/conf/hdfs-site.xml b/tez-dist/src/docker/tez-am/conf/hdfs-site.xml
new file mode 100644
index 0000000000..e2bd8733fb
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/conf/hdfs-site.xml
@@ -0,0 +1,75 @@
+
+
+
+
+
+ dfs.replication
+ 1
+
+
+
+ dfs.block.size
+ 67108864
+
+
+
+ dfs.namenode.name.dir
+ file:///data/namenode
+
+
+
+ dfs.datanode.data.dir
+ file:///data/datanode
+
+
+
+ dfs.namenode.rpc-bind-host
+ 0.0.0.0
+
+
+
+ dfs.datanode.address
+ 0.0.0.0:9866
+
+
+
+ dfs.datanode.http.address
+ 0.0.0.0:9864
+
+
+
+ dfs.client.use.datanode.hostname
+ true
+
+
+
+ dfs.datanode.use.datanode.hostname
+ true
+
+
+
+ dfs.permissions.enabled
+ false
+
+
+
+ dfs.datanode.hostname
+ datanode
+
+
+
diff --git a/tez-dist/src/docker/tez-am/conf/tez-site.xml b/tez-dist/src/docker/tez-am/conf/tez-site.xml
new file mode 100644
index 0000000000..81add40eb1
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/conf/tez-site.xml
@@ -0,0 +1,58 @@
+
+
+
+
+
+ tez.am.client.am.port-range
+ 10001-10003
+
+
+
+ tez.am.tez-ui.webservice.enable
+ false
+
+
+
+
+ tez.am.zookeeper.quorum
+ zookeeper:2181
+
+
+
+ tez.am.log.level
+ INFO
+
+
+
+ tez.local.mode
+ true
+
+
+
+
+ tez.session.am.dag.submit.timeout.secs
+ -1
+
+
+
+
+ dfs.client.use.datanode.hostname
+ true
+
+
+
diff --git a/tez-dist/src/docker/tez-am/docker-compose.yml b/tez-dist/src/docker/tez-am/docker-compose.yml
new file mode 100644
index 0000000000..3740fabe8a
--- /dev/null
+++ b/tez-dist/src/docker/tez-am/docker-compose.yml
@@ -0,0 +1,144 @@
+---
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: tez-cluster
+
+services:
+ zookeeper:
+ image: zookeeper:3.8.4
+ container_name: zookeeper
+ hostname: zookeeper
+ networks:
+ - hadoop-network
+ ports:
+ - "2181:2181"
+ environment:
+ ZOO_MY_ID: 1
+ volumes:
+ - zookeeper_data:/data
+ - zookeeper_datalog:/datalog
+ - zookeeper_logs:/logs
+
+ namenode:
+ image: apache/hadoop:3.4.2-lean
+ container_name: namenode
+ hostname: namenode
+ platform: linux/amd64
+ networks:
+ - hadoop-network
+ ports:
+ - "9870:9870" # NameNode Web UI
+ - "9000:9000" # IPC
+ volumes:
+ - hadoop_data:/data # Default persistence path
+ - hadoop_logs:/opt/hadoop/logs
+ - ./conf/core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml
+ - ./conf/hdfs-site.xml:/opt/hadoop/etc/hadoop/hdfs-site.xml
+ healthcheck:
+ test:
+ - "CMD-SHELL"
+ - >
+ su -s /bin/bash hadoop -c
+ '/opt/hadoop/bin/hdfs dfsadmin -safemode get |
+ grep -q "Safe mode is OFF"'
+ interval: 5s
+ timeout: 5s
+ retries: 5
+ user: root
+ command: >
+ /bin/bash -c "
+ chown -R hadoop:hadoop /data /opt/hadoop/logs;
+ su -s /bin/bash hadoop -c '
+ if [ ! -f /data/namenode/current/VERSION ]; then
+ echo \"Formatting NameNode...\";
+ /opt/hadoop/bin/hdfs namenode -format -force -nonInteractive;
+ fi;
+ /opt/hadoop/bin/hdfs namenode
+ '"
+
+ datanode:
+ image: apache/hadoop:3.4.2-lean
+ container_name: datanode
+ hostname: datanode
+ platform: linux/amd64
+ depends_on:
+ namenode:
+ condition: service_healthy
+ networks:
+ - hadoop-network
+ ports:
+ - "9864:9864" # DataNode Web UI
+ - "9866:9866" # DataNode
+ volumes:
+ - hadoop_data:/data # Default persistence path
+ - hadoop_logs:/opt/hadoop/logs
+ - ./conf/core-site.xml:/opt/hadoop/etc/hadoop/core-site.xml
+ - ./conf/hdfs-site.xml:/opt/hadoop/etc/hadoop/hdfs-site.xml
+ user: root
+ command: >
+ /bin/bash -c "
+ chown -R hadoop:hadoop /data /opt/hadoop/logs;
+ su -s /bin/bash hadoop -c '/opt/hadoop/bin/hdfs datanode'
+ "
+
+ tez-am:
+ image: apache/tez-am:${TEZ_VERSION:-1.0.0-SNAPSHOT}
+ container_name: tez-am
+ hostname: tez-am
+ networks:
+ - hadoop-network
+ ports:
+ - "10001:10001"
+ # - "5005:5005" # Uncomment for remote debugging
+ env_file:
+ - ./am.env
+ # Already define TEZ_CUSTOM_CONF_DIR in the env file,
+ # but adding here for clarity
+ # environment:
+ # - TEZ_CUSTOM_CONF_DIR=/opt/tez/custom-conf
+ # Uncomment the following lines if you want to mount a custom
+ # tez-site.xml for the Tez AM
+ # volumes:
+ # - ./custom-tez-site.xml:/opt/tez/custom-conf/tez-site.xml
+ # Uncomment the following lines to mount custom plugins or JARs
+ # required by Tez AM (e.g., UDFs, or dependencies previously managed
+ # via YARN localization)
+ # - ./tez-plugins:/opt/tez/plugins
+ depends_on:
+ zookeeper:
+ condition: service_started
+ namenode:
+ condition: service_healthy
+ datanode:
+ condition: service_started
+
+networks:
+ hadoop-network:
+ name: hadoop-network
+ driver: bridge
+
+volumes:
+ hadoop_data:
+ name: hadoop_data
+ hadoop_logs:
+ name: hadoop_logs
+ zookeeper_data:
+ name: zookeeper_data
+ zookeeper_datalog:
+ name: zookeeper_datalog
+ zookeeper_logs:
+ name: zookeeper_logs
diff --git a/tez-examples/src/main/java/org/apache/tez/examples/ExternalAmWordCount.java b/tez-examples/src/main/java/org/apache/tez/examples/ExternalAmWordCount.java
new file mode 100644
index 0000000000..2d77436ab4
--- /dev/null
+++ b/tez-examples/src/main/java/org/apache/tez/examples/ExternalAmWordCount.java
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tez.examples;
+
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.CuratorFrameworkFactory;
+import org.apache.curator.retry.ExponentialBackoffRetry;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.tez.client.TezClient;
+import org.apache.tez.dag.api.DAG;
+import org.apache.tez.dag.api.DataSinkDescriptor;
+import org.apache.tez.dag.api.DataSourceDescriptor;
+import org.apache.tez.dag.api.Edge;
+import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.client.DAGClient;
+import org.apache.tez.dag.api.client.DAGStatus;
+import org.apache.tez.dag.api.client.DAGStatus.State;
+import org.apache.tez.mapreduce.input.MRInput;
+import org.apache.tez.mapreduce.lib.MRReader;
+import org.apache.tez.mapreduce.output.MROutput;
+import org.apache.tez.mapreduce.processor.SimpleMRProcessor;
+import org.apache.tez.runtime.api.LogicalInput;
+import org.apache.tez.runtime.api.LogicalOutput;
+import org.apache.tez.runtime.api.ProcessorContext;
+import org.apache.tez.runtime.library.api.KeyValueWriter;
+import org.apache.tez.runtime.library.api.KeyValuesReader;
+import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig;
+import org.apache.tez.runtime.library.partitioner.HashPartitioner;
+import org.apache.zookeeper.KeeperException.NoNodeException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Sample Program inspired for WordCount but to run with External Tez AM with Zookeeper. */
+public class ExternalAmWordCount {
+ private static final Logger LOG = LoggerFactory.getLogger(ExternalAmWordCount.class);
+ private static final String ZK_ADDRESS = "zookeeper:2181";
+ private static final String ZK_PATH = "/tez-external-sessions/tez_am/server";
+
+ public static void main(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println(
+ "Usage: java -cp org.apache.tez.examples.ExternalAmWordCount ");
+ System.exit(2);
+ }
+
+ String inputPath = args[0];
+ String outputPath = args[1];
+
+ TezConfiguration tezConf = getTezConf();
+
+ LOG.info(
+ "Initializing TezClient to connect to External AM via ZooKeeper quorum at {}", ZK_ADDRESS);
+
+ final TezClient tezClient =
+ TezClient.newBuilder("ExternalAmWordCount", tezConf).setIsSession(true).build();
+
+ try {
+ LOG.info("Querying ZooKeeper quorum to discover an active Tez AM session");
+ String externalAppIdString = fetchAppIdFromZookeeper();
+
+ if (externalAppIdString == null || externalAppIdString.isEmpty()) {
+ throw new IllegalStateException(
+ "No active Tez Application Master found at path " + ZK_PATH);
+ }
+ LOG.info(
+ "ZooKeeper discovery successful. Extracted Tez Application ID: {}", externalAppIdString);
+
+ ApplicationId appId = ApplicationId.fromString(externalAppIdString);
+ tezClient.getClient(appId);
+ LOG.info("TezClient is ready for DAG submission.");
+ } catch (Exception e) {
+ LOG.error("Failure while connecting to the external Tez session.", e);
+ throw e;
+ }
+
+ ExternalAmWordCount app = new ExternalAmWordCount();
+ DAG dag = app.createDAG(tezConf, inputPath, outputPath);
+
+ LOG.info("Submitting DAG to the external Tez AM...");
+ DAGClient dagClient = tezClient.submitDAG(dag);
+
+ LOG.info("DAG successfully submitted.");
+ DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
+
+ if (dagStatus.getState() == State.SUCCEEDED) {
+ LOG.info("Job completed successfully! WordCount output written to: {}", outputPath);
+ System.exit(0);
+ } else {
+ LOG.error("Job execution failed. Final DAG state: {}.", dagStatus.getState());
+ System.exit(1);
+ }
+ }
+
+ private static TezConfiguration getTezConf() {
+ Configuration conf = new Configuration();
+ TezConfiguration tezConf = new TezConfiguration(conf);
+
+ tezConf.set(TezConfiguration.TEZ_FRAMEWORK_MODE, "STANDALONE_ZOOKEEPER");
+ tezConf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, ZK_ADDRESS);
+ tezConf.set(TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT, "30000ms");
+ tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, false);
+ tezConf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true);
+ tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, "/tmp/tez-staging");
+ return tezConf;
+ }
+
+ /** Discovers the external Tez AM ApplicationId by connecting to the ZooKeeper quorum. */
+ public static String fetchAppIdFromZookeeper() throws Exception {
+ // Retry up to 3 times, 1s, 2s, 4s (using local variable type inference)
+ var retryPolicy = new ExponentialBackoffRetry(1000, 3);
+
+ try (CuratorFramework client = CuratorFrameworkFactory.newClient(ZK_ADDRESS, retryPolicy)) {
+ client.start();
+
+ LOG.info(
+ "Attempting to establish CuratorFramework connection to ZooKeeper at {}...", ZK_ADDRESS);
+ // Wait up to 5 seconds to establish a connection to ZooKeeper
+ if (!client.blockUntilConnected(5, TimeUnit.SECONDS)) {
+ throw new IllegalStateException(
+ "CuratorFramework timeout: Could not connect to ZooKeeper at " + ZK_ADDRESS);
+ }
+ LOG.info("CuratorFramework connection established. Querying path: {}", ZK_PATH);
+
+ try {
+ List children = client.getChildren().forPath(ZK_PATH);
+ if (children != null && !children.isEmpty()) {
+ String appId = children.getFirst();
+ LOG.info("Successfully found AppID {} under ZNode {}", appId, ZK_PATH);
+ return appId;
+ } else {
+ LOG.warn("No application IDs found under ZNode {}", ZK_PATH);
+ }
+ } catch (NoNodeException e) {
+ LOG.warn("ZooKeeper parent path {} does not exist.", ZK_PATH);
+ }
+ }
+ return null;
+ }
+
+ /** Constructs the DAG. */
+ private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath) {
+ DataSourceDescriptor dataSource =
+ MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath)
+ .build();
+
+ DataSinkDescriptor dataSink =
+ MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath)
+ .build();
+
+ Vertex tokenizerVertex =
+ Vertex.create("Tokenizer", ProcessorDescriptor.create(TokenProcessor.class.getName()))
+ .addDataSource("Input", dataSource);
+
+ Vertex summerVertex =
+ Vertex.create(
+ "Summer", ProcessorDescriptor.create(SumProcessor.class.getName()), 1) // 1 Reducer
+ .addDataSink("Output", dataSink);
+
+ OrderedPartitionedKVEdgeConfig edgeConf =
+ OrderedPartitionedKVEdgeConfig.newBuilder(
+ Text.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName())
+ .setFromConfiguration(tezConf)
+ .build();
+
+ return DAG.create("ZkWordCountDAG")
+ .addVertex(tokenizerVertex)
+ .addVertex(summerVertex)
+ .addEdge(Edge.create(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
+ }
+
+ /** Map-equivalent: reads lines of text, tokenizes them, and emits key-value like (word, 1). */
+ public static class TokenProcessor extends SimpleMRProcessor {
+ private static final IntWritable ONE = new IntWritable(1);
+ private final Text word = new Text();
+
+ public TokenProcessor(ProcessorContext context) {
+ super(context);
+ }
+
+ @Override
+ public void run() throws Exception {
+ // Get inputs/outputs
+ Map inputs = getInputs();
+ Map outputs = getOutputs();
+
+ MRReader reader = (MRReader) inputs.get("Input").getReader();
+ KeyValueWriter writer = (KeyValueWriter) outputs.get("Summer").getWriter();
+
+ while (reader.next()) {
+ Object val = reader.getCurrentValue();
+ String line = val.toString();
+ StringTokenizer tokenizer = new StringTokenizer(line);
+
+ while (tokenizer.hasMoreTokens()) {
+ word.set(tokenizer.nextToken());
+ writer.write(word, ONE);
+ }
+ }
+ }
+ }
+
+ /** Reduce-equivalent: reads (word, [1, 1, ...]) and aggregates the total sum. */
+ public static class SumProcessor extends SimpleMRProcessor {
+ public SumProcessor(ProcessorContext context) {
+ super(context);
+ }
+
+ @Override
+ public void run() throws Exception {
+ Map inputs = getInputs();
+ Map outputs = getOutputs();
+
+ KeyValuesReader reader = (KeyValuesReader) inputs.get("Tokenizer").getReader();
+ KeyValueWriter writer = (KeyValueWriter) outputs.get("Output").getWriter();
+
+ while (reader.next()) {
+ Object key = reader.getCurrentKey();
+ Iterable