From efab533b2afc10947f39fed0529c5dc595a17c50 Mon Sep 17 00:00:00 2001
From: Palash Chauhan
Date: Tue, 26 May 2026 21:53:00 -0700
Subject: [PATCH 1/2] PHOENIX-7868 : Docker setup
---
.dockerignore | 20 ++
README.md | 33 ++
docker/Dockerfile.hbase-phoenix | 63 ++++
docker/Dockerfile.phoenix-adapters | 89 +++++
docker/README.md | 333 ++++++++++++++++++
docker/conf/hbase/hbase-env.sh | 16 +
docker/conf/hbase/hbase-site.xml | 58 +++
docker/conf/phoenix-adapters/hbase-site.xml | 21 ++
docker/docker-compose.yml | 186 ++++++++++
docker/scripts/hbase-entrypoint.sh | 55 +++
docker/scripts/phoenix-adapters-entrypoint.sh | 43 +++
docker/scripts/smoke.sh | 311 ++++++++++++++++
12 files changed, 1228 insertions(+)
create mode 100644 .dockerignore
create mode 100644 docker/Dockerfile.hbase-phoenix
create mode 100644 docker/Dockerfile.phoenix-adapters
create mode 100644 docker/README.md
create mode 100644 docker/conf/hbase/hbase-env.sh
create mode 100644 docker/conf/hbase/hbase-site.xml
create mode 100644 docker/conf/phoenix-adapters/hbase-site.xml
create mode 100644 docker/docker-compose.yml
create mode 100644 docker/scripts/hbase-entrypoint.sh
create mode 100644 docker/scripts/phoenix-adapters-entrypoint.sh
create mode 100755 docker/scripts/smoke.sh
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..5e8720b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,20 @@
+**/target/
+**/logs/
+**/*.log
+**/*.log.*
+**/dynamodb-local-metadata.json
+**/heap-dumps/
+
+**/*.tar.gz
+**/*.tar.bz2
+**/*.zip
+
+.idea/
+.vscode/
+.cursor/
+.DS_Store
+
+.git/
+.gitignore
+
+docker/README.md
diff --git a/README.md b/README.md
index f48d461..9d51359 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,39 @@ The Phoenix DynamoDB REST service is fully compatible with AWS SDKs. You can con
port 8842 with zk-quorum localhost:2181.
Alternative to `-z ` is env variable `ZOO_KEEPER_QUORUM`.
+#### One-shot Docker setup (recommended for first-time users)
+
+Skip steps 1-2 above with the bundled Docker cluster. From a fresh clone:
+
+**Prerequisites:** Docker Desktop running; `jq` and `curl` on `PATH`
+(`brew install jq` on macOS).
+
+```bash
+# 1. Bring up the full stack at the versions pinned in pom.xml and BLOCK
+# until every container reports healthy (REST is ~30-60s on cold start).
+# First time: ~8-12 min total; subsequent runs are cached.
+docker compose -f docker/docker-compose.yml up -d --build --wait
+
+# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem + streams).
+bash docker/scripts/smoke.sh
+# -> "Result: 20 checks PASSED across 18 API calls"
+
+# 3. Use it. The DynamoDB-compatible endpoint is at http://localhost:8842 .
+# Point any AWS SDK at it (Java/Python/Node.js snippets in
+# phoenix-ddb-rest/README.md), or hit it with curl:
+curl -s -X POST http://localhost:8842/ \
+ -H 'Content-Type: application/x-amz-json-1.0' \
+ -H 'X-Amz-Target: DynamoDB_20120810.ListTables' -d '{}'
+
+# 4. Tear down when you're done.
+docker compose -f docker/docker-compose.yml down -v
+```
+
+See [`docker/README.md`](docker/README.md) for the full reference: port
+mappings, the developer inner loop for code changes, the smoke-test
+breakdown, troubleshooting, and how to run the REST server outside
+Docker against the dockerized cluster.
+
### Building Distribution Tarball
To build a distribution tarball that includes all components:
diff --git a/docker/Dockerfile.hbase-phoenix b/docker/Dockerfile.hbase-phoenix
new file mode 100644
index 0000000..d92e1aa
--- /dev/null
+++ b/docker/Dockerfile.hbase-phoenix
@@ -0,0 +1,63 @@
+# syntax=docker/dockerfile:1
+FROM eclipse-temurin:8-jdk-jammy
+
+ARG HBASE_VERSION=2.5.14
+ARG HBASE_FLAVOR=hadoop3
+ARG PHOENIX_HBASE_LINE=2.5
+ARG PHOENIX_VERSION=5.3.1
+
+ENV HBASE_VERSION=${HBASE_VERSION} \
+ HBASE_FLAVOR=${HBASE_FLAVOR} \
+ PHOENIX_HBASE_LINE=${PHOENIX_HBASE_LINE} \
+ PHOENIX_VERSION=${PHOENIX_VERSION} \
+ JAVA_HOME=/opt/java/openjdk \
+ HBASE_HOME=/opt/hbase \
+ HBASE_CONF_DIR=/opt/hbase/conf \
+ PHOENIX_HOME=/opt/phoenix \
+ HBASE_MANAGES_ZK=false \
+ PATH=/opt/hbase/bin:/opt/phoenix/bin:/opt/java/openjdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+
+RUN set -eux; \
+ apt-get update; \
+ apt-get install -y --no-install-recommends \
+ bash curl ca-certificates netcat-openbsd procps tini less; \
+ rm -rf /var/lib/apt/lists/*
+
+RUN set -eux; \
+ mkdir -p "${HBASE_HOME}"; \
+ curl -fSL --retry 5 --retry-delay 5 \
+ "https://archive.apache.org/dist/hbase/${HBASE_VERSION}/hbase-${HBASE_VERSION}-${HBASE_FLAVOR}-bin.tar.gz" \
+ -o /tmp/hbase.tar.gz; \
+ tar -xzf /tmp/hbase.tar.gz -C "${HBASE_HOME}" --strip-components=1; \
+ rm /tmp/hbase.tar.gz; \
+ mkdir -p /var/log/hbase /var/run/hbase
+
+# phoenix-server JAR is copied into HBase's lib so the coprocessors and
+# the IndexedWALEditCodec are visible to both the master and every RS.
+RUN set -eux; \
+ mkdir -p "${PHOENIX_HOME}"; \
+ curl -fSL --retry 5 --retry-delay 5 \
+ "https://archive.apache.org/dist/phoenix/phoenix-${PHOENIX_VERSION}/phoenix-hbase-${PHOENIX_HBASE_LINE}-${PHOENIX_VERSION}-bin.tar.gz" \
+ -o /tmp/phoenix.tar.gz; \
+ tar -xzf /tmp/phoenix.tar.gz -C "${PHOENIX_HOME}" --strip-components=1; \
+ rm /tmp/phoenix.tar.gz; \
+ cp "${PHOENIX_HOME}/phoenix-server-hbase-${PHOENIX_HBASE_LINE}-${PHOENIX_VERSION}.jar" "${HBASE_HOME}/lib/"
+
+# Kept below the tarball downloads to preserve their (multi-hundred-MB) cache.
+# python3 is required by /opt/phoenix/bin/sqlline.py.
+RUN set -eux; \
+ apt-get update; \
+ apt-get install -y --no-install-recommends python3; \
+ rm -rf /var/lib/apt/lists/*; \
+ ln -sf /usr/bin/python3 /usr/local/bin/python
+
+COPY conf/hbase/hbase-site.xml ${HBASE_HOME}/conf/hbase-site.xml
+COPY conf/hbase/hbase-env.sh ${HBASE_HOME}/conf/hbase-env.sh
+
+COPY scripts/hbase-entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+WORKDIR /opt
+
+ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"]
+CMD ["help"]
diff --git a/docker/Dockerfile.phoenix-adapters b/docker/Dockerfile.phoenix-adapters
new file mode 100644
index 0000000..abb78f0
--- /dev/null
+++ b/docker/Dockerfile.phoenix-adapters
@@ -0,0 +1,89 @@
+# syntax=docker/dockerfile:1
+#
+# Build context: project root (the Maven reactor needs every module).
+#
+FROM maven:3.9-eclipse-temurin-8 AS builder
+
+WORKDIR /workspace
+
+# Copy poms first to maximise dep-layer cache hits on rebuild.
+COPY pom.xml ./
+COPY phoenix-ddb-utils/pom.xml phoenix-ddb-utils/pom.xml
+COPY phoenix-ddb-rest/pom.xml phoenix-ddb-rest/pom.xml
+COPY phoenix-ddb-assembly/pom.xml phoenix-ddb-assembly/pom.xml
+COPY coverage-report/pom.xml coverage-report/pom.xml
+
+# `|| true` because the cross-module reactor can't resolve siblings yet;
+# this step is only here to warm ~/.m2.
+RUN --mount=type=cache,target=/root/.m2 \
+ mvn -B -q -DskipTests \
+ -pl phoenix-ddb-utils,phoenix-ddb-rest,phoenix-ddb-assembly -am \
+ dependency:go-offline || true
+
+COPY phoenix-ddb-utils phoenix-ddb-utils
+COPY phoenix-ddb-rest phoenix-ddb-rest
+COPY phoenix-ddb-assembly phoenix-ddb-assembly
+COPY coverage-report coverage-report
+COPY bin bin
+COPY conf conf
+COPY README.md DDB_API_REFERENCE.md ./
+
+RUN --mount=type=cache,target=/root/.m2 \
+ mvn -B -DskipTests \
+ -pl phoenix-ddb-assembly -am \
+ clean package
+
+RUN set -eux; \
+ tarball=$(ls phoenix-ddb-assembly/target/phoenix-adapters-*-bin.tar.gz | head -n1); \
+ cp "${tarball}" /tmp/phoenix-adapters-bin.tar.gz
+
+FROM eclipse-temurin:8-jdk-jammy
+
+ENV JAVA_HOME=/opt/java/openjdk \
+ PHOENIX_ADAPTERS_HOME=/opt/phoenix-adapters \
+ PHOENIX_ADAPTERS_CONF_DIR=/opt/phoenix-adapters/conf \
+ PHOENIX_ADAPTERS_LOG_DIR=/var/log/phoenix-adapters \
+ PHOENIX_ADAPTERS_PID_DIR=/var/run/phoenix-adapters \
+ PHOENIX_REST_PORT=8842 \
+ ZOO_KEEPER_QUORUM=zookeeper:2181 \
+ HBASE_MASTER_HOST=hbase-master \
+ HBASE_MASTER_PORT=16000
+
+RUN set -eux; \
+ apt-get update; \
+ apt-get install -y --no-install-recommends \
+ bash curl ca-certificates netcat-openbsd tini procps; \
+ rm -rf /var/lib/apt/lists/*; \
+ mkdir -p "${PHOENIX_ADAPTERS_LOG_DIR}" "${PHOENIX_ADAPTERS_PID_DIR}"
+
+COPY --from=builder /tmp/phoenix-adapters-bin.tar.gz /tmp/phoenix-adapters-bin.tar.gz
+
+RUN set -eux; \
+ mkdir -p "${PHOENIX_ADAPTERS_HOME}"; \
+ tar -xzf /tmp/phoenix-adapters-bin.tar.gz -C "${PHOENIX_ADAPTERS_HOME}" --strip-components=1; \
+ rm /tmp/phoenix-adapters-bin.tar.gz; \
+ chmod -R +x "${PHOENIX_ADAPTERS_HOME}/bin"; \
+ # The assembly ships a mix of hadoop-common 3.3.6 (declared in pom.xml)
+ # and hadoop-hdfs/yarn/mapreduce 3.4.x (transitive from phoenix-core-client
+ # via hbase-server:2.5.14-hadoop3). The 3.4.x jars register FileSystem
+ # impls that reference `WithErasureCoding`, a class only present in
+ # hadoop-common 3.4.x. When HBase returns a remote exception during
+ # bootstrap, the client's classloader tries to enumerate FileSystem
+ # impls, hits NoClassDefFoundError, and poisons the JVM. The REST
+ # server only talks to HBase via RPC and never opens HDFS directly,
+ # so we strip the 3.4.x hadoop client jars to break the cycle.
+ rm -f "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-yarn-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-mapreduce-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-distcp-"*.jar
+
+# Client-side WAL codec / RPC controller must match the server cluster.
+COPY docker/conf/phoenix-adapters/hbase-site.xml ${PHOENIX_ADAPTERS_CONF_DIR}/hbase-site.xml
+
+COPY docker/scripts/phoenix-adapters-entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+EXPOSE 8842
+
+ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"]
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..618bee1
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,333 @@
+# Local Docker Cluster for Phoenix Adapters
+
+Brings up the full dependency stack (Hadoop / ZooKeeper / HBase / Phoenix)
+required to run **phoenix-adapters** on your laptop. Uses upstream images
+where they exist; custom only where they don't.
+
+| Component | Version | Image |
+| --- | --- | --- |
+| Apache ZooKeeper | 3.8.4 | [`library/zookeeper:3.8.4`](https://hub.docker.com/_/zookeeper) (Docker Official) |
+| Apache Hadoop (HDFS) | 3.3.6 | [`apache/hadoop:3.3.6`](https://hub.docker.com/r/apache/hadoop) (Apache convenience build) |
+| Apache HBase | 2.5.14-hadoop3 | `phoenix-adapters/hbase-phoenix:latest` (custom) |
+| Apache Phoenix | 5.3.1 (phoenix-hbase-2.5) | bundled into `phoenix-adapters/hbase-phoenix` |
+| Phoenix Adapters REST | this repo | `phoenix-adapters/rest:latest` (custom) |
+
+Versions are kept in lockstep with the top-level [`pom.xml`](../pom.xml).
+
+> **Apple Silicon.** `apache/hadoop:3.3.6` is amd64-only; the compose file
+> pins `platform: linux/amd64` so the NameNode/DataNode run under Rosetta
+> emulation. Slower than native, but functional.
+
+## Layout
+
+```
+docker/
+├── Dockerfile.hbase-phoenix # HBase 2.5.14 + Phoenix 5.3.1
+├── Dockerfile.phoenix-adapters # Multi-stage build of the REST server
+├── docker-compose.yml
+├── conf/
+│ ├── hbase/{hbase-site.xml,hbase-env.sh}
+│ └── phoenix-adapters/hbase-site.xml # Client-side overrides
+└── scripts/
+ ├── hbase-entrypoint.sh # hbase-master, hbase-regionserver
+ ├── phoenix-adapters-entrypoint.sh
+ └── smoke.sh # End-to-end DDB validation suite
+```
+
+ZooKeeper and Hadoop config lives entirely in `docker-compose.yml` as env
+vars that the upstream images template into XML.
+
+## Quick start
+
+**Prerequisites:** Docker Desktop running; `jq` and `curl` on `PATH`
+(`brew install jq` on macOS).
+
+From the **project root**:
+
+```bash
+# 1. Bring up the full stack (ZK + HDFS + HBase+Phoenix + REST) and BLOCK
+# until every service reports healthy (REST takes ~30-60s on a cold
+# start because Phoenix has to bootstrap SYSTEM.* tables).
+# First time: ~8-12 min (pulls upstream images + builds HBase/Phoenix + REST).
+# Subsequent runs: cached.
+docker compose -f docker/docker-compose.yml up -d --build --wait
+
+# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem + streams).
+bash docker/scripts/smoke.sh
+# -> "Result: 20 checks PASSED across 18 API calls"
+
+# 3. Use it. The DynamoDB-compatible REST endpoint is at http://localhost:8842 .
+# Point any AWS SDK at it (Java/Python/Node.js snippets in
+# phoenix-ddb-rest/README.md), or hit it directly with curl:
+curl -s -X POST http://localhost:8842/ \
+ -H 'Content-Type: application/x-amz-json-1.0' \
+ -H 'X-Amz-Target: DynamoDB_20120810.ListTables' -d '{}'
+
+# 4. Tear down when you're done.
+docker compose -f docker/docker-compose.yml down # keep volumes
+docker compose -f docker/docker-compose.yml down -v # also wipe HDFS + ZK
+```
+
+### URLs
+
+| URL | Service |
+| --- | --- |
+| http://localhost:8842 | **Phoenix Adapters REST (DynamoDB-compatible)** |
+| http://localhost:9870 | HDFS NameNode UI |
+| http://localhost:9864 | HDFS DataNode UI |
+| http://localhost:16010 | HBase Master UI |
+| http://localhost:16030 | HBase RegionServer UI |
+
+Two host ports are remapped because their defaults often collide on dev
+machines (macOS AirPlay on 9000, a locally installed Kafka/ZK on 2181):
+
+| Service | Container | Host |
+| --- | --- | --- |
+| HDFS NameNode RPC | `namenode:9000` | `localhost:19000` |
+| ZooKeeper client | `zookeeper:2181` | `localhost:12181` |
+
+Inter-container traffic still uses the standard ports.
+
+### Bring up just the cluster (no REST)
+
+```bash
+docker compose -f docker/docker-compose.yml up -d --build \
+ zookeeper namenode datanode hbase-master hbase-regionserver
+```
+
+## Validation suite
+
+`docker/scripts/smoke.sh` exercises every supported DynamoDB API against
+the running REST server and asserts the expected behaviour. It prints
+each request, response, and assertion as it runs.
+
+```bash
+docker compose -f docker/docker-compose.yml up -d --build --wait
+bash docker/scripts/smoke.sh
+```
+
+Exits `0` on full pass; exits non-zero on the first failed assertion and
+prints the offending response.
+
+| Step | API |
+| --- | --- |
+| 1 | `ListTables` (baseline) |
+| 2 | `CreateTable` (with `StreamSpecification` enabled, `NEW_AND_OLD_IMAGES`) |
+| 3 | `DescribeTable` |
+| 4 | `PutItem` (`id=a`) |
+| 5 | `UpdateItem` (`SET score, bonus`, `ReturnValues=ALL_NEW`) |
+| 6 | `GetItem` |
+| 7 | `PutItem` (`id=b`) |
+| 8 | `Scan` |
+| 9 | `Query` |
+| 10 | `DeleteItem` |
+| 11 | `Scan` (after delete) |
+| 12 | `BatchWriteItem` (mixed put + delete) |
+| 13 | `Scan` paginated (drains all pages) |
+| 14 | `ListStreams` |
+| 15 | `DescribeStream` (polls until `StreamStatus == ENABLED`) |
+| 16 | `GetShardIterator` (`TRIM_HORIZON`) |
+| 17 | `GetRecords` (drains all pages) |
+| 18 | `DeleteTable` |
+
+## Poking around the cluster
+
+HBase shell:
+
+```bash
+docker compose -f docker/docker-compose.yml exec hbase-master hbase shell
+```
+
+```text
+status
+list
+create 'demo', 'cf'
+put 'demo', 'r1', 'cf:c1', 'hello'
+scan 'demo'
+```
+
+Phoenix sqlline:
+
+```bash
+docker compose -f docker/docker-compose.yml exec hbase-master \
+ /opt/phoenix/bin/sqlline.py zookeeper:2181
+```
+
+```sql
+!tables
+CREATE TABLE IF NOT EXISTS t1 (id BIGINT PRIMARY KEY, name VARCHAR);
+UPSERT INTO t1 VALUES (1, 'phoenix-adapters');
+SELECT * FROM t1;
+```
+
+## Developer inner loop: code change → live endpoint
+
+```
+phoenix-ddb-rest/src/**.java
+ │ (1) edit on host
+ ▼
+docker compose ... up -d --build phoenix-adapters-rest
+ ├── stage 1: mvn package -DskipTests (BuildKit caches ~/.m2)
+ ├── stage 1 output: phoenix-ddb-assembly/target/*-bin.tar.gz
+ └── stage 2: temurin runtime extracts that tarball
+ │
+ ▼
+http://localhost:8842/ (new code, live)
+```
+
+The cluster (ZK + HDFS + HBase) keeps running across REST rebuilds, and
+HBase data persists across full `down`/`up` cycles.
+
+### The loop
+
+1. Edit code in `phoenix-ddb-rest/src/...` or `phoenix-ddb-utils/src/...`.
+2. *(Optional)* sanity-check the compile on the host:
+
+ ```bash
+ mvn -B -DskipTests -pl phoenix-ddb-rest -am package
+ ```
+
+3. Rebuild and recreate just the REST container:
+
+ ```bash
+ docker compose -f docker/docker-compose.yml up -d --build phoenix-adapters-rest
+ ```
+
+ No-dep-change rebuilds typically take 30-60 s on a warm cache.
+4. Watch logs:
+
+ ```bash
+ docker compose -f docker/docker-compose.yml logs -f phoenix-adapters-rest
+ ```
+5. Hit the endpoint and verify.
+
+### Quick reference
+
+| Task | Command |
+| --- | --- |
+| Rebuild REST + restart it | `docker compose -f docker/docker-compose.yml up -d --build phoenix-adapters-rest` |
+| Restart REST (no code change) | `docker compose -f docker/docker-compose.yml restart phoenix-adapters-rest` |
+| Tail REST logs | `docker compose -f docker/docker-compose.yml logs -f phoenix-adapters-rest` |
+| Tail HBase logs | `docker compose -f docker/docker-compose.yml logs -f hbase-master hbase-regionserver` |
+| HBase shell | `docker compose -f docker/docker-compose.yml exec hbase-master hbase shell` |
+| Phoenix sqlline | `docker compose -f docker/docker-compose.yml exec hbase-master /opt/phoenix/bin/sqlline.py zookeeper:2181` |
+| List containers | `docker compose -f docker/docker-compose.yml ps` |
+| Stop (keep data) | `docker compose -f docker/docker-compose.yml down` |
+| Stop + wipe data | `docker compose -f docker/docker-compose.yml down -v` |
+
+### Edge cases
+
+| Situation | What to do |
+| --- | --- |
+| Changed `conf/hbase/hbase-site.xml` or `hbase-env.sh` | `docker compose ... up -d --build hbase-master hbase-regionserver`. Existing tables survive. |
+| Bumped `hbase.version` / `phoenix.version` in `pom.xml` | Bump matching `ARG`s in `Dockerfile.hbase-phoenix`, then `--build hbase-master hbase-regionserver phoenix-adapters-rest`. Often pair with `down -v`. |
+| Added a Maven dep to `phoenix-ddb-rest/pom.xml` | `--build phoenix-adapters-rest`. New dep downloads once; cache warms after. |
+| Clean slate | `docker compose ... down -v` then `up -d --build`. |
+| Code doesn't seem picked up | You ran `restart` instead of `up --build`. `restart` does not rebuild. |
+
+### Pre-PR checklist
+
+```bash
+# 1. Host-side compile + unit tests (no cluster required).
+mvn -B clean install -DskipITs
+
+# 2. End-to-end validation: fresh stack + full DDB round-trip including streams.
+docker compose -f docker/docker-compose.yml down -v
+docker compose -f docker/docker-compose.yml up -d --build --wait
+bash docker/scripts/smoke.sh
+
+# 3. Tear it down.
+docker compose -f docker/docker-compose.yml down -v
+```
+
+If `smoke.sh` finishes with `Result: 20 checks PASSED across 18 API calls`,
+your change is wire-compatible end to end through Phoenix on dockerized
+HBase across CRUD, batch, and the change-stream chain.
+
+## Running the REST server outside Docker
+
+1. Bring up only the cluster services.
+2. Add cluster hostnames to `/etc/hosts` (HBase advertises hostnames over ZK):
+
+ ```
+ 127.0.0.1 zookeeper namenode datanode hbase-master hbase-regionserver
+ ```
+
+3. Start the REST server pointing at the dockerized ZooKeeper:
+
+ ```bash
+ mvn -DskipTests clean package
+ tar xzf phoenix-ddb-assembly/target/phoenix-adapters-*-bin.tar.gz -C /tmp
+ cd /tmp/phoenix-adapters-*
+ export JAVA_HOME=$(/usr/libexec/java_home -v 1.8) # macOS example
+ export PHOENIX_ADAPTERS_HOME=$(pwd)
+ bin/phoenix-adapters rest foreground_start -p 8842 -z localhost:12181
+ ```
+
+## Phoenix tuning baked into the image
+
+[`docker/conf/hbase/hbase-site.xml`](conf/hbase/hbase-site.xml) enables what
+Phoenix 5.x needs for secondary indexes, DDL events, and the multi-priority
+RPC controller:
+
+| Property | Value |
+| --- | --- |
+| `hbase.coprocessor.master.classes` | `…PhoenixMasterObserver` |
+| `hbase.coprocessor.regionserver.classes` | `…PhoenixRegionServerEndpoint` |
+| `hbase.regionserver.wal.codec` | `…IndexedWALEditCodec` |
+| `hbase.region.server.rpc.scheduler.factory.class` | `…PhoenixRpcSchedulerFactory` |
+| `hbase.rpc.controllerfactory.class` | `…ServerRpcControllerFactory` |
+| `phoenix.task.handling.interval.ms` | `10` |
+| `phoenix.task.handling.initial.delay.ms` | `1` |
+
+`phoenix-server-hbase-2.5-5.3.1.jar` is copied into `${HBASE_HOME}/lib/` so
+the coprocessors and WAL codec are visible to master and every RegionServer.
+
+## Why upstream images for ZK + Hadoop but not HBase?
+
+| Component | Decision | Reason |
+| --- | --- | --- |
+| ZooKeeper 3.8.4 | Upstream `zookeeper:3.8.4` | Docker Official, exact version, multi-arch. |
+| Hadoop 3.3.6 | Upstream `apache/hadoop:3.3.6` | Apache convenience build at the exact version. amd64-only, runs under emulation on Apple Silicon. |
+| HBase 2.5.14-hadoop3 | Custom | No official Apache image; community images don't cover `2.5.14-hadoop3`. |
+| Phoenix 5.3.1 | Custom (layered on HBase) | No Phoenix image anywhere; server JAR must be on HBase's classpath. |
+
+## Troubleshooting
+
+* **NameNode unhealthy on first start.** First start formats the NameNode
+ via `ENSURE_NAMENODE_DIR`. Watch with `docker compose ... logs -f namenode`.
+* **HBase Master `RegionTooBusyException` / `NotServingRegion`.** Wait ~30 s
+ after RegionServer comes up; Phoenix bootstraps `SYSTEM.*` tables on its
+ first connection and the REST server retries transparently.
+* **REST exits with `NoClassDefFoundError: org/apache/hadoop/fs/WithErasureCoding`.**
+ The phoenix-ddb-assembly tarball ships `hadoop-common:3.3.6` (from
+ `pom.xml`) alongside `hadoop-hdfs:3.4.x` / `hadoop-yarn:3.4.x`
+ (transitive from `phoenix-core-client`). The 3.4.x JARs register
+ FileSystem impls that need `WithErasureCoding`, which only exists in
+ hadoop-common 3.4+. When HBase returns a remote exception during
+ bootstrap, the client tries to enumerate FileSystem impls, hits
+ `NoClassDefFoundError`, and poisons the JVM. The REST image
+ `Dockerfile.phoenix-adapters` strips the 3.4.x `hadoop-hdfs*`,
+ `hadoop-yarn-*`, `hadoop-mapreduce-client-*`, and `hadoop-distcp-*`
+ jars after extracting the tarball — the REST server only talks to
+ HBase via RPC and never opens HDFS directly, so removing them is safe.
+ If this error reappears, check that those `rm -f` lines in
+ `Dockerfile.phoenix-adapters` weren't dropped.
+* **`Datanode denied communication with namenode`.** Cluster ID mismatch.
+ `docker compose down -v` and bring the stack back up.
+* **`platform mismatch` warnings on Apple Silicon.** Expected for the
+ Hadoop containers (amd64 image, emulated). No action needed.
+
+## Customising versions
+
+HBase / Phoenix versions are `ARG`s on `Dockerfile.hbase-phoenix`:
+
+```bash
+docker compose -f docker/docker-compose.yml build \
+ --build-arg HBASE_VERSION=2.5.13 \
+ --build-arg PHOENIX_VERSION=5.3.0 \
+ hbase-master
+```
+
+Hadoop and ZooKeeper versions are pinned by tag in `docker-compose.yml`.
+Keep all four in lockstep with `pom.xml`.
diff --git a/docker/conf/hbase/hbase-env.sh b/docker/conf/hbase/hbase-env.sh
new file mode 100644
index 0000000..a5c243b
--- /dev/null
+++ b/docker/conf/hbase/hbase-env.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+export JAVA_HOME=${JAVA_HOME:-/opt/java/openjdk}
+export HBASE_MANAGES_ZK=false
+export HBASE_LOG_DIR=/var/log/hbase
+export HBASE_PID_DIR=/var/run/hbase
+
+# Sized to fit the whole stack in ~4 GB of Docker memory.
+export HBASE_HEAPSIZE=1G
+export HBASE_OFFHEAPSIZE=256m
+
+# Strip JDK11-specific GC flags HBase ships with; we run on JDK8.
+export HBASE_OPTS="-XX:+UseG1GC -XX:+UnlockExperimentalVMOptions"
+export HBASE_MASTER_OPTS="${HBASE_OPTS} -Xms256m"
+export HBASE_REGIONSERVER_OPTS="${HBASE_OPTS} -Xms512m"
+
+unset HBASE_JSHELL_ARGS
diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml
new file mode 100644
index 0000000..a47b462
--- /dev/null
+++ b/docker/conf/hbase/hbase-site.xml
@@ -0,0 +1,58 @@
+
+
+
+
+ hbase.rootdir
+ hdfs://namenode:9000/hbase
+
+
+ hbase.cluster.distributed
+ true
+
+
+ hbase.zookeeper.quorum
+ zookeeper
+
+
+ hbase.zookeeper.property.clientPort
+ 2181
+
+
+ hbase.unsafe.stream.capability.enforce
+ false
+
+
+ hbase.wal.provider
+ filesystem
+
+
+
+
+ hbase.coprocessor.master.classes
+ org.apache.phoenix.coprocessor.PhoenixMasterObserver
+
+
+ hbase.coprocessor.regionserver.classes
+ org.apache.phoenix.coprocessor.PhoenixRegionServerEndpoint
+
+
+ hbase.regionserver.wal.codec
+ org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec
+
+
+ hbase.region.server.rpc.scheduler.factory.class
+ org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory
+
+
+ hbase.rpc.controllerfactory.class
+ org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory
+
+
+ phoenix.task.handling.interval.ms
+ 10
+
+
+ phoenix.task.handling.initial.delay.ms
+ 1
+
+
diff --git a/docker/conf/phoenix-adapters/hbase-site.xml b/docker/conf/phoenix-adapters/hbase-site.xml
new file mode 100644
index 0000000..fd993b9
--- /dev/null
+++ b/docker/conf/phoenix-adapters/hbase-site.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+ hbase.regionserver.wal.codec
+ org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec
+
+
+ hbase.rpc.controllerfactory.class
+ org.apache.hadoop.hbase.ipc.controller.ClientRpcControllerFactory
+
+
+ phoenix.task.handling.interval.ms
+ 10
+
+
+ phoenix.task.handling.initial.delay.ms
+ 1
+
+
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..794cb6e
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,186 @@
+# Bring up from the project root:
+# docker compose -f docker/docker-compose.yml up --build
+#
+# Cluster only (no REST):
+# docker compose -f docker/docker-compose.yml up --build \
+# zookeeper namenode datanode hbase-master hbase-regionserver
+
+name: phoenix-adapters
+
+services:
+
+ zookeeper:
+ image: zookeeper:3.8.4
+ container_name: phx-zookeeper
+ hostname: zookeeper
+ environment:
+ ZOO_4LW_COMMANDS_WHITELIST: "srvr,ruok,mntr,conf"
+ ZOO_ADMINSERVER_ENABLED: "false"
+ ZOO_AUTOPURGE_PURGEINTERVAL: 24
+ ZOO_AUTOPURGE_SNAPRETAINCOUNT: 3
+ ports:
+ # Host port shifted off the default; 2181 is often busy on dev machines.
+ - "12181:2181"
+ volumes:
+ - zookeeper-data:/data
+ - zookeeper-datalog:/datalog
+ healthcheck:
+ test: ["CMD-SHELL", "echo ruok | nc -w 2 localhost 2181 | grep -q imok"]
+ interval: 5s
+ timeout: 5s
+ retries: 20
+ networks: [phoenix-net]
+
+ # apache/hadoop:3.3.6 is amd64-only; on Apple Silicon Docker emulates
+ # via Rosetta/qemu (slower but functional).
+ namenode:
+ image: apache/hadoop:3.3.6
+ platform: linux/amd64
+ container_name: phx-namenode
+ hostname: namenode
+ environment:
+ # Triggers a first-time `hdfs namenode -format` when this dir is empty.
+ ENSURE_NAMENODE_DIR: /data/namenode
+ # The apache/hadoop image templates *-SITE.XML files from these env vars.
+ CORE-SITE.XML_fs.defaultFS: "hdfs://namenode:9000"
+ HDFS-SITE.XML_dfs.replication: "1"
+ HDFS-SITE.XML_dfs.namenode.name.dir: "file:///data/namenode"
+ HDFS-SITE.XML_dfs.datanode.data.dir: "file:///data/datanode"
+ HDFS-SITE.XML_dfs.permissions.enabled: "false"
+ HDFS-SITE.XML_dfs.namenode.datanode.registration.ip-hostname-check: "false"
+ HDFS-SITE.XML_dfs.client.use.datanode.hostname: "true"
+ HDFS-SITE.XML_dfs.datanode.use.datanode.hostname: "true"
+ command: ["hdfs", "namenode"]
+ ports:
+ - "9870:9870"
+ # Host port shifted off 9000 (macOS AirPlay et al).
+ - "19000:9000"
+ volumes:
+ - namenode-data:/data
+ healthcheck:
+ # Hadoop binds to the hostname, not localhost.
+ test: ["CMD-SHELL", "nc -z namenode 9000 || exit 1"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ datanode:
+ image: apache/hadoop:3.3.6
+ platform: linux/amd64
+ container_name: phx-datanode
+ hostname: datanode
+ depends_on:
+ namenode:
+ condition: service_healthy
+ environment:
+ WAITFOR: namenode:9000
+ CORE-SITE.XML_fs.defaultFS: "hdfs://namenode:9000"
+ HDFS-SITE.XML_dfs.replication: "1"
+ HDFS-SITE.XML_dfs.namenode.name.dir: "file:///data/namenode"
+ HDFS-SITE.XML_dfs.datanode.data.dir: "file:///data/datanode"
+ HDFS-SITE.XML_dfs.permissions.enabled: "false"
+ HDFS-SITE.XML_dfs.client.use.datanode.hostname: "true"
+ HDFS-SITE.XML_dfs.datanode.use.datanode.hostname: "true"
+ command: ["hdfs", "datanode"]
+ ports:
+ - "9864:9864"
+ volumes:
+ - datanode-data:/data
+ healthcheck:
+ test: ["CMD-SHELL", "nc -z datanode 9866 || exit 1"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ hbase-master:
+ image: phoenix-adapters/hbase-phoenix:latest
+ build:
+ context: .
+ dockerfile: Dockerfile.hbase-phoenix
+ container_name: phx-hbase-master
+ hostname: hbase-master
+ command: ["hbase-master"]
+ depends_on:
+ zookeeper:
+ condition: service_healthy
+ namenode:
+ condition: service_healthy
+ datanode:
+ condition: service_started
+ ports:
+ - "16000:16000"
+ - "16010:16010"
+ healthcheck:
+ test: ["CMD-SHELL", "nc -z hbase-master 16000 || exit 1"]
+ interval: 10s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ hbase-regionserver:
+ image: phoenix-adapters/hbase-phoenix:latest
+ build:
+ context: .
+ dockerfile: Dockerfile.hbase-phoenix
+ container_name: phx-hbase-regionserver
+ hostname: hbase-regionserver
+ command: ["hbase-regionserver"]
+ depends_on:
+ hbase-master:
+ condition: service_healthy
+ ports:
+ - "16020:16020"
+ - "16030:16030"
+ healthcheck:
+ test: ["CMD-SHELL", "nc -z hbase-regionserver 16020 || exit 1"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+ networks: [phoenix-net]
+
+ phoenix-adapters-rest:
+ image: phoenix-adapters/rest:latest
+ build:
+ context: ..
+ dockerfile: docker/Dockerfile.phoenix-adapters
+ container_name: phx-adapters-rest
+ hostname: phoenix-adapters-rest
+ depends_on:
+ hbase-master:
+ condition: service_healthy
+ hbase-regionserver:
+ condition: service_healthy
+ environment:
+ - ZOO_KEEPER_QUORUM=zookeeper:2181
+ - PHOENIX_REST_PORT=8842
+ - HBASE_MASTER_HOST=hbase-master
+ - HBASE_MASTER_PORT=16000
+ ports:
+ - "8842:8842"
+ # Probes the real API: only "healthy" once Phoenix has bootstrapped
+ # SYSTEM.* tables and Jetty is accepting POSTs.
+ healthcheck:
+ test:
+ - "CMD-SHELL"
+ - >-
+ curl -fs -m 3 -X POST http://localhost:8842/
+ -H 'Content-Type: application/x-amz-json-1.0'
+ -H 'X-Amz-Target: DynamoDB_20120810.ListTables'
+ -d '{}' || exit 1
+ interval: 5s
+ timeout: 5s
+ retries: 60
+ start_period: 30s
+ networks: [phoenix-net]
+
+volumes:
+ zookeeper-data:
+ zookeeper-datalog:
+ namenode-data:
+ datanode-data:
+
+networks:
+ phoenix-net:
+ driver: bridge
diff --git a/docker/scripts/hbase-entrypoint.sh b/docker/scripts/hbase-entrypoint.sh
new file mode 100644
index 0000000..ed21d07
--- /dev/null
+++ b/docker/scripts/hbase-entrypoint.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+#
+# Usage: entrypoint.sh
+# role := hbase-master | hbase-regionserver | bash | help
+#
+set -euo pipefail
+
+ROLE="${1:-help}"
+
+log() { echo "[hbase-entrypoint][$(date -u +%H:%M:%S)] $*"; }
+fail() { log "ERROR: $*"; exit 1; }
+
+wait_for() {
+ local host="$1" port="$2"
+ log "Waiting for ${host}:${port} ..."
+ until nc -z "${host}" "${port}" 2>/dev/null; do
+ sleep 2
+ done
+ log "${host}:${port} is reachable."
+}
+
+case "${ROLE}" in
+ hbase-master)
+ wait_for "${ZOOKEEPER_HOST:-zookeeper}" "${ZOOKEEPER_PORT:-2181}"
+ wait_for "${NAMENODE_HOST:-namenode}" "${NAMENODE_PORT:-9000}"
+ exec "${HBASE_HOME}/bin/hbase" master start
+ ;;
+
+ hbase-regionserver)
+ wait_for "${ZOOKEEPER_HOST:-zookeeper}" "${ZOOKEEPER_PORT:-2181}"
+ wait_for "${HMASTER_HOST:-hbase-master}" "${HMASTER_PORT:-16000}"
+ exec "${HBASE_HOME}/bin/hbase" regionserver start
+ ;;
+
+ bash|shell)
+ exec /bin/bash
+ ;;
+
+ help|*)
+ cat <
+
+Roles:
+ hbase-master Run the HBase Master.
+ hbase-regionserver Run an HBase RegionServer.
+ bash Drop into a shell inside the image.
+
+Versions:
+ HBase ${HBASE_VERSION}-${HBASE_FLAVOR}
+ Phoenix ${PHOENIX_VERSION} (phoenix-hbase-${PHOENIX_HBASE_LINE})
+EOF
+ [[ "${ROLE}" == "help" ]] && exit 0
+ fail "Unknown role: ${ROLE}"
+ ;;
+esac
diff --git a/docker/scripts/phoenix-adapters-entrypoint.sh b/docker/scripts/phoenix-adapters-entrypoint.sh
new file mode 100644
index 0000000..1c4685d
--- /dev/null
+++ b/docker/scripts/phoenix-adapters-entrypoint.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+log() { echo "[phoenix-adapters][$(date -u +%H:%M:%S)] $*"; }
+
+wait_for() {
+ local host="$1" port="$2"
+ log "Waiting for ${host}:${port} ..."
+ until nc -z "${host}" "${port}" 2>/dev/null; do
+ sleep 2
+ done
+ log "${host}:${port} is reachable."
+}
+
+zk_quorum="${ZOO_KEEPER_QUORUM:-zookeeper:2181}"
+zk_host="${zk_quorum%%:*}"
+zk_port="${zk_quorum##*:}"
+[[ "${zk_host}" == "${zk_port}" ]] && zk_port=2181
+
+wait_for "${zk_host}" "${zk_port}"
+wait_for "${HBASE_MASTER_HOST:-hbase-master}" "${HBASE_MASTER_PORT:-16000}"
+
+# Give the master a moment to finish initialising hbase:meta before the
+# first Phoenix connection bootstraps SYSTEM.* tables.
+sleep "${PHOENIX_BOOTSTRAP_SLEEP_SECONDS:-5}"
+
+log "Starting Phoenix Adapters REST on :${PHOENIX_REST_PORT} (ZK=${zk_quorum})"
+
+CLASSPATH="${PHOENIX_ADAPTERS_CONF_DIR}:${PHOENIX_ADAPTERS_HOME}/lib/*"
+
+exec "${JAVA_HOME}/bin/java" \
+ -Dproc_rest \
+ -XX:+UseG1GC \
+ -XX:OnOutOfMemoryError="kill -9 %p" \
+ -XX:+HeapDumpOnOutOfMemoryError \
+ -XX:HeapDumpPath="${PHOENIX_ADAPTERS_LOG_DIR}" \
+ -Dphoenix.adapters.log.dir="${PHOENIX_ADAPTERS_LOG_DIR}" \
+ -Dlog4j2.configurationFile="file:${PHOENIX_ADAPTERS_CONF_DIR}/log4j2.properties" \
+ -cp "${CLASSPATH}" \
+ org.apache.phoenix.ddb.rest.RESTServer \
+ start \
+ -p "${PHOENIX_REST_PORT}" \
+ -z "${zk_quorum}"
diff --git a/docker/scripts/smoke.sh b/docker/scripts/smoke.sh
new file mode 100755
index 0000000..051839f
--- /dev/null
+++ b/docker/scripts/smoke.sh
@@ -0,0 +1,311 @@
+#!/usr/bin/env bash
+#
+# Phoenix Adapters DynamoDB validation suite.
+#
+# Hits every supported API against the dockerized REST server and asserts
+# the expected behaviour. Prints each request, response, and assertion in
+# a readable format. Exits 0 on full pass; exits non-zero on the first
+# failed assertion (and dumps the offending response).
+#
+# Usage: docker/scripts/smoke.sh [label]
+#
+# Requires the cluster to already be up (see docker/README.md).
+# Requires: jq, curl.
+#
+set -euo pipefail
+
+URL="${PHX_URL:-http://localhost:8842}"
+LABEL="${1:-}"
+TBL="Smoke${LABEL}"
+CT='Content-Type: application/x-amz-json-1.0'
+TARGET='X-Amz-Target: DynamoDB_20120810'
+TOTAL=18
+
+if ! command -v jq >/dev/null 2>&1; then
+ echo "smoke.sh: jq is required but not on PATH" >&2
+ exit 2
+fi
+
+# ─── ANSI helpers ────────────────────────────────────────────────────────────
+B='\033[1m' # bold
+DIM='\033[2m' # dim
+CYAN='\033[1;36m'
+GREEN='\033[32m'
+RED='\033[31m'
+RESET='\033[0m'
+RULE='─────────────────────────────────────────────────────────────'
+BAR='━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
+
+STEP=0
+PASS=0
+
+banner() {
+ printf "\n${CYAN}%s${RESET}\n" "$BAR"
+ printf "${CYAN} %s${RESET}\n" "$1"
+ while [[ $# -gt 1 ]]; do shift; printf "${CYAN} %s${RESET}\n" "$1"; done
+ printf "${CYAN}%s${RESET}\n" "$BAR"
+}
+
+step() {
+ STEP=$((STEP + 1))
+ printf "\n${CYAN}[%2d/%2d]${RESET} ${B}%s${RESET}\n" "$STEP" "$TOTAL" "$1"
+ printf "${DIM}%s${RESET}\n" "$RULE"
+}
+
+show_json() {
+ local label="$1" body="$2"
+ printf " ${DIM}%s:${RESET}\n" "$label"
+ if printf '%s' "$body" | jq . >/dev/null 2>&1; then
+ printf '%s' "$body" | jq . | sed 's/^/ /'
+ else
+ printf " %s\n" "$body"
+ fi
+}
+
+LAST_RESP=""
+
+# Prints request + response visually and stashes the raw JSON in LAST_RESP.
+ddb() {
+ local action="$1" body="$2"
+ show_json "request " "$body"
+ LAST_RESP=$(curl -sS -X POST "$URL/" -H "$CT" -H "$TARGET.$action" -d "$body")
+ show_json "response" "$LAST_RESP"
+}
+
+assert_eq() {
+ local label="$1" actual="$2" expected="$3"
+ if [[ "$actual" == "$expected" ]]; then
+ printf " ${GREEN}✓${RESET} %s ${B}==${RESET} %s\n" "$label" "$expected"
+ PASS=$((PASS + 1))
+ else
+ printf " ${RED}✗${RESET} %s ${B}expected${RESET} %s, ${B}got${RESET} %s\n" \
+ "$label" "$expected" "$actual" >&2
+ exit 1
+ fi
+}
+
+assert_nonempty() {
+ local label="$1" value="$2"
+ if [[ -n "$value" && "$value" != "null" ]]; then
+ printf " ${GREEN}✓${RESET} %s present (%s)\n" "$label" "$value"
+ PASS=$((PASS + 1))
+ else
+ printf " ${RED}✗${RESET} %s missing\n" "$label" >&2
+ exit 1
+ fi
+}
+
+assert_ge() {
+ local label="$1" actual="$2" threshold="$3"
+ if [[ "$actual" -ge "$threshold" ]]; then
+ printf " ${GREEN}✓${RESET} %s ${B}>=${RESET} %s (got %s)\n" "$label" "$threshold" "$actual"
+ PASS=$((PASS + 1))
+ else
+ printf " ${RED}✗${RESET} %s expected >= %s, got %s\n" "$label" "$threshold" "$actual" >&2
+ exit 1
+ fi
+}
+
+banner "Phoenix Adapters DynamoDB Validation Suite" \
+ "Endpoint : $URL" \
+ "Table : $TBL"
+
+# ─── Confirm the REST server is up before exercising the API ────────────────
+# When the stack is launched with `docker compose up --wait` the
+# phoenix-adapters-rest healthcheck has already ensured readiness; this
+# check returns almost immediately in that case. Otherwise we probe
+# ListTables until it responds (cold-start bootstrap takes ~30-60s).
+TIMEOUT=180
+SPIN=( '⠋' '⠙' '⠹' '⠸' '⠼' '⠴' '⠦' '⠧' '⠇' '⠏' )
+ready=false
+printf "\n"
+for i in $(seq 1 $TIMEOUT); do
+ if curl -fs -m 3 -X POST "$URL/" \
+ -H "$CT" -H "$TARGET.ListTables" -d '{}' >/dev/null 2>&1; then
+ printf "\r${GREEN}✓${RESET} REST server is ready at %s (verified in %ds) \n" "$URL" "$i"
+ ready=true
+ break
+ fi
+ printf "\r${DIM}%s${RESET} Confirming REST server is ready at %s ${DIM}(%ds elapsed)${RESET}" \
+ "${SPIN[$((i % ${#SPIN[@]}))]}" "$URL" "$i"
+ sleep 1
+done
+if ! $ready; then
+ printf "\n${RED}✗ REST server did not become ready within %ds at %s${RESET}\n" "$TIMEOUT" "$URL" >&2
+ printf "${DIM}Last 30 lines of phx-adapters-rest:${RESET}\n" >&2
+ docker logs phx-adapters-rest 2>&1 | tail -30 >&2 || true
+ exit 1
+fi
+
+###############################################################################
+# CRUD
+###############################################################################
+
+step "ListTables (baseline)"
+ddb ListTables '{}'
+
+step "CreateTable (streams enabled, NEW_AND_OLD_IMAGES)"
+ddb CreateTable "$(cat <= 4 mutations (PutItem-a, UpdateItem-a, PutItem-b, DeleteItem-b) plus
+# 3 from the batch (delete-a, put-c, put-d).
+assert_ge "stream record count" "$total" "4"
+
+step "DeleteTable (cleanup)"
+ddb DeleteTable "{\"TableName\":\"$TBL\"}"
+
+###############################################################################
+# Summary
+###############################################################################
+
+banner "Result: ${PASS} checks PASSED across ${TOTAL} API calls"
From 5b02be66e4b032faa70bc3e40ffeab440bf1037e Mon Sep 17 00:00:00 2001
From: Palash Chauhan
Date: Wed, 27 May 2026 09:01:04 -0700
Subject: [PATCH 2/2] changes
---
README.md | 6 ++--
docker/Dockerfile.phoenix-adapters | 12 +++++--
docker/README.md | 14 ++++----
docker/conf/hbase/hbase-env.sh | 3 ++
docker/conf/hbase/hbase-site.xml | 13 +++++++-
docker/conf/phoenix-adapters/hbase-site.xml | 2 +-
docker/docker-compose.yml | 5 +++
docker/scripts/phoenix-adapters-entrypoint.sh | 20 +++++++++++
docker/scripts/smoke.sh | 33 ++++++++++++++++---
9 files changed, 92 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 9d51359..300dfbb 100644
--- a/README.md
+++ b/README.md
@@ -77,12 +77,14 @@ Skip steps 1-2 above with the bundled Docker cluster. From a fresh clone:
```bash
# 1. Bring up the full stack at the versions pinned in pom.xml and BLOCK
# until every container reports healthy (REST is ~30-60s on cold start).
-# First time: ~8-12 min total; subsequent runs are cached.
+# First time: ~8-12 min total -- most of that is Maven downloading
+# ~1.5 GB of dependencies into the BuildKit cache mount. Subsequent
+# runs reuse the cache and rebuild in seconds.
docker compose -f docker/docker-compose.yml up -d --build --wait
# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem + streams).
bash docker/scripts/smoke.sh
-# -> "Result: 20 checks PASSED across 18 API calls"
+# -> "Result: 21 checks PASSED across 18 API calls"
# 3. Use it. The DynamoDB-compatible endpoint is at http://localhost:8842 .
# Point any AWS SDK at it (Java/Python/Node.js snippets in
diff --git a/docker/Dockerfile.phoenix-adapters b/docker/Dockerfile.phoenix-adapters
index abb78f0..0c3df89 100644
--- a/docker/Dockerfile.phoenix-adapters
+++ b/docker/Dockerfile.phoenix-adapters
@@ -34,8 +34,16 @@ RUN --mount=type=cache,target=/root/.m2 \
clean package
RUN set -eux; \
- tarball=$(ls phoenix-ddb-assembly/target/phoenix-adapters-*-bin.tar.gz | head -n1); \
- cp "${tarball}" /tmp/phoenix-adapters-bin.tar.gz
+ # If the assembly module ever ships an additional *-bin.tar.gz (e.g.
+ # with a classifier), fail loudly rather than silently picking one.
+ count=$(find phoenix-ddb-assembly/target -maxdepth 1 -type f -name 'phoenix-adapters-*-bin.tar.gz' | wc -l); \
+ if [ "$count" -ne 1 ]; then \
+ echo "Expected exactly one phoenix-adapters-*-bin.tar.gz, found $count:" >&2; \
+ find phoenix-ddb-assembly/target -maxdepth 1 -type f -name 'phoenix-adapters-*-bin.tar.gz' >&2; \
+ exit 1; \
+ fi; \
+ tarball=$(find phoenix-ddb-assembly/target -maxdepth 1 -type f -name 'phoenix-adapters-*-bin.tar.gz'); \
+ cp "$tarball" /tmp/phoenix-adapters-bin.tar.gz
FROM eclipse-temurin:8-jdk-jammy
diff --git a/docker/README.md b/docker/README.md
index 618bee1..5fd2352 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -48,13 +48,14 @@ From the **project root**:
# 1. Bring up the full stack (ZK + HDFS + HBase+Phoenix + REST) and BLOCK
# until every service reports healthy (REST takes ~30-60s on a cold
# start because Phoenix has to bootstrap SYSTEM.* tables).
-# First time: ~8-12 min (pulls upstream images + builds HBase/Phoenix + REST).
-# Subsequent runs: cached.
+# First time: ~8-12 min -- most of that is Maven downloading ~1.5 GB
+# of dependencies into the BuildKit cache mount; subsequent runs reuse
+# the cache and rebuild in seconds.
docker compose -f docker/docker-compose.yml up -d --build --wait
# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem + streams).
bash docker/scripts/smoke.sh
-# -> "Result: 20 checks PASSED across 18 API calls"
+# -> "Result: 21 checks PASSED across 18 API calls"
# 3. Use it. The DynamoDB-compatible REST endpoint is at http://localhost:8842 .
# Point any AWS SDK at it (Java/Python/Node.js snippets in
@@ -91,7 +92,7 @@ Inter-container traffic still uses the standard ports.
### Bring up just the cluster (no REST)
```bash
-docker compose -f docker/docker-compose.yml up -d --build \
+docker compose -f docker/docker-compose.yml up -d --build --wait \
zookeeper namenode datanode hbase-master hbase-regionserver
```
@@ -224,6 +225,7 @@ HBase data persists across full `down`/`up` cycles.
| Added a Maven dep to `phoenix-ddb-rest/pom.xml` | `--build phoenix-adapters-rest`. New dep downloads once; cache warms after. |
| Clean slate | `docker compose ... down -v` then `up -d --build`. |
| Code doesn't seem picked up | You ran `restart` instead of `up --build`. `restart` does not rebuild. |
+| Stack left running for days / many smoke iterations | HBase + REST logs grow unbounded inside the containers. `down -v` periodically to reclaim disk. |
### Pre-PR checklist
@@ -240,7 +242,7 @@ bash docker/scripts/smoke.sh
docker compose -f docker/docker-compose.yml down -v
```
-If `smoke.sh` finishes with `Result: 20 checks PASSED across 18 API calls`,
+If `smoke.sh` finishes with `Result: 21 checks PASSED across 18 API calls`,
your change is wire-compatible end to end through Phoenix on dockerized
HBase across CRUD, batch, and the change-stream chain.
@@ -277,7 +279,7 @@ RPC controller:
| `hbase.regionserver.wal.codec` | `…IndexedWALEditCodec` |
| `hbase.region.server.rpc.scheduler.factory.class` | `…PhoenixRpcSchedulerFactory` |
| `hbase.rpc.controllerfactory.class` | `…ServerRpcControllerFactory` |
-| `phoenix.task.handling.interval.ms` | `10` |
+| `phoenix.task.handling.interval.ms` | `1000` |
| `phoenix.task.handling.initial.delay.ms` | `1` |
`phoenix-server-hbase-2.5-5.3.1.jar` is copied into `${HBASE_HOME}/lib/` so
diff --git a/docker/conf/hbase/hbase-env.sh b/docker/conf/hbase/hbase-env.sh
index a5c243b..e8d7c6d 100644
--- a/docker/conf/hbase/hbase-env.sh
+++ b/docker/conf/hbase/hbase-env.sh
@@ -9,6 +9,9 @@ export HBASE_HEAPSIZE=1G
export HBASE_OFFHEAPSIZE=256m
# Strip JDK11-specific GC flags HBase ships with; we run on JDK8.
+# This intentionally REPLACES the upstream value (rather than appending),
+# so any future upstream flag drops out of the container -- add new flags
+# to this list directly instead of re-deriving from upstream's HBASE_OPTS.
export HBASE_OPTS="-XX:+UseG1GC -XX:+UnlockExperimentalVMOptions"
export HBASE_MASTER_OPTS="${HBASE_OPTS} -Xms256m"
export HBASE_REGIONSERVER_OPTS="${HBASE_OPTS} -Xms512m"
diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml
index a47b462..56c279c 100644
--- a/docker/conf/hbase/hbase-site.xml
+++ b/docker/conf/hbase/hbase-site.xml
@@ -17,6 +17,11 @@
hbase.zookeeper.property.clientPort
2181
+
hbase.unsafe.stream.capability.enforce
false
@@ -47,9 +52,15 @@
hbase.rpc.controllerfactory.class
org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory
+
phoenix.task.handling.interval.ms
- 10
+ 1000
phoenix.task.handling.initial.delay.ms
diff --git a/docker/conf/phoenix-adapters/hbase-site.xml b/docker/conf/phoenix-adapters/hbase-site.xml
index fd993b9..0657edd 100644
--- a/docker/conf/phoenix-adapters/hbase-site.xml
+++ b/docker/conf/phoenix-adapters/hbase-site.xml
@@ -12,7 +12,7 @@
phoenix.task.handling.interval.ms
- 10
+ 1000
phoenix.task.handling.initial.delay.ms
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 794cb6e..329bef5 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -7,6 +7,11 @@
name: phoenix-adapters
+# The container_name keys below pin each service to a fixed name (phx-*).
+# Combined with the fixed host ports (8842/9870/12181/etc.), this means
+# only one copy of the stack can run on a workstation at a time. That's
+# intentional given the host-port collisions; if you need parallel stacks,
+# drop the container_name keys AND change the host-port mappings.
services:
zookeeper:
diff --git a/docker/scripts/phoenix-adapters-entrypoint.sh b/docker/scripts/phoenix-adapters-entrypoint.sh
index 1c4685d..f77b971 100644
--- a/docker/scripts/phoenix-adapters-entrypoint.sh
+++ b/docker/scripts/phoenix-adapters-entrypoint.sh
@@ -3,6 +3,26 @@ set -euo pipefail
log() { echo "[phoenix-adapters][$(date -u +%H:%M:%S)] $*"; }
+# Guard against accidental reintroduction of the 3.4.x hadoop client jars.
+# Dockerfile.phoenix-adapters strips them because they reference
+# org.apache.hadoop.fs.WithErasureCoding (only present in hadoop-common
+# 3.4+), which poisons the client JVM via FileSystem ServiceLoader the
+# first time HBase returns a remote exception. If anyone re-adds them,
+# fail fast with a clear pointer instead of dying mid-bootstrap.
+shopt -s nullglob
+stray=( "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-yarn-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-mapreduce-client-"*.jar \
+ "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-distcp-"*.jar )
+shopt -u nullglob
+if [[ ${#stray[@]} -gt 0 ]]; then
+ log "ERROR: assembly contains hadoop 3.4.x jars that must be stripped:"
+ for j in "${stray[@]}"; do log " - ${j##*/}"; done
+ log "See the 'rm -f hadoop-hdfs-*' block in docker/Dockerfile.phoenix-adapters."
+ exit 1
+fi
+
wait_for() {
local host="$1" port="$2"
log "Waiting for ${host}:${port} ..."
diff --git a/docker/scripts/smoke.sh b/docker/scripts/smoke.sh
index 051839f..4962a57 100755
--- a/docker/scripts/smoke.sh
+++ b/docker/scripts/smoke.sh
@@ -65,11 +65,22 @@ show_json() {
LAST_RESP=""
# Prints request + response visually and stashes the raw JSON in LAST_RESP.
+# Aborts immediately if the response is a DDB error envelope (has __type),
+# so per-step assertions don't have to translate confusing "expected X got
+# null" failures back into the underlying Phoenix error.
ddb() {
local action="$1" body="$2"
show_json "request " "$body"
LAST_RESP=$(curl -sS -X POST "$URL/" -H "$CT" -H "$TARGET.$action" -d "$body")
show_json "response" "$LAST_RESP"
+ if printf '%s' "$LAST_RESP" | jq -e 'type == "object" and has("__type")' >/dev/null 2>&1; then
+ local err_type err_msg
+ err_type=$(printf '%s' "$LAST_RESP" | jq -r '.__type // "?"')
+ err_msg=$(printf '%s' "$LAST_RESP" | jq -r '.Message // .message // ""')
+ printf " ${RED}✗${RESET} %s returned error ${B}%s${RESET}: %s\n" \
+ "$action" "$err_type" "$err_msg" >&2
+ exit 1
+ fi
}
assert_eq() {
@@ -97,7 +108,10 @@ assert_nonempty() {
assert_ge() {
local label="$1" actual="$2" threshold="$3"
- if [[ "$actual" -ge "$threshold" ]]; then
+ # Coerce non-numeric (null, empty, "true", etc.) to 0 so the arithmetic
+ # comparison can't abort the script with "integer expression expected".
+ [[ "$actual" =~ ^-?[0-9]+$ ]] || actual=0
+ if (( actual >= threshold )); then
printf " ${GREEN}✓${RESET} %s ${B}>=${RESET} %s (got %s)\n" "$label" "$threshold" "$actual"
PASS=$((PASS + 1))
else
@@ -280,7 +294,7 @@ iter=$(jq -r '.ShardIterator // empty' <<<"$LAST_RESP")
assert_nonempty "ShardIterator" "$iter"
step "GetRecords (drain pages until empty)"
-total=0; pages=0; seen_keys=""
+total=0; pages=0; seen_keys=""; advanced=false
while [[ -n "$iter" && "$iter" != "null" && $pages -lt 10 ]]; do
pages=$((pages + 1))
ddb GetRecords "{\"ShardIterator\":\"$iter\"}"
@@ -290,16 +304,27 @@ while [[ -n "$iter" && "$iter" != "null" && $pages -lt 10 ]]; do
total=$((total + n))
printf " ${DIM}page %d: %d record(s) keys=[%s]${RESET}\n" "$pages" "$n" "$keys"
next=$(jq -r '.NextShardIterator // empty' <<<"$LAST_RESP")
- if [[ "$next" == "$iter" || -z "$next" || "$next" == "null" ]]; then
+ # Iterator stuck-at-position guard: if NextShardIterator equals the
+ # current one across consecutive empty pages, the stream isn't actually
+ # being consumed, so further pages would just spin.
+ if [[ -z "$next" || "$next" == "null" || "$next" == "$iter" ]]; then
break
fi
+ advanced=true
iter="$next"
[[ $n -eq 0 ]] && break
done
printf " ${DIM}total records: %d keys=[%s]${RESET}\n" "$total" "$seen_keys"
# Expect >= 4 mutations (PutItem-a, UpdateItem-a, PutItem-b, DeleteItem-b) plus
-# 3 from the batch (delete-a, put-c, put-d).
+# 3 from the batch (delete-a, put-c, put-d) -- 7 total in steady state.
assert_ge "stream record count" "$total" "4"
+if $advanced; then
+ printf " ${GREEN}✓${RESET} ShardIterator advanced across pages\n"
+ PASS=$((PASS + 1))
+else
+ printf " ${RED}✗${RESET} ShardIterator never advanced; stream appears stuck\n" >&2
+ exit 1
+fi
step "DeleteTable (cleanup)"
ddb DeleteTable "{\"TableName\":\"$TBL\"}"