@@ -13,9 +13,9 @@ LABEL ubuntu.version="22.04"
1313# Avoid interactive prompts during package installation
1414ARG DEBIAN_FRONTEND=noninteractive
1515
16- # Update and install essential development tools
16+ # Install essential development tools for GPU programming
1717RUN apt-get update && apt-get install -y \
18- # Basic development tools
18+ # Core development tools
1919 build-essential \
2020 cmake \
2121 git \
@@ -25,17 +25,13 @@ RUN apt-get update && apt-get install -y \
2525 nano \
2626 htop \
2727 tree \
28- # Python development
28+ # Minimal Python for basic scripting (not data science)
2929 python3 \
3030 python3-pip \
3131 python3-dev \
3232 # Additional utilities
3333 pkg-config \
3434 software-properties-common \
35- apt-transport-https \
36- ca-certificates \
37- gnupg \
38- lsb-release \
3935 # Debugging and profiling tools
4036 gdb \
4137 valgrind \
@@ -45,35 +41,21 @@ RUN apt-get update && apt-get install -y \
4541 iputils-ping \
4642 && rm -rf /var/lib/apt/lists/*
4743
48- # Install ROCm development packages
44+ # Install core ROCm development packages (keep minimal)
4945RUN apt-get update && apt-get install -y \
50- # Core ROCm packages
51- rocm-dev \
52- rocm-libs \
46+ # Core ROCm packages for GPU programming
5347 hip-dev \
5448 hip-samples \
5549 hipblas-dev \
56- hipfft-dev \
57- hipsparse-dev \
58- # ROCm profiling and debugging tools
50+ # ROCm profiling tools (essential for performance work)
5951 rocprofiler-dev \
6052 roctracer-dev \
61- roctx \
62- # Additional ROCm libraries
63- rocrand-dev \
64- rocthrust-dev \
6553 && rm -rf /var/lib/apt/lists/*
6654
67- # Install Python packages for data analysis and visualization
55+ # Install minimal Python packages for basic development (no heavy data science libs)
6856RUN pip3 install --no-cache-dir \
6957 numpy \
70- matplotlib \
71- seaborn \
72- pandas \
73- jupyter \
74- jupyterlab \
75- plotly \
76- scipy
58+ matplotlib
7759
7860# Set up ROCm environment variables
7961ENV ROCM_PATH=/opt/rocm
@@ -84,11 +66,8 @@ ENV HIP_PLATFORM=amd
8466ENV HSA_OVERRIDE_GFX_VERSION=11.0.0
8567ENV ROCM_VERSION=6.4.3
8668
87- # Add ROCm binaries to PATH
88- ENV PATH=/opt/rocm/bin:/opt/rocm/hip/bin:${PATH}
89-
90- # Verify ROCm installation
91- RUN hipcc --version && rocminfo > /dev/null 2>&1 || echo "ROCm info check completed (may fail without GPU)"
69+ # Verify HIP compiler installation (skip rocminfo as no GPU during build)
70+ RUN hipcc --version
9271
9372# Create development workspace
9473WORKDIR /workspace
@@ -107,164 +86,90 @@ RUN echo 'alias ll="ls -alF"' >> /root/.bashrc && \
10786 echo 'alias rocm-info="rocminfo"' >> /root/.bashrc && \
10887 echo 'export PS1="\[\e [1;34m\] [ROCm-DEV]\[\e [0m\] \w $ "' >> /root/.bashrc
10988
110- # Create a comprehensive GPU test script
111- RUN cat > /workspace/test-gpu.sh << 'EOF'
112- # !/bin/bash
113- echo "=== GPU Programming 101 - ROCm Environment Test ==="
114- echo "Date: $(date)"
115- echo ""
116-
117- echo "=== HIP Compiler ==="
118- hipcc --version
119- echo ""
120-
121- echo "=== ROCm Version ==="
122- if command -v rocminfo > /dev/null 2>&1; then
123- rocminfo | head -20
124- else
125- echo "rocminfo command not available"
126- fi
127- echo ""
128-
129- echo "=== GPU Information ==="
130- if command -v rocm-smi > /dev/null 2>&1; then
131- rocm-smi --showproductname --showmeminfo vram || echo "No AMD GPU detected or accessible"
132- else
133- echo "rocm-smi not available"
134- fi
135- echo ""
136-
137- echo "=== Environment Variables ==="
138- echo "ROCM_PATH: $ROCM_PATH"
139- echo "HIP_PATH: $HIP_PATH"
140- echo "HIP_PLATFORM: $HIP_PLATFORM"
141- echo "PATH: $PATH"
142- echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
143- echo ""
144-
145- echo "=== HIP Platform Detection ==="
146- cat > /tmp/platform_test.cpp << 'HIP_EOF'
147- # include <hip/hip_runtime.h>
148- # include <iostream>
149-
150- int main() {
151- int deviceCount;
152- hipError_t error = hipGetDeviceCount(&deviceCount);
153-
154- if (error != hipSuccess) {
155- std::cout << "HIP Error: " << hipGetErrorString(error) << std::endl;
156- std::cout << "This may be normal if no GPU is available" << std::endl;
157- } else {
158- std::cout << "Number of HIP devices: " << deviceCount << std::endl;
159-
160- for (int i = 0; i < deviceCount; i++) {
161- hipDeviceProp_t props;
162- hipGetDeviceProperties(&props, i);
163- std::cout << "Device " << i << ": " << props.name << std::endl;
164- }
165- }
166-
167- return 0;
168- }
169- HIP_EOF
170-
171- echo "Compiling platform detection test..."
172- if hipcc -o /tmp/platform_test /tmp/platform_test.cpp; then
173- echo "✓ Compilation successful"
174- echo "Running platform test:"
175- /tmp/platform_test
176- else
177- echo "✗ Platform test compilation failed"
178- fi
179-
180- echo ""
181- echo "=== Build Test ==="
182- cd /tmp
183- cat > test.hip.cpp << 'HIP_EOF'
184- # include <hip/hip_runtime.h>
185- # include <stdio.h>
186-
187- __global__ void hello() {
188- printf("Hello from HIP thread %d!\n " , hipThreadIdx_x);
189- }
190-
191- int main() {
192- printf("HIP Test Program\n " );
193-
194- // Check for HIP devices
195- int deviceCount;
196- hipError_t error = hipGetDeviceCount(&deviceCount);
197-
198- if (error == hipSuccess && deviceCount > 0) {
199- printf("Found %d HIP device(s)\n " , deviceCount);
200- hello<<<1, 5>>>();
201- hipDeviceSynchronize();
202- printf("GPU kernel completed!\n " );
203- } else {
204- printf("No HIP devices found or error: %s\n " , hipGetErrorString(error));
205- printf("This is normal when running without GPU access\n " );
206- }
207-
208- return 0;
209- }
210- HIP_EOF
211-
212- echo "Compiling test HIP program..."
213- if hipcc -o test test.hip.cpp; then
214- echo "✓ Compilation successful"
215- echo "Running test program:"
216- ./test
217- echo "✓ HIP environment is working correctly!"
218- else
219- echo "✗ Compilation failed"
220- exit 1
221- fi
222-
223- rm -f test test.hip.cpp platform_test platform_test.cpp
224- echo ""
225- echo "=== All tests completed ==="
226- EOF
89+ # Create a simple GPU test script
90+ RUN printf '#!/bin/bash\n \
91+ echo "=== GPU Programming 101 - ROCm Environment Test ==="\n \
92+ echo "Date: $(date)"\n \
93+ echo ""\n \
94+ \n \
95+ echo "=== HIP Compiler ==="\n \
96+ hipcc --version\n \
97+ echo ""\n \
98+ \n \
99+ echo "=== GPU Information ==="\n \
100+ if rocm-smi --showproductname --showmeminfo vram 2>/dev/null; then\n \
101+ echo "AMD GPU detected successfully"\n \
102+ else\n \
103+ echo "No AMD GPU detected or rocm-smi not available"\n \
104+ fi\n \
105+ echo ""\n \
106+ \n \
107+ echo "=== Environment Variables ==="\n \
108+ echo "ROCM_PATH: $ROCM_PATH"\n \
109+ echo "HIP_PATH: $HIP_PATH"\n \
110+ echo "HIP_PLATFORM: $HIP_PLATFORM"\n \
111+ echo "PATH: $PATH"\n \
112+ echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"\n \
113+ echo ""\n \
114+ \n \
115+ echo "=== Build Test ==="\n \
116+ cd /tmp\n \
117+ cat > test.hip.cpp << ' "'" 'HIP_EOF' "'" '\n \
118+ #include <hip/hip_runtime.h>\n \
119+ #include <stdio.h>\n \
120+ \n \
121+ __global__ void hello() {\n \
122+ printf("Hello from HIP thread %%d!\\ n", hipThreadIdx_x);\n \
123+ }\n \
124+ \n \
125+ int main() {\n \
126+ printf("HIP Test Program\\ n");\n \
127+ \n \
128+ int deviceCount;\n \
129+ hipError_t error = hipGetDeviceCount(&deviceCount);\n \
130+ \n \
131+ if (error != hipSuccess) {\n \
132+ printf("HIP Error: %%s\\ n", hipGetErrorString(error));\n \
133+ printf("No HIP-capable devices found\\ n");\n \
134+ return 0;\n \
135+ }\n \
136+ \n \
137+ printf("Found %%d HIP device(s)\\ n", deviceCount);\n \
138+ hello<<<1, 5>>>();\n \
139+ hipDeviceSynchronize();\n \
140+ printf("GPU kernel completed!\\ n");\n \
141+ return 0;\n \
142+ }\n \
143+ HIP_EOF\n \
144+ \n \
145+ echo "Compiling test HIP program..."\n \
146+ if hipcc -o test test.hip.cpp; then\n \
147+ echo "✓ Compilation successful"\n \
148+ echo "Running test program:"\n \
149+ ./test\n \
150+ echo "✓ HIP environment is working correctly!"\n \
151+ else\n \
152+ echo "✗ Compilation failed"\n \
153+ exit 1\n \
154+ fi\n \
155+ \n \
156+ rm -f test test.hip.cpp\n \
157+ echo ""\n \
158+ echo "=== All tests completed ==="\n ' > /workspace/test-gpu.sh
227159
228160RUN chmod +x /workspace/test-gpu.sh
229161
230- # Install HIP samples
162+ # Install HIP samples for learning and reference
231163RUN cd /workspace && \
232164 if [ -d "/opt/rocm/hip/samples" ]; then \
233165 cp -r /opt/rocm/hip/samples ./hip-samples; \
234166 else \
235167 git clone https://github.com/ROCm-Developer-Tools/HIP-Examples.git hip-examples; \
236168 fi
237169
238- # Create jupyter kernel for HIP (for notebooks)
239- RUN python3 -m ipykernel install --name hip-kernel --display-name "HIP Python"
240-
241- # Set up HIP for both AMD and NVIDIA compatibility
242- RUN cat > /workspace/setup-hip-nvidia.sh << 'EOF'
243- # !/bin/bash
244- # Switch HIP to NVIDIA backend (for systems with NVIDIA GPUs)
245- export HIP_PLATFORM=nvidia
246- export HIP_COMPILER=nvcc
247- echo "HIP configured for NVIDIA backend"
248- echo "HIP_PLATFORM=$HIP_PLATFORM"
249- EOF
250-
251- RUN cat > /workspace/setup-hip-amd.sh << 'EOF'
252- # !/bin/bash
253- # Switch HIP to AMD backend (default)
254- export HIP_PLATFORM=amd
255- unset HIP_COMPILER
256- echo "HIP configured for AMD backend"
257- echo "HIP_PLATFORM=$HIP_PLATFORM"
258- EOF
259-
260- RUN chmod +x /workspace/setup-hip-*.sh
261-
262- # Expose Jupyter port
263- EXPOSE 8888
264-
265170# Default command
266171CMD ["/bin/bash" ]
267172
268- # Health check to verify ROCm access
173+ # Health check to verify HIP compiler access (will only work when GPU is available)
269174HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
270- CMD rocminfo > /dev/null 2>&1 || hipcc --version > /dev/null 2>&1 || exit 1
175+ CMD hipcc --version > /dev/null 2>&1 || exit 1
0 commit comments