Skip to content

Commit 13f8b8f

Browse files
authored
Merge branch 'main' into user/qa/post_update_waive_20251201_LLM_FUNCTION_TEST_1712
Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
2 parents aa53729 + d11acee commit 13f8b8f

File tree

67 files changed

+3236
-996
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+3236
-996
lines changed

docker/Dockerfile.multi

Lines changed: 34 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ LABEL com.nvidia.eula="https://www.nvidia.com/en-us/agreements/enterprise-softwa
1212
LABEL com.nvidia.ai-terms="https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/"
1313

1414
# https://www.gnu.org/software/bash/manual/html_node/Bash-Startup-Files.html
15-
# The default values come from `nvcr.io/nvidia/pytorch`
16-
ENV BASH_ENV=${BASH_ENV:-/etc/bash.bashrc}
17-
ENV ENV=${ENV:-/etc/shinit_v2}
15+
ARG SH_ENV="/etc/shinit_v2"
16+
ENV ENV=${SH_ENV}
17+
ARG BASH_ENV="/etc/bash.bashrc"
18+
ENV BASH_ENV=${BASH_ENV}
1819

1920
ARG GITHUB_MIRROR=""
2021
RUN echo "Using GitHub mirror: $GITHUB_MIRROR"
@@ -43,48 +44,41 @@ COPY docker/common/install.sh \
4344
docker/common/install_ucx.sh \
4445
docker/common/install_nixl.sh \
4546
docker/common/install_etcd.sh \
46-
docker/common/install_mooncake.sh \
4747
./
4848

49-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
50-
PYTHON_VERSION=${PYTHON_VERSION} \
51-
bash ./install.sh --base && rm install_base.sh
52-
53-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --cmake && rm install_cmake.sh
54-
55-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --ccache && rm install_ccache.sh
56-
57-
RUN bash ./install.sh --cuda_toolkit && rm install_cuda_toolkit.sh
58-
5949
ARG TRT_VER
6050
ARG CUDA_VER
6151
ARG CUDNN_VER
6252
ARG NCCL_VER
6353
ARG CUBLAS_VER
64-
RUN TRT_VER=${TRT_VER} \
54+
ARG TORCH_INSTALL_TYPE="skip"
55+
RUN GITHUB_MIRROR=${GITHUB_MIRROR} \
56+
PYTHON_VERSION=${PYTHON_VERSION} \
57+
TRT_VER=${TRT_VER} \
6558
CUDA_VER=${CUDA_VER} \
6659
CUDNN_VER=${CUDNN_VER} \
6760
NCCL_VER=${NCCL_VER} \
6861
CUBLAS_VER=${CUBLAS_VER} \
69-
bash ./install.sh --tensorrt && rm install_tensorrt.sh
70-
71-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --polygraphy && rm install_polygraphy.sh
72-
73-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --mpi4py && rm install_mpi4py.sh
74-
75-
ARG TORCH_INSTALL_TYPE="skip"
76-
RUN TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} bash ./install.sh --pytorch && rm install_pytorch.sh
77-
78-
RUN bash ./install.sh --opencv && rm install.sh
79-
80-
# Install UCX first
81-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_ucx.sh && rm install_ucx.sh
82-
83-
# Install NIXL
84-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_nixl.sh && rm install_nixl.sh
85-
86-
# Install etcd
87-
RUN bash ./install_etcd.sh && rm install_etcd.sh
62+
TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} \
63+
bash ./install.sh --base --cmake --ccache --cuda_toolkit --tensorrt --polygraphy --mpi4py --pytorch --opencv && \
64+
rm install_base.sh && \
65+
rm install_cmake.sh && \
66+
rm install_ccache.sh && \
67+
rm install_cuda_toolkit.sh && \
68+
rm install_tensorrt.sh && \
69+
rm install_polygraphy.sh && \
70+
rm install_mpi4py.sh && \
71+
rm install_pytorch.sh && \
72+
rm install.sh
73+
74+
# Install UCX, NIXL, etcd
75+
# TODO: Combine these into the main install.sh script
76+
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_ucx.sh && \
77+
GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_nixl.sh && \
78+
bash ./install_etcd.sh && \
79+
rm install_ucx.sh && \
80+
rm install_nixl.sh && \
81+
rm install_etcd.sh
8882

8983
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
9084

@@ -99,16 +93,18 @@ COPY --from=triton /opt/tritonserver/caches /opt/tritonserver/caches
9993

10094
# Copy all installation scripts at once to reduce layers
10195
COPY docker/common/install_triton.sh \
96+
docker/common/install_mooncake.sh \
10297
./
10398

104-
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && rm install_triton.sh
105-
10699
# Install Mooncake, after triton handles boost requirement
107-
RUN if [ -f /etc/redhat-release ]; then \
100+
RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install_triton.sh && \
101+
if [ -f /etc/redhat-release ]; then \
108102
echo "Rocky8 detected, skipping mooncake installation"; \
109103
else \
110104
bash ./install_mooncake.sh; \
111-
fi && rm install_mooncake.sh
105+
fi && \
106+
rm install_triton.sh && \
107+
rm install_mooncake.sh
112108

113109
FROM ${DEVEL_IMAGE} AS wheel
114110
WORKDIR /src/tensorrt_llm

docker/Makefile

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,16 @@ define rewrite_tag
7575
$(shell echo $(IMAGE_WITH_TAG) | sed "s/\/tensorrt-llm:/\/tensorrt-llm-staging:/g")
7676
endef
7777

78+
base_pull:
79+
@echo "Pulling base image: $(BASE_IMAGE):$(BASE_TAG)"
80+
docker pull $(BASE_IMAGE):$(BASE_TAG)
81+
7882
%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE))
79-
%_build:
83+
%_build: SH_ENV = $(shell docker inspect --format='{{range .Config.Env}}{{println .}}{{end}}' $(BASE_IMAGE):$(BASE_TAG) \
84+
| grep '^ENV=' | sed 's/^[^=]*=//' 2>/dev/null)
85+
%_build: BASH_ENV = $(shell docker inspect --format='{{range .Config.Env}}{{println .}}{{end}}' $(BASE_IMAGE):$(BASE_TAG) \
86+
| grep '^BASH_ENV=' | sed 's/^[^=]*=//' 2>/dev/null)
87+
%_build: base_pull
8088
@echo "Building docker image: $(IMAGE_WITH_TAG)"
8189
docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \
8290
--progress $(DOCKER_PROGRESS) \
@@ -97,6 +105,8 @@ endef
97105
$(if $(GIT_COMMIT), --build-arg GIT_COMMIT="$(GIT_COMMIT)") \
98106
$(if $(GITHUB_MIRROR), --build-arg GITHUB_MIRROR="$(GITHUB_MIRROR)") \
99107
$(if $(PYTHON_VERSION), --build-arg PYTHON_VERSION="$(PYTHON_VERSION)") \
108+
$(if $(SH_ENV), --build-arg SH_ENV="$(SH_ENV)") \
109+
$(if $(BASH_ENV), --build-arg BASH_ENV="$(BASH_ENV)") \
100110
$(if $(STAGE), --target $(STAGE)) \
101111
--file Dockerfile.multi \
102112
--tag $(IMAGE_WITH_TAG) \

docker/common/install.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22
set -Eeo pipefail
33
shopt -s nullglob
44
trap 'echo "[install.sh] Error on line $LINENO" >&2' ERR
@@ -125,7 +125,7 @@ fi
125125

126126
if [ $opencv -eq 1 ]; then
127127
echo "Installing OpenCV..."
128-
pip3 uninstall -y opencv
128+
bash -c "pip3 uninstall -y opencv"
129129
rm -rf /usr/local/lib/python3*/dist-packages/cv2/
130-
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
130+
bash -c "pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir"
131131
fi

docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ P99 E2EL (ms): 1643.44
162162
For a single request, ITLs are the time intervals between tokens, while TPOT is the average of those intervals:
163163

164164
$$
165-
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{#Output Tokens} - 1}
165+
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{Num Output Tokens} - 1}
166166
$$
167167

168168
Across different requests, **average TPOT** is the mean of each request's TPOT (all requests weighted equally), while **average ITL** is token-weighted (all tokens weighted equally):
@@ -172,7 +172,7 @@ $$
172172
$$
173173

174174
$$
175-
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{#Output Tokens across requests}}
175+
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{Num Output Tokens across requests}}
176176
$$
177177

178178
#### End-to-End (E2E) Latency
@@ -182,14 +182,14 @@ $$
182182
* The combined rate at which the system processes both input (prompt) tokens and output (generated) tokens.
183183

184184
$$
185-
\text{Total TPS} = \frac{\text{#Input Tokens}+\text{#Output Tokens}}{T_{last} - T_{first}}
185+
\text{Total TPS} = \frac{\text{Num Input Tokens}+\text{Num Output Tokens}}{T_{last} - T_{first}}
186186
$$
187187

188188
#### Tokens Per Second (TPS) or Output Token Throughput
189189
* how many output tokens the system generates each second.
190190

191191
$$
192-
\text{TPS} = \frac{\text{#Output Tokens}}{T_{last} - T_{first}}
192+
\text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
193193
$$
194194

195195
### Request Time Breakdown

docs/source/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ P99 E2EL (ms): [result]
400400
For a single request, ITLs are the time intervals between tokens, while TPOT is the average of those intervals:
401401
402402
$$
403-
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{#Output Tokens} - 1}
403+
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{Num Output Tokens} - 1}
404404
$$
405405
406406
Across different requests, **average TPOT** is the mean of each request's TPOT (all requests weighted equally), while **average ITL** is token-weighted (all tokens weighted equally):
@@ -410,7 +410,7 @@ $$
410410
$$
411411
412412
$$
413-
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{#Output Tokens across requests}}
413+
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{Num Output Tokens across requests}}
414414
$$
415415
416416
#### End-to-End (E2E) Latency
@@ -420,12 +420,12 @@ $$
420420
* The combined rate at which the system processes both input (prompt) tokens and output (generated) tokens.
421421
422422
$$
423-
\text{Total TPS} = \frac{\text{#Input Tokens}+\text{#Output Tokens}}{T_{last} - T_{first}}
423+
\text{Total TPS} = \frac{\text{Num Input Tokens}+\text{Num Output Tokens}}{T_{last} - T_{first}}
424424
$$
425425
426426
#### Tokens Per Second (TPS) or Output Token Throughput
427427
* how many output tokens the system generates each second.
428428
429429
$$
430-
\text{TPS} = \frac{\text{#Output Tokens}}{T_{last} - T_{first}}
430+
\text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
431431
$$

docs/source/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ P99 E2EL (ms): [result]
350350
For a single request, ITLs are the time intervals between tokens, while TPOT is the average of those intervals:
351351

352352
$$
353-
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{#Output Tokens} - 1}
353+
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{Num Output Tokens} - 1}
354354
$$
355355

356356
Across different requests, **average TPOT** is the mean of each request's TPOT (all requests weighted equally), while **average ITL** is token-weighted (all tokens weighted equally):
@@ -360,7 +360,7 @@ $$
360360
$$
361361

362362
$$
363-
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{#Output Tokens across requests}}
363+
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{Num Output Tokens across requests}}
364364
$$
365365

366366
#### End-to-End (E2E) Latency
@@ -370,12 +370,12 @@ $$
370370
* The combined rate at which the system processes both input (prompt) tokens and output (generated) tokens.
371371

372372
$$
373-
\text{Total TPS} = \frac{\text{#Input Tokens}+\text{#Output Tokens}}{T_{last} - T_{first}}
373+
\text{Total TPS} = \frac{\text{Num Input Tokens}+\text{Num Output Tokens}}{T_{last} - T_{first}}
374374
$$
375375

376376
#### Tokens Per Second (TPS) or Output Token Throughput
377377
* how many output tokens the system generates each second.
378378

379379
$$
380-
\text{TPS} = \frac{\text{#Output Tokens}}{T_{last} - T_{first}}
380+
\text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
381381
$$

docs/source/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ P99 E2EL (ms): [result]
355355
For a single request, ITLs are the time intervals between tokens, while TPOT is the average of those intervals:
356356

357357
$$
358-
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{#Output Tokens} - 1}
358+
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{Num Output Tokens} - 1}
359359
$$
360360

361361
Across different requests, **average TPOT** is the mean of each request's TPOT (all requests weighted equally), while **average ITL** is token-weighted (all tokens weighted equally):
@@ -365,7 +365,7 @@ $$
365365
$$
366366

367367
$$
368-
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{#Output Tokens across requests}}
368+
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{Num Output Tokens across requests}}
369369
$$
370370

371371
#### End-to-End (E2E) Latency
@@ -375,12 +375,12 @@ $$
375375
* The combined rate at which the system processes both input (prompt) tokens and output (generated) tokens.
376376

377377
$$
378-
\text{Total TPS} = \frac{\text{#Input Tokens}+\text{#Output Tokens}}{T_{last} - T_{first}}
378+
\text{Total TPS} = \frac{\text{Num Input Tokens}+\text{Num Output Tokens}}{T_{last} - T_{first}}
379379
$$
380380

381381
#### Tokens Per Second (TPS) or Output Token Throughput
382382
* how many output tokens the system generates each second.
383383

384384
$$
385-
\text{TPS} = \frac{\text{#Output Tokens}}{T_{last} - T_{first}}
385+
\text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
386386
$$

docs/source/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ P99 E2EL (ms): [result]
347347
For a single request, ITLs are the time intervals between tokens, while TPOT is the average of those intervals:
348348

349349
$$
350-
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{#Output Tokens} - 1}
350+
\text{TPOT (1 request)} = \text{Avg(ITL)} = \frac{\text{E2E latency} - \text{TTFT}}{\text{Num Output Tokens} - 1}
351351
$$
352352

353353
Across different requests, **average TPOT** is the mean of each request's TPOT (all requests weighted equally), while **average ITL** is token-weighted (all tokens weighted equally):
@@ -357,7 +357,7 @@ $$
357357
$$
358358

359359
$$
360-
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{#Output Tokens across requests}}
360+
\text{Avg ITL (N requests)} = \frac{\text{Sum of all ITLs across requests}}{\text{Num Output Tokens across requests}}
361361
$$
362362

363363
#### End-to-End (E2E) Latency
@@ -367,12 +367,12 @@ $$
367367
* The combined rate at which the system processes both input (prompt) tokens and output (generated) tokens.
368368

369369
$$
370-
\text{Total TPS} = \frac{\text{#Input Tokens}+\text{#Output Tokens}}{T_{last} - T_{first}}
370+
\text{Total TPS} = \frac{\text{Num Input Tokens}+\text{Num Output Tokens}}{T_{last} - T_{first}}
371371
$$
372372

373373
#### Tokens Per Second (TPS) or Output Token Throughput
374374
* how many output tokens the system generates each second.
375375

376376
$$
377-
\text{TPS} = \frac{\text{#Output Tokens}}{T_{last} - T_{first}}
377+
\text{TPS} = \frac{\text{Num Output Tokens}}{T_{last} - T_{first}}
378378
$$

jenkins/L0_MergeRequest.groovy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ def globalVars = [
155155
boolean enableUpdateGitlabStatus =
156156
!testFilter[ENABLE_SKIP_TEST] &&
157157
!testFilter[ONLY_MULTI_GPU_TEST] &&
158+
!testFilter[DISABLE_MULTI_GPU_TEST] &&
159+
!testFilter[DEBUG_MODE] &&
158160
testFilter[GPU_TYPE_LIST] == null &&
159161
testFilter[TEST_STAGE_LIST] == null &&
160162
testFilter[TEST_BACKEND] == null

jenkins/L0_Test.groovy

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,11 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
642642
echo "--gpus ${gpuCount}"
643643
fi
644644
""", returnStdout: true).trim()
645+
646+
if (cluster.host.contains("dlcluster")) {
647+
dockerArgs += " " + sh(script: 'echo " -e NVIDIA_IMEX_CHANNELS=${NVIDIA_IMEX_CHANNELS:-0}"', returnStdout: true).trim()
648+
dockerArgs += " --device=/dev/gdrdrv:/dev/gdrdrv"
649+
}
645650
}
646651

647652
dockerArgs = "${dockerArgs} " +
@@ -655,10 +660,6 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
655660
"-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " +
656661
"--cap-add=SYSLOG"
657662

658-
if (partition.clusterName == "dlcluster") {
659-
dockerArgs += " -e NVIDIA_IMEX_CHANNELS=0"
660-
dockerArgs += " --device=/dev/gdrdrv:/dev/gdrdrv"
661-
}
662663
echo "Final dockerArgs: ${dockerArgs}"
663664
} else {
664665
error "The Slurm node does not come online in the waiting period. Terminating the job."
@@ -750,6 +751,8 @@ def getPytestBaseCommandLine(
750751
extraInternalEnv = "__LUNOWUD=\"-thread_pool_size=${TESTER_CORES}\""
751752
// CPP test execution is timing out easily, so we always override its internal timeout to the same value as pytest
752753
extraInternalEnv += " CPP_TEST_TIMEOUT_OVERRIDDEN=${pytestTestTimeout}"
754+
// Enable NCCL debug information for multi-GPU tests
755+
extraInternalEnv += " NCCL_DEBUG=INFO"
753756

754757
def testCmdLine = [
755758
"LLM_ROOT=${llmSrc}",
@@ -996,8 +999,11 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
996999
export resourcePathNode=$resourcePathNode
9971000
export pytestCommand="$pytestCommand"
9981001
export coverageConfigFile="$coverageConfigFile"
999-
export NVIDIA_IMEX_CHANNELS=0
1000-
[ -z "\${NVIDIA_VISIBLE_DEVICES:-}" ] && export NVIDIA_VISIBLE_DEVICES=\$(seq -s, 0 \$((\$(nvidia-smi --query-gpu=count -i 0 --format=noheader)-1)))
1002+
export NVIDIA_IMEX_CHANNELS=\${NVIDIA_IMEX_CHANNELS:-0}
1003+
export NVIDIA_VISIBLE_DEVICES=\${NVIDIA_VISIBLE_DEVICES:-\$(seq -s, 0 \$((\$(nvidia-smi --query-gpu=count -i 0 --format=noheader)-1)))}
1004+
1005+
echo "Env NVIDIA_IMEX_CHANNELS: \$NVIDIA_IMEX_CHANNELS"
1006+
echo "Env NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES"
10011007
10021008
${srunPrologue}
10031009
@@ -2248,20 +2254,6 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
22482254
def noRegularTests = false
22492255
def noIsolateTests = false
22502256
def rerunFailed = false
2251-
2252-
echoNodeAndGpuInfo(pipeline, stageName)
2253-
sh 'if [ "$(id -u)" -eq 0 ]; then dmesg -C || true; fi'
2254-
2255-
def extraInternalEnv = ""
2256-
def pytestTestTimeout = "3600"
2257-
2258-
// TRT uses half of the host logic cores for engine building which is bad for multi-GPU machines.
2259-
extraInternalEnv = "__LUNOWUD=\"-thread_pool_size=${TESTER_CORES}\""
2260-
// CPP test execution is timing out easily, so we always override its internal timeout to the same value as pytest
2261-
extraInternalEnv += " CPP_TEST_TIMEOUT_OVERRIDDEN=${pytestTestTimeout}"
2262-
// Enable NCCL debug information for multi-GPU tests
2263-
extraInternalEnv += " NCCL_DEBUG=INFO"
2264-
22652257
def testDBList = renderTestDB(testList, llmSrc, stageName)
22662258

22672259
// Process shard test list and create separate files for regular and isolate tests

0 commit comments

Comments
 (0)