From 23392a5a7202abd45ffc423c082f00285fa6316b Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 12 Jun 2026 14:01:34 +0500 Subject: [PATCH 1/4] Support AWS p6 instances --- .../_internal/core/backends/aws/compute.py | 2 + .../_internal/core/backends/aws/resources.py | 99 +++++++---- .../server/services/backends/provisioning.py | 2 +- .../core/backends/aws/test_resources.py | 159 ++++++++++++++++++ 4 files changed, 227 insertions(+), 35 deletions(-) diff --git a/src/dstack/_internal/core/backends/aws/compute.py b/src/dstack/_internal/core/backends/aws/compute.py index 4c0fb4e39..cd072c1f5 100644 --- a/src/dstack/_internal/core/backends/aws/compute.py +++ b/src/dstack/_internal/core/backends/aws/compute.py @@ -1240,6 +1240,8 @@ def _supported_instances(offer: InstanceOffer) -> bool: "t2.small", "c5.", "m5.", + "p6-b300.", + "p6-b200.", "p5.", "p5e.", "p4d.", diff --git a/src/dstack/_internal/core/backends/aws/resources.py b/src/dstack/_internal/core/backends/aws/resources.py index 67959a19c..4e92bd84b 100644 --- a/src/dstack/_internal/core/backends/aws/resources.py +++ b/src/dstack/_internal/core/backends/aws/resources.py @@ -191,40 +191,13 @@ def create_instances_struct( # AWS allows specifying either NetworkInterfaces for specific subnet_id # or instance-level SecurityGroupIds in case of no specific subnet_id, not both. if subnet_id is not None: - # AWS does not auto-assign a public IPv4 to instances launched with multiple network - # interfaces ("AssociatePublicIpAddress [...] You cannot specify more than one network - # interface in the request"). For multi-EFA instance types (e.g. p4d, p5, trn1), we - # therefore launch all EFA NICs without `AssociatePublicIpAddress` and, when - # `public_ips: true`, attach an Elastic IP after launch in `update_provisioning_data`. - multi_eni = max_efa_interfaces > 1 - struct["NetworkInterfaces"] = [ - { - "AssociatePublicIpAddress": allocate_public_ip and not multi_eni, - "DeviceIndex": 0, - "SubnetId": subnet_id, - "Groups": [security_group_id], - "InterfaceType": "efa" if max_efa_interfaces > 0 else "interface", - }, - ] - - if multi_eni: - for i in range(1, max_efa_interfaces): - # Set to efa-only to use interfaces exclusively for GPU-to-GPU communication - interface_type = "efa-only" - if instance_type == "p5.48xlarge": - # EFA configuration for P5 instances: - # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-acc-inst-types.html#efa-for-p5 - interface_type = "efa" if i % 4 == 0 else "efa-only" - struct["NetworkInterfaces"].append( - { - "AssociatePublicIpAddress": False, - "NetworkCardIndex": i, - "DeviceIndex": 1, - "SubnetId": subnet_id, - "Groups": [security_group_id], - "InterfaceType": interface_type, - } - ) + struct["NetworkInterfaces"] = _create_network_interfaces_struct( + instance_type=instance_type, + subnet_id=subnet_id, + security_group_id=security_group_id, + allocate_public_ip=allocate_public_ip, + max_efa_interfaces=max_efa_interfaces, + ) else: struct["SecurityGroupIds"] = [security_group_id] @@ -632,6 +605,64 @@ def _is_private_subnet_with_internet_egress( return False +def _create_network_interfaces_struct( + instance_type: str, + subnet_id: str, + security_group_id: str, + allocate_public_ip: bool, + max_efa_interfaces: int, +) -> List[Dict[str, Any]]: + # AWS does not auto-assign a public IPv4 to instances launched with multiple network + # interfaces ("AssociatePublicIpAddress [...] You cannot specify more than one network + # interface in the request"). For multi-EFA instance types (e.g. p4d, p5, p6, trn1), we + # therefore launch all EFA NICs without `AssociatePublicIpAddress` and, when + # `public_ips: true`, attach an Elastic IP after launch in `update_provisioning_data`. + multi_eni = max_efa_interfaces > 1 + primary_supports_efa = _primary_nic_supports_efa(instance_type) + network_interfaces: List[Dict[str, Any]] = [ + { + "AssociatePublicIpAddress": allocate_public_ip and not multi_eni, + "DeviceIndex": 0, + "SubnetId": subnet_id, + "Groups": [security_group_id], + "InterfaceType": "efa" + if max_efa_interfaces > 0 and primary_supports_efa + else "interface", + }, + ] + + if multi_eni: + last_card_index = max_efa_interfaces + if not primary_supports_efa: + last_card_index += 1 + for i in range(1, last_card_index): + # Set to efa-only to use interfaces exclusively for GPU-to-GPU communication + interface_type = "efa-only" + if instance_type == "p5.48xlarge": + # EFA configuration for P5 instances: + # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-acc-inst-types.html#efa-for-p5 + interface_type = "efa" if i % 4 == 0 else "efa-only" + network_interfaces.append( + { + "AssociatePublicIpAddress": False, + "NetworkCardIndex": i, + "DeviceIndex": 1, + "SubnetId": subnet_id, + "Groups": [security_group_id], + "InterfaceType": interface_type, + } + ) + return network_interfaces + + +def _primary_nic_supports_efa(instance_type: str) -> bool: + """For most EFA-supported instance types, primary network card (index 0) supports + attaching both ENA and EFA. But some may support only one interface (ENA), + and all EFA interfaces are placed on the secondary network cards (1..max_efa_interfaces). + """ + return instance_type not in {"p6-b300.48xlarge"} + + def get_reservation( ec2_client: botocore.client.BaseClient, reservation_id: str, diff --git a/src/dstack/_internal/server/services/backends/provisioning.py b/src/dstack/_internal/server/services/backends/provisioning.py index b02deb8ad..f289a41dc 100644 --- a/src/dstack/_internal/server/services/backends/provisioning.py +++ b/src/dstack/_internal/server/services/backends/provisioning.py @@ -12,7 +12,7 @@ # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types _AWS_EFA_ENABLED_INSTANCE_TYPE_PATTERNS = [ - # TODO: p6-b200 isn't supported yet in gpuhunt + r"^p6-b300\.(48xlarge)$", r"^p6-b200\.(48xlarge)$", r"^p5\.(4xlarge|48xlarge)$", r"^p5e\.(48xlarge)$", diff --git a/src/tests/_internal/core/backends/aws/test_resources.py b/src/tests/_internal/core/backends/aws/test_resources.py index dcec84bf6..4fa9a30fa 100644 --- a/src/tests/_internal/core/backends/aws/test_resources.py +++ b/src/tests/_internal/core/backends/aws/test_resources.py @@ -5,6 +5,7 @@ from dstack._internal.core.backends.aws.models import AWSOSImage, AWSOSImageConfig from dstack._internal.core.backends.aws.resources import ( + _create_network_interfaces_struct, _is_valid_tag_key, _is_valid_tag_value, get_image_id_and_username, @@ -235,3 +236,161 @@ def test_raises_resource_not_found_if_image_config_property_not_set( image_config=image_config, ) assert "cpu image not configured" in caplog.text + + +class TestCreateNetworkInterfacesStruct: + def test_non_efa_instance_single_interface(self): + interfaces = _create_network_interfaces_struct( + instance_type="m5.large", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=True, + max_efa_interfaces=0, + ) + assert interfaces == [ + { + "AssociatePublicIpAddress": True, + "DeviceIndex": 0, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "interface", + }, + ] + + def test_non_efa_instance_no_public_ip(self): + interfaces = _create_network_interfaces_struct( + instance_type="m5.large", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=False, + max_efa_interfaces=0, + ) + assert interfaces[0]["AssociatePublicIpAddress"] is False + assert interfaces[0]["InterfaceType"] == "interface" + + def test_single_efa_interface(self): + interfaces = _create_network_interfaces_struct( + instance_type="g5.8xlarge", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=True, + max_efa_interfaces=1, + ) + # multi_eni is False, so the single EFA NIC keeps the public IP + assert interfaces == [ + { + "AssociatePublicIpAddress": True, + "DeviceIndex": 0, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "efa", + }, + ] + + def test_multi_efa_instance(self): + interfaces = _create_network_interfaces_struct( + instance_type="p4d.24xlarge", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=True, + max_efa_interfaces=4, + ) + # Multiple NICs disable auto-assigned public IP on every interface + assert interfaces[0] == { + "AssociatePublicIpAddress": False, + "DeviceIndex": 0, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "efa", + } + assert interfaces[1:] == [ + { + "AssociatePublicIpAddress": False, + "NetworkCardIndex": i, + "DeviceIndex": 1, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "efa-only", + } + for i in range(1, 4) + ] + + def test_p5_uses_efa_every_fourth_interface(self): + interfaces = _create_network_interfaces_struct( + instance_type="p5.48xlarge", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=True, + max_efa_interfaces=32, + ) + assert len(interfaces) == 32 + assert all(i["NetworkCardIndex"] == idx for idx, i in enumerate(interfaces) if idx > 0) + # The primary NIC is a combined efa interface + assert interfaces[0]["InterfaceType"] == "efa" + assert "NetworkCardIndex" not in interfaces[0] + # Every 4th secondary NIC is a combined efa interface, the rest are efa-only + for idx, interface in enumerate(interfaces[1:], start=1): + expected = "efa" if idx % 4 == 0 else "efa-only" + assert interface["InterfaceType"] == expected, idx + + def test_p6_b200_efa_on_every_card(self): + # p6-b200 has 8 EFA-capable network cards (indexes 0-7), handled by the generic path + interfaces = _create_network_interfaces_struct( + instance_type="p6-b200.48xlarge", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=True, + max_efa_interfaces=8, + ) + assert len(interfaces) == 8 + assert interfaces[0] == { + "AssociatePublicIpAddress": False, + "DeviceIndex": 0, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "efa", + } + assert interfaces[1:] == [ + { + "AssociatePublicIpAddress": False, + "NetworkCardIndex": i, + "DeviceIndex": 1, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "efa-only", + } + for i in range(1, 8) + ] + + def test_p6_b300_ena_only_primary_nic(self): + # p6-b300 has 17 network cards: the primary (index 0) supports only ENA, the remaining + # 16 cards (indexes 1-16) support EFA. max_efa_interfaces is 16. + interfaces = _create_network_interfaces_struct( + instance_type="p6-b300.48xlarge", + subnet_id="subnet-1", + security_group_id="sg-1", + allocate_public_ip=True, + max_efa_interfaces=16, + ) + # 1 ENA primary + 16 EFA secondary cards + assert len(interfaces) == 17 + # Primary card is a plain ENA interface, not EFA + assert interfaces[0] == { + "AssociatePublicIpAddress": False, + "DeviceIndex": 0, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "interface", + } + # EFA-only interfaces span network card indexes 1-16 + assert interfaces[1:] == [ + { + "AssociatePublicIpAddress": False, + "NetworkCardIndex": i, + "DeviceIndex": 1, + "SubnetId": "subnet-1", + "Groups": ["sg-1"], + "InterfaceType": "efa-only", + } + for i in range(1, 17) + ] From 1319fd96b4fdaa0ca91e14d03d9b3388362ae7ec Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 12 Jun 2026 14:36:53 +0500 Subject: [PATCH 2/4] Update efa image deps versions --- docker/base/efa/Dockerfile | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/docker/base/efa/Dockerfile b/docker/base/efa/Dockerfile index 105650a83..3ea6a4970 100644 --- a/docker/base/efa/Dockerfile +++ b/docker/base/efa/Dockerfile @@ -16,15 +16,11 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \ && apt-get install -y --no-install-recommends \ cuda-libraries-dev-${cuda_version} \ cuda-nvcc-${cuda_version} \ - libhwloc-dev \ - autoconf \ - automake \ - libtool \ && rm -rf /var/lib/apt/lists/* # EFA -ARG EFA_VERSION=1.38.1 +ARG EFA_VERSION=1.48.0 RUN cd /tmp \ && apt-get update \ @@ -36,7 +32,7 @@ RUN cd /tmp \ # NCCL -ARG NCCL_VERSION=2.26.2-1 +ARG NCCL_VERSION=2.27.7-1 RUN cd /tmp \ && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \ @@ -44,26 +40,6 @@ RUN cd /tmp \ && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \ && rm -rf /tmp/nccl -# AWS OFI NCCL - -ARG OFI_VERSION=1.14.0 - -RUN cd /tmp \ - && git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \ - && cd aws-ofi-nccl \ - && ./autogen.sh \ - && ./configure \ - --with-cuda=${CUDA_HOME} \ - --with-libfabric=${LIBFABRIC_PATH} \ - --with-mpi=${OPEN_MPI_PATH} \ - --with-cuda=${CUDA_HOME} \ - --with-nccl=${NCCL_HOME} \ - --disable-tests \ - --prefix=${NCCL_HOME} \ - && make -j$(nproc) \ - && make install \ - && rm -rf /tmp/aws-ofi-nccl /var/lib/apt/lists/* - # NCCL Tests RUN cd /opt \ From cbaf1cade423b3a2f604097d78e0bd2076c66a96 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 12 Jun 2026 15:14:08 +0500 Subject: [PATCH 3/4] Build docker images on dstack-ubuntu-latest-32-cores --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ae8f76d1a..bb9abe8f0 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -43,7 +43,7 @@ jobs: defaults: run: working-directory: docker - runs-on: ubuntu-latest + runs-on: dstack-ubuntu-latest-32-cores strategy: matrix: flavor: ["base", "devel", "devel-efa"] From 90630a21c2dd53de1806fed9918e0624cebbb54a Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 12 Jun 2026 16:00:11 +0500 Subject: [PATCH 4/4] Update efa/README.md --- docker/base/efa/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/base/efa/README.md b/docker/base/efa/README.md index 29ed74821..9790d84d7 100644 --- a/docker/base/efa/README.md +++ b/docker/base/efa/README.md @@ -2,8 +2,7 @@ This image has the following installed: -* CUDA 12.1 -* AWS EFA Installer 1.38.1 (Libfabric + Open MPI 4 + Open MPI 5) -* NCCL 2.26.2-1 -* AWS OFI NCCL 1.14.0 +* CUDA 12.8 +* AWS EFA Installer 1.48.0 (Libfabric + Open MPI + AWS OFI NCCL 1.19.0) +* NCCL 2.27.7-1 * NCCL Tests