Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
defaults:
run:
working-directory: docker
runs-on: ubuntu-latest
runs-on: dstack-ubuntu-latest-32-cores
strategy:
matrix:
flavor: ["base", "devel", "devel-efa"]
Expand Down
28 changes: 2 additions & 26 deletions docker/base/efa/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,11 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
&& apt-get install -y --no-install-recommends \
cuda-libraries-dev-${cuda_version} \
cuda-nvcc-${cuda_version} \
libhwloc-dev \
autoconf \
automake \
libtool \
&& rm -rf /var/lib/apt/lists/*

# EFA

ARG EFA_VERSION=1.38.1
ARG EFA_VERSION=1.48.0

RUN cd /tmp \
&& apt-get update \
Expand All @@ -36,34 +32,14 @@ RUN cd /tmp \

# NCCL

ARG NCCL_VERSION=2.26.2-1
ARG NCCL_VERSION=2.27.7-1

RUN cd /tmp \
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
&& cd nccl \
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
&& rm -rf /tmp/nccl

# AWS OFI NCCL

ARG OFI_VERSION=1.14.0

RUN cd /tmp \
&& git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
&& cd aws-ofi-nccl \
&& ./autogen.sh \
&& ./configure \
--with-cuda=${CUDA_HOME} \
--with-libfabric=${LIBFABRIC_PATH} \
--with-mpi=${OPEN_MPI_PATH} \
--with-cuda=${CUDA_HOME} \
--with-nccl=${NCCL_HOME} \
--disable-tests \
--prefix=${NCCL_HOME} \
&& make -j$(nproc) \
&& make install \
&& rm -rf /tmp/aws-ofi-nccl /var/lib/apt/lists/*

# NCCL Tests

RUN cd /opt \
Expand Down
7 changes: 3 additions & 4 deletions docker/base/efa/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

This image has the following installed:

* CUDA 12.1
* AWS EFA Installer 1.38.1 (Libfabric + Open MPI 4 + Open MPI 5)
* NCCL 2.26.2-1
* AWS OFI NCCL 1.14.0
* CUDA 12.8
* AWS EFA Installer 1.48.0 (Libfabric + Open MPI + AWS OFI NCCL 1.19.0)
* NCCL 2.27.7-1
* NCCL Tests
2 changes: 2 additions & 0 deletions src/dstack/_internal/core/backends/aws/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,8 @@ def _supported_instances(offer: InstanceOffer) -> bool:
"t2.small",
"c5.",
"m5.",
"p6-b300.",
"p6-b200.",
"p5.",
"p5e.",
"p4d.",
Expand Down
99 changes: 65 additions & 34 deletions src/dstack/_internal/core/backends/aws/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,40 +191,13 @@ def create_instances_struct(
# AWS allows specifying either NetworkInterfaces for specific subnet_id
# or instance-level SecurityGroupIds in case of no specific subnet_id, not both.
if subnet_id is not None:
# AWS does not auto-assign a public IPv4 to instances launched with multiple network
# interfaces ("AssociatePublicIpAddress [...] You cannot specify more than one network
# interface in the request"). For multi-EFA instance types (e.g. p4d, p5, trn1), we
# therefore launch all EFA NICs without `AssociatePublicIpAddress` and, when
# `public_ips: true`, attach an Elastic IP after launch in `update_provisioning_data`.
multi_eni = max_efa_interfaces > 1
struct["NetworkInterfaces"] = [
{
"AssociatePublicIpAddress": allocate_public_ip and not multi_eni,
"DeviceIndex": 0,
"SubnetId": subnet_id,
"Groups": [security_group_id],
"InterfaceType": "efa" if max_efa_interfaces > 0 else "interface",
},
]

if multi_eni:
for i in range(1, max_efa_interfaces):
# Set to efa-only to use interfaces exclusively for GPU-to-GPU communication
interface_type = "efa-only"
if instance_type == "p5.48xlarge":
# EFA configuration for P5 instances:
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-acc-inst-types.html#efa-for-p5
interface_type = "efa" if i % 4 == 0 else "efa-only"
struct["NetworkInterfaces"].append(
{
"AssociatePublicIpAddress": False,
"NetworkCardIndex": i,
"DeviceIndex": 1,
"SubnetId": subnet_id,
"Groups": [security_group_id],
"InterfaceType": interface_type,
}
)
struct["NetworkInterfaces"] = _create_network_interfaces_struct(
instance_type=instance_type,
subnet_id=subnet_id,
security_group_id=security_group_id,
allocate_public_ip=allocate_public_ip,
max_efa_interfaces=max_efa_interfaces,
)
else:
struct["SecurityGroupIds"] = [security_group_id]

Expand Down Expand Up @@ -632,6 +605,64 @@ def _is_private_subnet_with_internet_egress(
return False


def _create_network_interfaces_struct(
instance_type: str,
subnet_id: str,
security_group_id: str,
allocate_public_ip: bool,
max_efa_interfaces: int,
) -> List[Dict[str, Any]]:
# AWS does not auto-assign a public IPv4 to instances launched with multiple network
# interfaces ("AssociatePublicIpAddress [...] You cannot specify more than one network
# interface in the request"). For multi-EFA instance types (e.g. p4d, p5, p6, trn1), we
# therefore launch all EFA NICs without `AssociatePublicIpAddress` and, when
# `public_ips: true`, attach an Elastic IP after launch in `update_provisioning_data`.
multi_eni = max_efa_interfaces > 1
primary_supports_efa = _primary_nic_supports_efa(instance_type)
network_interfaces: List[Dict[str, Any]] = [
{
"AssociatePublicIpAddress": allocate_public_ip and not multi_eni,
"DeviceIndex": 0,
"SubnetId": subnet_id,
"Groups": [security_group_id],
"InterfaceType": "efa"
if max_efa_interfaces > 0 and primary_supports_efa
else "interface",
},
]

if multi_eni:
last_card_index = max_efa_interfaces
if not primary_supports_efa:
last_card_index += 1
for i in range(1, last_card_index):
# Set to efa-only to use interfaces exclusively for GPU-to-GPU communication
interface_type = "efa-only"
if instance_type == "p5.48xlarge":
# EFA configuration for P5 instances:
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-acc-inst-types.html#efa-for-p5
interface_type = "efa" if i % 4 == 0 else "efa-only"
network_interfaces.append(
{
"AssociatePublicIpAddress": False,
"NetworkCardIndex": i,
"DeviceIndex": 1,
"SubnetId": subnet_id,
"Groups": [security_group_id],
"InterfaceType": interface_type,
}
)
return network_interfaces


def _primary_nic_supports_efa(instance_type: str) -> bool:
"""For most EFA-supported instance types, primary network card (index 0) supports
attaching both ENA and EFA. But some may support only one interface (ENA),
and all EFA interfaces are placed on the secondary network cards (1..max_efa_interfaces).
"""
return instance_type not in {"p6-b300.48xlarge"}


def get_reservation(
ec2_client: botocore.client.BaseClient,
reservation_id: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types
_AWS_EFA_ENABLED_INSTANCE_TYPE_PATTERNS = [
# TODO: p6-b200 isn't supported yet in gpuhunt
r"^p6-b300\.(48xlarge)$",
r"^p6-b200\.(48xlarge)$",
r"^p5\.(4xlarge|48xlarge)$",
r"^p5e\.(48xlarge)$",
Expand Down
159 changes: 159 additions & 0 deletions src/tests/_internal/core/backends/aws/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dstack._internal.core.backends.aws.models import AWSOSImage, AWSOSImageConfig
from dstack._internal.core.backends.aws.resources import (
_create_network_interfaces_struct,
_is_valid_tag_key,
_is_valid_tag_value,
get_image_id_and_username,
Expand Down Expand Up @@ -235,3 +236,161 @@ def test_raises_resource_not_found_if_image_config_property_not_set(
image_config=image_config,
)
assert "cpu image not configured" in caplog.text


class TestCreateNetworkInterfacesStruct:
def test_non_efa_instance_single_interface(self):
interfaces = _create_network_interfaces_struct(
instance_type="m5.large",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=True,
max_efa_interfaces=0,
)
assert interfaces == [
{
"AssociatePublicIpAddress": True,
"DeviceIndex": 0,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "interface",
},
]

def test_non_efa_instance_no_public_ip(self):
interfaces = _create_network_interfaces_struct(
instance_type="m5.large",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=False,
max_efa_interfaces=0,
)
assert interfaces[0]["AssociatePublicIpAddress"] is False
assert interfaces[0]["InterfaceType"] == "interface"

def test_single_efa_interface(self):
interfaces = _create_network_interfaces_struct(
instance_type="g5.8xlarge",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=True,
max_efa_interfaces=1,
)
# multi_eni is False, so the single EFA NIC keeps the public IP
assert interfaces == [
{
"AssociatePublicIpAddress": True,
"DeviceIndex": 0,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "efa",
},
]

def test_multi_efa_instance(self):
interfaces = _create_network_interfaces_struct(
instance_type="p4d.24xlarge",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=True,
max_efa_interfaces=4,
)
# Multiple NICs disable auto-assigned public IP on every interface
assert interfaces[0] == {
"AssociatePublicIpAddress": False,
"DeviceIndex": 0,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "efa",
}
assert interfaces[1:] == [
{
"AssociatePublicIpAddress": False,
"NetworkCardIndex": i,
"DeviceIndex": 1,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "efa-only",
}
for i in range(1, 4)
]

def test_p5_uses_efa_every_fourth_interface(self):
interfaces = _create_network_interfaces_struct(
instance_type="p5.48xlarge",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=True,
max_efa_interfaces=32,
)
assert len(interfaces) == 32
assert all(i["NetworkCardIndex"] == idx for idx, i in enumerate(interfaces) if idx > 0)
# The primary NIC is a combined efa interface
assert interfaces[0]["InterfaceType"] == "efa"
assert "NetworkCardIndex" not in interfaces[0]
# Every 4th secondary NIC is a combined efa interface, the rest are efa-only
for idx, interface in enumerate(interfaces[1:], start=1):
expected = "efa" if idx % 4 == 0 else "efa-only"
assert interface["InterfaceType"] == expected, idx

def test_p6_b200_efa_on_every_card(self):
# p6-b200 has 8 EFA-capable network cards (indexes 0-7), handled by the generic path
interfaces = _create_network_interfaces_struct(
instance_type="p6-b200.48xlarge",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=True,
max_efa_interfaces=8,
)
assert len(interfaces) == 8
assert interfaces[0] == {
"AssociatePublicIpAddress": False,
"DeviceIndex": 0,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "efa",
}
assert interfaces[1:] == [
{
"AssociatePublicIpAddress": False,
"NetworkCardIndex": i,
"DeviceIndex": 1,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "efa-only",
}
for i in range(1, 8)
]

def test_p6_b300_ena_only_primary_nic(self):
# p6-b300 has 17 network cards: the primary (index 0) supports only ENA, the remaining
# 16 cards (indexes 1-16) support EFA. max_efa_interfaces is 16.
interfaces = _create_network_interfaces_struct(
instance_type="p6-b300.48xlarge",
subnet_id="subnet-1",
security_group_id="sg-1",
allocate_public_ip=True,
max_efa_interfaces=16,
)
# 1 ENA primary + 16 EFA secondary cards
assert len(interfaces) == 17
# Primary card is a plain ENA interface, not EFA
assert interfaces[0] == {
"AssociatePublicIpAddress": False,
"DeviceIndex": 0,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "interface",
}
# EFA-only interfaces span network card indexes 1-16
assert interfaces[1:] == [
{
"AssociatePublicIpAddress": False,
"NetworkCardIndex": i,
"DeviceIndex": 1,
"SubnetId": "subnet-1",
"Groups": ["sg-1"],
"InterfaceType": "efa-only",
}
for i in range(1, 17)
]
Loading