Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions cmd/mapt/cmd/ibmcloud/hosts/ibm-gaudi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package hosts

import (
"github.com/redhat-developer/mapt/cmd/mapt/cmd/params"
maptContext "github.com/redhat-developer/mapt/pkg/manager/context"
ibmgaudi "github.com/redhat-developer/mapt/pkg/provider/ibmcloud/action/ibm-gaudi"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/spf13/viper"
)

const (
cmdIBMGaudi = "ibm-gaudi"
cmdIBMGaudiDesc = "manage ibm gaudi3 accelerated instances (amd64)"
)

func IBMGaudiCmd() *cobra.Command {
c := &cobra.Command{
Use: cmdIBMGaudi,
Short: cmdIBMGaudiDesc,
RunE: func(cmd *cobra.Command, args []string) error {
if err := viper.BindPFlags(cmd.Flags()); err != nil {
return err
}
return nil
},
}

flagSet := pflag.NewFlagSet(cmdIBMGaudi, pflag.ExitOnError)
params.AddCommonFlags(flagSet)
c.PersistentFlags().AddFlagSet(flagSet)

c.AddCommand(ibmGaudiCreate(), ibmGaudiDestroy())
return c
}

func ibmGaudiCreate() *cobra.Command {
c := &cobra.Command{
Use: params.CreateCmdName,
Short: params.CreateCmdName,
RunE: func(cmd *cobra.Command, args []string) error {
if err := viper.BindPFlags(cmd.Flags()); err != nil {
return err
}
return ibmgaudi.New(
&maptContext.ContextArgs{
Context: cmd.Context(),
ProjectName: viper.GetString(params.ProjectName),
BackedURL: viper.GetString(params.BackedURL),
ResultsOutput: viper.GetString(params.ConnectionDetailsOutput),
Debug: viper.IsSet(params.Debug),
DebugLevel: viper.GetUint(params.DebugLevel),
Tags: viper.GetStringMapString(params.Tags),
},
&ibmgaudi.GaudiArgs{
SubnetID: viper.GetString(params.SubnetID),
OtelAppCode: viper.GetString(params.OtelAppCode),
OtelAuthToken: viper.GetString(params.OtelAuthToken),
OtelEndpoint: viper.GetString(params.OtelEndpoint),
OtelIndex: viper.GetString(params.OtelIndex),
OtelExtraAttrs: viper.GetStringMapString(params.OtelExtraAttrs),
})
},
}
flagSet := pflag.NewFlagSet(params.CreateCmdName, pflag.ExitOnError)
flagSet.StringP(params.ConnectionDetailsOutput, "", "", params.ConnectionDetailsOutputDesc)
flagSet.StringToStringP(params.Tags, "", nil, params.TagsDesc)
flagSet.StringP(params.SubnetID, "", "", params.SubnetIDDesc)
flagSet.StringP(params.OtelAppCode, "", "", params.OtelAppCodeDesc)
flagSet.StringP(params.OtelAuthToken, "", "", params.OtelAuthTokenDesc)
flagSet.StringP(params.OtelEndpoint, "", "https://otel-input.corp.redhat.com", params.OtelEndpointDesc)
flagSet.StringP(params.OtelIndex, "", "", params.OtelIndexDesc)
flagSet.StringToStringP(params.OtelExtraAttrs, "", nil, params.OtelExtraAttrsDesc)
c.PersistentFlags().AddFlagSet(flagSet)
return c
}

func ibmGaudiDestroy() *cobra.Command {
c := &cobra.Command{
Use: params.DestroyCmdName,
Short: params.DestroyCmdName,
RunE: func(cmd *cobra.Command, args []string) error {
if err := viper.BindPFlags(cmd.Flags()); err != nil {
return err
}
return ibmgaudi.Destroy(&maptContext.ContextArgs{
Context: cmd.Context(),
ProjectName: viper.GetString(params.ProjectName),
BackedURL: viper.GetString(params.BackedURL),
Debug: viper.IsSet(params.Debug),
DebugLevel: viper.GetUint(params.DebugLevel),
Serverless: viper.IsSet(params.Serverless),
ForceDestroy: viper.IsSet(params.ForceDestroy),
KeepState: viper.IsSet(params.KeepState),
})
},
}
flagSet := pflag.NewFlagSet(params.DestroyCmdName, pflag.ExitOnError)
flagSet.Bool(params.Serverless, false, params.ServerlessDesc)
flagSet.Bool(params.ForceDestroy, false, params.ForceDestroyDesc)
flagSet.Bool(params.KeepState, false, params.KeepStateDesc)
c.PersistentFlags().AddFlagSet(flagSet)
return c
}
1 change: 1 addition & 0 deletions cmd/mapt/cmd/ibmcloud/ibmcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func GetCmd() *cobra.Command {
params.AddCommonFlags(flagSet)
c.PersistentFlags().AddFlagSet(flagSet)
c.AddCommand(
hosts.IBMGaudiCmd(),
hosts.IBMPowerCmd(),
hosts.IBMZCmd())
return c
Expand Down
167 changes: 167 additions & 0 deletions docs/ibmcloud/ibm-gaudi.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Overview

This action provisions an Intel Gaudi 3 accelerated instance on IBM Cloud VPC using the RHEL AI image. The instance uses the `gx3d-160x1792x8gaudi3` profile (160 vCPU, 1792 GB RAM, 8x Gaudi 3 accelerators) and is assigned a floating IP for direct SSH access.

Two networking modes are supported:

- **Existing subnet** (`--subnet-id`): the instance is placed in a pre-existing VPC subnet. VPC, subnet, and gateway are not created. Only `IC_REGION` is required.
- **Auto-provision** (no `--subnet-id`): a new VPC, subnet, and public gateway are created from scratch. Both `IC_REGION` and `IC_ZONE` are required.

## Environment variables

| Variable | Required | Description |
|---|---|---|
| `IBMCLOUD_ACCOUNT` | yes | IBM Cloud account ID |
| `IBMCLOUD_API_KEY` | yes | IBM Cloud API key |
| `IC_REGION` | yes | IBM Cloud region (e.g. `us-east`, `us-south`, `eu-de`) |
| `IC_ZONE` | only without `--subnet-id` | Availability zone (e.g. `us-east-1`) |
| `IBMCLOUD_COS_ACCESS_KEY_ID` | only with S3 `--backed-url` | HMAC access key for IBM Cloud Object Storage |
| `IBMCLOUD_COS_SECRET_ACCESS_KEY` | only with S3 `--backed-url` | HMAC secret key for IBM Cloud Object Storage |
| `IBMCLOUD_COS_ENDPOINT` | no | COS S3 endpoint (defaults to `s3.<region>.cloud-object-storage.appdomain.cloud`) |

## Regional availability

Gaudi 3 instances are available in:

- **us-east** (Washington DC)
- **us-south** (Dallas)
- **eu-de** (Frankfurt)

## Create

```bash
mapt ibmcloud ibm-gaudi create -h
create

Usage:
mapt ibmcloud ibm-gaudi create [flags]

Flags:
--conn-details-output string path to export host connection information (host, username and privateKey)
-h, --help help for create
--otel-app-code string OpenTelemetry appcode identifier (e.g. MAPT-001); when set together with --otel-auth-token, installs the otelcol-contrib filelog collector on the instance
--otel-auth-token string OpenTelemetry authentication token (UUID) used to authenticate against the OTLP endpoint
--otel-endpoint string OTLP HTTP endpoint to export logs to (default "https://otel-input.corp.redhat.com")
--otel-index string Splunk index name for log routing (e.g. rh_linux)
--subnet-id string ID of an existing VPC subnet to deploy the instance into (optional)
--tags stringToString tags to add on each resource (--tags name1=value1,name2=value2) (default [])

Global Flags:
--backed-url string backed for stack state. (local) file:///path/subpath (s3) s3://existing-bucket, (azure) azblob://existing-blobcontainer. See more https://www.pulumi.com/docs/iac/concepts/state-and-backends/#using-a-self-managed-backend
--debug Enable debug traces and set verbosity to max. Typically to get information to troubleshooting an issue.
--debug-level uint Set the level of verbosity on debug. You can set from minimum 1 to max 9. (default 3)
--project-name string project name to identify the instance of the stack
```

### Outputs

Files written to the path defined by `--conn-details-output`:

| File | Description |
|---|---|
| `host` | Floating IP of the instance (direct SSH) |
| `username` | SSH username (`root`) |
| `id_rsa` | Private key for the instance |

A state folder is also created at `--backed-url`. It is required (together with `--project-name`) to destroy the resources later.

### SSH access

```bash
OUTPUT=/path/to/conn-details-output

ssh -i ${OUTPUT}/id_rsa \
-o StrictHostKeyChecking=no \
root@$(cat ${OUTPUT}/host)
```

### Container

```bash
# Using an existing VPC subnet
podman run -d --name ibm-gaudi \
-v ${PWD}:/workspace:z \
-e IBMCLOUD_API_KEY=XXX \
-e IC_REGION=us-east \
quay.io/redhat-developer/mapt:latest ibmcloud ibm-gaudi create \
--project-name ibm-gaudi \
--backed-url file:///workspace \
--conn-details-output /workspace \
--subnet-id <subnet-id>

# Auto-provisioning VPC, subnet, and gateway
podman run -d --name ibm-gaudi \
-v ${PWD}:/workspace:z \
-e IBMCLOUD_API_KEY=XXX \
-e IC_REGION=us-east \
-e IC_ZONE=us-east-1 \
quay.io/redhat-developer/mapt:latest ibmcloud ibm-gaudi create \
--project-name ibm-gaudi \
--backed-url file:///workspace \
--conn-details-output /workspace
```

## OpenTelemetry log collection

When both `--otel-app-code` and `--otel-auth-token` are provided, cloud-init installs `otelcol-contrib` on the instance at first boot and configures it to ship `/var/log/messages`, `/var/log/secure`, and `/var/log/audit/audit.log` to the OTLP endpoint.

```bash
podman run -d --name ibm-gaudi \
-v ${PWD}:/workspace:z \
-e IBMCLOUD_API_KEY=XXX \
-e IC_REGION=us-east \
quay.io/redhat-developer/mapt:latest ibmcloud ibm-gaudi create \
--project-name ibm-gaudi \
--backed-url file:///workspace \
--conn-details-output /workspace \
--subnet-id <subnet-id> \
--otel-app-code MAPT-001 \
--otel-auth-token <uuid-token>
```

## Using IBM Cloud Object Storage as S3 backend

To store Pulumi state in IBM COS instead of a local file, create [HMAC credentials](https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-uhc-hmac-credentials-main) for your COS instance and pass an `s3://` backed URL:

```bash
podman run -d --name ibm-gaudi \
-v ${PWD}:/workspace:z \
-e IBMCLOUD_API_KEY=XXX \
-e IBMCLOUD_ACCOUNT=XXX \
-e IC_REGION=us-east \
-e IC_ZONE=us-east-1 \
-e IBMCLOUD_COS_ACCESS_KEY_ID=XXX \
-e IBMCLOUD_COS_SECRET_ACCESS_KEY=XXX \
quay.io/redhat-developer/mapt:latest ibmcloud ibm-gaudi create \
--project-name ibm-gaudi \
--backed-url s3://my-cos-bucket \
--conn-details-output /workspace
```

## Destroy

```bash
podman run -d --name ibm-gaudi \
-v ${PWD}:/workspace:z \
-e IBMCLOUD_API_KEY=XXX \
-e IC_REGION=us-east \
quay.io/redhat-developer/mapt:latest ibmcloud ibm-gaudi destroy \
--project-name ibm-gaudi \
--backed-url file:///workspace
```

By default, destroy removes the Pulumi state files from the backend after a successful destroy. Use `--keep-state` to preserve them:

```bash
podman run -d --name ibm-gaudi \
-v ${PWD}:/workspace:z \
-e IBMCLOUD_API_KEY=XXX \
-e IBMCLOUD_ACCOUNT=XXX \
-e IC_REGION=us-east \
-e IBMCLOUD_COS_ACCESS_KEY_ID=XXX \
-e IBMCLOUD_COS_SECRET_ACCESS_KEY=XXX \
quay.io/redhat-developer/mapt:latest ibmcloud ibm-gaudi destroy \
--project-name ibm-gaudi \
--backed-url s3://my-cos-bucket \
--keep-state
```
Loading