From 6cf7f4fcb670daaca0d3c7b3d0a517f1f34ea23b Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:32:17 +0200 Subject: [PATCH] feat(aws): add RHAIIS auto-start flags for RHEL AI provisioning Add --auto-start, --model, --hf-token, and --api-key flags to `mapt aws rhel-ai create` to configure and start the RHAIIS inference server automatically after VM provisioning. When --auto-start is set, mapt SSHes into the provisioned VM after readiness, configures the RHAIIS quadlet with the provided model and credentials, and starts the systemd service. Co-Authored-By: Claude Opus 4.6 --- cmd/mapt/cmd/aws/hosts/rhelai.go | 8 ++++ cmd/mapt/cmd/params/params.go | 8 ++++ pkg/provider/aws/action/rhel-ai/rhelai.go | 56 +++++++++++++++++++++-- pkg/target/host/rhelai/api.go | 22 +++++---- 4 files changed, 82 insertions(+), 12 deletions(-) diff --git a/cmd/mapt/cmd/aws/hosts/rhelai.go b/cmd/mapt/cmd/aws/hosts/rhelai.go index 43e848b4d..8b39315c5 100644 --- a/cmd/mapt/cmd/aws/hosts/rhelai.go +++ b/cmd/mapt/cmd/aws/hosts/rhelai.go @@ -63,6 +63,10 @@ func getRHELAICreate() *cobra.Command { Spot: params.SpotArgs(), Timeout: viper.GetString(params.Timeout), ServiceEndpoints: params.NetworkServiceEndpoints(), + Model: viper.GetString(params.RhelAIModel), + HFToken: viper.GetString(params.RhelAIHFToken), + APIKey: viper.GetString(params.RhelAIAPIKey), + AutoStart: viper.IsSet(params.RhelAIAutoStart), }) }, } @@ -72,6 +76,10 @@ func getRHELAICreate() *cobra.Command { flagSet.StringP(params.RhelAIVersion, "", params.RhelAIVersionDefault, params.RhelAIVersionDesc) flagSet.StringP(params.RhelAIAccelerator, "", params.RhelAIAccelearatorDefault, params.RhelAIAccelearatorDesc) flagSet.StringP(params.RhelAICustomImage, "", "", params.RhelAICustomImageDesc) + flagSet.StringP(params.RhelAIModel, "", "", params.RhelAIModelDesc) + flagSet.StringP(params.RhelAIHFToken, "", "", params.RhelAIHFTokenDesc) + flagSet.StringP(params.RhelAIAPIKey, "", "", params.RhelAIAPIKeyDesc) + flagSet.Bool(params.RhelAIAutoStart, false, params.RhelAIAutoStartDesc) flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index f6583a661..ab0317ca5 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -119,6 +119,14 @@ const ( RhelAIAccelearatorDefault string = "cuda" RhelAICustomImage string = "custom-image" RhelAICustomImageDesc string = "custom image name to spin RHEL AI OS (AMI name for AWS, image name for Azure)" + RhelAIModel string = "model" + RhelAIModelDesc string = "Hugging Face model ID for RHAIIS (e.g. meta-llama/Llama-3.2-1B-Instruct)" + RhelAIHFToken string = "hf-token" + RhelAIHFTokenDesc string = "Hugging Face Hub token for model download" + RhelAIAPIKey string = "api-key" + RhelAIAPIKeyDesc string = "API key to enforce secure connections to vLLM" + RhelAIAutoStart string = "auto-start" + RhelAIAutoStartDesc string = "automatically configure and start RHAIIS after provisioning" // Serverless Timeout string = "timeout" diff --git a/pkg/provider/aws/action/rhel-ai/rhelai.go b/pkg/provider/aws/action/rhel-ai/rhelai.go index 405264a9c..7a3bc79a2 100644 --- a/pkg/provider/aws/action/rhel-ai/rhelai.go +++ b/pkg/provider/aws/action/rhel-ai/rhelai.go @@ -40,6 +40,10 @@ type rhelAIRequest struct { serviceEndpoints []string allocationData *allocation.AllocationResult diskSize *int + model *string + hfToken *string + apiKey *string + autoStart bool } func (r *rhelAIRequest) validate() error { @@ -73,7 +77,11 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) { arch: &args.Arch, timeout: &args.Timeout, serviceEndpoints: args.ServiceEndpoints, - diskSize: args.ComputeRequest.DiskSize} + diskSize: args.ComputeRequest.DiskSize, + model: &args.Model, + hfToken: &args.HFToken, + apiKey: &args.APIKey, + autoStart: args.AutoStart} if args.Spot != nil { r.spot = args.Spot.Spot } @@ -224,8 +232,26 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error { return err } } - return c.Readiness(ctx, command.CommandPing, *r.prefix, awsRHELDedicatedID, - keyResources.PrivateKey, amiUserDefault, nil, c.Dependencies) + if !r.autoStart { + return c.Readiness(ctx, command.CommandPing, *r.prefix, awsRHELDedicatedID, + keyResources.PrivateKey, amiUserDefault, nil, c.Dependencies) + } + readinessCmd, err := c.RunCommand(ctx, + command.CommandPing, + compute.LoggingCmdStd, + fmt.Sprintf("%s-readiness", *r.prefix), awsRHELDedicatedID, + keyResources.PrivateKey, amiUserDefault, + nil, c.Dependencies) + if err != nil { + return err + } + _, err = c.RunCommand(ctx, + r.rhaiisSetupScript(), + compute.NoLoggingCmdStd, + fmt.Sprintf("%s-rhaiis-setup", *r.prefix), awsRHELDedicatedID, + keyResources.PrivateKey, amiUserDefault, + nil, []pulumi.Resource{readinessCmd}) + return err } // Write exported values in context to files o a selected target folder @@ -263,6 +289,30 @@ func (r *rhelAIRequest) securityGroups(ctx *pulumi.Context, mCtx *mc.Context, return pulumi.StringArray(sgs[:]), nil } +func (r *rhelAIRequest) rhaiisSetupScript() string { + confDir := "/etc/containers/systemd/rhaiis.container.d" + script := fmt.Sprintf( + "sudo cp %s/install.conf.example %s/install.conf", + confDir, confDir) + if len(*r.hfToken) > 0 { + script += fmt.Sprintf( + " && sudo sed -i 's|HUGGING_FACE_HUB_TOKEN=.*|HUGGING_FACE_HUB_TOKEN=%s|' %s/install.conf", + *r.hfToken, confDir) + } + if len(*r.model) > 0 { + script += fmt.Sprintf( + ` && sudo sed -i 's|--model .*|--model %s \\|' %s/install.conf`, + *r.model, confDir) + } + if len(*r.apiKey) > 0 { + script += fmt.Sprintf( + " && sudo sed -i '/\\[Install\\]/i Environment=VLLM_API_KEY=%s' %s/install.conf", + *r.apiKey, confDir) + } + script += " && sudo systemctl daemon-reload && sudo systemctl start rhaiis" + return script +} + func checkAMIExists(ctx context.Context, amiName, region, arch *string) error { isAMIOffered, _, err := data.IsAMIOffered( ctx, diff --git a/pkg/target/host/rhelai/api.go b/pkg/target/host/rhelai/api.go index 9676c7a35..7defecdef 100644 --- a/pkg/target/host/rhelai/api.go +++ b/pkg/target/host/rhelai/api.go @@ -6,14 +6,18 @@ import ( ) type RHELAIArgs struct { - Prefix string - Accelerator string - Version string - CustomImage string - Arch string - ComputeRequest *cr.ComputeRequestArgs - Spot *spotTypes.SpotArgs - ServiceEndpoints []string + Prefix string + Accelerator string + Version string + CustomImage string + Arch string + ComputeRequest *cr.ComputeRequestArgs + Spot *spotTypes.SpotArgs + ServiceEndpoints []string // If timeout is set a severless scheduled task will be created to self destroy the resources - Timeout string + Timeout string + Model string + HFToken string + APIKey string + AutoStart bool }