From d69fc5a161e661e894a838443b814b74138e3c70 Mon Sep 17 00:00:00 2001
From: Syed Jafri <syedjfr@amazon.com>
Date: Thu, 28 May 2026 22:19:34 +0000
Subject: [PATCH 1/5] fix: add invoke verification

---
 .../test_model_customization_deployment.py    | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
index b38ca249c7..144c78fe76 100644
--- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py
+++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
@@ -14,6 +14,8 @@
 from __future__ import absolute_import
 
 import boto3
+import json
+import time
 import pytest
 import random
 
@@ -135,6 +137,38 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
             adapter_ic = InferenceComponent.get(inference_component_name=adapter_name, region=AWS_REGION)
             assert adapter_ic is not None
 
+        # Invoke verification
+        time.sleep(5)  # brief buffer for IC readiness
+
+        invoke_ic_name = adapter_name if peft_type == "LORA" else f"{endpoint_name}-inference-component"
+
+        test_payload = {
+            "inputs": "What is machine learning?",
+            "parameters": {"max_new_tokens": 32},
+        }
+
+        invoke_response = endpoint.invoke(
+            body=json.dumps(test_payload),
+            content_type="application/json",
+            accept="application/json",
+            inference_component_name=invoke_ic_name,
+        )
+
+        response_body = json.loads(invoke_response.body)
+
+        # Validate response structure
+        assert response_body is not None, f"Empty response from invoke on {invoke_ic_name}"
+        if isinstance(response_body, list):
+            assert len(response_body) > 0
+            assert "generated_text" in response_body[0] or "generation" in response_body[0]
+        elif isinstance(response_body, dict):
+            assert (
+                "generated_text" in response_body
+                or "generation" in response_body
+                or "outputs" in response_body
+            )
+
+
     def test_fetch_endpoint_names_for_base_model(self, training_job_name, sagemaker_session):
         """Test fetching endpoint names for base model."""
         from sagemaker.core.resources import TrainingJob

From 8d63596845ea09f1db152442d5b2b6ee0b3769ae Mon Sep 17 00:00:00 2001
From: Syed Jafri <syedjfr@amazon.com>
Date: Thu, 28 May 2026 23:51:18 +0000
Subject: [PATCH 2/5] fix: add read() for streaming response body

---
 .../tests/integ/test_model_customization_deployment.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
index 144c78fe76..e1e3412105 100644
--- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py
+++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
@@ -154,7 +154,7 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
             inference_component_name=invoke_ic_name,
         )
 
-        response_body = json.loads(invoke_response.body)
+        response_body = json.loads(invoke_response.body.read())
 
         # Validate response structure
         assert response_body is not None, f"Empty response from invoke on {invoke_ic_name}"

From c4cbafee488fb6408e75b621335b15c1bf3ba7e9 Mon Sep 17 00:00:00 2001
From: Syed Jafri <syedjfr@amazon.com>
Date: Fri, 29 May 2026 20:26:24 +0000
Subject: [PATCH 3/5] fix: add invoke test for bedrock oss

---
 .../test_model_customization_deployment.py    | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
index e1e3412105..83e4643f21 100644
--- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py
+++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
@@ -15,6 +15,7 @@
 
 import boto3
 import json
+import os
 import time
 import pytest
 import random
@@ -23,6 +24,7 @@
 
 # This test relies on resources in a specific region
 AWS_REGION = "us-west-2"
+os.environ.setdefault("AWS_DEFAULT_REGION", AWS_REGION)
 
 
 @pytest.fixture(scope="module")
@@ -350,7 +352,7 @@ def setup_config(self, training_job_name):
         from sagemaker.core.helper.session_helper import get_execution_role
         return {
             "training_job_name": training_job_name,
-            "region": "us-west-2",
+            "region": AWS_REGION,
             "bucket": "models-sdk-testing-pdx",
             "role_arn": get_execution_role()
         }
@@ -538,6 +540,33 @@ def test_bedrock_job_created(self, deployed_model_arn):
         """Test that Bedrock import job was created successfully."""
         assert deployed_model_arn is not None
 
+    @pytest.mark.slow
+    def test_bedrock_model_invoke(self, deployed_model_arn, bedrock_runtime):
+        """Test invoking the imported Bedrock model to ensure it works end-to-end.
+
+        Invokes the imported model directly using the Converse API and validates
+        the response contains generated text.
+        """
+        message = "What is machine learning?"
+
+        response = bedrock_runtime.invoke_model(
+                modelId=deployed_model_arn,
+                body=json.dumps({
+                    "prompt": "What is the capital of France?",
+                    "max_gen_len": 100,
+                    "temperature": 0.7,
+                    "top_p": 0.9
+                })
+        )
+
+        result = json.loads(response['body'].read().decode())
+
+        # Validate response structure
+        assert "generation" in result, "Response missing 'generation' field"
+        assert isinstance(result["generation"], str), "'generation' should be a string"
+        assert len(result["generation"]) > 0, "'generation' should not be empty"
+
+
     def test_zzz_cleanup_deployed_model(self, bedrock_client):
         """Cleanup deployed model and import jobs (runs last due to zzz prefix)."""
         if hasattr(self, 'model_arn_for_cleanup'):

From 366899225987847febd64b35cb224a09e65ddfe4 Mon Sep 17 00:00:00 2001
From: Syed Jafri <syedjfr@amazon.com>
Date: Fri, 29 May 2026 20:47:26 +0000
Subject: [PATCH 4/5] fix: use model arn to invoke not model name

---
 .../tests/integ/test_model_customization_deployment.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
index 83e4643f21..b7537a82f6 100644
--- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py
+++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
@@ -417,7 +417,7 @@ def deployed_model_arn(self, training_job, bedrock_client, s3_client, setup_conf
                     break
                 time.sleep(30)
 
-            model_arn = response['importedModelName']
+            model_arn = response['importedModelArn']
             return model_arn
 
         except Exception as e:

From eb92967a431ba3e5429463894b800cc41e1a201f Mon Sep 17 00:00:00 2001
From: Syed Jafri <syedjfr@amazon.com>
Date: Fri, 29 May 2026 21:35:28 +0000
Subject: [PATCH 5/5] fix: add retries for bedrock invoke test

---
 .../test_model_customization_deployment.py    | 58 +++++++++++++------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
index b7537a82f6..21eef47558 100644
--- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py
+++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py
@@ -15,11 +15,14 @@
 
 import boto3
 import json
+import logging
 import os
 import time
 import pytest
 import random
 
+logger = logging.getLogger(__name__)
+
 from sagemaker.core.helper.session_helper import Session
 
 # This test relies on resources in a specific region
@@ -140,7 +143,7 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
             assert adapter_ic is not None
 
         # Invoke verification
-        time.sleep(5)  # brief buffer for IC readiness
+        time.sleep(10)  # brief buffer for IC readiness
 
         invoke_ic_name = adapter_name if peft_type == "LORA" else f"{endpoint_name}-inference-component"
 
@@ -544,27 +547,44 @@ def test_bedrock_job_created(self, deployed_model_arn):
     def test_bedrock_model_invoke(self, deployed_model_arn, bedrock_runtime):
         """Test invoking the imported Bedrock model to ensure it works end-to-end.
 
-        Invokes the imported model directly using the Converse API and validates
-        the response contains generated text.
+        Retries on failure since models can take several minutes
+        to become ready after import.
         """
-        message = "What is machine learning?"
-
-        response = bedrock_runtime.invoke_model(
-                modelId=deployed_model_arn,
-                body=json.dumps({
-                    "prompt": "What is the capital of France?",
-                    "max_gen_len": 100,
-                    "temperature": 0.7,
-                    "top_p": 0.9
-                })
-        )
+        max_retries = 5
+        base_delay = 10
+
+        for attempt in range(max_retries):
+            try:
+                response = bedrock_runtime.invoke_model(
+                    modelId=deployed_model_arn,
+                    body=json.dumps({
+                        "prompt": "What is the capital of France?",
+                        "max_gen_len": 100,
+                        "temperature": 0.7,
+                        "top_p": 0.9
+                    })
+                )
 
-        result = json.loads(response['body'].read().decode())
+                result = json.loads(response['body'].read().decode())
 
-        # Validate response structure
-        assert "generation" in result, "Response missing 'generation' field"
-        assert isinstance(result["generation"], str), "'generation' should be a string"
-        assert len(result["generation"]) > 0, "'generation' should not be empty"
+                # Validate response structure
+                assert "generation" in result, "Response missing 'generation' field"
+                assert isinstance(result["generation"], str), "'generation' should be a string"
+                assert len(result["generation"]) > 0, "'generation' should not be empty"
+                return  # Success
+
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    logger.info(
+                        f"Invoke failed (attempt {attempt + 1}/{max_retries}): {e}. "
+                        f"Retrying in {base_delay}s..."
+                    )
+                    time.sleep(base_delay)
+                else:
+                    pytest.fail(
+                        f"Invoke failed after {max_retries} attempts. "
+                        f"Last error: {e}"
+                    )
 
 
     def test_zzz_cleanup_deployed_model(self, bedrock_client):