From 1fa9583e57d1306b06caac78b322835b0f63ce66 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:05:12 -0800 Subject: [PATCH 1/5] Bug fixes for HF models --- .../src/sagemaker/serve/builder/schema_builder.py | 5 +++++ .../src/sagemaker/serve/model_builder_servers.py | 13 +++++++++++++ .../src/sagemaker/serve/model_builder_utils.py | 5 +++++ 3 files changed, 23 insertions(+) diff --git a/sagemaker-serve/src/sagemaker/serve/builder/schema_builder.py b/sagemaker-serve/src/sagemaker/serve/builder/schema_builder.py index d68c2bffe1..faa8066d52 100644 --- a/sagemaker-serve/src/sagemaker/serve/builder/schema_builder.py +++ b/sagemaker-serve/src/sagemaker/serve/builder/schema_builder.py @@ -196,6 +196,11 @@ def _get_deserializer(self, obj): return StringDeserializer() if _is_jsonable(obj): return JSONDeserializer() + if isinstance(obj, dict) and "content_type" in obj: + try: + return BytesDeserializer() + except ValueError as e: + logger.error(e) raise ValueError( ( diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py b/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py index 43af8b4f7a..b88c5517f0 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py +++ b/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py @@ -687,7 +687,20 @@ def _build_for_transformers(self) -> Model: hf_model_id, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") ) elif isinstance(self.model, str): # Only set HF_MODEL_ID if model is a string + # Get model metadata for task detection (same pattern as _build_for_triton) + hf_model_md = self.get_huggingface_model_metadata( + self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") + ) + model_task = hf_model_md.get("pipeline_tag") + if model_task: + self.env_vars.update({"HF_TASK": model_task}) + self.env_vars.update({"HF_MODEL_ID": self.model}) + + # Add HuggingFace token if available (same as other methods) + if self.env_vars.get("HUGGING_FACE_HUB_TOKEN"): + self.env_vars["HF_TOKEN"] = self.env_vars.get("HUGGING_FACE_HUB_TOKEN") + # Get HF config for string model IDs if hasattr(self.env_vars, "HF_API_TOKEN"): self.hf_model_config = _get_model_config_properties_from_hf( diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py b/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py index f9efe42a18..56f3070346 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py +++ b/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py @@ -1004,6 +1004,11 @@ def _hf_schema_builder_init(self, model_task: str) -> None: sample_inputs, sample_outputs, ) = remote_hf_schema_helper.get_resolved_hf_schema_for_task(model_task) + # Unwrap list outputs for binary tasks (text-to-image, audio, etc.) + # Remote schema retriever returns [{'data': b'...', 'content_type': '...'}] + # but SchemaBuilder expects {'data': b'...', 'content_type': '...'} + if isinstance(sample_outputs, list) and len(sample_outputs) > 0: + sample_outputs = sample_outputs[0] self.schema_builder = SchemaBuilder(sample_inputs, sample_outputs) From fe0726c1bfeef7d5f1c934adc86275fbdc601c39 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Wed, 17 Dec 2025 08:49:54 -0800 Subject: [PATCH 2/5] Fix serialization deserialization issues in core --- sagemaker-core/src/sagemaker/core/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-core/src/sagemaker/core/utils/utils.py b/sagemaker-core/src/sagemaker/core/utils/utils.py index 8a8d90be6e..2881695969 100644 --- a/sagemaker-core/src/sagemaker/core/utils/utils.py +++ b/sagemaker-core/src/sagemaker/core/utils/utils.py @@ -273,7 +273,7 @@ def pascal_to_snake(pascal_str): def is_not_primitive(obj): - return not isinstance(obj, (int, float, str, bool, datetime.datetime)) + return not isinstance(obj, (int, float, str, bool, datetime.datetime, bytes)) def is_not_str_dict(obj): From a71382f956c2fad5066128cb2e76826be17ba969 Mon Sep 17 00:00:00 2001 From: aviruthen <91846056+aviruthen@users.noreply.github.com> Date: Fri, 19 Dec 2025 09:10:15 -0800 Subject: [PATCH 3/5] Removing unnecessary comments --- sagemaker-serve/src/sagemaker/serve/model_builder_servers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py b/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py index b88c5517f0..6619608395 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py +++ b/sagemaker-serve/src/sagemaker/serve/model_builder_servers.py @@ -687,7 +687,7 @@ def _build_for_transformers(self) -> Model: hf_model_id, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") ) elif isinstance(self.model, str): # Only set HF_MODEL_ID if model is a string - # Get model metadata for task detection (same pattern as _build_for_triton) + # Get model metadata for task detection hf_model_md = self.get_huggingface_model_metadata( self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN") ) @@ -697,7 +697,7 @@ def _build_for_transformers(self) -> Model: self.env_vars.update({"HF_MODEL_ID": self.model}) - # Add HuggingFace token if available (same as other methods) + # Add HuggingFace token if available if self.env_vars.get("HUGGING_FACE_HUB_TOKEN"): self.env_vars["HF_TOKEN"] = self.env_vars.get("HUGGING_FACE_HUB_TOKEN") From 192b22f8a4901c2967fbe0ed8307db463662e1e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Hern=C3=A1ndez=20Calabr=C3=A9s?= Date: Tue, 7 Apr 2026 10:39:42 +0200 Subject: [PATCH 4/5] feat: add support for model_index.json fallback in HF config retrieval --- .../src/sagemaker/serve/utils/hf_utils.py | 45 +++++++++------ .../servers/test_model_builder_servers.py | 4 ++ .../tests/unit/utils/test_hf_utils.py | 56 ++++++++++++++++--- 3 files changed, 79 insertions(+), 26 deletions(-) diff --git a/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py b/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py index 75f46eeeb9..27a1cc24d1 100644 --- a/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py +++ b/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py @@ -24,26 +24,35 @@ def _get_model_config_properties_from_hf(model_id: str, hf_hub_token: str = None): """Placeholder docstring""" - config_url = f"https://huggingface.co/{model_id}/raw/main/config.json" + config_files = ["config.json", "model_index.json"] + model_config = None - try: - if hf_hub_token: - config_url = urllib.request.Request( - config_url, headers={"Authorization": "Bearer " + hf_hub_token} - ) - with urllib.request.urlopen(config_url) as response: - model_config = json.load(response) - except (HTTPError, URLError, TimeoutError, JSONDecodeError) as e: - if "HTTP Error 401: Unauthorized" in str(e): - raise ValueError( - "Trying to access a gated/private HuggingFace model without valid credentials. " - "Please provide a HUGGING_FACE_HUB_TOKEN in env_vars" + for config_file in config_files: + config_url = f"https://huggingface.co/{model_id}/raw/main/{config_file}" + request = config_url + + try: + if hf_hub_token: + request = urllib.request.Request( + config_url, headers={"Authorization": "Bearer " + hf_hub_token} + ) + + with urllib.request.urlopen(request) as response: + model_config = json.load(response) + break + except (HTTPError, URLError, TimeoutError, JSONDecodeError) as e: + if "HTTP Error 401: Unauthorized" in str(e): + raise ValueError( + "Trying to access a gated/private HuggingFace model without valid credentials. " + "Please provide a HUGGING_FACE_HUB_TOKEN in env_vars" + ) + + logger.warning( + "Exception encountered while trying to read config file %s. Details: %s", + config_url, + e, ) - logger.warning( - "Exception encountered while trying to read config file %s. " "Details: %s", - config_url, - e, - ) + if not model_config: raise ValueError( f"Did not find a config.json or model_index.json file in huggingface hub for " diff --git a/sagemaker-serve/tests/unit/servers/test_model_builder_servers.py b/sagemaker-serve/tests/unit/servers/test_model_builder_servers.py index 4355474c3d..1a6f3b2442 100644 --- a/sagemaker-serve/tests/unit/servers/test_model_builder_servers.py +++ b/sagemaker-serve/tests/unit/servers/test_model_builder_servers.py @@ -781,6 +781,10 @@ def test_build_with_hf_model_string( result = self.builder._build_for_transformers() self.assertEqual(self.builder.env_vars["HF_MODEL_ID"], "gpt2") + mock_hf_config.assert_called_once_with( + "gpt2", + "token", + ) mock_create.assert_called_once() @patch("sagemaker.serve.model_builder_servers._get_nb_instance") diff --git a/sagemaker-serve/tests/unit/utils/test_hf_utils.py b/sagemaker-serve/tests/unit/utils/test_hf_utils.py index 8a92c5c165..42959b4a3d 100644 --- a/sagemaker-serve/tests/unit/utils/test_hf_utils.py +++ b/sagemaker-serve/tests/unit/utils/test_hf_utils.py @@ -75,9 +75,9 @@ def test_get_model_config_http_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("non-existent-model") - + self.assertIn("Did not find a config.json", str(context.exception)) - mock_logger.warning.assert_called_once() + self.assertEqual(mock_logger.warning.call_count, 2) @patch('urllib.request.urlopen') @patch('sagemaker.serve.utils.hf_utils.logger') @@ -87,9 +87,9 @@ def test_get_model_config_url_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - + self.assertIn("Did not find a config.json", str(context.exception)) - mock_logger.warning.assert_called_once() + self.assertEqual(mock_logger.warning.call_count, 2) @patch('urllib.request.urlopen') @patch('sagemaker.serve.utils.hf_utils.logger') @@ -99,9 +99,9 @@ def test_get_model_config_timeout_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - + self.assertIn("Did not find a config.json", str(context.exception)) - mock_logger.warning.assert_called_once() + self.assertEqual(mock_logger.warning.call_count, 2) @patch('urllib.request.urlopen') @patch('sagemaker.serve.utils.hf_utils.logger') @@ -115,9 +115,9 @@ def test_get_model_config_json_decode_error(self, mock_logger, mock_urlopen): with patch('json.load', side_effect=JSONDecodeError("msg", "doc", 0)): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - + self.assertIn("Did not find a config.json", str(context.exception)) - mock_logger.warning.assert_called_once() + self.assertEqual(mock_logger.warning.call_count, 2) @patch('urllib.request.urlopen') def test_get_model_config_url_format(self, mock_urlopen): @@ -137,6 +137,46 @@ def test_get_model_config_url_format(self, mock_urlopen): actual_url = mock_urlopen.call_args[0][0] self.assertEqual(actual_url, expected_url) + @patch("urllib.request.urlopen") + def test_get_model_config_falls_back_to_model_index(self, mock_urlopen): + """Test fallback to model_index.json when config.json is missing.""" + config_missing_error = HTTPError( + "https://huggingface.co/org/model/raw/main/config.json", 404, "Not Found", {}, None + ) + model_index_config = {"_class_name": "FluxPipeline", "_diffusers_version": "0.31.0"} + + mock_model_index_response = Mock() + mock_model_index_response.__enter__ = Mock(return_value=mock_model_index_response) + mock_model_index_response.__exit__ = Mock(return_value=False) + + def _urlopen_side_effect(request): + url = request.full_url if hasattr(request, "full_url") else request + if url.endswith("/config.json"): + raise config_missing_error + if url.endswith("/model_index.json"): + return mock_model_index_response + raise AssertionError(f"Unexpected URL called: {url}") + + mock_urlopen.side_effect = _urlopen_side_effect + + with patch("json.load", side_effect=[model_index_config]): + result = _get_model_config_properties_from_hf("org/model-name") + + self.assertEqual(result, model_index_config) + + @patch("urllib.request.urlopen") + @patch("sagemaker.serve.utils.hf_utils.logger") + def test_get_model_config_dual_file_error_when_both_missing(self, mock_logger, mock_urlopen): + """Test error when both config.json and model_index.json are missing.""" + mock_urlopen.side_effect = HTTPError("url", 404, "Not Found", {}, None) + + with self.assertRaises(ValueError) as context: + _get_model_config_properties_from_hf("model-id") + + self.assertIn("config.json or model_index.json", str(context.exception)) + self.assertEqual(mock_urlopen.call_count, 2) + self.assertEqual(mock_logger.warning.call_count, 2) + if __name__ == "__main__": unittest.main() From 375076a1b01a3e70d7db6aa7293b9e0c52726043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Hern=C3=A1ndez=20Calabr=C3=A9s?= Date: Wed, 8 Apr 2026 14:04:40 +0200 Subject: [PATCH 5/5] feat: add support for PEFT models with adapter_config.json config file --- .../src/sagemaker/serve/utils/hf_utils.py | 9 +-- .../tests/unit/utils/test_hf_utils.py | 62 +++++++++++++++---- 2 files changed, 55 insertions(+), 16 deletions(-) diff --git a/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py b/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py index 27a1cc24d1..b980edabe4 100644 --- a/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py +++ b/sagemaker-serve/src/sagemaker/serve/utils/hf_utils.py @@ -11,6 +11,7 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. """Utility functions for fetching model information from HuggingFace Hub""" + from __future__ import absolute_import import json import urllib.request @@ -24,7 +25,7 @@ def _get_model_config_properties_from_hf(model_id: str, hf_hub_token: str = None): """Placeholder docstring""" - config_files = ["config.json", "model_index.json"] + config_files = ["config.json", "model_index.json", "adapter_config.json"] model_config = None for config_file in config_files: @@ -54,9 +55,9 @@ def _get_model_config_properties_from_hf(model_id: str, hf_hub_token: str = None ) if not model_config: + allowed_files = ", ".join(config_files) raise ValueError( - f"Did not find a config.json or model_index.json file in huggingface hub for " - f"{model_id}. Please make sure a config.json exists (or model_index.json for Stable " - f"Diffusion Models) for this model in the huggingface hub" + f"Did not find any supported model config file in Hugging Face Hub for {model_id}. " + f"Expected one of: {allowed_files}" ) return model_config diff --git a/sagemaker-serve/tests/unit/utils/test_hf_utils.py b/sagemaker-serve/tests/unit/utils/test_hf_utils.py index 42959b4a3d..df86175382 100644 --- a/sagemaker-serve/tests/unit/utils/test_hf_utils.py +++ b/sagemaker-serve/tests/unit/utils/test_hf_utils.py @@ -76,8 +76,8 @@ def test_get_model_config_http_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("non-existent-model") - self.assertIn("Did not find a config.json", str(context.exception)) - self.assertEqual(mock_logger.warning.call_count, 2) + self.assertIn("Did not find any supported model config file", str(context.exception)) + self.assertEqual(mock_logger.warning.call_count, 3) @patch('urllib.request.urlopen') @patch('sagemaker.serve.utils.hf_utils.logger') @@ -88,8 +88,8 @@ def test_get_model_config_url_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - self.assertIn("Did not find a config.json", str(context.exception)) - self.assertEqual(mock_logger.warning.call_count, 2) + self.assertIn("Did not find any supported model config file", str(context.exception)) + self.assertEqual(mock_logger.warning.call_count, 3) @patch('urllib.request.urlopen') @patch('sagemaker.serve.utils.hf_utils.logger') @@ -100,8 +100,8 @@ def test_get_model_config_timeout_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - self.assertIn("Did not find a config.json", str(context.exception)) - self.assertEqual(mock_logger.warning.call_count, 2) + self.assertIn("Did not find any supported model config file", str(context.exception)) + self.assertEqual(mock_logger.warning.call_count, 3) @patch('urllib.request.urlopen') @patch('sagemaker.serve.utils.hf_utils.logger') @@ -116,8 +116,8 @@ def test_get_model_config_json_decode_error(self, mock_logger, mock_urlopen): with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - self.assertIn("Did not find a config.json", str(context.exception)) - self.assertEqual(mock_logger.warning.call_count, 2) + self.assertIn("Did not find any supported model config file", str(context.exception)) + self.assertEqual(mock_logger.warning.call_count, 3) @patch('urllib.request.urlopen') def test_get_model_config_url_format(self, mock_urlopen): @@ -167,15 +167,53 @@ def _urlopen_side_effect(request): @patch("urllib.request.urlopen") @patch("sagemaker.serve.utils.hf_utils.logger") def test_get_model_config_dual_file_error_when_both_missing(self, mock_logger, mock_urlopen): - """Test error when both config.json and model_index.json are missing.""" + """Test error when all known config files are missing.""" mock_urlopen.side_effect = HTTPError("url", 404, "Not Found", {}, None) with self.assertRaises(ValueError) as context: _get_model_config_properties_from_hf("model-id") - self.assertIn("config.json or model_index.json", str(context.exception)) - self.assertEqual(mock_urlopen.call_count, 2) - self.assertEqual(mock_logger.warning.call_count, 2) + self.assertIn( + "Expected one of: config.json, model_index.json, adapter_config.json", + str(context.exception), + ) + self.assertEqual(mock_urlopen.call_count, 3) + self.assertEqual(mock_logger.warning.call_count, 3) + + @patch("urllib.request.urlopen") + def test_get_model_config_falls_back_to_adapter_config(self, mock_urlopen): + """Test fallback to adapter_config.json when config/model_index are missing.""" + config_missing_error = HTTPError( + "https://huggingface.co/org/model/raw/main/config.json", 404, "Not Found", {}, None + ) + model_index_missing_error = HTTPError( + "https://huggingface.co/org/model/raw/main/model_index.json", 404, "Not Found", {}, None + ) + adapter_config = { + "base_model_name_or_path": "LiquidAI/LFM2.5-1.2B-Instruct", + "peft_type": "LORA", + } + + mock_adapter_response = Mock() + mock_adapter_response.__enter__ = Mock(return_value=mock_adapter_response) + mock_adapter_response.__exit__ = Mock(return_value=False) + + def _urlopen_side_effect(request): + url = request.full_url if hasattr(request, "full_url") else request + if url.endswith("/config.json"): + raise config_missing_error + if url.endswith("/model_index.json"): + raise model_index_missing_error + if url.endswith("/adapter_config.json"): + return mock_adapter_response + raise AssertionError(f"Unexpected URL called: {url}") + + mock_urlopen.side_effect = _urlopen_side_effect + + with patch("json.load", side_effect=[adapter_config]): + result = _get_model_config_properties_from_hf("org/model-name") + + self.assertEqual(result, adapter_config) if __name__ == "__main__":