From f117a5878583770d7d8fbcbd38cebd03c3b313c4 Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Mon, 29 Dec 2025 17:45:14 -0500 Subject: [PATCH 1/2] Restructuring JailbreakV dataset to work with overall dataset refactor --- .../datasets/seed_datasets/remote/__init__.py | 1 + .../remote/jailbreakv_28k_dataset.py | 321 ++++++++++++++++++ .../test_seed_dataset_provider_integration.py | 4 +- .../datasets/test_seed_dataset_provider.py | 239 +++++++++++++ 4 files changed, 564 insertions(+), 1 deletion(-) create mode 100644 pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py diff --git a/pyrit/datasets/seed_datasets/remote/__init__.py b/pyrit/datasets/seed_datasets/remote/__init__.py index 382a00e37..fc7d1bb67 100644 --- a/pyrit/datasets/seed_datasets/remote/__init__.py +++ b/pyrit/datasets/seed_datasets/remote/__init__.py @@ -16,6 +16,7 @@ from pyrit.datasets.seed_datasets.remote.forbidden_questions_dataset import _ForbiddenQuestionsDataset # noqa: F401 from pyrit.datasets.seed_datasets.remote.harmbench_dataset import _HarmBenchDataset # noqa: F401 from pyrit.datasets.seed_datasets.remote.harmbench_multimodal_dataset import _HarmBenchMultimodalDataset # noqa: F401 +from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import _JailbreakV28KDataset # noqa: F401 from pyrit.datasets.seed_datasets.remote.jbb_behaviors_dataset import _JBBBehaviorsDataset # noqa: F401 from pyrit.datasets.seed_datasets.remote.librai_do_not_answer_dataset import _LibrAIDoNotAnswerDataset # noqa: F401 from pyrit.datasets.seed_datasets.remote.llm_latent_adversarial_training_dataset import ( # noqa: F401 diff --git a/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py new file mode 100644 index 000000000..469535d95 --- /dev/null +++ b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py @@ -0,0 +1,321 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging +import pathlib +import uuid +import zipfile +from enum import Enum +from typing import Dict, List, Literal, Optional + +from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( + _RemoteDatasetLoader, +) +from pyrit.models import SeedDataset, SeedPrompt + +logger = logging.getLogger(__name__) + + +class HarmCategory(Enum): + """Harm categories in the JailBreakV-28K dataset.""" + + UNETHICAL_BEHAVIOR = "Unethical Behavior" + ECONOMIC_HARM = "Economic Harm" + HATE_SPEECH = "Hate Speech" + GOVERNMENT_DECISION = "Government Decision" + PHYSICAL_HARM = "Physical Harm" + FRAUD = "Fraud" + POLITICAL_SENSITIVITY = "Political Sensitivity" + MALWARE = "Malware" + ILLEGAL_ACTIVITY = "Illegal Activity" + BIAS = "Bias" + VIOLENCE = "Violence" + ANIMAL_ABUSE = "Animal Abuse" + TAILORED_UNLICENSED_ADVICE = "Tailored Unlicensed Advice" + PRIVACY_VIOLATION = "Privacy Violation" + HEALTH_CONSULTATION = "Health Consultation" + CHILD_ABUSE_CONTENT = "Child Abuse Content" + + +class _JailbreakV28KDataset(_RemoteDatasetLoader): + """ + Loader for the JailBreakV-28K multimodal dataset. + + The JailBreakV-28K dataset is a benchmark for assessing the robustness of + multimodal large language models against jailbreak attacks. Each example consists + of an image and a text query, linked by the same prompt_group_id. + + Note: Most images are not available on HuggingFace. You must download the full image + set from Google Drive by filling out the form at: + https://docs.google.com/forms/d/e/1FAIpQLSc_p1kCs3p9z-3FbtSeF7uLYsiQk0tvsGi6F0e_z5xCEmN1gQ/viewform + + Reference: https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k + Paper: https://arxiv.org/abs/2404.03027 + Authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo + License: MIT + + Warning: Due to the nature of these prompts, consult your legal department + before testing them with LLMs to ensure compliance and reduce potential risks. + """ + + def __init__( + self, + *, + source: str = "JailbreakV-28K/JailBreakV-28k", + zip_dir: str = str(pathlib.Path.home()), + split: Literal["JailBreakV_28K", "mini_JailBreakV_28K"] = "mini_JailBreakV_28K", + text_field: Literal["jailbreak_query", "redteam_query"] = "redteam_query", + harm_categories: Optional[List[HarmCategory]] = None, + ) -> None: + """ + Initialize the JailBreakV-28K dataset loader. + + Args: + source: HuggingFace dataset identifier. Defaults to "JailbreakV-28K/JailBreakV-28k". + zip_dir: Directory containing the JailBreakV_28K.zip file with images. + Defaults to home directory. + split: Dataset split to load. Defaults to "mini_JailBreakV_28K". + Options are "JailBreakV_28K" and "mini_JailBreakV_28K". + text_field: Field to use as the prompt text. Defaults to "redteam_query". + Options are "jailbreak_query" and "redteam_query". + harm_categories: List of harm categories to filter examples. + If None, all categories are included (default). + + Raises: + ValueError: If any of the specified harm categories are invalid. + """ + self.source = source + self.zip_dir = pathlib.Path(zip_dir) + self.split = split + self.text_field = text_field + self.harm_categories = harm_categories + + # Validate harm categories if provided + if harm_categories is not None: + valid_categories = {category.value for category in HarmCategory} + invalid_categories = ( + set(cat.value if isinstance(cat, HarmCategory) else cat for cat in harm_categories) + - valid_categories + ) + if invalid_categories: + raise ValueError(f"Invalid harm categories: {', '.join(invalid_categories)}") + + @property + def dataset_name(self) -> str: + """Return the dataset name.""" + return "jailbreakv_28k" + + async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset: + """ + Fetch JailBreakV-28K dataset and return as SeedDataset. + + The dataset contains both image and text prompts linked by prompt_group_id. + You can extract the grouped prompts using the group_seed_prompts_by_prompt_group_id method. + + Args: + cache: Whether to cache the fetched dataset. Defaults to True. + + Returns: + SeedDataset: A SeedDataset containing the multimodal examples. + + Raises: + FileNotFoundError: If the required ZIP file is not found. + ValueError: If the number of prompts is below the minimum threshold. + Exception: If the dataset cannot be loaded or processed. + """ + # Extract images from ZIP if needed + zip_file_path = self.zip_dir / "JailBreakV_28K.zip" + zip_extracted_path = self.zip_dir / "JailBreakV_28K" + + if not zip_file_path.exists(): + raise FileNotFoundError( + f"ZIP file not found at {zip_file_path}. " + "Please download images from Google Drive using the form at: " + "https://docs.google.com/forms/d/e/1FAIpQLSc_p1kCs3p9z-3FbtSeF7uLYsiQk0tvsGi6F0e_z5xCEmN1gQ/viewform" + ) + + # Only unzip if the target directory does not already exist + if not zip_extracted_path.exists(): + logger.info(f"Extracting {zip_file_path} to {self.zip_dir}") + with zipfile.ZipFile(zip_file_path, "r") as zip_ref: + zip_ref.extractall(self.zip_dir) + + try: + logger.info(f"Loading JailBreakV-28K dataset from {self.source}") + + # Load dataset from HuggingFace using the helper method + data = await self._fetch_from_huggingface( + dataset_name=self.source, + config="JailBreakV_28K", + split=self.split, + cache=cache, + ) + + # Normalize the harm categories for filtering + harm_categories_normalized = ( + None + if self.harm_categories is None + else [self._normalize_policy(cat.value) for cat in self.harm_categories] + ) + + seed_prompts = [] + missing_images = 0 + total_items_processed = 0 + per_call_cache: Dict[str, str] = {} + + for item in data: + policy = self._normalize_policy(item.get("policy", "")) + + # Skip if user requested policy filter and item's policy does not match + if harm_categories_normalized is not None and policy not in harm_categories_normalized: + continue + + # Count items that pass the filter + total_items_processed += 1 + + image_rel_path = item.get("image_path", "") + if not image_rel_path: + missing_images += 1 + continue + + image_abs_path = self._resolve_image_path( + rel_path=image_rel_path, + local_directory=zip_extracted_path, + call_cache=per_call_cache, + ) + + if not image_abs_path: + missing_images += 1 + continue + + # Create linked text and image prompts + group_id = uuid.uuid4() + + text_seed_prompt = SeedPrompt( + value=item.get(self.text_field, ""), + data_type="text", + name="JailBreakV-28K", + dataset_name=self.dataset_name, + harm_categories=[policy], + description=( + "Benchmark for Assessing the Robustness of " + "Multimodal Large Language Models against Jailbreak Attacks." + ), + authors=["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"], + groups=["The Ohio State University", "Peking University", "University of Wisconsin-Madison"], + source="https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k", + prompt_group_id=group_id, + sequence=0, + ) + + image_seed_prompt = SeedPrompt( + value=image_abs_path, + data_type="image_path", + name="JailBreakV-28K", + dataset_name=self.dataset_name, + harm_categories=[policy], + description=( + "Benchmark for Assessing the Robustness of " + "Multimodal Large Language Models against Jailbreak Attacks." + ), + authors=["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"], + groups=["The Ohio State University", "Peking University", "University of Wisconsin-Madison"], + source="https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k", + prompt_group_id=group_id, + sequence=0, + ) + + seed_prompts.append(text_seed_prompt) + seed_prompts.append(image_seed_prompt) + + except Exception as e: + logger.error(f"Failed to load JailBreakV-28K dataset: {str(e)}") + raise + + # Validation: Check if 50% or more of the responses are unpaired + if total_items_processed == 0: + raise ValueError( + "JailBreakV-28K fetch produced 0 items after filtering. " + "Try adjusting your harm_categories filter or check the dataset source." + ) + + successful_pairs = len(seed_prompts) // 2 # Each pair has text + image + unpaired_percentage = (missing_images / total_items_processed) * 100 + + if unpaired_percentage >= 50: + raise ValueError( + f"JailBreakV-28K fetch failed: {unpaired_percentage:.1f}% of items are missing images " + f"({missing_images} out of {total_items_processed} items processed). " + f"Only {successful_pairs} valid pairs were created. " + f"At least 50% of items must have valid images. " + f"Please ensure the ZIP file contains the full image set." + ) + + if missing_images > 0: + logger.warning( + f"Failed to resolve {missing_images} image paths in JailBreakV-28K dataset " + f"({unpaired_percentage:.1f}% unpaired)" + ) + + logger.info( + f"Successfully loaded {successful_pairs} multimodal pairs " + f"({len(seed_prompts)} total prompts) from JailBreakV-28K dataset" + ) + + return SeedDataset(seeds=seed_prompts, dataset_name=self.dataset_name) + + def _normalize_policy(self, policy: str) -> str: + """ + Create a machine-friendly variant of the policy category. + + Args: + policy: The human-readable policy category. + + Returns: + str: The normalized policy category. + """ + return policy.strip().lower().replace(" ", "_").replace("-", "_") + + def _resolve_image_path( + self, + *, + rel_path: str, + local_directory: pathlib.Path, + call_cache: Dict[str, str], + ) -> str: + """ + Resolve a repository-relative image path to a local absolute path. + + Uses a cache to avoid re-checking the same file multiple times. + + Args: + rel_path: Path relative to the dataset repository root (e.g., "images/0001.png"). + local_directory: Directory to search for the image. + call_cache: Cache dictionary to store resolved paths. + + Returns: + str: Absolute local path if resolved, else empty string. + """ + if not rel_path: + return "" + + # Check if image has already been cached + if rel_path in call_cache: + return call_cache[rel_path] + + image_path = local_directory / rel_path + + try: + if image_path.exists(): + abs_path = str(image_path) + else: + logger.debug(f"File {image_path} not found in {local_directory}") + abs_path = "" + + call_cache[rel_path] = abs_path + return abs_path + + except Exception as e: + logger.error(f"Failed to resolve image path {rel_path}: {str(e)}") + call_cache[rel_path] = "" + return "" diff --git a/tests/integration/datasets/test_seed_dataset_provider_integration.py b/tests/integration/datasets/test_seed_dataset_provider_integration.py index db828a3cd..4f643f54c 100644 --- a/tests/integration/datasets/test_seed_dataset_provider_integration.py +++ b/tests/integration/datasets/test_seed_dataset_provider_integration.py @@ -14,7 +14,9 @@ def get_dataset_providers(): """Helper to get all registered providers for parameterization.""" providers = SeedDatasetProvider.get_all_providers() - return [(name, cls) for name, cls in providers.items()] + # Exclude datasets that rely on external data sources + excluded = ["_JailbreakV28KDataset"] + return [(name, cls) for name, cls in providers.items() if name not in excluded] class TestSeedDatasetProviderIntegration: diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index d2063fad6..8218b352b 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -8,6 +8,10 @@ from pyrit.datasets import SeedDatasetProvider from pyrit.datasets.seed_datasets.remote.darkbench_dataset import _DarkBenchDataset from pyrit.datasets.seed_datasets.remote.harmbench_dataset import _HarmBenchDataset +from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import ( + _JailbreakV28KDataset, + HarmCategory, +) from pyrit.models import SeedDataset, SeedPrompt @@ -41,6 +45,25 @@ def mock_darkbench_data(): ] +@pytest.fixture +def mock_jailbreakv_data(): + """Mock data for JailbreakV-28K dataset.""" + return [ + { + "jailbreak_query": "Test jailbreak query 1", + "redteam_query": "Test redteam query 1", + "policy": "Hate Speech", + "image_path": "images/test_001.png", + }, + { + "jailbreak_query": "Test jailbreak query 2", + "redteam_query": "Test redteam query 2", + "policy": "Violence", + "image_path": "images/test_002.png", + }, + ] + + class TestSeedDatasetProvider: """Test the SeedDatasetProvider base class and registration.""" @@ -236,3 +259,219 @@ async def test_fetch_dataset_with_custom_config(self, mock_darkbench_data): assert call_kwargs["dataset_name"] == "custom/darkbench" assert call_kwargs["config"] == "custom_config" assert call_kwargs["split"] == "test" + + +class TestJailbreakV28KDataset: + """Test the JailbreakV-28K dataset loader.""" + + @pytest.mark.asyncio + async def test_fetch_dataset(self, mock_jailbreakv_data, tmp_path): + """Test fetching JailbreakV-28K dataset.""" + # Create mock ZIP structure + zip_dir = tmp_path / "test_zip" + zip_dir.mkdir() + images_dir = zip_dir / "JailBreakV_28K" / "images" + images_dir.mkdir(parents=True) + + # Create mock image files + (images_dir / "test_001.png").touch() + (images_dir / "test_002.png").touch() + + loader = _JailbreakV28KDataset( + zip_dir=str(zip_dir), + ) + + # Mock the ZIP extraction check + with patch("pathlib.Path.exists") as mock_exists: + # ZIP exists, extracted folder exists + mock_exists.side_effect = lambda: True + + with patch.object(loader, "_fetch_from_huggingface", return_value=mock_jailbreakv_data): + with patch.object(loader, "_resolve_image_path") as mock_resolve: + # Mock image path resolution + mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: str( + local_directory / rel_path + ) + + dataset = await loader.fetch_dataset() + + assert isinstance(dataset, SeedDataset) + # 2 examples * 2 prompts each (text + image) = 4 total + assert len(dataset.seeds) == 4 + assert all(isinstance(p, SeedPrompt) for p in dataset.seeds) + + # Check text prompts + text_prompts = [p for p in dataset.seeds if p.data_type == "text"] + assert len(text_prompts) == 2 + assert text_prompts[0].value == "Test redteam query 1" + assert text_prompts[0].dataset_name == "jailbreakv_28k" + assert text_prompts[0].harm_categories == ["hate_speech"] + + # Check image prompts + image_prompts = [p for p in dataset.seeds if p.data_type == "image_path"] + assert len(image_prompts) == 2 + + def test_dataset_name(self): + """Test dataset_name property.""" + loader = _JailbreakV28KDataset() + assert loader.dataset_name == "jailbreakv_28k" + + def test_harm_category_enum(self): + """Test HarmCategory enum values.""" + assert HarmCategory.HATE_SPEECH.value == "Hate Speech" + assert HarmCategory.VIOLENCE.value == "Violence" + assert HarmCategory.FRAUD.value == "Fraud" + + def test_initialization_with_harm_categories(self): + """Test initialization with harm category filtering.""" + loader = _JailbreakV28KDataset( + harm_categories=[HarmCategory.HATE_SPEECH, HarmCategory.VIOLENCE], + ) + assert loader.harm_categories is not None + assert len(loader.harm_categories) == 2 + assert HarmCategory.HATE_SPEECH in loader.harm_categories + + def test_initialization_invalid_harm_category(self): + """Test that invalid harm categories raise ValueError.""" + with pytest.raises(ValueError, match="Invalid harm categories"): + _JailbreakV28KDataset( + harm_categories=["invalid_category"], # type: ignore + ) + + @pytest.mark.asyncio + async def test_fetch_dataset_with_text_field(self, mock_jailbreakv_data, tmp_path): + """Test fetching with different text field.""" + zip_dir = tmp_path / "test_zip" + zip_dir.mkdir() + images_dir = zip_dir / "JailBreakV_28K" / "images" + images_dir.mkdir(parents=True) + (images_dir / "test_001.png").touch() + (images_dir / "test_002.png").touch() + + loader = _JailbreakV28KDataset( + zip_dir=str(zip_dir), + text_field="jailbreak_query", + ) + + with patch("pathlib.Path.exists", return_value=True): + with patch.object(loader, "_fetch_from_huggingface", return_value=mock_jailbreakv_data): + with patch.object(loader, "_resolve_image_path") as mock_resolve: + mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: str( + local_directory / rel_path + ) + + dataset = await loader.fetch_dataset() + + text_prompts = [p for p in dataset.seeds if p.data_type == "text"] + assert text_prompts[0].value == "Test jailbreak query 1" + + @pytest.mark.asyncio + async def test_fetch_dataset_missing_zip(self): + """Test that missing ZIP file raises FileNotFoundError.""" + loader = _JailbreakV28KDataset( + zip_dir="/nonexistent/path", + ) + + with patch("pathlib.Path.exists", return_value=False): + with pytest.raises(FileNotFoundError, match="ZIP file not found"): + await loader.fetch_dataset() + + @pytest.mark.asyncio + async def test_fetch_dataset_filters_by_category(self, mock_jailbreakv_data, tmp_path): + """Test filtering by harm categories.""" + zip_dir = tmp_path / "test_zip" + zip_dir.mkdir() + images_dir = zip_dir / "JailBreakV_28K" / "images" + images_dir.mkdir(parents=True) + (images_dir / "test_001.png").touch() + + loader = _JailbreakV28KDataset( + zip_dir=str(zip_dir), + harm_categories=[HarmCategory.HATE_SPEECH], # Only hate speech + ) + + with patch("pathlib.Path.exists", return_value=True): + with patch.object(loader, "_fetch_from_huggingface", return_value=mock_jailbreakv_data): + with patch.object(loader, "_resolve_image_path") as mock_resolve: + mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: str( + local_directory / rel_path + ) if "test_001" in rel_path else "" + + dataset = await loader.fetch_dataset() + + # Should only get first example (hate speech), not second (violence) + text_prompts = [p for p in dataset.seeds if p.data_type == "text"] + assert len(text_prompts) == 1 + assert text_prompts[0].harm_categories == ["hate_speech"] + + def test_normalize_policy(self): + """Test policy normalization helper.""" + loader = _JailbreakV28KDataset() + + assert loader._normalize_policy("Hate Speech") == "hate_speech" + assert loader._normalize_policy("Economic-Harm") == "economic_harm" + assert loader._normalize_policy(" Violence ") == "violence" + + @pytest.mark.asyncio + async def test_fetch_dataset_50_percent_threshold(self, tmp_path): + """Test that 50% or more missing images raises ValueError.""" + zip_dir = tmp_path / "test_zip" + zip_dir.mkdir() + images_dir = zip_dir / "JailBreakV_28K" / "images" + images_dir.mkdir(parents=True) + + # Mock data with 4 items + mock_data = [ + {"policy": "Hate Speech", "image_path": "images/001.png", "redteam_query": "Query 1"}, + {"policy": "Violence", "image_path": "images/002.png", "redteam_query": "Query 2"}, + {"policy": "Fraud", "image_path": "images/003.png", "redteam_query": "Query 3"}, + {"policy": "Malware", "image_path": "images/004.png", "redteam_query": "Query 4"}, + ] + + loader = _JailbreakV28KDataset(zip_dir=str(zip_dir)) + + with patch("pathlib.Path.exists", return_value=True): + with patch.object(loader, "_fetch_from_huggingface", return_value=mock_data): + with patch.object(loader, "_resolve_image_path") as mock_resolve: + # Mock so that only 1 out of 4 images resolves (25% success = 75% unpaired) + def resolve_side_effect(rel_path, local_directory, call_cache): + return str(local_directory / rel_path) if "001" in rel_path else "" + + mock_resolve.side_effect = resolve_side_effect + + # Should raise because 75% are unpaired (>= 50%) + with pytest.raises(ValueError, match="75.0% of items are missing images"): + await loader.fetch_dataset() + + @pytest.mark.asyncio + async def test_fetch_dataset_below_50_percent_threshold(self, tmp_path): + """Test that less than 50% missing images succeeds.""" + zip_dir = tmp_path / "test_zip" + zip_dir.mkdir() + images_dir = zip_dir / "JailBreakV_28K" / "images" + images_dir.mkdir(parents=True) + + # Mock data with 4 items + mock_data = [ + {"policy": "Hate Speech", "image_path": "images/001.png", "redteam_query": "Query 1"}, + {"policy": "Violence", "image_path": "images/002.png", "redteam_query": "Query 2"}, + {"policy": "Fraud", "image_path": "images/003.png", "redteam_query": "Query 3"}, + {"policy": "Malware", "image_path": "images/004.png", "redteam_query": "Query 4"}, + ] + + loader = _JailbreakV28KDataset(zip_dir=str(zip_dir)) + + with patch("pathlib.Path.exists", return_value=True): + with patch.object(loader, "_fetch_from_huggingface", return_value=mock_data): + with patch.object(loader, "_resolve_image_path") as mock_resolve: + # Mock so that 3 out of 4 images resolve (75% success = 25% unpaired) + def resolve_side_effect(rel_path, local_directory, call_cache): + return "" if "004" in rel_path else str(local_directory / rel_path) + + mock_resolve.side_effect = resolve_side_effect + + # Should succeed because only 25% are unpaired (< 50%) + dataset = await loader.fetch_dataset() + + # Should have 3 pairs (6 total prompts) + assert len(dataset.seeds) == 6 From 2e5d6cbdf52a4a957e5d615897eae6f687272b48 Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Mon, 29 Dec 2025 18:10:06 -0500 Subject: [PATCH 2/2] Pre-commit hooks --- .../remote/jailbreakv_28k_dataset.py | 3 +- .../datasets/test_seed_dataset_provider.py | 30 +++++++++---------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py index 469535d95..2cb49ac94 100644 --- a/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/jailbreakv_28k_dataset.py @@ -94,8 +94,7 @@ def __init__( if harm_categories is not None: valid_categories = {category.value for category in HarmCategory} invalid_categories = ( - set(cat.value if isinstance(cat, HarmCategory) else cat for cat in harm_categories) - - valid_categories + set(cat.value if isinstance(cat, HarmCategory) else cat for cat in harm_categories) - valid_categories ) if invalid_categories: raise ValueError(f"Invalid harm categories: {', '.join(invalid_categories)}") diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index 8218b352b..fea05a8b8 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -9,8 +9,8 @@ from pyrit.datasets.seed_datasets.remote.darkbench_dataset import _DarkBenchDataset from pyrit.datasets.seed_datasets.remote.harmbench_dataset import _HarmBenchDataset from pyrit.datasets.seed_datasets.remote.jailbreakv_28k_dataset import ( - _JailbreakV28KDataset, HarmCategory, + _JailbreakV28KDataset, ) from pyrit.models import SeedDataset, SeedPrompt @@ -272,7 +272,7 @@ async def test_fetch_dataset(self, mock_jailbreakv_data, tmp_path): zip_dir.mkdir() images_dir = zip_dir / "JailBreakV_28K" / "images" images_dir.mkdir(parents=True) - + # Create mock image files (images_dir / "test_001.png").touch() (images_dir / "test_002.png").touch() @@ -285,14 +285,14 @@ async def test_fetch_dataset(self, mock_jailbreakv_data, tmp_path): with patch("pathlib.Path.exists") as mock_exists: # ZIP exists, extracted folder exists mock_exists.side_effect = lambda: True - + with patch.object(loader, "_fetch_from_huggingface", return_value=mock_jailbreakv_data): with patch.object(loader, "_resolve_image_path") as mock_resolve: # Mock image path resolution mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: str( local_directory / rel_path ) - + dataset = await loader.fetch_dataset() assert isinstance(dataset, SeedDataset) @@ -359,7 +359,7 @@ async def test_fetch_dataset_with_text_field(self, mock_jailbreakv_data, tmp_pat mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: str( local_directory / rel_path ) - + dataset = await loader.fetch_dataset() text_prompts = [p for p in dataset.seeds if p.data_type == "text"] @@ -393,10 +393,10 @@ async def test_fetch_dataset_filters_by_category(self, mock_jailbreakv_data, tmp with patch("pathlib.Path.exists", return_value=True): with patch.object(loader, "_fetch_from_huggingface", return_value=mock_jailbreakv_data): with patch.object(loader, "_resolve_image_path") as mock_resolve: - mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: str( - local_directory / rel_path - ) if "test_001" in rel_path else "" - + mock_resolve.side_effect = lambda rel_path, local_directory, call_cache: ( + str(local_directory / rel_path) if "test_001" in rel_path else "" + ) + dataset = await loader.fetch_dataset() # Should only get first example (hate speech), not second (violence) @@ -407,7 +407,7 @@ async def test_fetch_dataset_filters_by_category(self, mock_jailbreakv_data, tmp def test_normalize_policy(self): """Test policy normalization helper.""" loader = _JailbreakV28KDataset() - + assert loader._normalize_policy("Hate Speech") == "hate_speech" assert loader._normalize_policy("Economic-Harm") == "economic_harm" assert loader._normalize_policy(" Violence ") == "violence" @@ -436,9 +436,9 @@ async def test_fetch_dataset_50_percent_threshold(self, tmp_path): # Mock so that only 1 out of 4 images resolves (25% success = 75% unpaired) def resolve_side_effect(rel_path, local_directory, call_cache): return str(local_directory / rel_path) if "001" in rel_path else "" - + mock_resolve.side_effect = resolve_side_effect - + # Should raise because 75% are unpaired (>= 50%) with pytest.raises(ValueError, match="75.0% of items are missing images"): await loader.fetch_dataset() @@ -467,11 +467,11 @@ async def test_fetch_dataset_below_50_percent_threshold(self, tmp_path): # Mock so that 3 out of 4 images resolve (75% success = 25% unpaired) def resolve_side_effect(rel_path, local_directory, call_cache): return "" if "004" in rel_path else str(local_directory / rel_path) - + mock_resolve.side_effect = resolve_side_effect - + # Should succeed because only 25% are unpaired (< 50%) dataset = await loader.fetch_dataset() - + # Should have 3 pairs (6 total prompts) assert len(dataset.seeds) == 6