diff --git a/README.md b/README.md index 03bc111..fad805e 100644 --- a/README.md +++ b/README.md @@ -67,13 +67,29 @@ uv pip install -r requirements-dev.txt ### GPU Support (Optional) -For faster OCR processing with GPU: +PaddleOCR 3.x automatically detects and uses GPU when available. For GPU support, install the GPU version of PaddlePaddle: ```bash uv pip uninstall paddlepaddle uv pip install paddlepaddle-gpu ``` +**Note**: The `--gpu` flag is deprecated in PaddleOCR 3.x and will be ignored. GPU acceleration is automatically enabled when GPU and CUDA are available. + +### Important Notes for Mac Users + +⚠️ **First Run**: PaddleOCR will download model files (~25 MB) on first initialization. This may take a few minutes depending on your network speed. + +⚠️ **Memory & Performance Optimizations**: The tool automatically: +- Sets `OMP_NUM_THREADS=1` (required for OpenBlas stability) +- Resizes large images (>1920px) to prevent memory leaks +- Uses aggressive garbage collection +- Scales coordinates back to original image dimensions + +📊 **Expected Performance**: For large screenshots (1200x2670px): +- First run: ~60 seconds (model download + processing) +- Subsequent runs: ~20-30 seconds (processing only) + ## Quick Start ### Command Line Usage @@ -96,19 +112,15 @@ Analyze with large text threshold: contrastcheck your_screenshot.png --large-text ``` -Use GPU acceleration: - -```bash -contrastcheck your_screenshot.png --gpu -``` - ### Python API Usage ```python from contrast_check.main import ContrastAnalyzer # Initialize analyzer -analyzer = ContrastAnalyzer(use_gpu=False, lang='en') +# Note: use_gpu parameter is deprecated in PaddleOCR 3.x+ +# GPU is automatically detected and used when available +analyzer = ContrastAnalyzer(lang='en') # Analyze image results = analyzer.analyze_image('screenshot.png') @@ -220,7 +232,7 @@ optional arguments: -f {json,text}, --format {json,text} Output format (default: text) --large-text Treat all text as large text (18pt+ or 14pt+ bold) - --gpu Use GPU for OCR processing + --gpu Deprecated. GPU is auto-detected by PaddleOCR 3.x+ --lang LANG Language for OCR (default: en) ``` diff --git a/contrast_check/color_extractor.py b/contrast_check/color_extractor.py index 330bfa5..3a745bf 100644 --- a/contrast_check/color_extractor.py +++ b/contrast_check/color_extractor.py @@ -11,78 +11,51 @@ class ColorExtractor: """ - Extract text and background colors using K-means clustering. + Extract dominant colors using K-means clustering. """ - def __init__(self, n_text_colors: int = 3, n_bg_colors: int = 3): + def __init__(self, n_colors: int = 2): """ Initialize color extractor. Args: - n_text_colors: Number of clusters for text color extraction - n_bg_colors: Number of clusters for background color extraction + n_colors: Number of color clusters (default: 2) """ - self.n_text_colors = n_text_colors - self.n_bg_colors = n_bg_colors + self.n_colors = n_colors - def extract_text_color( - self, image: np.ndarray, text_mask: np.ndarray - ) -> Tuple[int, int, int]: + def _color_distance( + self, color1: Tuple[int, int, int], color2: Tuple[int, int, int] + ) -> float: """ - Extract dominant text color from the masked region. + Calculate Euclidean distance between two RGB colors. Args: - image: Input image (BGR format) - text_mask: Binary mask indicating text region + color1: First RGB color tuple + color2: Second RGB color tuple Returns: - RGB tuple of the dominant text color + Euclidean distance between colors """ - # Extract pixels in text region - text_pixels = image[text_mask] - - if len(text_pixels) == 0: - return (0, 0, 0) - - # Convert BGR to RGB - text_pixels_rgb = cv2.cvtColor( - text_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2RGB - ).reshape(-1, 3) - - # Use K-means to find dominant colors - kmeans = KMeans( - n_clusters=min(self.n_text_colors, len(text_pixels)), - random_state=42, - n_init=10, - ) - kmeans.fit(text_pixels_rgb) - - # Get the most common cluster (dominant color) - labels = kmeans.labels_ - counts = np.bincount(labels) - dominant_cluster = np.argmax(counts) - dominant_color = kmeans.cluster_centers_[dominant_cluster] - - return tuple(dominant_color.astype(int)) - - def extract_background_color( - self, - image: np.ndarray, - text_mask: np.ndarray, - bbox: List[List[float]], - margin: int = 10, - ) -> Tuple[int, int, int]: + return sum((a - b) ** 2 for a, b in zip(color1, color2)) ** 0.5 + + def extract_colors( + self, image: np.ndarray, bbox: List[List[float]], margin: int = 10 + ) -> Tuple[Tuple[int, int, int], Tuple[int, int, int]]: """ - Extract background color around the text region. + Extract the two dominant colors from a region using K-means. + + Uses K-means clustering to find the two most dominant colors. + The colors are returned sorted by frequency (most common first). + ContrastChecker.calculate_contrast_ratio will handle determining + which color is lighter/darker for proper contrast calculation. Args: image: Input image (BGR format) - text_mask: Binary mask indicating text region bbox: Bounding box coordinates [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] - margin: Margin around text box for background sampling + margin: Margin around text box for sampling (default: 10 pixels) Returns: - RGB tuple of the dominant background color + Tuple of (color1, color2) as RGB tuples, sorted by frequency """ h, w = image.shape[:2] @@ -93,35 +66,79 @@ def extract_background_color( y_min = max(0, int(np.min(bbox_array[:, 1])) - margin) y_max = min(h, int(np.max(bbox_array[:, 1])) + margin) - # Create background mask (region around text, excluding text itself) - bg_mask = np.zeros((h, w), dtype=bool) - bg_mask[y_min:y_max, x_min:x_max] = True - bg_mask[text_mask] = False + # Extract region pixels + region = image[y_min:y_max, x_min:x_max] - # Extract background pixels - bg_pixels = image[bg_mask] + if region.size == 0: + return ((0, 0, 0), (255, 255, 255)) - if len(bg_pixels) == 0: - return (255, 255, 255) + # Convert BGR to RGB and flatten + pixels_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB).reshape(-1, 3) - # Convert BGR to RGB - bg_pixels_rgb = cv2.cvtColor( - bg_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2RGB - ).reshape(-1, 3) + if len(pixels_rgb) < self.n_colors: + # Not enough pixels for clustering + return ((0, 0, 0), (255, 255, 255)) - # Use K-means to find dominant colors - kmeans = KMeans( - n_clusters=min(self.n_bg_colors, len(bg_pixels)), random_state=42, n_init=10 - ) - kmeans.fit(bg_pixels_rgb) + # Use K-means to find the two dominant colors + kmeans = KMeans(n_clusters=self.n_colors, random_state=42, n_init=10) + kmeans.fit(pixels_rgb) - # Get the most common cluster (dominant color) + # Get cluster centers sorted by pixel count labels = kmeans.labels_ counts = np.bincount(labels) - dominant_cluster = np.argmax(counts) - dominant_color = kmeans.cluster_centers_[dominant_cluster] - return tuple(dominant_color.astype(int)) + # Sort clusters by count (descending) + sorted_indices = np.argsort(counts)[::-1] + colors = kmeans.cluster_centers_[sorted_indices] + + # Return the two dominant colors + color1 = tuple(colors[0].astype(int)) + color2 = tuple(colors[1].astype(int)) if len(colors) > 1 else (255, 255, 255) + + # If colors are too similar, try to find more distinct colors + min_distance = 50 # Minimum Euclidean distance for distinct colors + if self._color_distance(color1, color2) < min_distance: + # Try with more clusters and pick the two most distinct ones + try: + kmeans_more = KMeans( + n_clusters=min(5, len(pixels_rgb)), random_state=42, n_init=10 + ) + kmeans_more.fit(pixels_rgb) + labels_more = kmeans_more.labels_ + counts_more = np.bincount(labels_more) + sorted_indices_more = np.argsort(counts_more)[::-1] + colors_more = kmeans_more.cluster_centers_[sorted_indices_more] + + # Find two colors that are sufficiently different + best_pair = None + best_distance = 0 + for i in range(min(3, len(colors_more))): + for j in range(i + 1, min(4, len(colors_more))): + c1 = tuple(colors_more[i].astype(int)) + c2 = tuple(colors_more[j].astype(int)) + dist = self._color_distance(c1, c2) + if dist > best_distance: + best_distance = dist + best_pair = (c1, c2) + if dist >= min_distance: + return (c1, c2) + + # If we found a better pair, use it + if best_pair and best_distance >= 10: + return best_pair + + except Exception: + pass + + # Fallback: return white and black if colors are too similar + if self._color_distance(color1, (255, 255, 255)) > self._color_distance( + color1, (0, 0, 0) + ): + return (color1, (0, 0, 0)) + else: + return (color1, (255, 255, 255)) + + return (color1, color2) @staticmethod def rgb_to_hex(rgb: Tuple[int, int, int]) -> str: diff --git a/contrast_check/main.py b/contrast_check/main.py index 6bf1932..4184bb7 100644 --- a/contrast_check/main.py +++ b/contrast_check/main.py @@ -24,22 +24,18 @@ def __init__( self, use_gpu: bool = False, lang: str = "en", - n_text_colors: int = 3, - n_bg_colors: int = 3, + n_colors: int = 2, ): """ Initialize the contrast analyzer. Args: - use_gpu: Whether to use GPU for OCR + use_gpu: Deprecated. GPU is auto-detected by PaddleOCR 3.x+. lang: Language for OCR - n_text_colors: Number of color clusters for text - n_bg_colors: Number of color clusters for background + n_colors: Number of color clusters (default: 2) """ self.ocr_extractor = OCRExtractor(use_gpu=use_gpu, lang=lang) - self.color_extractor = ColorExtractor( - n_text_colors=n_text_colors, n_bg_colors=n_bg_colors - ) + self.color_extractor = ColorExtractor(n_colors=n_colors) self.contrast_checker = ContrastChecker() def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Dict]: @@ -62,25 +58,16 @@ def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Di # Load image image = cv2.imread(image_path) - image_shape = image.shape results = [] for idx, region in enumerate(text_regions): - # Create text mask - text_mask = self.ocr_extractor.get_text_region_mask( - image_shape, region["bbox"] - ) - - # Extract colors - text_color = self.color_extractor.extract_text_color(image, text_mask) - bg_color = self.color_extractor.extract_background_color( - image, text_mask, region["bbox"] - ) + # Extract the two dominant colors + color1, color2 = self.color_extractor.extract_colors(image, region["bbox"]) - # Analyze contrast + # Analyze contrast - calculate_contrast_ratio handles which is lighter/darker analysis = self.contrast_checker.analyze_contrast( - text_color, bg_color, is_large_text + color1, color2, is_large_text ) # Add region info @@ -90,10 +77,10 @@ def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Di "confidence": round(region["confidence"], 3), "bbox": region["bbox"], "center": region["center"], - "text_color": text_color, - "text_color_hex": self.color_extractor.rgb_to_hex(text_color), - "bg_color": bg_color, - "bg_color_hex": self.color_extractor.rgb_to_hex(bg_color), + "color_1": color1, + "color_1_hex": self.color_extractor.rgb_to_hex(color1), + "color_2": color2, + "color_2_hex": self.color_extractor.rgb_to_hex(color2), "contrast_ratio": analysis["contrast_ratio"], "wcag_aa": analysis["wcag_aa"], "wcag_aaa": analysis["wcag_aaa"], @@ -130,12 +117,10 @@ def generate_report(self, results: List[Dict], output_format: str = "json") -> s report_lines.append(f"Text #{result['index']}: {result['text']}") report_lines.append(f" OCR Confidence: {result['confidence']:.1%}") report_lines.append( - f" Text Color: RGB{result['text_color']} " - f"({result['text_color_hex']})" + f" Color 1: RGB{result['color_1']} " f"({result['color_1_hex']})" ) report_lines.append( - f" Background Color: RGB{result['bg_color']} " - f"({result['bg_color_hex']})" + f" Color 2: RGB{result['color_2']} " f"({result['color_2_hex']})" ) report_lines.append(f" Contrast Ratio: {result['contrast_ratio']}:1") report_lines.append( @@ -193,7 +178,11 @@ def main(): action="store_true", help="Treat all text as large text (18pt+ or 14pt+ bold)", ) - parser.add_argument("--gpu", action="store_true", help="Use GPU for OCR processing") + parser.add_argument( + "--gpu", + action="store_true", + help="Deprecated. GPU is auto-detected by PaddleOCR 3.x+", + ) parser.add_argument( "--lang", type=str, default="en", help="Language for OCR (default: en)" ) @@ -207,6 +196,7 @@ def main(): # Initialize analyzer print("Initializing ContrastCheck...") + print("Note: First run may take several minutes to download OCR models...") analyzer = ContrastAnalyzer(use_gpu=args.gpu, lang=args.lang) # Analyze image diff --git a/contrast_check/ocr_extractor.py b/contrast_check/ocr_extractor.py index 0a0dcc6..eb6a1e4 100644 --- a/contrast_check/ocr_extractor.py +++ b/contrast_check/ocr_extractor.py @@ -18,9 +18,22 @@ def __init__(self, use_gpu: bool = False, lang: str = "en"): Initialize OCR extractor. Args: - use_gpu: Whether to use GPU for inference + use_gpu: Deprecated. GPU is auto-detected by PaddleOCR 3.x+. lang: Language for OCR recognition (default: 'en') + + Note: + PaddleOCR 3.x removed use_angle_cls, use_gpu, show_log parameters, + and cls parameter from ocr() method. + GPU acceleration and text direction classification are automatic. """ + import os + + # PaddlePaddle with OpenBlas requires OMP_NUM_THREADS=1 + # Setting to 1 prevents "does not support multi-threads" error + os.environ.setdefault("OMP_NUM_THREADS", "1") + os.environ.setdefault("MKL_NUM_THREADS", "1") + os.environ.setdefault("OPENBLAS_NUM_THREADS", "1") + try: from paddleocr import PaddleOCR except ImportError: @@ -29,9 +42,19 @@ def __init__(self, use_gpu: bool = False, lang: str = "en"): "pip install paddleocr" ) - self.ocr = PaddleOCR( - use_angle_cls=True, lang=lang, use_gpu=use_gpu, show_log=False - ) + # Warn if use_gpu is explicitly set (for backward compatibility) + if use_gpu: + import warnings + + warnings.warn( + "use_gpu parameter is deprecated in PaddleOCR 3.x and will be " + "ignored. GPU is automatically detected and used when available.", + DeprecationWarning, + stacklevel=2, + ) + + # Use simplified initialization for PaddleOCR 3.x compatibility + self.ocr = PaddleOCR(lang=lang) def extract_text_regions(self, image_path: str) -> List[Dict[str, Any]]: """ @@ -47,37 +70,139 @@ def extract_text_regions(self, image_path: str) -> List[Dict[str, Any]]: - bbox: Bounding box coordinates [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] - center: Center point of the text region (x, y) """ + import gc + import tempfile + # Read image image = cv2.imread(image_path) if image is None: raise ValueError(f"Failed to read image from {image_path}") - # Run OCR - result = self.ocr.ocr(image_path, cls=True) + # Resize large images to prevent memory issues + # Max dimension: 1920 pixels (maintains aspect ratio) + max_dimension = 1920 + height, width = image.shape[:2] + scale = 1.0 + + if max(height, width) > max_dimension: + scale = max_dimension / max(height, width) + new_width = int(width * scale) + new_height = int(height * scale) + image = cv2.resize( + image, (new_width, new_height), interpolation=cv2.INTER_AREA + ) + + # Save resized image to temp file for OCR + with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp: + temp_path = tmp.name + cv2.imwrite(temp_path, image) + + # Use temp file for OCR + ocr_input = temp_path + else: + ocr_input = image_path + temp_path = None + + try: + # Run OCR + # Note: cls parameter removed in PaddleOCR 3.x (text direction classification is automatic) + result = self.ocr.ocr(ocr_input) + finally: + # Clean up temp file and force garbage collection + if temp_path: + import os + + try: + os.unlink(temp_path) + except: + pass + del image + gc.collect() if not result or not result[0]: return [] + ocr_result = result[0] text_regions = [] - for line in result[0]: - bbox = line[0] - text_info = line[1] - text = text_info[0] - confidence = text_info[1] - - # Calculate center point - bbox_array = np.array(bbox) - center_x = int(np.mean(bbox_array[:, 0])) - center_y = int(np.mean(bbox_array[:, 1])) - - text_regions.append( - { - "text": text, - "confidence": confidence, - "bbox": bbox, - "center": (center_x, center_y), - } - ) + + # PaddleOCR 3.x returns OCRResult object (dict-like) + # Check if it's the new format (dict with rec_polys, rec_texts, rec_scores) + if isinstance(ocr_result, dict): + # New format: PaddleOCR 3.x OCRResult object + boxes = ocr_result.get("rec_polys", ocr_result.get("dt_polys", [])) + texts = ocr_result.get("rec_texts", []) + scores = ocr_result.get("rec_scores", []) + + if not boxes or not texts: + return [] + + for i in range(len(texts)): + try: + bbox = boxes[i] + text = texts[i] + confidence = scores[i] if i < len(scores) else 1.0 + + # Scale bbox back to original image coordinates if image was resized + if scale != 1.0: + bbox = [[x / scale, y / scale] for x, y in bbox] + + # Calculate center point + bbox_array = np.array(bbox) + center_x = int(np.mean(bbox_array[:, 0])) + center_y = int(np.mean(bbox_array[:, 1])) + + text_regions.append( + { + "text": text, + "confidence": confidence, + "bbox": bbox, + "center": (center_x, center_y), + } + ) + except (IndexError, TypeError, ValueError) as e: + import warnings + + warnings.warn(f"Skipping malformed OCR result: {e}", RuntimeWarning) + continue + + elif isinstance(ocr_result, list): + # Old format: list of [bbox, (text, confidence)] + for line in ocr_result: + try: + bbox = line[0] + text_info = line[1] + + if isinstance(text_info, (list, tuple)) and len(text_info) >= 2: + text = text_info[0] + confidence = text_info[1] + elif isinstance(text_info, str): + text = text_info + confidence = 1.0 + else: + continue + + # Scale bbox back to original image coordinates if image was resized + if scale != 1.0: + bbox = [[x / scale, y / scale] for x, y in bbox] + + # Calculate center point + bbox_array = np.array(bbox) + center_x = int(np.mean(bbox_array[:, 0])) + center_y = int(np.mean(bbox_array[:, 1])) + + text_regions.append( + { + "text": text, + "confidence": confidence, + "bbox": bbox, + "center": (center_x, center_y), + } + ) + except (IndexError, TypeError, ValueError) as e: + import warnings + + warnings.warn(f"Skipping malformed OCR result: {e}", RuntimeWarning) + continue return text_regions diff --git a/examples/simple_usage.py b/examples/simple_usage.py index bc122d7..78f5a13 100644 --- a/examples/simple_usage.py +++ b/examples/simple_usage.py @@ -12,12 +12,13 @@ def main(): # Initialize the analyzer print("Initializing ContrastCheck...") analyzer = ContrastAnalyzer( - use_gpu=False, # Set to True if you have GPU support - lang='en' # Language for OCR (en, ch, etc.) + # Note: use_gpu is deprecated in PaddleOCR 3.x+ + # GPU is automatically detected and used when available + lang="en" # Language for OCR (en, ch, etc.) ) # Analyze an image - image_path = 'your_screenshot.png' # Replace with your image path + image_path = "your_screenshot.png" # Replace with your image path print(f"Analyzing image: {image_path}") results = analyzer.analyze_image(image_path) @@ -40,15 +41,15 @@ def main(): print() # Generate text report - report = analyzer.generate_report(results, output_format='text') + report = analyzer.generate_report(results, output_format="text") print(report) # Save JSON report - json_report = analyzer.generate_report(results, output_format='json') - with open('contrast_report.json', 'w') as f: + json_report = analyzer.generate_report(results, output_format="json") + with open("contrast_report.json", "w") as f: f.write(json_report) print("\nJSON report saved to: contrast_report.json") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/setup.py b/setup.py index a8c42a9..840c09b 100644 --- a/setup.py +++ b/setup.py @@ -7,56 +7,56 @@ # Read the contents of README file this_directory = Path(__file__).parent -long_description = (this_directory / "README.md").read_text(encoding='utf-8') +long_description = (this_directory / "README.md").read_text(encoding="utf-8") setup( - name='contrastcheck', - version='0.1.0', - author='ContrastCheck Contributors', - description='A tool for analyzing text-background contrast ratios in UI screenshots', + name="contrastcheck", + version="0.1.0", + author="ContrastCheck Contributors", + description="A tool for analyzing text-background contrast ratios in UI screenshots", long_description=long_description, - long_description_content_type='text/markdown', - url='https://github.com/longway-code/ContrastCheck', - packages=find_packages(exclude=['tests', 'tests.*', 'examples']), + long_description_content_type="text/markdown", + url="https://github.com/longway-code/ContrastCheck", + packages=find_packages(exclude=["tests", "tests.*", "examples"]), classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Quality Assurance', - 'Topic :: Multimedia :: Graphics', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', - 'Programming Language :: Python :: 3.13', + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Software Development :: Quality Assurance", + "Topic :: Multimedia :: Graphics", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ], - python_requires='>=3.10', + python_requires=">=3.10", install_requires=[ - 'paddleocr>=2.7.0', - 'paddlepaddle>=2.5.0', - 'numpy>=1.21.0', - 'opencv-python>=4.5.0', - 'Pillow>=9.0.0', - 'scikit-learn>=1.0.0', + "paddleocr>=2.7.0", + "paddlepaddle>=2.5.0", + "numpy>=1.21.0", + "opencv-python>=4.5.0", + "Pillow>=9.0.0", + "scikit-learn>=1.0.0", ], extras_require={ - 'dev': [ - 'pytest>=7.0.0', - 'pytest-cov>=3.0.0', - 'pytest-mock>=3.6.0', - 'black>=22.0.0', - 'flake8>=4.0.0', - 'mypy>=0.950', - 'isort>=5.10.0', + "dev": [ + "pytest>=7.0.0", + "pytest-cov>=3.0.0", + "pytest-mock>=3.6.0", + "black>=22.0.0", + "flake8>=4.0.0", + "mypy>=0.950", + "isort>=5.10.0", ], - 'docs': [ - 'sphinx>=4.5.0', - 'sphinx-rtd-theme>=1.0.0', + "docs": [ + "sphinx>=4.5.0", + "sphinx-rtd-theme>=1.0.0", ], }, entry_points={ - 'console_scripts': [ - 'contrastcheck=contrast_check.main:main', + "console_scripts": [ + "contrastcheck=contrast_check.main:main", ], }, include_package_data=True, diff --git a/tests/test_color_extractor.py b/tests/test_color_extractor.py deleted file mode 100644 index 1228d04..0000000 --- a/tests/test_color_extractor.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -Unit tests for ColorExtractor module. -""" - -import unittest - -import numpy as np - -from contrast_check.color_extractor import ColorExtractor - - -class TestColorExtractor(unittest.TestCase): - """Test cases for ColorExtractor class.""" - - def setUp(self): - """Set up test cases.""" - self.extractor = ColorExtractor(n_text_colors=3, n_bg_colors=3) - - def test_initialization(self): - """Test ColorExtractor initialization.""" - self.assertEqual(self.extractor.n_text_colors, 3) - self.assertEqual(self.extractor.n_bg_colors, 3) - - def test_extract_text_color_single_color(self): - """Test text color extraction with uniform color.""" - # Create a uniform black image (BGR format) - image = np.zeros((100, 100, 3), dtype=np.uint8) - mask = np.ones((100, 100), dtype=bool) - - color = self.extractor.extract_text_color(image, mask) - - # Should extract black color (0, 0, 0) in RGB - self.assertEqual(color, (0, 0, 0)) - - def test_extract_text_color_empty_mask(self): - """Test text color extraction with empty mask.""" - image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) - mask = np.zeros((100, 100), dtype=bool) - - color = self.extractor.extract_text_color(image, mask) - - # Should return default black for empty mask - self.assertEqual(color, (0, 0, 0)) - - def test_extract_background_color_uniform(self): - """Test background color extraction with uniform background.""" - # Create white image (BGR format) - image = np.ones((100, 100, 3), dtype=np.uint8) * 255 - - # Small text region in center - text_mask = np.zeros((100, 100), dtype=bool) - text_mask[40:60, 40:60] = True - - bbox = [[40, 40], [60, 40], [60, 60], [40, 60]] - - color = self.extractor.extract_background_color(image, text_mask, bbox) - - # Should extract white color (255, 255, 255) in RGB - self.assertEqual(color, (255, 255, 255)) - - def test_extract_background_color_with_margin(self): - """Test background color extraction respects margin.""" - # Create image with different colored regions - image = np.zeros((100, 100, 3), dtype=np.uint8) - image[:, :] = [255, 0, 0] # Blue in BGR - - text_mask = np.zeros((100, 100), dtype=bool) - text_mask[45:55, 45:55] = True - - bbox = [[45, 45], [55, 45], [55, 55], [45, 55]] - - color = self.extractor.extract_background_color( - image, text_mask, bbox, margin=10 - ) - - # Should extract blue color (0, 0, 255) in RGB - self.assertEqual(color, (0, 0, 255)) - - def test_rgb_to_hex_black(self): - """Test RGB to hex conversion for black.""" - rgb = (0, 0, 0) - hex_color = self.extractor.rgb_to_hex(rgb) - self.assertEqual(hex_color, "#000000") - - def test_rgb_to_hex_white(self): - """Test RGB to hex conversion for white.""" - rgb = (255, 255, 255) - hex_color = self.extractor.rgb_to_hex(rgb) - self.assertEqual(hex_color, "#ffffff") - - def test_rgb_to_hex_custom_color(self): - """Test RGB to hex conversion for custom color.""" - rgb = (128, 64, 192) - hex_color = self.extractor.rgb_to_hex(rgb) - self.assertEqual(hex_color, "#8040c0") - - def test_extract_dominant_color_from_mixed(self): - """Test extraction of dominant color from mixed pixels.""" - # Create image with mostly red pixels and some blue - image = np.zeros((100, 100, 3), dtype=np.uint8) - image[:80, :] = [0, 0, 255] # Red in BGR (80% of image) - image[80:, :] = [255, 0, 0] # Blue in BGR (20% of image) - - mask = np.ones((100, 100), dtype=bool) - - color = self.extractor.extract_text_color(image, mask) - - # Should extract red as dominant color (255, 0, 0) in RGB - # Allow some tolerance due to clustering - self.assertTrue(color[0] > 200) # High red channel - self.assertTrue(color[1] < 50) # Low green channel - self.assertTrue(color[2] < 50) # Low blue channel - - def test_color_extraction_boundary_cases(self): - """Test color extraction at image boundaries.""" - image = np.ones((50, 50, 3), dtype=np.uint8) * 128 - - # Text region at corner - text_mask = np.zeros((50, 50), dtype=bool) - text_mask[0:10, 0:10] = True - - bbox = [[0, 0], [10, 0], [10, 10], [0, 10]] - - # Should not crash and should return a valid color - color = self.extractor.extract_background_color(image, text_mask, bbox) - self.assertIsInstance(color, tuple) - self.assertEqual(len(color), 3) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_main.py b/tests/test_main.py index 46f17ec..0ed287e 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -17,13 +17,13 @@ class TestContrastAnalyzer(unittest.TestCase): @patch("contrast_check.main.ContrastChecker") def test_initialization(self, mock_checker, mock_color, mock_ocr): """Test ContrastAnalyzer initialization.""" - ContrastAnalyzer(use_gpu=True, lang="ch", n_text_colors=5, n_bg_colors=5) + ContrastAnalyzer(use_gpu=True, lang="ch", n_colors=2) # Verify OCRExtractor was initialized correctly mock_ocr.assert_called_once_with(use_gpu=True, lang="ch") # Verify ColorExtractor was initialized correctly - mock_color.assert_called_once_with(n_text_colors=5, n_bg_colors=5) + mock_color.assert_called_once_with(n_colors=2) @patch("contrast_check.main.cv2.imread") @patch("contrast_check.main.OCRExtractor") @@ -75,15 +75,11 @@ def test_analyze_image_with_text( mock_ocr_instance = Mock() mock_ocr_instance.extract_text_regions.return_value = mock_text_regions - mock_ocr_instance.get_text_region_mask.return_value = np.ones( - (100, 100), dtype=bool - ) mock_ocr.return_value = mock_ocr_instance # Mock color extraction mock_color_instance = Mock() - mock_color_instance.extract_text_color.return_value = (0, 0, 0) - mock_color_instance.extract_background_color.return_value = (255, 255, 255) + mock_color_instance.extract_colors.return_value = ((0, 0, 0), (255, 255, 255)) mock_color_instance.rgb_to_hex.side_effect = ( lambda rgb: "#{:02x}{:02x}{:02x}".format(*rgb) ) @@ -92,13 +88,10 @@ def test_analyze_image_with_text( # Mock contrast checker mock_checker_instance = Mock() mock_checker_instance.analyze_contrast.return_value = { - "text_color": (0, 0, 0), - "bg_color": (255, 255, 255), "contrast_ratio": 21.0, "wcag_aa": True, "wcag_aaa": True, "level": "Excellent (AAA)", - "is_large_text": False, } mock_checker.return_value = mock_checker_instance @@ -112,6 +105,8 @@ def test_analyze_image_with_text( self.assertEqual(results[0]["contrast_ratio"], 21.0) self.assertTrue(results[0]["wcag_aa"]) self.assertTrue(results[0]["wcag_aaa"]) + self.assertEqual(results[0]["color_1"], (0, 0, 0)) + self.assertEqual(results[0]["color_2"], (255, 255, 255)) def test_generate_report_json(self): """Test JSON report generation.""" @@ -156,10 +151,10 @@ def test_generate_report_text(self): "index": 0, "text": "Test", "confidence": 0.95, - "text_color": (0, 0, 0), - "text_color_hex": "#000000", - "bg_color": (255, 255, 255), - "bg_color_hex": "#ffffff", + "color_1": (0, 0, 0), + "color_1_hex": "#000000", + "color_2": (255, 255, 255), + "color_2_hex": "#ffffff", "contrast_ratio": 21.0, "wcag_aa": True, "wcag_aaa": True, @@ -175,6 +170,8 @@ def test_generate_report_text(self): self.assertIn("21.0:1", report) self.assertIn("SUMMARY", report) self.assertIn("WCAG AA Compliance", report) + self.assertIn("Color 1", report) + self.assertIn("Color 2", report) def test_generate_report_invalid_format(self): """Test report generation with invalid format.""" @@ -204,10 +201,10 @@ def test_generate_report_summary_statistics(self): "index": 0, "text": "Good", "confidence": 0.95, - "text_color": (0, 0, 0), - "text_color_hex": "#000000", - "bg_color": (255, 255, 255), - "bg_color_hex": "#ffffff", + "color_1": (0, 0, 0), + "color_1_hex": "#000000", + "color_2": (255, 255, 255), + "color_2_hex": "#ffffff", "contrast_ratio": 21.0, "wcag_aa": True, "wcag_aaa": True, @@ -217,10 +214,10 @@ def test_generate_report_summary_statistics(self): "index": 1, "text": "Poor", "confidence": 0.90, - "text_color": (200, 200, 200), - "text_color_hex": "#c8c8c8", - "bg_color": (255, 255, 255), - "bg_color_hex": "#ffffff", + "color_1": (200, 200, 200), + "color_1_hex": "#c8c8c8", + "color_2": (255, 255, 255), + "color_2_hex": "#ffffff", "contrast_ratio": 1.5, "wcag_aa": False, "wcag_aaa": False, diff --git a/tests/test_ocr_extractor.py b/tests/test_ocr_extractor.py index df2cafb..a51c99a 100644 --- a/tests/test_ocr_extractor.py +++ b/tests/test_ocr_extractor.py @@ -19,18 +19,26 @@ def test_initialization(self, mock_paddle): OCRExtractor(use_gpu=False, lang="en") # Check that PaddleOCR was called with correct parameters - mock_paddle.assert_called_once_with( - use_angle_cls=True, lang="en", use_gpu=False, show_log=False - ) + # PaddleOCR 3.x+ only accepts lang parameter + mock_paddle.assert_called_once_with(lang="en") @patch("paddleocr.PaddleOCR") def test_initialization_with_gpu(self, mock_paddle): - """Test OCRExtractor initialization with GPU.""" - OCRExtractor(use_gpu=True, lang="ch") + """Test OCRExtractor initialization with GPU (deprecated parameter).""" + import warnings - mock_paddle.assert_called_once_with( - use_angle_cls=True, lang="ch", use_gpu=True, show_log=False - ) + # use_gpu parameter should trigger deprecation warning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + OCRExtractor(use_gpu=True, lang="ch") + + # Check deprecation warning was raised + self.assertEqual(len(w), 1) + self.assertTrue(issubclass(w[0].category, DeprecationWarning)) + self.assertIn("use_gpu parameter is deprecated", str(w[0].message)) + + # PaddleOCR 3.x+ only accepts lang parameter (use_gpu is ignored) + mock_paddle.assert_called_once_with(lang="ch") def test_get_text_region_mask(self): """Test text region mask creation."""