longway-code · longway-code · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
diff --git a/README.md b/README.md
@@ -67,13 +67,29 @@ uv pip install -r requirements-dev.txt
 
 ### GPU Support (Optional)
 
-For faster OCR processing with GPU:
+PaddleOCR 3.x automatically detects and uses GPU when available. For GPU support, install the GPU version of PaddlePaddle:
 
 ```bash
 uv pip uninstall paddlepaddle
 uv pip install paddlepaddle-gpu
 ```
 
+**Note**: The `--gpu` flag is deprecated in PaddleOCR 3.x and will be ignored. GPU acceleration is automatically enabled when GPU and CUDA are available.
+
+### Important Notes for Mac Users
+
+⚠️ **First Run**: PaddleOCR will download model files (~25 MB) on first initialization. This may take a few minutes depending on your network speed.
+
+⚠️ **Memory & Performance Optimizations**: The tool automatically:
+- Sets `OMP_NUM_THREADS=1` (required for OpenBlas stability)
+- Resizes large images (>1920px) to prevent memory leaks
+- Uses aggressive garbage collection
+- Scales coordinates back to original image dimensions
+
+📊 **Expected Performance**: For large screenshots (1200x2670px):
+- First run: ~60 seconds (model download + processing)
+- Subsequent runs: ~20-30 seconds (processing only)
+
 ## Quick Start
 
 ### Command Line Usage
@@ -96,19 +112,15 @@ Analyze with large text threshold:
 contrastcheck your_screenshot.png --large-text
 ```
 
-Use GPU acceleration:
-
-```bash
-contrastcheck your_screenshot.png --gpu
-```
-
 ### Python API Usage
 
 ```python
 from contrast_check.main import ContrastAnalyzer
 
 # Initialize analyzer
-analyzer = ContrastAnalyzer(use_gpu=False, lang='en')
+# Note: use_gpu parameter is deprecated in PaddleOCR 3.x+
+# GPU is automatically detected and used when available
+analyzer = ContrastAnalyzer(lang='en')
 
 # Analyze image
 results = analyzer.analyze_image('screenshot.png')
@@ -220,7 +232,7 @@ optional arguments:
   -f {json,text}, --format {json,text}
                         Output format (default: text)
   --large-text          Treat all text as large text (18pt+ or 14pt+ bold)
-  --gpu                 Use GPU for OCR processing
+  --gpu                 Deprecated. GPU is auto-detected by PaddleOCR 3.x+
   --lang LANG           Language for OCR (default: en)
 ```
 

diff --git a/contrast_check/color_extractor.py b/contrast_check/color_extractor.py
@@ -11,78 +11,51 @@
 
 class ColorExtractor:
     """
-    Extract text and background colors using K-means clustering.
+    Extract dominant colors using K-means clustering.
     """
 
-    def __init__(self, n_text_colors: int = 3, n_bg_colors: int = 3):
+    def __init__(self, n_colors: int = 2):
         """
         Initialize color extractor.
 
         Args:
-            n_text_colors: Number of clusters for text color extraction
-            n_bg_colors: Number of clusters for background color extraction
+            n_colors: Number of color clusters (default: 2)
         """
-        self.n_text_colors = n_text_colors
-        self.n_bg_colors = n_bg_colors
+        self.n_colors = n_colors
 
-    def extract_text_color(
-        self, image: np.ndarray, text_mask: np.ndarray
-    ) -> Tuple[int, int, int]:
+    def _color_distance(
+        self, color1: Tuple[int, int, int], color2: Tuple[int, int, int]
+    ) -> float:
         """
-        Extract dominant text color from the masked region.
+        Calculate Euclidean distance between two RGB colors.
 
         Args:
-            image: Input image (BGR format)
-            text_mask: Binary mask indicating text region
+            color1: First RGB color tuple
+            color2: Second RGB color tuple
 
         Returns:
-            RGB tuple of the dominant text color
+            Euclidean distance between colors
         """
-        # Extract pixels in text region
-        text_pixels = image[text_mask]
-
-        if len(text_pixels) == 0:
-            return (0, 0, 0)
-
-        # Convert BGR to RGB
-        text_pixels_rgb = cv2.cvtColor(
-            text_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2RGB
-        ).reshape(-1, 3)
-
-        # Use K-means to find dominant colors
-        kmeans = KMeans(
-            n_clusters=min(self.n_text_colors, len(text_pixels)),
-            random_state=42,
-            n_init=10,
-        )
-        kmeans.fit(text_pixels_rgb)
-
-        # Get the most common cluster (dominant color)
-        labels = kmeans.labels_
-        counts = np.bincount(labels)
-        dominant_cluster = np.argmax(counts)
-        dominant_color = kmeans.cluster_centers_[dominant_cluster]
-
-        return tuple(dominant_color.astype(int))
-
-    def extract_background_color(
-        self,
-        image: np.ndarray,
-        text_mask: np.ndarray,
-        bbox: List[List[float]],
-        margin: int = 10,
-    ) -> Tuple[int, int, int]:
+        return sum((a - b) ** 2 for a, b in zip(color1, color2)) ** 0.5
+
+    def extract_colors(
+        self, image: np.ndarray, bbox: List[List[float]], margin: int = 10
+    ) -> Tuple[Tuple[int, int, int], Tuple[int, int, int]]:
         """
-        Extract background color around the text region.
+        Extract the two dominant colors from a region using K-means.
+
+        Uses K-means clustering to find the two most dominant colors.
+        The colors are returned sorted by frequency (most common first).
+        ContrastChecker.calculate_contrast_ratio will handle determining
+        which color is lighter/darker for proper contrast calculation.
 
         Args:
             image: Input image (BGR format)
-            text_mask: Binary mask indicating text region
             bbox: Bounding box coordinates [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
-            margin: Margin around text box for background sampling
+            margin: Margin around text box for sampling (default: 10 pixels)
 
         Returns:
-            RGB tuple of the dominant background color
+            Tuple of (color1, color2) as RGB tuples, sorted by frequency
         """
         h, w = image.shape[:2]
 
@@ -93,35 +66,79 @@ def extract_background_color(
         y_min = max(0, int(np.min(bbox_array[:, 1])) - margin)
         y_max = min(h, int(np.max(bbox_array[:, 1])) + margin)
 
-        # Create background mask (region around text, excluding text itself)
-        bg_mask = np.zeros((h, w), dtype=bool)
-        bg_mask[y_min:y_max, x_min:x_max] = True
-        bg_mask[text_mask] = False
+        # Extract region pixels
+        region = image[y_min:y_max, x_min:x_max]
 
-        # Extract background pixels
-        bg_pixels = image[bg_mask]
+        if region.size == 0:
+            return ((0, 0, 0), (255, 255, 255))
 
-        if len(bg_pixels) == 0:
-            return (255, 255, 255)
+        # Convert BGR to RGB and flatten
+        pixels_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB).reshape(-1, 3)
 
-        # Convert BGR to RGB
-        bg_pixels_rgb = cv2.cvtColor(
-            bg_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2RGB
-        ).reshape(-1, 3)
+        if len(pixels_rgb) < self.n_colors:
+            # Not enough pixels for clustering
+            return ((0, 0, 0), (255, 255, 255))
 
-        # Use K-means to find dominant colors
-        kmeans = KMeans(
-            n_clusters=min(self.n_bg_colors, len(bg_pixels)), random_state=42, n_init=10
-        )
-        kmeans.fit(bg_pixels_rgb)
+        # Use K-means to find the two dominant colors
+        kmeans = KMeans(n_clusters=self.n_colors, random_state=42, n_init=10)
+        kmeans.fit(pixels_rgb)
 
-        # Get the most common cluster (dominant color)
+        # Get cluster centers sorted by pixel count
         labels = kmeans.labels_
         counts = np.bincount(labels)
-        dominant_cluster = np.argmax(counts)
-        dominant_color = kmeans.cluster_centers_[dominant_cluster]
 
-        return tuple(dominant_color.astype(int))
+        # Sort clusters by count (descending)
+        sorted_indices = np.argsort(counts)[::-1]
+        colors = kmeans.cluster_centers_[sorted_indices]
+
+        # Return the two dominant colors
+        color1 = tuple(colors[0].astype(int))
+        color2 = tuple(colors[1].astype(int)) if len(colors) > 1 else (255, 255, 255)
+
+        # If colors are too similar, try to find more distinct colors
+        min_distance = 50  # Minimum Euclidean distance for distinct colors
+        if self._color_distance(color1, color2) < min_distance:
+            # Try with more clusters and pick the two most distinct ones
+            try:
+                kmeans_more = KMeans(
+                    n_clusters=min(5, len(pixels_rgb)), random_state=42, n_init=10
+                )
+                kmeans_more.fit(pixels_rgb)
+                labels_more = kmeans_more.labels_
+                counts_more = np.bincount(labels_more)
+                sorted_indices_more = np.argsort(counts_more)[::-1]
+                colors_more = kmeans_more.cluster_centers_[sorted_indices_more]
+
+                # Find two colors that are sufficiently different
+                best_pair = None
+                best_distance = 0
+                for i in range(min(3, len(colors_more))):
+                    for j in range(i + 1, min(4, len(colors_more))):
+                        c1 = tuple(colors_more[i].astype(int))
+                        c2 = tuple(colors_more[j].astype(int))
+                        dist = self._color_distance(c1, c2)
+                        if dist > best_distance:
+                            best_distance = dist
+                            best_pair = (c1, c2)
+                        if dist >= min_distance:
+                            return (c1, c2)
+
+                # If we found a better pair, use it
+                if best_pair and best_distance >= 10:
+                    return best_pair
+
+            except Exception:
+                pass
+
+            # Fallback: return white and black if colors are too similar
+            if self._color_distance(color1, (255, 255, 255)) > self._color_distance(
+                color1, (0, 0, 0)
+            ):
+                return (color1, (0, 0, 0))
+            else:
+                return (color1, (255, 255, 255))
+
+        return (color1, color2)
 
     @staticmethod
     def rgb_to_hex(rgb: Tuple[int, int, int]) -> str:

diff --git a/contrast_check/main.py b/contrast_check/main.py
@@ -24,22 +24,18 @@ def __init__(
         self,
         use_gpu: bool = False,
         lang: str = "en",
-        n_text_colors: int = 3,
-        n_bg_colors: int = 3,
+        n_colors: int = 2,
     ):
         """
         Initialize the contrast analyzer.
 
         Args:
-            use_gpu: Whether to use GPU for OCR
+            use_gpu: Deprecated. GPU is auto-detected by PaddleOCR 3.x+.
             lang: Language for OCR
-            n_text_colors: Number of color clusters for text
-            n_bg_colors: Number of color clusters for background
+            n_colors: Number of color clusters (default: 2)
         """
         self.ocr_extractor = OCRExtractor(use_gpu=use_gpu, lang=lang)
-        self.color_extractor = ColorExtractor(
-            n_text_colors=n_text_colors, n_bg_colors=n_bg_colors
-        )
+        self.color_extractor = ColorExtractor(n_colors=n_colors)
         self.contrast_checker = ContrastChecker()
 
     def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Dict]:
@@ -62,25 +58,16 @@ def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Di
 
         # Load image
         image = cv2.imread(image_path)
-        image_shape = image.shape
 
         results = []
 
         for idx, region in enumerate(text_regions):
-            # Create text mask
-            text_mask = self.ocr_extractor.get_text_region_mask(
-                image_shape, region["bbox"]
-            )
-
-            # Extract colors
-            text_color = self.color_extractor.extract_text_color(image, text_mask)
-            bg_color = self.color_extractor.extract_background_color(
-                image, text_mask, region["bbox"]
-            )
+            # Extract the two dominant colors
+            color1, color2 = self.color_extractor.extract_colors(image, region["bbox"])
 
-            # Analyze contrast
+            # Analyze contrast - calculate_contrast_ratio handles which is lighter/darker
             analysis = self.contrast_checker.analyze_contrast(
-                text_color, bg_color, is_large_text
+                color1, color2, is_large_text
             )
 
             # Add region info
@@ -90,10 +77,10 @@ def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Di
                 "confidence": round(region["confidence"], 3),
                 "bbox": region["bbox"],
                 "center": region["center"],
-                "text_color": text_color,
-                "text_color_hex": self.color_extractor.rgb_to_hex(text_color),
-                "bg_color": bg_color,
-                "bg_color_hex": self.color_extractor.rgb_to_hex(bg_color),
+                "color_1": color1,
+                "color_1_hex": self.color_extractor.rgb_to_hex(color1),
+                "color_2": color2,
+                "color_2_hex": self.color_extractor.rgb_to_hex(color2),
                 "contrast_ratio": analysis["contrast_ratio"],
                 "wcag_aa": analysis["wcag_aa"],
                 "wcag_aaa": analysis["wcag_aaa"],
@@ -130,12 +117,10 @@ def generate_report(self, results: List[Dict], output_format: str = "json") -> s
                 report_lines.append(f"Text #{result['index']}: {result['text']}")
                 report_lines.append(f"  OCR Confidence: {result['confidence']:.1%}")
                 report_lines.append(
-                    f"  Text Color: RGB{result['text_color']} "
-                    f"({result['text_color_hex']})"
+                    f"  Color 1: RGB{result['color_1']} " f"({result['color_1_hex']})"
                 )
                 report_lines.append(
-                    f"  Background Color: RGB{result['bg_color']} "
-                    f"({result['bg_color_hex']})"
+                    f"  Color 2: RGB{result['color_2']} " f"({result['color_2_hex']})"
                 )
                 report_lines.append(f"  Contrast Ratio: {result['contrast_ratio']}:1")
                 report_lines.append(
@@ -193,7 +178,11 @@ def main():
         action="store_true",
         help="Treat all text as large text (18pt+ or 14pt+ bold)",
     )
-    parser.add_argument("--gpu", action="store_true", help="Use GPU for OCR processing")
+    parser.add_argument(
+        "--gpu",
+        action="store_true",
+        help="Deprecated. GPU is auto-detected by PaddleOCR 3.x+",
+    )
     parser.add_argument(
         "--lang", type=str, default="en", help="Language for OCR (default: en)"
     )
@@ -207,6 +196,7 @@ def main():
 
     # Initialize analyzer
     print("Initializing ContrastCheck...")
+    print("Note: First run may take several minutes to download OCR models...")
     analyzer = ContrastAnalyzer(use_gpu=args.gpu, lang=args.lang)
 
     # Analyze image