Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,29 @@ uv pip install -r requirements-dev.txt

### GPU Support (Optional)

For faster OCR processing with GPU:
PaddleOCR 3.x automatically detects and uses GPU when available. For GPU support, install the GPU version of PaddlePaddle:

```bash
uv pip uninstall paddlepaddle
uv pip install paddlepaddle-gpu
```

**Note**: The `--gpu` flag is deprecated in PaddleOCR 3.x and will be ignored. GPU acceleration is automatically enabled when GPU and CUDA are available.

### Important Notes for Mac Users

⚠️ **First Run**: PaddleOCR will download model files (~25 MB) on first initialization. This may take a few minutes depending on your network speed.

⚠️ **Memory & Performance Optimizations**: The tool automatically:
- Sets `OMP_NUM_THREADS=1` (required for OpenBlas stability)
- Resizes large images (>1920px) to prevent memory leaks
- Uses aggressive garbage collection
- Scales coordinates back to original image dimensions

📊 **Expected Performance**: For large screenshots (1200x2670px):
- First run: ~60 seconds (model download + processing)
- Subsequent runs: ~20-30 seconds (processing only)

## Quick Start

### Command Line Usage
Expand All @@ -96,19 +112,15 @@ Analyze with large text threshold:
contrastcheck your_screenshot.png --large-text
```

Use GPU acceleration:

```bash
contrastcheck your_screenshot.png --gpu
```

### Python API Usage

```python
from contrast_check.main import ContrastAnalyzer

# Initialize analyzer
analyzer = ContrastAnalyzer(use_gpu=False, lang='en')
# Note: use_gpu parameter is deprecated in PaddleOCR 3.x+
# GPU is automatically detected and used when available
analyzer = ContrastAnalyzer(lang='en')

# Analyze image
results = analyzer.analyze_image('screenshot.png')
Expand Down Expand Up @@ -220,7 +232,7 @@ optional arguments:
-f {json,text}, --format {json,text}
Output format (default: text)
--large-text Treat all text as large text (18pt+ or 14pt+ bold)
--gpu Use GPU for OCR processing
--gpu Deprecated. GPU is auto-detected by PaddleOCR 3.x+
--lang LANG Language for OCR (default: en)
```

Expand Down
161 changes: 89 additions & 72 deletions contrast_check/color_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,78 +11,51 @@

class ColorExtractor:
"""
Extract text and background colors using K-means clustering.
Extract dominant colors using K-means clustering.
"""

def __init__(self, n_text_colors: int = 3, n_bg_colors: int = 3):
def __init__(self, n_colors: int = 2):
"""
Initialize color extractor.

Args:
n_text_colors: Number of clusters for text color extraction
n_bg_colors: Number of clusters for background color extraction
n_colors: Number of color clusters (default: 2)
"""
self.n_text_colors = n_text_colors
self.n_bg_colors = n_bg_colors
self.n_colors = n_colors

def extract_text_color(
self, image: np.ndarray, text_mask: np.ndarray
) -> Tuple[int, int, int]:
def _color_distance(
self, color1: Tuple[int, int, int], color2: Tuple[int, int, int]
) -> float:
"""
Extract dominant text color from the masked region.
Calculate Euclidean distance between two RGB colors.

Args:
image: Input image (BGR format)
text_mask: Binary mask indicating text region
color1: First RGB color tuple
color2: Second RGB color tuple

Returns:
RGB tuple of the dominant text color
Euclidean distance between colors
"""
# Extract pixels in text region
text_pixels = image[text_mask]

if len(text_pixels) == 0:
return (0, 0, 0)

# Convert BGR to RGB
text_pixels_rgb = cv2.cvtColor(
text_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2RGB
).reshape(-1, 3)

# Use K-means to find dominant colors
kmeans = KMeans(
n_clusters=min(self.n_text_colors, len(text_pixels)),
random_state=42,
n_init=10,
)
kmeans.fit(text_pixels_rgb)

# Get the most common cluster (dominant color)
labels = kmeans.labels_
counts = np.bincount(labels)
dominant_cluster = np.argmax(counts)
dominant_color = kmeans.cluster_centers_[dominant_cluster]

return tuple(dominant_color.astype(int))

def extract_background_color(
self,
image: np.ndarray,
text_mask: np.ndarray,
bbox: List[List[float]],
margin: int = 10,
) -> Tuple[int, int, int]:
return sum((a - b) ** 2 for a, b in zip(color1, color2)) ** 0.5

def extract_colors(
self, image: np.ndarray, bbox: List[List[float]], margin: int = 10
) -> Tuple[Tuple[int, int, int], Tuple[int, int, int]]:
"""
Extract background color around the text region.
Extract the two dominant colors from a region using K-means.

Uses K-means clustering to find the two most dominant colors.
The colors are returned sorted by frequency (most common first).
ContrastChecker.calculate_contrast_ratio will handle determining
which color is lighter/darker for proper contrast calculation.

Args:
image: Input image (BGR format)
text_mask: Binary mask indicating text region
bbox: Bounding box coordinates [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
margin: Margin around text box for background sampling
margin: Margin around text box for sampling (default: 10 pixels)

Returns:
RGB tuple of the dominant background color
Tuple of (color1, color2) as RGB tuples, sorted by frequency
"""
h, w = image.shape[:2]

Expand All @@ -93,35 +66,79 @@ def extract_background_color(
y_min = max(0, int(np.min(bbox_array[:, 1])) - margin)
y_max = min(h, int(np.max(bbox_array[:, 1])) + margin)

# Create background mask (region around text, excluding text itself)
bg_mask = np.zeros((h, w), dtype=bool)
bg_mask[y_min:y_max, x_min:x_max] = True
bg_mask[text_mask] = False
# Extract region pixels
region = image[y_min:y_max, x_min:x_max]

# Extract background pixels
bg_pixels = image[bg_mask]
if region.size == 0:
return ((0, 0, 0), (255, 255, 255))

if len(bg_pixels) == 0:
return (255, 255, 255)
# Convert BGR to RGB and flatten
pixels_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB).reshape(-1, 3)

# Convert BGR to RGB
bg_pixels_rgb = cv2.cvtColor(
bg_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2RGB
).reshape(-1, 3)
if len(pixels_rgb) < self.n_colors:
# Not enough pixels for clustering
return ((0, 0, 0), (255, 255, 255))

# Use K-means to find dominant colors
kmeans = KMeans(
n_clusters=min(self.n_bg_colors, len(bg_pixels)), random_state=42, n_init=10
)
kmeans.fit(bg_pixels_rgb)
# Use K-means to find the two dominant colors
kmeans = KMeans(n_clusters=self.n_colors, random_state=42, n_init=10)
kmeans.fit(pixels_rgb)

# Get the most common cluster (dominant color)
# Get cluster centers sorted by pixel count
labels = kmeans.labels_
counts = np.bincount(labels)
dominant_cluster = np.argmax(counts)
dominant_color = kmeans.cluster_centers_[dominant_cluster]

return tuple(dominant_color.astype(int))
# Sort clusters by count (descending)
sorted_indices = np.argsort(counts)[::-1]
colors = kmeans.cluster_centers_[sorted_indices]

# Return the two dominant colors
color1 = tuple(colors[0].astype(int))
color2 = tuple(colors[1].astype(int)) if len(colors) > 1 else (255, 255, 255)

# If colors are too similar, try to find more distinct colors
min_distance = 50 # Minimum Euclidean distance for distinct colors
if self._color_distance(color1, color2) < min_distance:
# Try with more clusters and pick the two most distinct ones
try:
kmeans_more = KMeans(
n_clusters=min(5, len(pixels_rgb)), random_state=42, n_init=10
)
kmeans_more.fit(pixels_rgb)
labels_more = kmeans_more.labels_
counts_more = np.bincount(labels_more)
sorted_indices_more = np.argsort(counts_more)[::-1]
colors_more = kmeans_more.cluster_centers_[sorted_indices_more]

# Find two colors that are sufficiently different
best_pair = None
best_distance = 0
for i in range(min(3, len(colors_more))):
for j in range(i + 1, min(4, len(colors_more))):
c1 = tuple(colors_more[i].astype(int))
c2 = tuple(colors_more[j].astype(int))
dist = self._color_distance(c1, c2)
if dist > best_distance:
best_distance = dist
best_pair = (c1, c2)
if dist >= min_distance:
return (c1, c2)

# If we found a better pair, use it
if best_pair and best_distance >= 10:
return best_pair

except Exception:
pass

# Fallback: return white and black if colors are too similar
if self._color_distance(color1, (255, 255, 255)) > self._color_distance(
color1, (0, 0, 0)
):
return (color1, (0, 0, 0))
else:
return (color1, (255, 255, 255))

return (color1, color2)

@staticmethod
def rgb_to_hex(rgb: Tuple[int, int, int]) -> str:
Expand Down
50 changes: 20 additions & 30 deletions contrast_check/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,18 @@ def __init__(
self,
use_gpu: bool = False,
lang: str = "en",
n_text_colors: int = 3,
n_bg_colors: int = 3,
n_colors: int = 2,
):
"""
Initialize the contrast analyzer.

Args:
use_gpu: Whether to use GPU for OCR
use_gpu: Deprecated. GPU is auto-detected by PaddleOCR 3.x+.
lang: Language for OCR
n_text_colors: Number of color clusters for text
n_bg_colors: Number of color clusters for background
n_colors: Number of color clusters (default: 2)
"""
self.ocr_extractor = OCRExtractor(use_gpu=use_gpu, lang=lang)
self.color_extractor = ColorExtractor(
n_text_colors=n_text_colors, n_bg_colors=n_bg_colors
)
self.color_extractor = ColorExtractor(n_colors=n_colors)
self.contrast_checker = ContrastChecker()

def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Dict]:
Expand All @@ -62,25 +58,16 @@ def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Di

# Load image
image = cv2.imread(image_path)
image_shape = image.shape

results = []

for idx, region in enumerate(text_regions):
# Create text mask
text_mask = self.ocr_extractor.get_text_region_mask(
image_shape, region["bbox"]
)

# Extract colors
text_color = self.color_extractor.extract_text_color(image, text_mask)
bg_color = self.color_extractor.extract_background_color(
image, text_mask, region["bbox"]
)
# Extract the two dominant colors
color1, color2 = self.color_extractor.extract_colors(image, region["bbox"])

# Analyze contrast
# Analyze contrast - calculate_contrast_ratio handles which is lighter/darker
analysis = self.contrast_checker.analyze_contrast(
text_color, bg_color, is_large_text
color1, color2, is_large_text
)

# Add region info
Expand All @@ -90,10 +77,10 @@ def analyze_image(self, image_path: str, is_large_text: bool = False) -> List[Di
"confidence": round(region["confidence"], 3),
"bbox": region["bbox"],
"center": region["center"],
"text_color": text_color,
"text_color_hex": self.color_extractor.rgb_to_hex(text_color),
"bg_color": bg_color,
"bg_color_hex": self.color_extractor.rgb_to_hex(bg_color),
"color_1": color1,
"color_1_hex": self.color_extractor.rgb_to_hex(color1),
"color_2": color2,
"color_2_hex": self.color_extractor.rgb_to_hex(color2),
"contrast_ratio": analysis["contrast_ratio"],
"wcag_aa": analysis["wcag_aa"],
"wcag_aaa": analysis["wcag_aaa"],
Expand Down Expand Up @@ -130,12 +117,10 @@ def generate_report(self, results: List[Dict], output_format: str = "json") -> s
report_lines.append(f"Text #{result['index']}: {result['text']}")
report_lines.append(f" OCR Confidence: {result['confidence']:.1%}")
report_lines.append(
f" Text Color: RGB{result['text_color']} "
f"({result['text_color_hex']})"
f" Color 1: RGB{result['color_1']} " f"({result['color_1_hex']})"
)
report_lines.append(
f" Background Color: RGB{result['bg_color']} "
f"({result['bg_color_hex']})"
f" Color 2: RGB{result['color_2']} " f"({result['color_2_hex']})"
)
report_lines.append(f" Contrast Ratio: {result['contrast_ratio']}:1")
report_lines.append(
Expand Down Expand Up @@ -193,7 +178,11 @@ def main():
action="store_true",
help="Treat all text as large text (18pt+ or 14pt+ bold)",
)
parser.add_argument("--gpu", action="store_true", help="Use GPU for OCR processing")
parser.add_argument(
"--gpu",
action="store_true",
help="Deprecated. GPU is auto-detected by PaddleOCR 3.x+",
)
parser.add_argument(
"--lang", type=str, default="en", help="Language for OCR (default: en)"
)
Expand All @@ -207,6 +196,7 @@ def main():

# Initialize analyzer
print("Initializing ContrastCheck...")
print("Note: First run may take several minutes to download OCR models...")
analyzer = ContrastAnalyzer(use_gpu=args.gpu, lang=args.lang)

# Analyze image
Expand Down
Loading
Loading