cassiofb-dev · vzcodes · May 11, 2025
diff --git a/README.md b/README.md
@@ -16,7 +16,9 @@
 
 ## About
 
-Working with FIDE oficial data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests.
+Working with FIDE official data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests.
+
+A Redis cache is implemented to provide faster lookups for common use cases.
 
 ## Features
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -8,3 +8,22 @@ services:
     command: sh -c "pip install -r requirements.txt && uvicorn src.api:app --reload"
     volumes:
       - ./:/fide-api
+    environment:
+      - REDIS_HOST=localhost
+      - REDIS_PORT=6379
+      - REDIS_DB=0
+      - CACHE_EXPIRY=3600
+    depends_on:
+      - redis
+
+  redis:
+    image: redis:7-alpine
+    container_name: fide-redis
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis-data:/data
+    command: redis-server --save 60 1 --loglevel warning
+
+volumes:
+  redis-data:
diff --git a/requirements.txt b/requirements.txt
@@ -10,6 +10,7 @@ idna==3.10
 orjson==3.10.7
 pydantic==2.9.2
 pydantic_core==2.23.4
+redis==6.0.0
 requests==2.32.3
 sniffio==1.3.1
 soupsieve==2.6

diff --git a/src/scraper/cache.py b/src/scraper/cache.py
@@ -0,0 +1,60 @@
+"""
+Redis cache implementation for the FIDE scraper.
+This module provides functionality to cache API responses in Redis
+to avoid making repeated requests to the FIDE website.
+"""
+import os
+import json
+import time
+
+# Redis configuration - can be moved to environment variables
+REDIS_HOST = os.environ.get('REDIS_HOST', 'localhost')
+REDIS_PORT = int(os.environ.get('REDIS_PORT', 6379))
+REDIS_DB = int(os.environ.get('REDIS_DB', 0))
+REDIS_PASSWORD = os.environ.get('REDIS_PASSWORD', None)
+CACHE_EXPIRY = int(os.environ.get('CACHE_EXPIRY', 3600))  # Default: 1 hour
+
+# Initialize Redis client
+try:
+    import redis
+    redis_client = redis.Redis(
+        host=REDIS_HOST, 
+        port=REDIS_PORT,
+        db=REDIS_DB,
+        password=REDIS_PASSWORD,
+        decode_responses=True
+    )
+    # Test connection
+    redis_client.ping()
+    redis_enabled = True
+    print("Redis cache enabled")
+except ImportError:
+    print("Redis package not installed. Running without cache.")
+    redis_enabled = False
+except Exception as e:
+    print(f"Redis connection failed: {e}. Running without cache.")
+    redis_enabled = False
+
+def get_from_cache(key):
+    """Get data from Redis cache if available"""
+    if not redis_enabled:
+        return None
+
+    try:
+        data = redis_client.get(key)
+        if data:
+            return json.loads(data)
+    except Exception as e:
+        print(f"Error retrieving from cache: {e}")
+
+    return None
+
+def save_to_cache(key, data, expiry=CACHE_EXPIRY):
+    """Save data to Redis cache"""
+    if not redis_enabled:
+        return
+
+    try:
+        redis_client.setex(key, expiry, json.dumps(data))
+    except Exception as e:
+        print(f"Error saving to cache: {e}")
diff --git a/src/scraper/fide_scraper.py b/src/scraper/fide_scraper.py
@@ -1,54 +1,100 @@
 import requests
 import src.scraper.functions as scraper
+import src.scraper.cache as cache # Redis client
+from src.scraper.cache import get_from_cache, save_to_cache
 
 def get_top_players(limit: int = 100, history: bool = False) -> list[dict]:
-  fide_top_players_page = requests.get("https://ratings.fide.com/a_top.php?list=open")
-
-  html_doc = fide_top_players_page.text
-
-  top_players = scraper.get_top_players(html_doc)
-
-  top_players = top_players[0:limit]
-
-  if history == False: return top_players
-
-  for player_dict in top_players:
-    fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}"
+    # Create a cache key based on function parameters (used for cache lookups)
+    cache_key = f"top_players:{limit}:{history}"
+
+    # Try to get from cache first
+    cached_data = get_from_cache(cache_key)
+    if cached_data:
+        return cached_data
+
+    # If not in cache, proceed with fetch
+    fide_top_players_page = requests.get("https://ratings.fide.com/a_top.php?list=open")
+    html_doc = fide_top_players_page.text
+    top_players = scraper.get_top_players(html_doc)
+    top_players = top_players[0:limit]
+
+    if history == False:
+        # Cache the result before returning
+        save_to_cache(cache_key, top_players)
+        return top_players
+
+    for player_dict in top_players:
+        fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}"
+
+        # Check if we have player history in cache
+        history_cache_key = f"player_history:{player_dict['fide_id']}"
+        player_history = get_from_cache(history_cache_key)
+
+        if not player_history:
+            # If not in cache, fetch it
+            response = requests.get(fide_profile_page)
+            html_doc = response.text
+            player_history = scraper.get_player_history(html_doc)
+            # Cache player history
+            save_to_cache(history_cache_key, player_history)
+
+        player_dict["history"] = player_history
+
+    # Cache the final result with histories
+    save_to_cache(cache_key, top_players)
+    return top_players
 
+def get_player_history(fide_id: str) -> list[dict]:
+    # Create a cache key
+    cache_key = f"player_history:{fide_id}"
+
+    # Try to get from cache first
+    cached_data = get_from_cache(cache_key)
+    if cached_data:
+        return cached_data
+
+    # If not in cache, proceed with fetch
+    fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
     response = requests.get(fide_profile_page)
-
     html_doc = response.text
-
     player_history = scraper.get_player_history(html_doc)
-
-    player_dict["history"] = player_history
-
-  return top_players
-
-def get_player_history(fide_id: str) -> list[dict]:
-  fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
-
-  response = requests.get(fide_profile_page)
-
-  html_doc = response.text
-
-  player_history = scraper.get_player_history(html_doc)
-
-  return player_history
+
+    # Cache the result before returning
+    save_to_cache(cache_key, player_history)
+    return player_history
 
 def get_player_info(fide_id: str, history: bool = False):
-  fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
-
-  response = requests.get(fide_profile_page)
-
-  html_doc = response.text
-
-  player_info = scraper.get_player_info(html_doc)
-
-  if history == False: return player_info
-
-  player_history = scraper.get_player_history(html_doc)
-
-  player_info["history"] = player_history
-
-  return player_info
+    # Create a cache key based on function parameters
+    cache_key = f"player_info:{fide_id}:{history}"
+
+    # Try to get from cache first
+    cached_data = get_from_cache(cache_key)
+    if cached_data:
+        return cached_data
+
+    # If not in cache, proceed with fetch
+    fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
+    response = requests.get(fide_profile_page)
+    html_doc = response.text
+    player_info = scraper.get_player_info(html_doc)
+
+    if history == False:
+        # Cache the result before returning
+        save_to_cache(cache_key, player_info)
+        return player_info
+
+    # Check if we have player history in cache
+    history_cache_key = f"player_history:{fide_id}"
+    player_history = get_from_cache(history_cache_key)
+
+    if not player_history:
+        # If not in cache, we already have the HTML doc, so just extract history
+        player_history = scraper.get_player_history(html_doc)
+        # Cache player history
+        save_to_cache(history_cache_key, player_history)
+
+    player_info["history"] = player_history
+
+    # Cache the final result with history
+    save_to_cache(cache_key, player_info)
+    return player_info