diff --git a/README.md b/README.md index cdcd2f5..ac9e283 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,9 @@ ## About -Working with FIDE oficial data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests. +Working with FIDE official data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests. + +A Redis cache is implemented to provide faster lookups for common use cases. ## Features diff --git a/docker-compose.yml b/docker-compose.yml index 342d1b5..4c889ad 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,3 +8,22 @@ services: command: sh -c "pip install -r requirements.txt && uvicorn src.api:app --reload" volumes: - ./:/fide-api + environment: + - REDIS_HOST=localhost + - REDIS_PORT=6379 + - REDIS_DB=0 + - CACHE_EXPIRY=3600 + depends_on: + - redis + + redis: + image: redis:7-alpine + container_name: fide-redis + ports: + - "6379:6379" + volumes: + - redis-data:/data + command: redis-server --save 60 1 --loglevel warning + +volumes: + redis-data: diff --git a/requirements.txt b/requirements.txt index ec83f86..0e60814 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ idna==3.10 orjson==3.10.7 pydantic==2.9.2 pydantic_core==2.23.4 +redis==6.0.0 requests==2.32.3 sniffio==1.3.1 soupsieve==2.6 diff --git a/src/scraper/cache.py b/src/scraper/cache.py new file mode 100644 index 0000000..a453b10 --- /dev/null +++ b/src/scraper/cache.py @@ -0,0 +1,60 @@ +""" +Redis cache implementation for the FIDE scraper. +This module provides functionality to cache API responses in Redis +to avoid making repeated requests to the FIDE website. +""" +import os +import json +import time + +# Redis configuration - can be moved to environment variables +REDIS_HOST = os.environ.get('REDIS_HOST', 'localhost') +REDIS_PORT = int(os.environ.get('REDIS_PORT', 6379)) +REDIS_DB = int(os.environ.get('REDIS_DB', 0)) +REDIS_PASSWORD = os.environ.get('REDIS_PASSWORD', None) +CACHE_EXPIRY = int(os.environ.get('CACHE_EXPIRY', 3600)) # Default: 1 hour + +# Initialize Redis client +try: + import redis + redis_client = redis.Redis( + host=REDIS_HOST, + port=REDIS_PORT, + db=REDIS_DB, + password=REDIS_PASSWORD, + decode_responses=True + ) + # Test connection + redis_client.ping() + redis_enabled = True + print("Redis cache enabled") +except ImportError: + print("Redis package not installed. Running without cache.") + redis_enabled = False +except Exception as e: + print(f"Redis connection failed: {e}. Running without cache.") + redis_enabled = False + +def get_from_cache(key): + """Get data from Redis cache if available""" + if not redis_enabled: + return None + + try: + data = redis_client.get(key) + if data: + return json.loads(data) + except Exception as e: + print(f"Error retrieving from cache: {e}") + + return None + +def save_to_cache(key, data, expiry=CACHE_EXPIRY): + """Save data to Redis cache""" + if not redis_enabled: + return + + try: + redis_client.setex(key, expiry, json.dumps(data)) + except Exception as e: + print(f"Error saving to cache: {e}") diff --git a/src/scraper/fide_scraper.py b/src/scraper/fide_scraper.py index b407c02..78df713 100644 --- a/src/scraper/fide_scraper.py +++ b/src/scraper/fide_scraper.py @@ -1,54 +1,100 @@ import requests import src.scraper.functions as scraper +import src.scraper.cache as cache # Redis client +from src.scraper.cache import get_from_cache, save_to_cache def get_top_players(limit: int = 100, history: bool = False) -> list[dict]: - fide_top_players_page = requests.get("https://ratings.fide.com/a_top.php?list=open") - - html_doc = fide_top_players_page.text - - top_players = scraper.get_top_players(html_doc) - - top_players = top_players[0:limit] - - if history == False: return top_players - - for player_dict in top_players: - fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}" + # Create a cache key based on function parameters (used for cache lookups) + cache_key = f"top_players:{limit}:{history}" + + # Try to get from cache first + cached_data = get_from_cache(cache_key) + if cached_data: + return cached_data + + # If not in cache, proceed with fetch + fide_top_players_page = requests.get("https://ratings.fide.com/a_top.php?list=open") + html_doc = fide_top_players_page.text + top_players = scraper.get_top_players(html_doc) + top_players = top_players[0:limit] + + if history == False: + # Cache the result before returning + save_to_cache(cache_key, top_players) + return top_players + + for player_dict in top_players: + fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}" + + # Check if we have player history in cache + history_cache_key = f"player_history:{player_dict['fide_id']}" + player_history = get_from_cache(history_cache_key) + + if not player_history: + # If not in cache, fetch it + response = requests.get(fide_profile_page) + html_doc = response.text + player_history = scraper.get_player_history(html_doc) + # Cache player history + save_to_cache(history_cache_key, player_history) + + player_dict["history"] = player_history + + # Cache the final result with histories + save_to_cache(cache_key, top_players) + return top_players +def get_player_history(fide_id: str) -> list[dict]: + # Create a cache key + cache_key = f"player_history:{fide_id}" + + # Try to get from cache first + cached_data = get_from_cache(cache_key) + if cached_data: + return cached_data + + # If not in cache, proceed with fetch + fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}" response = requests.get(fide_profile_page) - html_doc = response.text - player_history = scraper.get_player_history(html_doc) - - player_dict["history"] = player_history - - return top_players - -def get_player_history(fide_id: str) -> list[dict]: - fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}" - - response = requests.get(fide_profile_page) - - html_doc = response.text - - player_history = scraper.get_player_history(html_doc) - - return player_history + + # Cache the result before returning + save_to_cache(cache_key, player_history) + return player_history def get_player_info(fide_id: str, history: bool = False): - fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}" - - response = requests.get(fide_profile_page) - - html_doc = response.text - - player_info = scraper.get_player_info(html_doc) - - if history == False: return player_info - - player_history = scraper.get_player_history(html_doc) - - player_info["history"] = player_history - - return player_info + # Create a cache key based on function parameters + cache_key = f"player_info:{fide_id}:{history}" + + # Try to get from cache first + cached_data = get_from_cache(cache_key) + if cached_data: + return cached_data + + # If not in cache, proceed with fetch + fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}" + response = requests.get(fide_profile_page) + html_doc = response.text + player_info = scraper.get_player_info(html_doc) + + if history == False: + # Cache the result before returning + save_to_cache(cache_key, player_info) + return player_info + + # Check if we have player history in cache + history_cache_key = f"player_history:{fide_id}" + player_history = get_from_cache(history_cache_key) + + if not player_history: + # If not in cache, we already have the HTML doc, so just extract history + player_history = scraper.get_player_history(html_doc) + # Cache player history + save_to_cache(history_cache_key, player_history) + + player_info["history"] = player_history + + # Cache the final result with history + save_to_cache(cache_key, player_info) + return player_info