Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

## About

Working with FIDE oficial data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests.
Working with FIDE official data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests.

A Redis cache is implemented to provide faster lookups for common use cases.

## Features

Expand Down
19 changes: 19 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,22 @@ services:
command: sh -c "pip install -r requirements.txt && uvicorn src.api:app --reload"
volumes:
- ./:/fide-api
environment:
- REDIS_HOST=localhost
- REDIS_PORT=6379
- REDIS_DB=0
- CACHE_EXPIRY=3600
depends_on:
- redis

redis:
image: redis:7-alpine
container_name: fide-redis
ports:
- "6379:6379"
volumes:
- redis-data:/data
command: redis-server --save 60 1 --loglevel warning

volumes:
redis-data:
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ idna==3.10
orjson==3.10.7
pydantic==2.9.2
pydantic_core==2.23.4
redis==6.0.0
requests==2.32.3
sniffio==1.3.1
soupsieve==2.6
Expand Down
60 changes: 60 additions & 0 deletions src/scraper/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Redis cache implementation for the FIDE scraper.
This module provides functionality to cache API responses in Redis
to avoid making repeated requests to the FIDE website.
"""
import os
import json
import time

# Redis configuration - can be moved to environment variables
REDIS_HOST = os.environ.get('REDIS_HOST', 'localhost')
REDIS_PORT = int(os.environ.get('REDIS_PORT', 6379))
REDIS_DB = int(os.environ.get('REDIS_DB', 0))
REDIS_PASSWORD = os.environ.get('REDIS_PASSWORD', None)
CACHE_EXPIRY = int(os.environ.get('CACHE_EXPIRY', 3600)) # Default: 1 hour

# Initialize Redis client
try:
import redis
redis_client = redis.Redis(
host=REDIS_HOST,
port=REDIS_PORT,
db=REDIS_DB,
password=REDIS_PASSWORD,
decode_responses=True
)
# Test connection
redis_client.ping()
redis_enabled = True
print("Redis cache enabled")
except ImportError:
print("Redis package not installed. Running without cache.")
redis_enabled = False
except Exception as e:
print(f"Redis connection failed: {e}. Running without cache.")
redis_enabled = False

def get_from_cache(key):
"""Get data from Redis cache if available"""
if not redis_enabled:
return None

try:
data = redis_client.get(key)
if data:
return json.loads(data)
except Exception as e:
print(f"Error retrieving from cache: {e}")

return None

def save_to_cache(key, data, expiry=CACHE_EXPIRY):
"""Save data to Redis cache"""
if not redis_enabled:
return

try:
redis_client.setex(key, expiry, json.dumps(data))
except Exception as e:
print(f"Error saving to cache: {e}")
134 changes: 90 additions & 44 deletions src/scraper/fide_scraper.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,100 @@
import requests
import src.scraper.functions as scraper
import src.scraper.cache as cache # Redis client
from src.scraper.cache import get_from_cache, save_to_cache

def get_top_players(limit: int = 100, history: bool = False) -> list[dict]:
fide_top_players_page = requests.get("https://ratings.fide.com/a_top.php?list=open")

html_doc = fide_top_players_page.text

top_players = scraper.get_top_players(html_doc)

top_players = top_players[0:limit]

if history == False: return top_players

for player_dict in top_players:
fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}"
# Create a cache key based on function parameters (used for cache lookups)
cache_key = f"top_players:{limit}:{history}"

# Try to get from cache first
cached_data = get_from_cache(cache_key)
if cached_data:
return cached_data

# If not in cache, proceed with fetch
fide_top_players_page = requests.get("https://ratings.fide.com/a_top.php?list=open")
html_doc = fide_top_players_page.text
top_players = scraper.get_top_players(html_doc)
top_players = top_players[0:limit]

if history == False:
# Cache the result before returning
save_to_cache(cache_key, top_players)
return top_players

for player_dict in top_players:
fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}"

# Check if we have player history in cache
history_cache_key = f"player_history:{player_dict['fide_id']}"
player_history = get_from_cache(history_cache_key)

if not player_history:
# If not in cache, fetch it
response = requests.get(fide_profile_page)
html_doc = response.text
player_history = scraper.get_player_history(html_doc)
# Cache player history
save_to_cache(history_cache_key, player_history)

player_dict["history"] = player_history

# Cache the final result with histories
save_to_cache(cache_key, top_players)
return top_players

def get_player_history(fide_id: str) -> list[dict]:
# Create a cache key
cache_key = f"player_history:{fide_id}"

# Try to get from cache first
cached_data = get_from_cache(cache_key)
if cached_data:
return cached_data

# If not in cache, proceed with fetch
fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
response = requests.get(fide_profile_page)

html_doc = response.text

player_history = scraper.get_player_history(html_doc)

player_dict["history"] = player_history

return top_players

def get_player_history(fide_id: str) -> list[dict]:
fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"

response = requests.get(fide_profile_page)

html_doc = response.text

player_history = scraper.get_player_history(html_doc)

return player_history

# Cache the result before returning
save_to_cache(cache_key, player_history)
return player_history

def get_player_info(fide_id: str, history: bool = False):
fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"

response = requests.get(fide_profile_page)

html_doc = response.text

player_info = scraper.get_player_info(html_doc)

if history == False: return player_info

player_history = scraper.get_player_history(html_doc)

player_info["history"] = player_history

return player_info
# Create a cache key based on function parameters
cache_key = f"player_info:{fide_id}:{history}"

# Try to get from cache first
cached_data = get_from_cache(cache_key)
if cached_data:
return cached_data

# If not in cache, proceed with fetch
fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
response = requests.get(fide_profile_page)
html_doc = response.text
player_info = scraper.get_player_info(html_doc)

if history == False:
# Cache the result before returning
save_to_cache(cache_key, player_info)
return player_info

# Check if we have player history in cache
history_cache_key = f"player_history:{fide_id}"
player_history = get_from_cache(history_cache_key)

if not player_history:
# If not in cache, we already have the HTML doc, so just extract history
player_history = scraper.get_player_history(html_doc)
# Cache player history
save_to_cache(history_cache_key, player_history)

player_info["history"] = player_history

# Cache the final result with history
save_to_cache(cache_key, player_info)
return player_info