diff --git a/frontend/src/api/api.ts b/frontend/src/api/api.ts index 283aaa2..e8238b5 100644 --- a/frontend/src/api/api.ts +++ b/frontend/src/api/api.ts @@ -144,11 +144,22 @@ export async function fetchAllNews(): Promise { return r.data; } -export async function fetchLeaderboardSummaries(useV1: boolean = false): Promise { +export async function fetchLeaderboardSummaries( + useBeta: boolean = false, + forceRefreshCache: boolean = false, +): Promise { const start = performance.now(); - const url = useV1 - ? "/api/leaderboard-summaries?v1_query" + + // Build URL with query params + const params = new URLSearchParams(); + if (useBeta) params.append("use_beta", ""); + if (forceRefreshCache) params.append("force_refresh_cache", ""); + + const queryString = params.toString(); + const url = queryString + ? `/api/leaderboard-summaries?${queryString}` : "/api/leaderboard-summaries"; + const res = await fetch(url); const fetchTime = performance.now() - start; @@ -165,9 +176,9 @@ export async function fetchLeaderboardSummaries(useV1: boolean = false): Promise const parseTime = performance.now() - parseStart; const totalTime = performance.now() - start; - const version = useV1 ? "v1" : "v2"; + const version = useBeta ? "beta" : "original"; console.log( - `[Perf] fetchLeaderboardSummaries (${version}) | fetch=${fetchTime.toFixed(2)}ms | parse=${parseTime.toFixed(2)}ms | total=${totalTime.toFixed(2)}ms`, + `[Perf] fetchLeaderboardSummaries (${version}| ${forceRefreshCache})( | fetch=${fetchTime.toFixed(2)}ms | parse=${parseTime.toFixed(2)}ms | total=${totalTime.toFixed(2)}ms`, ); return r.data; diff --git a/frontend/src/pages/home/Home.tsx b/frontend/src/pages/home/Home.tsx index 4e1939f..e0654d4 100644 --- a/frontend/src/pages/home/Home.tsx +++ b/frontend/src/pages/home/Home.tsx @@ -42,16 +42,17 @@ interface LeaderboardSummaries { export default function Home() { const [searchParams] = useSearchParams(); const [isQuickStartOpen, setIsQuickStartOpen] = useState(false); - const useV1 = searchParams.has("v1_query"); + const useBeta = searchParams.has("use_beta"); + const forceRefresh = searchParams.has("force_refresh"); const { data, loading, error, errorStatus, call } = fetcherApiCallback< LeaderboardSummaries, - [boolean] + [boolean, boolean] >(fetchLeaderboardSummaries); useEffect(() => { - call(useV1); - }, [call, useV1]); + call(useBeta, forceRefresh); + }, [call, useBeta, forceRefresh]); if (loading) { return ; diff --git a/kernelboard/__init__.py b/kernelboard/__init__.py index 14faf79..77fa72c 100644 --- a/kernelboard/__init__.py +++ b/kernelboard/__init__.py @@ -18,7 +18,7 @@ from kernelboard.lib import db, env, score, time from kernelboard.lib.logging import configure_logging from kernelboard.lib.rate_limiter import limiter -from kernelboard.lib.redis_connection import create_redis_connection +from kernelboard.lib.redis_connection import get_redis_connection from kernelboard.lib.status_code import http_error from kernelboard.og_tags import get_og_tags_for_path, inject_og_tags, is_social_crawler @@ -55,7 +55,7 @@ def create_app(test_config=None): SESSION_TYPE="redis", # REDIS_SSL_CERT_REQS can be set to override SSL cert verification # for Redis connections (e.g., "none" for self-signed certificates). - SESSION_REDIS=create_redis_connection( + SESSION_REDIS=get_redis_connection( cert_reqs=os.getenv("REDIS_SSL_CERT_REQS") ), OAUTH2_PROVIDERS=providers(), diff --git a/kernelboard/api/leaderboard_summaries.py b/kernelboard/api/leaderboard_summaries.py index 65b3ad4..1428a66 100644 --- a/kernelboard/api/leaderboard_summaries.py +++ b/kernelboard/api/leaderboard_summaries.py @@ -1,33 +1,246 @@ +import json import logging +import os import time from datetime import datetime, timezone from flask import Blueprint, request +from kernelboard.lib.auth_utils import get_id_and_username_from_session, get_whitelist from kernelboard.lib.db import get_db_connection +from kernelboard.lib.redis_connection import get_redis_connection from kernelboard.lib.status_code import http_success logger = logging.getLogger(__name__) -leaderboard_summaries_bp = Blueprint( - "leaderboard_summaries_bp", __name__, url_prefix="/leaderboard-summaries" -) +leaderboard_summaries_bp = Blueprint("leaderboard_summaries_bp", __name__, url_prefix="/leaderboard-summaries") + +# Redis cache key prefix for ended leaderboard top_users +CACHE_KEY_PREFIX = "lb_top_users:" + + +# ============================================================================= +# Redis Cache Helpers +# ============================================================================= + + +def _get_redis(): + """Get Redis connection (singleton).""" + cert_reqs = os.getenv("REDIS_SSL_CERT_REQS") + return get_redis_connection(cert_reqs=cert_reqs) + + +def _get_cached_top_users(redis_conn, leaderboard_ids: list[int]) -> dict[int, list]: + """Get cached top_users for multiple leaderboards from Redis.""" + if not redis_conn or not leaderboard_ids: + return {} + + keys = [f"{CACHE_KEY_PREFIX}{lb_id}" for lb_id in leaderboard_ids] + try: + values = redis_conn.mget(keys) + result = {} + for lb_id, value in zip(leaderboard_ids, values): + if value: + result[lb_id] = json.loads(value) + return result + except Exception: + logger.warning("Redis cache read failed", exc_info=True) + return {} + + +def _set_cached_top_users(redis_conn, leaderboard_id: int, top_users: list): + """Cache top_users for ended leaderboard (no expiry).""" + if not redis_conn: + return + + try: + key = f"{CACHE_KEY_PREFIX}{leaderboard_id}" + redis_conn.set(key, json.dumps(top_users)) + except Exception: + logger.warning("Redis cache write failed", exc_info=True) + + +def _delete_cached_top_users(redis_conn, leaderboard_ids: list[int]): + """Delete cached top_users for leaderboards (e.g., when deadline extended).""" + if not redis_conn or not leaderboard_ids: + return + try: + keys = [f"{CACHE_KEY_PREFIX}{lb_id}" for lb_id in leaderboard_ids] + redis_conn.delete(*keys) + except Exception: + logger.warning("Redis cache delete failed", exc_info=True) + + +# ============================================================================= +# Main API Endpoint +# ============================================================================= @leaderboard_summaries_bp.route("", methods=["GET"]) def index(): + """ + Get leaderboard summaries. + + Query params: + - use_beta: Use hide beta query + - force_refresh_cache: Clear and refresh cache for ended leaderboards + """ total_start = time.perf_counter() - # Check if legacy v1 query is requested (v2 is now default) - use_v1 = request.args.get("v1_query") is not None + use_beta = request.args.get("use_beta") is not None + force_refresh = request.args.get("force_refresh_cache") is not None + + # Check if user is admin to force refresh cache + user_id, _ = get_id_and_username_from_session() + whitelist = get_whitelist() + if not user_id or user_id not in whitelist: + logger.info("[leaderboard_summaries] skip force_refresh since user is not admin") + force_refresh = False + + # Choose strategy based on query params + if use_beta: + return _get_leaderboards_cached(total_start, force_refresh) + else: + return _get_leaderboards_original(total_start) + + +# ============================================================================= +# Strategy 1: Cached (default) - Cache ended leaderboards in Redis +# ============================================================================= + + +def _get_leaderboards_cached(total_start: float, force_refresh: bool = False): + """ + Get leaderboard summaries with Redis caching for ended leaderboards. + + Args: + total_start: Start time for performance logging + force_refresh: If True, ignore cache and recompute all ended leaderboards + + Strategy: + - Ended leaderboards (deadline < NOW): Read from Redis cache + - Active leaderboards (deadline >= NOW): Compute in real-time + - Uncached ended leaderboards: Compute and store in cache + """ + # 1. Database & Redis connection + db_conn_start = time.perf_counter() + conn = get_db_connection() + redis_conn = _get_redis() + db_conn_time = (time.perf_counter() - db_conn_start) * 1000 + + query_start = time.perf_counter() + + with conn.cursor() as cur: + # 2. Get all leaderboards and identify ended vs active + cur.execute(""" + SELECT id, name, deadline, + deadline < NOW() AS is_ended + FROM leaderboard.leaderboard + ORDER BY id DESC + """) + all_leaderboards = cur.fetchall() + + ended_ids = [row[0] for row in all_leaderboards if row[3]] + active_ids = [row[0] for row in all_leaderboards if not row[3]] + + # 3. Delete stale cache for active leaderboards (ex. deadline extended) + if active_ids: + _delete_cached_top_users(redis_conn, active_ids) + + # 4. Try to get cached top_users for ended leaderboards + cache_start = time.perf_counter() + if force_refresh: + logger.info("[Cache] force_refresh=True, ignoring cache") + cached_top_users = {} + else: + cached_top_users = _get_cached_top_users(redis_conn, ended_ids) + cache_time = (time.perf_counter() - cache_start) * 1000 + + # Find ended leaderboards not in cache + uncached_ended_ids = [lb_id for lb_id in ended_ids if lb_id not in cached_top_users] + # 4. Compute top_users for: active + uncached ended leaderboards + ids_to_compute = active_ids + uncached_ended_ids + + logger.info( + "[Cache] cached=%d | uncached=%d | active=%d | ids_to_compute=%d", + len(cached_top_users), + len(uncached_ended_ids), + len(active_ids), + len(ids_to_compute) + ) + + compute_start = time.perf_counter() + if ids_to_compute: + ids_tuple = tuple(ids_to_compute) + cur.execute(_get_query_for_ids(), (ids_tuple, ids_tuple)) + computed_results = {row[0]: row[1] for row in cur.fetchall()} + else: + computed_results = {} + compute_time = (time.perf_counter() - compute_start) * 1000 + + # 5. Cache newly computed ended leaderboards + for lb_id in uncached_ended_ids: + if lb_id in computed_results: + _set_cached_top_users(redis_conn, lb_id, computed_results[lb_id]) + + # 6. Get metadata for all leaderboards + cur.execute(_get_leaderboard_metadata_query()) + metadata = {row[0]: row[1] for row in cur.fetchall()} + + # 7. Build final response + leaderboards = [] + for row in all_leaderboards: + lb_id = row[0] + lb_data = metadata.get(lb_id, {}) + + # Get top_users from cache or computed results + lb_data["top_users"] = cached_top_users.get(lb_id, computed_results.get(lb_id)) + + if lb_data.get("gpu_types") is None: + lb_data["gpu_types"] = [] + leaderboards.append(lb_data) + + query_time = (time.perf_counter() - query_start) * 1000 + total_time = (time.perf_counter() - total_start) * 1000 + + logger.info( + "[Perf] leaderboard_summaries (cached) | " + "db_conn=%.2fms | cache=%.2fms | compute=%.2fms | " + "total_query=%.2fms | total=%.2fms | " + "cached=%d | computed=%d", + db_conn_time, + cache_time, + compute_time, + query_time, + total_time, + len(cached_top_users), + len(computed_results), + ) + + return http_success( + { + "leaderboards": leaderboards, + "now": datetime.now(timezone.utc), + } + ) + + +# ============================================================================= +# Strategy 2: Original - No caching, compute all in one query +# ============================================================================= + +def _get_leaderboards_original(total_start: float): + """ + Get leaderboard summaries without caching (original implementation). + """ # 1. Database connection db_conn_start = time.perf_counter() conn = get_db_connection() db_conn_time = (time.perf_counter() - db_conn_start) * 1000 - # 2. Query execution (v2 is default, v1 for legacy) - query = _get_query_v1() if use_v1 else _get_query() + # 2. Query execution + query = _get_query() query_start = time.perf_counter() with conn.cursor() as cur: cur.execute(query) @@ -39,16 +252,12 @@ def index(): for lb in leaderboards: if lb["gpu_types"] is None: lb["gpu_types"] = [] - transform_time = (time.perf_counter() - transform_start) * 1000 + transform_time: float = (time.perf_counter() - transform_start) * 1000 total_time = (time.perf_counter() - total_start) * 1000 - # Log timing breakdown - version = "v1" if use_v1 else "v2" logger.info( - "[Perf] leaderboard_summaries (%s) | " - "db_conn=%.2fms | query=%.2fms | transform=%.2fms | total=%.2fms", - version, + "[Perf] leaderboard_summaries (original) | db_conn=%.2fms | query=%.2fms | transform=%.2fms | total=%.2fms", db_conn_time, query_time, transform_time, @@ -56,20 +265,156 @@ def index(): ) return http_success( - {"leaderboards": leaderboards, "now": datetime.now(timezone.utc)} + { + "leaderboards": leaderboards, + "now": datetime.now(timezone.utc), + } ) +# ============================================================================= +# SQL Query Builders +# ============================================================================= + + +def _get_leaderboard_metadata_query(): + """Get leaderboard metadata (id, name, deadline, gpu_types).""" + return """ + WITH + gpu_types_agg AS ( + SELECT + leaderboard_id, + jsonb_agg(DISTINCT gpu_type) AS gpu_types + FROM leaderboard.gpu_type + GROUP BY leaderboard_id + ), + priority_gpu AS ( + SELECT DISTINCT ON (leaderboard_id) + leaderboard_id, + gpu_type + FROM leaderboard.gpu_type + ORDER BY leaderboard_id, + CASE gpu_type + WHEN 'B200' THEN 1 + WHEN 'H100' THEN 2 + WHEN 'MI300' THEN 3 + WHEN 'A100' THEN 4 + WHEN 'L4' THEN 5 + WHEN 'T4' THEN 6 + ELSE 7 + END, + gpu_type + ) + SELECT + l.id, + jsonb_build_object( + 'id', l.id, + 'name', l.name, + 'deadline', l.deadline, + 'gpu_types', COALESCE(g.gpu_types, '[]'::jsonb), + 'priority_gpu_type', p.gpu_type + ) + FROM leaderboard.leaderboard l + LEFT JOIN gpu_types_agg g ON g.leaderboard_id = l.id + LEFT JOIN priority_gpu p ON p.leaderboard_id = l.id + ORDER BY l.id DESC; + """ + + +def _get_query_for_ids(): + """ + Get top_users for specific leaderboard IDs only. + Returns (leaderboard_id, top_users_json) pairs. + + Usage: cur.execute(_get_query_for_ids(), (tuple(leaderboard_ids),) * 2) + """ + return """ + WITH + priority_gpu AS ( + SELECT DISTINCT ON (leaderboard_id) + leaderboard_id, + gpu_type + FROM leaderboard.gpu_type + WHERE leaderboard_id IN %s + ORDER BY leaderboard_id, + CASE gpu_type + WHEN 'B200' THEN 1 + WHEN 'H100' THEN 2 + WHEN 'MI300' THEN 3 + WHEN 'A100' THEN 4 + WHEN 'L4' THEN 5 + WHEN 'T4' THEN 6 + ELSE 7 + END, + gpu_type + ), + personal_best_candidates AS ( + SELECT + r.runner, + s.leaderboard_id, + s.user_id, + u.user_name, + r.score, + RANK() OVER ( + PARTITION BY s.leaderboard_id, r.runner, s.user_id + ORDER BY r.score ASC + ) AS personal_submission_rank + FROM leaderboard.runs r + JOIN leaderboard.submission s ON r.submission_id = s.id + JOIN priority_gpu p ON p.leaderboard_id = s.leaderboard_id + AND p.gpu_type = r.runner + LEFT JOIN leaderboard.user_info u ON s.user_id = u.id + WHERE NOT r.secret + AND r.score IS NOT NULL + AND r.passed + AND s.leaderboard_id IN %s + ), + personal_best_runs AS ( + SELECT * FROM personal_best_candidates + WHERE personal_submission_rank = 1 + ), + ranked_users AS ( + SELECT + leaderboard_id, + runner, + user_name, + score, + RANK() OVER ( + PARTITION BY leaderboard_id, runner + ORDER BY score ASC + ) AS user_rank + FROM personal_best_runs + ), + top_users_agg AS ( + SELECT + leaderboard_id, + jsonb_agg( + jsonb_build_object( + 'rank', user_rank, + 'score', score, + 'user_name', user_name + ) + ORDER BY user_rank + ) AS top_users + FROM ranked_users + WHERE user_rank <= 3 + GROUP BY leaderboard_id + ) + SELECT leaderboard_id, top_users + FROM top_users_agg; + """ + + def _get_query(): """ - Optimized query for leaderboard summaries (default). + Optimized query for leaderboard summaries (v2). Performance optimizations: 1. Use DISTINCT ON instead of ROW_NUMBER for priority GPU selection 2. Pre-aggregate GPU types to avoid correlated subqueries 3. Pre-aggregate top users JSON to avoid correlated subqueries """ - query = """ + return """ WITH -- Pre-aggregate GPU types per leaderboard (avoids correlated subquery) gpu_types_agg AS ( @@ -99,7 +444,7 @@ def _get_query(): gpu_type ), - -- Step 1: Get each user's best run per leaderboard+runner (same as v1) + -- Step 1: Get each user's best run per leaderboard+runner personal_best_candidates AS ( SELECT r.runner, @@ -127,7 +472,7 @@ def _get_query(): WHERE personal_submission_rank = 1 ), - -- Step 3: Rank users by score (same as v1) + -- Step 3: Rank users by score ranked_users AS ( SELECT leaderboard_id, @@ -141,7 +486,7 @@ def _get_query(): FROM personal_best_runs ), - -- Pre-aggregate top 3 users JSON (optimization over v1) + -- Pre-aggregate top 3 users JSON top_users_agg AS ( SELECT leaderboard_id, @@ -172,106 +517,4 @@ def _get_query(): LEFT JOIN priority_gpu p ON p.leaderboard_id = l.id LEFT JOIN top_users_agg t ON t.leaderboard_id = l.id ORDER BY l.id DESC; - """ - return query - - -def _get_query_v1(): - """Legacy query (use ?v1 to enable).""" - query = """ - WITH - - -- Get basic information about active leaderboards. - active_leaderboards AS ( - SELECT id, name, deadline FROM leaderboard.leaderboard - ), - - -- Get all the GPU types for each leaderboard. - gpu_types AS ( - SELECT DISTINCT leaderboard_id, gpu_type FROM leaderboard.gpu_type - WHERE leaderboard_id IN (SELECT id FROM active_leaderboards) - ), - - -- Get the "highest priority" GPU type for each leaderboard. - priority_gpu_types AS ( - SELECT leaderboard_id, gpu_type FROM ( - SELECT - leaderboard_id, - gpu_type, - -- Assign priority based on the how "capable" GPT-4o thought - -- various GPU types were. - ROW_NUMBER() OVER ( - PARTITION BY leaderboard_id - ORDER BY - CASE gpu_type - WHEN 'B200' THEN 1 - WHEN 'H100' THEN 2 - WHEN 'MI300' THEN 3 - WHEN 'A100' THEN 4 - WHEN 'L4' THEN 5 - WHEN 'T4' THEN 6 - ELSE 7 -- Lowest priority for any other type. - END ASC, - gpu_type ASC - ) as rn - FROM leaderboard.gpu_type - WHERE leaderboard_id IN (SELECT id FROM active_leaderboards) - ) ranked_gpu_types - WHERE rn = 1 - ), - - -- Get each user's best run for each GPU type (runner) on the active - -- leaderboards. - personal_best_candidates AS ( - SELECT r.runner AS runner, - s.leaderboard_id AS leaderboard_id, - u.user_name AS user_name, - r.score AS score, - RANK() OVER (PARTITION BY s.leaderboard_id, r.runner, u.id - ORDER BY r.score ASC) AS personal_submission_rank - FROM leaderboard.runs r - JOIN leaderboard.submission s ON r.submission_id = s.id - JOIN active_leaderboards a ON s.leaderboard_id = a.id - JOIN priority_gpu_types p on p.leaderboard_id = a.id - AND p.gpu_type = r.runner - LEFT JOIN leaderboard.user_info u ON s.user_id = u.id - WHERE NOT r.secret AND r.score IS NOT NULL AND r.passed - ), - - -- Select only the best run for each user and GPU type. - personal_best_runs AS ( - SELECT * FROM personal_best_candidates WHERE personal_submission_rank = 1 - ), - - -- Order the personal best runs by score for each leaderboard and GPU type. - competitive_rankings AS ( - SELECT leaderboard_id, runner, user_name, score, - RANK() OVER (PARTITION BY leaderboard_id, runner ORDER BY score ASC) AS user_rank - FROM personal_best_runs) - - -- Build the JSON response. - SELECT jsonb_build_object( - 'id', l.id, - 'name', l.name, - 'deadline', l.deadline, - 'gpu_types', (SELECT jsonb_agg(gpu_type) FROM gpu_types g WHERE g.leaderboard_id = l.id), - 'priority_gpu_type', (SELECT g.gpu_type FROM priority_gpu_types g WHERE g.leaderboard_id = l.id), - 'top_users', - - -- For the priority GPU type, get the top 3 users by rank. - (SELECT jsonb_agg( - jsonb_build_object( - 'rank', r.user_rank, - 'score', r.score, - 'user_name', r.user_name - ) - ORDER BY r.user_rank ASC - ) - FROM competitive_rankings r - WHERE r.leaderboard_id = l.id AND r.user_rank <= 3 - ) - ) - FROM active_leaderboards l - ORDER BY l.id DESC; - """ - return query + """ diff --git a/kernelboard/health.py b/kernelboard/health.py index 0a460f2..6110d93 100644 --- a/kernelboard/health.py +++ b/kernelboard/health.py @@ -5,7 +5,7 @@ from flask import current_app as app from kernelboard.lib.db import get_db_connection -from kernelboard.lib.redis_connection import create_redis_connection +from kernelboard.lib.redis_connection import get_redis_connection from kernelboard.lib.status_code import ( http_error, http_success, @@ -27,7 +27,7 @@ def health(): all_checks_passed = False cert_reqs = os.getenv("REDIS_SSL_CERT_REQS") - redis_conn = create_redis_connection(cert_reqs=cert_reqs) + redis_conn = get_redis_connection(cert_reqs=cert_reqs) if redis_conn is None: app.logger.error("redis_conn is None. Is REDIS_URL set?") all_checks_passed = False diff --git a/kernelboard/lib/redis_connection.py b/kernelboard/lib/redis_connection.py index 6846345..2443a92 100644 --- a/kernelboard/lib/redis_connection.py +++ b/kernelboard/lib/redis_connection.py @@ -2,14 +2,23 @@ import redis +# Singleton Redis connection +_redis_client: redis.Redis | None = None -def create_redis_connection( + +def get_redis_connection( cert_reqs: str | None = None, ) -> redis.Redis | None: """ - Creates a redis connection using application configuration. + Get a singleton Redis connection. + Reuses the same connection across requests for better performance. """ - url = os.getenv("REDIS_URL") + global _redis_client + + if _redis_client is not None: + return _redis_client + + url: str | None = os.getenv("REDIS_URL") if url is None: return None @@ -17,4 +26,5 @@ def create_redis_connection( if cert_reqs: kwargs["ssl_cert_reqs"] = cert_reqs - return redis.from_url(url, **kwargs) + _redis_client = redis.from_url(url, **kwargs) + return _redis_client diff --git a/tests/test_health.py b/tests/test_health.py index 8d3b667..be0ea43 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -36,7 +36,7 @@ def test_health_database_error(client): def test_health_no_redis_config(client): with patch( - "kernelboard.health.create_redis_connection", return_value=None + "kernelboard.health.get_redis_connection", return_value=None ): assert_unhealthy(client.get("/health")) @@ -48,7 +48,7 @@ def test_health_redis_error(client): ) with patch( - "kernelboard.health.create_redis_connection", return_value=mock_conn + "kernelboard.health.get_redis_connection", return_value=mock_conn ): assert_unhealthy(client.get("/health")) mock_conn.ping.assert_called_once() diff --git a/tests/test_redis.py b/tests/test_redis.py index 3ac1735..597f4dc 100644 --- a/tests/test_redis.py +++ b/tests/test_redis.py @@ -1,7 +1,7 @@ -from kernelboard.lib.redis_connection import create_redis_connection +from kernelboard.lib.redis_connection import get_redis_connection def test_get_and_close_redis_connection(app): with app.app_context(): - conn = create_redis_connection() + conn = get_redis_connection() assert conn is not None