From cbd5fbd2dd71870d0a7642d81da93121ba9a29be Mon Sep 17 00:00:00 2001 From: Louis Perso Date: Sun, 12 Apr 2026 12:36:49 +0200 Subject: [PATCH 1/5] =?UTF-8?q?Fix=20SR:=20skip=20Itin=C3=A9raires=20detai?= =?UTF-8?q?l=20+=20abandon=20imm=C3=A9diat=20sur=20RemoteDisconnected?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Les 458 radars I_xx_xxx (Itinéraires) n'ont pas d'endpoint detail fonctionnel — le serveur coupe la connexion sans répondre. Avec 5 retries × backoff exponentiel ça bloquait tous les workers. - Skip detail fetch pour typeLabel == "Itinéraires" - RemoteDisconnected → abandon immédiat sans retry - Import RequestsConnectionError pour détecter spécifiquement ce cas Co-Authored-By: Claude Sonnet 4.6 --- scripts/fetch_radars.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/fetch_radars.py b/scripts/fetch_radars.py index 95778cd..44289bd 100644 --- a/scripts/fetch_radars.py +++ b/scripts/fetch_radars.py @@ -18,6 +18,7 @@ from io import StringIO import requests +from requests.exceptions import ConnectionError as RequestsConnectionError # ── Config ──────────────────────────────────────────────────── @@ -160,6 +161,10 @@ def fetch_sr_detail(session: requests.Session, raw_id: str) -> dict | None: log("SR", f" {raw_id} → timeout tentative {attempt}/{SR_RETRIES}") if attempt < SR_RETRIES: time.sleep(2 * attempt) + except RequestsConnectionError as e: + # RemoteDisconnected = le serveur coupe sans répondre, pas la peine de retry + log("SR", f" {raw_id} → connexion coupée (abandon) : {e}") + return None except Exception as e: log("SR", f" {raw_id} → erreur tentative {attempt}/{SR_RETRIES}: {e}") if attempt < SR_RETRIES: @@ -200,8 +205,13 @@ def process_one(basic: dict) -> dict | None: if not raw_id or lat is None or lng is None: return None, False - radar_type = SR_TYPE_MAP.get(basic.get("typeLabel", ""), basic.get("typeLabel", "")) - detail = fetch_sr_detail(session, raw_id) + radar_type = SR_TYPE_MAP.get(basic.get("typeLabel", ""), basic.get("typeLabel", "")) + type_label = basic.get("typeLabel", "") + + # Les Itinéraires (I_xx_xxx) n'ont pas d'endpoint detail fonctionnel + # et n'ont pas de vitesse ponctuelle — on skip le fetch + skip_detail = type_label == "Itinéraires" + detail = None if skip_detail else fetch_sr_detail(session, raw_id) speed_car = speed_hgv = None department = route = direction = equipment = install_date = section_km = "" From 76458dc234121078b541085b7d245c4db768a7eb Mon Sep 17 00:00:00 2001 From: Louis Perso Date: Sun, 12 Apr 2026 12:44:53 +0200 Subject: [PATCH 2/5] =?UTF-8?q?SR:=20exclure=20les=20Itin=C3=A9raires=20du?= =?UTF-8?q?=20pipeline=20=E2=80=94=20ce=20sont=20des=20zones,=20pas=20des?= =?UTF-8?q?=20radars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Sur toute la longueur de cet itinéraire, il peut y avoir de un à plusieurs radars, de plusieurs types." → pas un radar ponctuel, inutile en base. Filtre appliqué sur typeLabel == "Itinéraires" avant le fetch des détails (458 radars éliminés, ~12% du total SR). Co-Authored-By: Claude Sonnet 4.6 --- scripts/fetch_radars.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/scripts/fetch_radars.py b/scripts/fetch_radars.py index 44289bd..a2d26ac 100644 --- a/scripts/fetch_radars.py +++ b/scripts/fetch_radars.py @@ -195,8 +195,10 @@ def fetch_sr() -> list[dict]: if basic_list is None: raise RuntimeError("Impossible de récupérer /radars/all après plusieurs tentatives") + # Les Itinéraires (I_xx_xxx) sont des zones, pas des radars ponctuels — on les exclut + basic_list = [r for r in basic_list if r.get("typeLabel") != "Itinéraires"] total = len(basic_list) - log("SR", f"{total} radars dans la liste — fetch des détails ({SR_MAX_CONCURRENT} workers)...") + log("SR", f"{total} radars dans la liste (Itinéraires exclus) — fetch des détails ({SR_MAX_CONCURRENT} workers)...") def process_one(basic: dict) -> dict | None: raw_id = basic.get("id", "") @@ -205,13 +207,8 @@ def process_one(basic: dict) -> dict | None: if not raw_id or lat is None or lng is None: return None, False - radar_type = SR_TYPE_MAP.get(basic.get("typeLabel", ""), basic.get("typeLabel", "")) - type_label = basic.get("typeLabel", "") - - # Les Itinéraires (I_xx_xxx) n'ont pas d'endpoint detail fonctionnel - # et n'ont pas de vitesse ponctuelle — on skip le fetch - skip_detail = type_label == "Itinéraires" - detail = None if skip_detail else fetch_sr_detail(session, raw_id) + radar_type = SR_TYPE_MAP.get(basic.get("typeLabel", ""), basic.get("typeLabel", "")) + detail = fetch_sr_detail(session, raw_id) speed_car = speed_hgv = None department = route = direction = equipment = install_date = section_km = "" From 25848090dda9dc531d24c06fbeebce2d82075b36 Mon Sep 17 00:00:00 2001 From: Louis Perso Date: Sun, 12 Apr 2026 12:49:59 +0200 Subject: [PATCH 3/5] SR: 1 worker pour passer sous le rate-limit du serveur MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le serveur SR bloque l'IP GHA après ~300 requêtes à 25 req/s. 1 worker = ~3 req/s (throttlé par le RTT réseau), suffisant pour fetcher les 3206 radars en ~15-20 min sans déclencher le rate-limit. Co-Authored-By: Claude Sonnet 4.6 --- scripts/fetch_radars.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/fetch_radars.py b/scripts/fetch_radars.py index a2d26ac..4be7007 100644 --- a/scripts/fetch_radars.py +++ b/scripts/fetch_radars.py @@ -30,9 +30,9 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter" OVERPASS_QUERY = '[out:json][timeout:360];node["highway"="speed_camera"](35.0,-11.0,72.0,45.0);out body;' -SR_MAX_CONCURRENT = 4 +SR_MAX_CONCURRENT = 1 # le serveur SR rate-limite à ~25 req/s depuis GHA — 1 worker = ~3 req/s SR_RETRIES = 5 -SR_MIN_DELAY_S = 0.15 # délai minimum entre deux requêtes dans le même worker +SR_MIN_DELAY_S = 0.0 # pas de délai artificiel, le RTT réseau suffit (~300ms) BATCH_SIZE = 500 From aa414e3843f85745ea790f83df158e1749ab4dc1 Mon Sep 17 00:00:00 2001 From: Louis Perso Date: Sun, 12 Apr 2026 12:54:38 +0200 Subject: [PATCH 4/5] =?UTF-8?q?SR:=20recr=C3=A9er=20la=20session=20TCP=20a?= =?UTF-8?q?pr=C3=A8s=20disconnect=20(~300=20req/session=20limite=20serveur?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le serveur SR ferme la connexion persistente après ~300 requêtes. Sur SessionDisconnected : pause 10s + nouvelle session + retry, pour reprendre proprement sans perdre les radars restants. Utilise un conteneur mutable [session] pour partager la session dans la closure process_one sans piège nonlocal. Co-Authored-By: Claude Sonnet 4.6 --- scripts/fetch_radars.py | 53 +++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/scripts/fetch_radars.py b/scripts/fetch_radars.py index 4be7007..d81235d 100644 --- a/scripts/fetch_radars.py +++ b/scripts/fetch_radars.py @@ -132,16 +132,25 @@ def stable_sr_id(raw_id: str) -> int: return int(digits) + 200_000_000_000 return abs(hash(raw_id)) + 100_000_000_000 +class SessionDisconnected(Exception): + """Le serveur a coupé la connexion TCP — la session doit être recréée.""" + +def make_sr_session() -> requests.Session: + s = requests.Session() + s.headers.update({ + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (compatible; RadarAlert/1.0)", + }) + return s + def fetch_sr_detail(session: requests.Session, raw_id: str) -> dict | None: url = f"{SR_BASE_URL}/radars/{raw_id}" for attempt in range(1, SR_RETRIES + 1): try: - t0 = time.time() resp = session.get(url, timeout=30) - elapsed = time.time() - t0 if resp.status_code == 429 or resp.status_code >= 500: - wait = min(2 ** attempt, 30) # backoff exponentiel plafonné à 30s + wait = min(2 ** attempt, 30) log("SR", f" {raw_id} → HTTP {resp.status_code}, attente {wait}s (tentative {attempt}/{SR_RETRIES})") time.sleep(wait) continue @@ -150,35 +159,28 @@ def fetch_sr_detail(session: requests.Session, raw_id: str) -> dict | None: log("SR", f" {raw_id} → HTTP {resp.status_code} (abandon)") return None - # Délai minimal pour ne pas flooder le serveur - remaining = SR_MIN_DELAY_S - elapsed - if remaining > 0: - time.sleep(remaining) - return resp.json() except requests.exceptions.Timeout: log("SR", f" {raw_id} → timeout tentative {attempt}/{SR_RETRIES}") if attempt < SR_RETRIES: time.sleep(2 * attempt) - except RequestsConnectionError as e: - # RemoteDisconnected = le serveur coupe sans répondre, pas la peine de retry - log("SR", f" {raw_id} → connexion coupée (abandon) : {e}") - return None + except RequestsConnectionError: + # Le serveur a fermé la connexion persistente (~300 req/session) + # On propage pour que l'appelant recrée la session et réessaie + raise SessionDisconnected(raw_id) except Exception as e: log("SR", f" {raw_id} → erreur tentative {attempt}/{SR_RETRIES}: {e}") if attempt < SR_RETRIES: time.sleep(2 * attempt) return None +SR_RECONNECT_WAIT_S = 10 # pause après coupure avant de recréer la session + def fetch_sr() -> list[dict]: log("SR", "Connexion à securite-routiere.gouv.fr...") t0 = time.time() - session = requests.Session() - session.headers.update({ - "Accept": "application/json", - "User-Agent": "Mozilla/5.0 (compatible; RadarAlert/1.0)", - }) + session = make_sr_session() # /radars/all : retry car le serveur est fragile basic_list = None @@ -200,6 +202,10 @@ def fetch_sr() -> list[dict]: total = len(basic_list) log("SR", f"{total} radars dans la liste (Itinéraires exclus) — fetch des détails ({SR_MAX_CONCURRENT} workers)...") + # Conteneur mutable pour partager la session entre la closure et fetch_sr + # (évite le piège nonlocal avec le ThreadPoolExecutor) + sess = [session] + def process_one(basic: dict) -> dict | None: raw_id = basic.get("id", "") lat = basic.get("lat") @@ -208,7 +214,18 @@ def process_one(basic: dict) -> dict | None: return None, False radar_type = SR_TYPE_MAP.get(basic.get("typeLabel", ""), basic.get("typeLabel", "")) - detail = fetch_sr_detail(session, raw_id) + + # Retry avec nouvelle session si le serveur coupe la connexion TCP + detail = None + for reconnect in range(SR_RETRIES): + try: + detail = fetch_sr_detail(sess[0], raw_id) + break + except SessionDisconnected: + log("SR", f" {raw_id} → connexion coupée — nouvelle session dans {SR_RECONNECT_WAIT_S}s (reconnect {reconnect + 1}/{SR_RETRIES})") + sess[0].close() + time.sleep(SR_RECONNECT_WAIT_S) + sess[0] = make_sr_session() speed_car = speed_hgv = None department = route = direction = equipment = install_date = section_km = "" From 390826f39f72ca421631761eac218fd7152a598b Mon Sep 17 00:00:00 2001 From: Louis Perso Date: Mon, 13 Apr 2026 20:27:01 +0200 Subject: [PATCH 5/5] =?UTF-8?q?SR:=20remplace=20reconnexion=20complexe=20p?= =?UTF-8?q?ar=20shuffle=20+=20jitter=201s=20=C2=B1=200.5s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests locaux : 150/150 OK sans aucun ban avec ce rythme. Extrapolation : ~73 min pour 3206 radars (dans le timeout GHA 90 min). - Ordre aléatoire (random.shuffle) : évite les patterns répétitifs - Délai 1.0s ± 0.5s entre chaque requête detail - Fetch séquentiel simple (plus besoin de ThreadPoolExecutor pour SR) - Supprime SessionDisconnected, make_sr_session, SR_RECONNECT_WAIT_S - Supprime SR_MAX_CONCURRENT (1 worker = séquentiel) Co-Authored-By: Claude Sonnet 4.6 --- scripts/fetch_radars.py | 129 ++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 85 deletions(-) diff --git a/scripts/fetch_radars.py b/scripts/fetch_radars.py index d81235d..2d8fe06 100644 --- a/scripts/fetch_radars.py +++ b/scripts/fetch_radars.py @@ -12,13 +12,13 @@ import csv import hashlib import os +import random import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone from io import StringIO import requests -from requests.exceptions import ConnectionError as RequestsConnectionError # ── Config ──────────────────────────────────────────────────── @@ -30,9 +30,9 @@ OVERPASS_URL = "https://overpass-api.de/api/interpreter" OVERPASS_QUERY = '[out:json][timeout:360];node["highway"="speed_camera"](35.0,-11.0,72.0,45.0);out body;' -SR_MAX_CONCURRENT = 1 # le serveur SR rate-limite à ~25 req/s depuis GHA — 1 worker = ~3 req/s -SR_RETRIES = 5 -SR_MIN_DELAY_S = 0.0 # pas de délai artificiel, le RTT réseau suffit (~300ms) +SR_RETRIES = 3 +SR_DELAY_BASE_S = 1.0 # délai de base entre chaque requête detail SR +SR_DELAY_JITTER_S = 0.5 # ± jitter aléatoire (évite les patterns détectables) BATCH_SIZE = 500 @@ -132,55 +132,34 @@ def stable_sr_id(raw_id: str) -> int: return int(digits) + 200_000_000_000 return abs(hash(raw_id)) + 100_000_000_000 -class SessionDisconnected(Exception): - """Le serveur a coupé la connexion TCP — la session doit être recréée.""" - -def make_sr_session() -> requests.Session: - s = requests.Session() - s.headers.update({ - "Accept": "application/json", - "User-Agent": "Mozilla/5.0 (compatible; RadarAlert/1.0)", - }) - return s - def fetch_sr_detail(session: requests.Session, raw_id: str) -> dict | None: url = f"{SR_BASE_URL}/radars/{raw_id}" for attempt in range(1, SR_RETRIES + 1): try: resp = session.get(url, timeout=30) - if resp.status_code == 429 or resp.status_code >= 500: wait = min(2 ** attempt, 30) log("SR", f" {raw_id} → HTTP {resp.status_code}, attente {wait}s (tentative {attempt}/{SR_RETRIES})") time.sleep(wait) continue - if not resp.ok: - log("SR", f" {raw_id} → HTTP {resp.status_code} (abandon)") return None - return resp.json() - - except requests.exceptions.Timeout: - log("SR", f" {raw_id} → timeout tentative {attempt}/{SR_RETRIES}") - if attempt < SR_RETRIES: - time.sleep(2 * attempt) - except RequestsConnectionError: - # Le serveur a fermé la connexion persistente (~300 req/session) - # On propage pour que l'appelant recrée la session et réessaie - raise SessionDisconnected(raw_id) except Exception as e: - log("SR", f" {raw_id} → erreur tentative {attempt}/{SR_RETRIES}: {e}") if attempt < SR_RETRIES: time.sleep(2 * attempt) + else: + log("SR", f" {raw_id} → abandon après {SR_RETRIES} tentatives : {e}") return None -SR_RECONNECT_WAIT_S = 10 # pause après coupure avant de recréer la session - def fetch_sr() -> list[dict]: log("SR", "Connexion à securite-routiere.gouv.fr...") t0 = time.time() - session = make_sr_session() + session = requests.Session() + session.headers.update({ + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (compatible; RadarAlert/1.0)", + }) # /radars/all : retry car le serveur est fragile basic_list = None @@ -199,40 +178,31 @@ def fetch_sr() -> list[dict]: # Les Itinéraires (I_xx_xxx) sont des zones, pas des radars ponctuels — on les exclut basic_list = [r for r in basic_list if r.get("typeLabel") != "Itinéraires"] + + # Ordre aléatoire : évite les patterns détectables côté serveur + random.shuffle(basic_list) + total = len(basic_list) - log("SR", f"{total} radars dans la liste (Itinéraires exclus) — fetch des détails ({SR_MAX_CONCURRENT} workers)...") + log("SR", f"{total} radars (Itinéraires exclus, ordre aléatoire) — fetch séquentiel avec jitter {SR_DELAY_BASE_S}s ± {SR_DELAY_JITTER_S}s...") - # Conteneur mutable pour partager la session entre la closure et fetch_sr - # (évite le piège nonlocal avec le ThreadPoolExecutor) - sess = [session] + radars = [] + failed = with_speed = 0 + t_sr = time.time() - def process_one(basic: dict) -> dict | None: + for i, basic in enumerate(basic_list): raw_id = basic.get("id", "") lat = basic.get("lat") lng = basic.get("lng") if not raw_id or lat is None or lng is None: - return None, False + continue radar_type = SR_TYPE_MAP.get(basic.get("typeLabel", ""), basic.get("typeLabel", "")) - - # Retry avec nouvelle session si le serveur coupe la connexion TCP - detail = None - for reconnect in range(SR_RETRIES): - try: - detail = fetch_sr_detail(sess[0], raw_id) - break - except SessionDisconnected: - log("SR", f" {raw_id} → connexion coupée — nouvelle session dans {SR_RECONNECT_WAIT_S}s (reconnect {reconnect + 1}/{SR_RETRIES})") - sess[0].close() - time.sleep(SR_RECONNECT_WAIT_S) - sess[0] = make_sr_session() + detail = fetch_sr_detail(session, raw_id) speed_car = speed_hgv = None department = route = direction = equipment = install_date = section_km = "" - detail_ok = False if detail: - detail_ok = True for rule in (detail.get("rulesmesured") or []): mname = rule.get("macinename", "") if mname.startswith("vitesse_vl_"): @@ -241,7 +211,6 @@ def process_one(basic: dict) -> dict | None: elif mname.startswith("vitesse_pl_"): try: speed_hgv = int(mname[len("vitesse_pl_"):]) except ValueError: pass - troncon = detail.get("radartronconkm", "") section_km = troncon.replace(",", ".").strip() if isinstance(troncon, str) else "" department = detail.get("department", "") @@ -249,8 +218,13 @@ def process_one(basic: dict) -> dict | None: direction = detail.get("radardirection", "") equipment = detail.get("radarequipment", "") install_date = detail.get("radarinstalldate", "") + else: + failed += 1 - radar = { + if speed_car is not None: + with_speed += 1 + + radars.append({ "source_id": raw_id, "stable_id": stable_sr_id(raw_id), "lat": lat, @@ -264,38 +238,23 @@ def process_one(basic: dict) -> dict | None: "equipment": equipment, "install_date": install_date, "section_length_km": section_km, - } - radar["data_hash"] = make_hash( - lat, lng, radar_type, speed_car, speed_hgv, - department, route, direction, equipment, install_date, section_km - ) - return radar, detail_ok + "data_hash": make_hash( + lat, lng, radar_type, speed_car, speed_hgv, + department, route, direction, equipment, install_date, section_km + ), + }) - radars = [] - fetch_failed = no_detail = with_speed = 0 - t_sr = time.time() - with ThreadPoolExecutor(max_workers=SR_MAX_CONCURRENT) as executor: - futures = {executor.submit(process_one, b): b for b in basic_list} - done = 0 - for future in as_completed(futures): - result, ok = future.result() - if result: - radars.append(result) - if result.get("speed_car") is not None: - with_speed += 1 - if not ok: - no_detail += 1 - else: - fetch_failed += 1 - done += 1 - if done % 100 == 0: - elapsed = time.time() - t_sr - rate = done / elapsed - eta = (total - done) / rate if rate > 0 else 0 - pct = done * 100 // total - log("SR", f" {done}/{total} ({pct}%) — fetch_ko={fetch_failed} detail_ko={no_detail} avec_vitesse={with_speed} — {rate:.1f} req/s — ETA {eta:.0f}s") - - log("SR", f"Terminé en {time.time() - t0:.1f}s — {len(radars)} radars — fetch_ko={fetch_failed} detail_ko={no_detail} avec_vitesse={with_speed}") + done = i + 1 + if done % 100 == 0: + elapsed = time.time() - t_sr + eta = (total - done) * elapsed / done + pct = done * 100 // total + log("SR", f" {done}/{total} ({pct}%) — detail_ko={failed} avec_vitesse={with_speed} — ETA {eta:.0f}s") + + # Délai avec jitter pour rester sous le radar du rate-limiter + time.sleep(max(0.2, SR_DELAY_BASE_S + random.uniform(-SR_DELAY_JITTER_S, SR_DELAY_JITTER_S))) + + log("SR", f"Terminé en {time.time() - t0:.1f}s — {len(radars)} radars — detail_ko={failed} avec_vitesse={with_speed}") return radars