From 6dd54ab2350bd1cd4be8864ac4adbc959d0036a7 Mon Sep 17 00:00:00 2001 From: Tom8266 Date: Tue, 5 May 2026 13:33:57 +0800 Subject: [PATCH 1/2] fix: detect Tencent SILK (\x02 prefix) in audio magic bytes to avoid ffmpeg failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QQ official bot sends voice in Tencent SILK format (leading \x02 byte before #!SILK_V3 magic). _get_audio_magic_type() had two off-by-one slice errors: 1. Standard SILK: header[:8] vs b'#!SILK_V3' (8 != 9 bytes) — never matched 2. Tencent SILK: not detected at all Fixes: - Standard SILK: header[:9] == b'#!SILK_V3' (correct 9-byte slice) - Tencent SILK: header[:1] == b"\x02" and header[1:10] == b'#!SILK_V3' - ensure_wav() routes detected silk to tencent_silk_to_wav() Before: QQ voice → ffmpeg → 'Invalid data found' After: QQ voice → magic detects silk → tencent_silk_to_wav → WAV OK --- astrbot/core/utils/media_utils.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/astrbot/core/utils/media_utils.py b/astrbot/core/utils/media_utils.py index 03d7912cb6..5aad35a7f8 100644 --- a/astrbot/core/utils/media_utils.py +++ b/astrbot/core/utils/media_utils.py @@ -15,6 +15,7 @@ from astrbot import logger from astrbot.core.utils.astrbot_path import get_astrbot_temp_path +from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav IMAGE_COMPRESS_DEFAULT_MAX_SIZE = 1280 IMAGE_COMPRESS_DEFAULT_QUALITY = 95 @@ -300,9 +301,17 @@ async def ensure_wav(audio_path: str, output_path: str | None = None) -> str: if not audio_path: return audio_path - if _get_audio_magic_type(audio_path) == "wav": + audio_type = _get_audio_magic_type(audio_path) + if audio_type == "wav": return audio_path + if audio_type == "silk": + if output_path is None: + temp_dir = get_astrbot_temp_path() + os.makedirs(temp_dir, exist_ok=True) + output_path = os.path.join(temp_dir, f"media_audio_{uuid.uuid4().hex}.wav") + return await tencent_silk_to_wav(audio_path, output_path) + return await convert_audio_to_wav(audio_path, output_path) @@ -341,7 +350,11 @@ def _get_audio_magic_type(audio_path: str) -> str: if header[:4] == b"ftyp" and b"mp4" in header[:8]: return "mp4" - if header[:8] == b"#!SILK_V3": + if header[:9] == b"#!SILK_V3": + return "silk" + + # Tencent SILK: leading \x02 byte before #!SILK_V3 + if header[:1] == b"\x02" and header[1:10] == b"#!SILK_V3": return "silk" return "" From 7aee01905042974a280aa676bcc8242a326ba20b Mon Sep 17 00:00:00 2001 From: Tom8266 Date: Tue, 5 May 2026 14:17:41 +0800 Subject: [PATCH 2/2] refactor: use startswith() for SILK magic byte detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace manual slice comparisons with startswith() — cleaner, less error-prone, and immune to off-by-one slice errors. Suggested by: sourcery-ai --- astrbot/core/utils/media_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrbot/core/utils/media_utils.py b/astrbot/core/utils/media_utils.py index 5aad35a7f8..4d56f33713 100644 --- a/astrbot/core/utils/media_utils.py +++ b/astrbot/core/utils/media_utils.py @@ -350,11 +350,11 @@ def _get_audio_magic_type(audio_path: str) -> str: if header[:4] == b"ftyp" and b"mp4" in header[:8]: return "mp4" - if header[:9] == b"#!SILK_V3": + if header.startswith(b"#!SILK_V3"): return "silk" # Tencent SILK: leading \x02 byte before #!SILK_V3 - if header[:1] == b"\x02" and header[1:10] == b"#!SILK_V3": + if header.startswith(b"\x02#!SILK_V3"): return "silk" return ""