From ef586174613bc82b52aa73dd311d7d9f17699835 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 19:48:24 +0800 Subject: [PATCH 1/4] fix(media): validate Content-Type and magic bytes before sending to model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #776. When a Slack bot token lacks the `files:read` OAuth scope, Slack serves the workspace login HTML page (~55 KB) at HTTP 200 with a `text/html` Content-Type instead of the requested file binary. `download_and_encode_image` previously accepted this response because: 1. It never inspected the HTTP response `Content-Type` header. 2. On `resize_and_compress` failure for a body ≤ 1 MB it fell back to forwarding the raw bytes under the Slack-reported MIME (`image/png`), bypassing any format check. The result: a `ContentBlock::Image { media_type: "image/png", data: }` flowed through to Anthropic, which 400'd with "Could not process image". Because claude-agent-acp persists the user message into the session JSONL before the API reply, the bad block replayed on every subsequent turn in that Slack thread until an operator manually deleted the JSONL inside the pod. Changes: - Add `MediaFetchError` enum to `src/media.rs` so callers can distinguish "not an image, skip silently" (`NotAnImage`) from "claimed image, got unexpected bytes" (`UnsupportedResponseType`, `InvalidImageBody`). - Add `validate_image_response(content_type, body)` pure helper that: - Rejects any HTTP response whose Content-Type (stripped of params, lowercased) is not in `{image/png, image/jpeg, image/gif, image/webp}`. - Sniffs magic bytes via `image::ImageReader::with_guessed_format()` (no new dependencies) and rejects anything that doesn't decode as one of the four supported formats. - Change `download_and_encode_image` signature from `-> Option` to `-> Result`, capturing the Content-Type header before consuming the response with `.bytes()`. - Remove the ≤ 1 MB resize-error fallback that was the direct bug path. - Update `src/slack.rs` call site: on validation failure, collect filenames and post one aggregated user-visible warning to the Slack thread: ":warning: I couldn't access the file(s) you shared (``). This often means the bot is missing the `files:read` OAuth scope. Please ask an admin to reinstall the app with that scope." - Update `src/discord.rs` call site: `warn!` log on failure (Discord URLs are signed-public so the Slack scope hint is not applicable there). Preserve the existing `is_video_file` fallback for `Err(NotAnImage)`. - Add 12 unit tests for `validate_image_response` including the exact bug repro case (HTML body labeled `image/png`, first 8 bytes `3c21444f43545950`). Out of scope / follow-up issues: - Secondary defense: deferring claude-agent-acp JSONL persistence until after model returns 200 (requires changes in the claude-agent-acp Node project). - Startup preflight calling Slack `auth.test` to warn loudly on missing scopes. - Same Content-Type/magic-byte hardening for `download_and_transcribe` and `download_and_read_text_file`. Co-Authored-By: Claude Opus 4.7 --- src/discord.rs | 52 ++++++--- src/media.rs | 301 ++++++++++++++++++++++++++++++++++++++++++++++--- src/slack.rs | 63 +++++++++-- 3 files changed, 370 insertions(+), 46 deletions(-) diff --git a/src/discord.rs b/src/discord.rs index ce2b5e11..a811cfbc 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -702,25 +702,43 @@ impl EventHandler for Handler { debug!(filename = %attachment.filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - &attachment.url, - attachment.content_type.as_deref(), - &attachment.filename, - u64::from(attachment.size), - None, - ) - .await - { - debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); - extra_blocks.push(block); - } else if media::is_video_file(&attachment.filename, attachment.content_type.as_deref()) { - debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); - extra_blocks.push(video_attachment_block( - &attachment.filename, + } else { + match media::download_and_encode_image( + &attachment.url, attachment.content_type.as_deref(), + &attachment.filename, u64::from(attachment.size), - &attachment.url, - )); + None, + ) + .await + { + Ok(block) => { + debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => { + if media::is_video_file( + &attachment.filename, + attachment.content_type.as_deref(), + ) { + debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); + extra_blocks.push(video_attachment_block( + &attachment.filename, + attachment.content_type.as_deref(), + u64::from(attachment.size), + &attachment.url, + )); + } + } + Err(e) => { + tracing::warn!( + url = %attachment.url, + filename = %attachment.filename, + error = %e, + "image attachment failed" + ); + } + } } } diff --git a/src/media.rs b/src/media.rs index b42cfa6e..1f3b4fa0 100644 --- a/src/media.rs +++ b/src/media.rs @@ -21,7 +21,111 @@ const IMAGE_MAX_DIMENSION_PX: u32 = 1200; /// JPEG quality for compressed output. const IMAGE_JPEG_QUALITY: u8 = 75; +/// Error variants for `download_and_encode_image`. +#[derive(Debug)] +pub enum MediaFetchError { + /// URL empty or MIME/filename doesn't indicate an image; skip silently. + NotAnImage, + /// HTTP response Content-Type is not a supported image format. + UnsupportedResponseType { + hinted: Option, + actual: Option, + }, + /// Response body magic bytes don't match a supported image format. + InvalidImageBody { magic_prefix_hex: String }, + /// File exceeds the configured size limit. + SizeExceeded { actual: u64, limit: u64 }, + /// Network-level error (send or body-read). + Network(reqwest::Error), + /// Server returned a non-success HTTP status. + HttpStatus(reqwest::StatusCode), +} + +impl std::fmt::Display for MediaFetchError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotAnImage => write!(f, "not an image attachment"), + Self::UnsupportedResponseType { hinted, actual } => write!( + f, + "server returned unexpected content type (hinted: {}, actual: {})", + hinted.as_deref().unwrap_or("none"), + actual.as_deref().unwrap_or("none"), + ), + Self::InvalidImageBody { magic_prefix_hex } => write!( + f, + "response body is not a valid image (first 8 bytes: {magic_prefix_hex})" + ), + Self::SizeExceeded { actual, limit } => { + write!(f, "file size {actual} exceeds limit {limit}") + } + Self::Network(e) => write!(f, "network error: {e}"), + Self::HttpStatus(s) => write!(f, "HTTP {s}"), + } + } +} + +/// Format the first 8 bytes of a buffer as lowercase hex (no separator). +fn hex_prefix(body: &[u8]) -> String { + body.iter() + .take(8) + .map(|b| format!("{b:02x}")) + .collect::>() + .concat() +} + +/// Allowed MIME types for model-bound images. +const ALLOWED_IMAGE_MIME: &[&str] = &["image/png", "image/jpeg", "image/gif", "image/webp"]; + +/// Validate the HTTP response Content-Type and body magic bytes. +/// +/// Returns the detected `ImageFormat` on success. Both checks are applied: +/// the Content-Type allow-list rejects non-image responses early (e.g. Slack +/// serving an HTML login page when `files:read` scope is missing), and the +/// magic-byte sniff catches corrupted or mislabeled bodies regardless of the +/// header. +fn validate_image_response( + content_type: Option<&str>, + body: &[u8], +) -> Result { + if let Some(ct) = content_type { + let base = ct.split(';').next().unwrap_or(ct).trim().to_lowercase(); + if !ALLOWED_IMAGE_MIME.contains(&base.as_str()) { + return Err(MediaFetchError::UnsupportedResponseType { + hinted: None, + actual: Some(base), + }); + } + } + + let reader = match ImageReader::new(Cursor::new(body)).with_guessed_format() { + Ok(r) => r, + Err(_) => { + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }); + } + }; + + match reader.format() { + Some( + image::ImageFormat::Png + | image::ImageFormat::Jpeg + | image::ImageFormat::Gif + | image::ImageFormat::WebP, + ) => Ok(reader.format().unwrap()), + _ => Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }), + } +} + /// Download an image from a URL, resize/compress it, and return as a ContentBlock. +/// +/// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't +/// indicate an image (silent skip). Returns other `Err` variants when the +/// download succeeded but the response bytes fail Content-Type or magic-byte +/// validation — callers should surface these to the user. +/// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( url: &str, @@ -29,11 +133,11 @@ pub async fn download_and_encode_image( filename: &str, size: u64, auth_token: Option<&str>, -) -> Option { +) -> Result { const MAX_SIZE: u64 = 10 * 1024 * 1024; // 10 MB if url.is_empty() { - return None; + return Err(MediaFetchError::NotAnImage); } let mime = mime_hint.or_else(|| { @@ -51,17 +155,20 @@ pub async fn download_and_encode_image( let Some(mime) = mime else { debug!(filename, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); }; let mime = mime.split(';').next().unwrap_or(mime).trim(); if !mime.starts_with("image/") { debug!(filename, mime, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); } if size > MAX_SIZE { error!(filename, size, "image exceeds 10MB limit"); - return None; + return Err(MediaFetchError::SizeExceeded { + actual: size, + limit: MAX_SIZE, + }); } let mut req = HTTP_CLIENT.get(url); @@ -73,39 +180,63 @@ pub async fn download_and_encode_image( Ok(resp) => resp, Err(e) => { error!(url, error = %e, "download failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if !response.status().is_success() { error!(url, status = %response.status(), "HTTP error downloading image"); - return None; + return Err(MediaFetchError::HttpStatus(response.status())); } + + // Capture Content-Type BEFORE .bytes() consumes the response. + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(str::to_string); + let bytes = match response.bytes().await { Ok(b) => b, Err(e) => { error!(url, error = %e, "read failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if bytes.len() as u64 > MAX_SIZE { + error!(filename, size = bytes.len(), "downloaded image exceeds limit"); + return Err(MediaFetchError::SizeExceeded { + actual: bytes.len() as u64, + limit: MAX_SIZE, + }); + } + + // Validate Content-Type and magic bytes before processing. + if let Err(e) = validate_image_response(content_type.as_deref(), &bytes) { error!( filename, - size = bytes.len(), - "downloaded image exceeds limit" + mime_hint = mime, + content_type = content_type.as_deref().unwrap_or("none"), + magic = hex_prefix(&bytes), + error = %e, + "image validation failed — body is not a supported image" ); - return None; + return Err(e); } let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { - if bytes.len() > 1024 * 1024 { - error!(filename, error = %e, size = bytes.len(), "resize failed and original too large, skipping"); - return None; - } - debug!(filename, error = %e, "resize failed, using original"); - (bytes.to_vec(), mime.to_string()) + error!( + filename, + error = %e, + magic = hex_prefix(&bytes), + size = bytes.len(), + "resize failed after successful validation" + ); + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(&bytes), + }); } }; @@ -117,7 +248,7 @@ pub async fn download_and_encode_image( ); let encoded = BASE64.encode(&output_bytes); - Some(ContentBlock::Image { + Ok(ContentBlock::Image { media_type: output_mime, data: encoded, }) @@ -348,6 +479,13 @@ mod tests { buf.into_inner() } + fn make_jpeg(width: u32, height: u32) -> Vec { + let img = image::RgbImage::new(width, height); + let mut buf = Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::Jpeg).unwrap(); + buf.into_inner() + } + #[test] fn large_image_resized_to_max_dimension() { let png = make_png(3000, 2000); @@ -429,4 +567,131 @@ mod tests { assert!(is_video_file("clip.MOV", None)); assert!(!is_video_file("notes.txt", Some("text/plain"))); } + + // --- validate_image_response tests --- + + #[test] + fn validate_accepts_png_with_matching_content_type() { + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png"), &png).is_ok()); + } + + #[test] + fn validate_accepts_jpeg_with_matching_content_type() { + let jpeg = make_jpeg(1, 1); + assert!(validate_image_response(Some("image/jpeg"), &jpeg).is_ok()); + } + + #[test] + fn validate_accepts_gif_with_matching_content_type() { + let gif: Vec = vec![ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x2C, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, + 0x3B, + ]; + assert!(validate_image_response(Some("image/gif"), &gif).is_ok()); + } + + #[test] + fn validate_accepts_missing_content_type_with_valid_png() { + // When Content-Type header is absent, fall back to magic-byte detection. + let png = make_png(1, 1); + assert!(validate_image_response(None, &png).is_ok()); + } + + #[test] + fn validate_content_type_strips_params() { + // "image/png; charset=binary" is a real header value — must be accepted. + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png; charset=binary"), &png).is_ok()); + } + + /// Exact reproduction of issue #776: Slack serves the workspace login HTML + /// page at HTTP 200 when the bot token lacks the `files:read` scope. + /// The Slack file metadata says `mimetype: image/png`; the response body + /// magic bytes are `Slack login"; + let result = validate_image_response(Some("image/png"), html_body); + match result { + Err(MediaFetchError::InvalidImageBody { magic_prefix_hex }) => { + assert_eq!(magic_prefix_hex, "3c21444f43545950"); + } + other => panic!("expected InvalidImageBody, got {other:?}"), + } + } + + #[test] + fn validate_rejects_text_html_content_type() { + // Even if the body were a valid image, a text/html Content-Type must be rejected. + let png = make_png(1, 1); + let result = validate_image_response(Some("text/html; charset=utf-8"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + #[test] + fn validate_rejects_application_json_content_type() { + let png = make_png(1, 1); + let result = validate_image_response(Some("application/json"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + #[test] + fn validate_rejects_empty_body() { + let result = validate_image_response(Some("image/png"), &[]); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_rejects_truncated_png_header() { + // PNG magic is 8 bytes; 4 bytes is not enough to identify the format. + let truncated = [0x89u8, 0x50, 0x4e, 0x47]; + let result = validate_image_response(Some("image/png"), &truncated); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn media_fetch_error_display_renders() { + let _ = MediaFetchError::NotAnImage.to_string(); + let _ = MediaFetchError::UnsupportedResponseType { + hinted: Some("image/png".into()), + actual: Some("text/html".into()), + } + .to_string(); + let _ = MediaFetchError::InvalidImageBody { + magic_prefix_hex: "3c21444f43545950".into(), + } + .to_string(); + let _ = MediaFetchError::SizeExceeded { + actual: 11_000_000, + limit: 10_000_000, + } + .to_string(); + let _ = MediaFetchError::HttpStatus(reqwest::StatusCode::UNAUTHORIZED).to_string(); + } + + #[test] + fn hex_prefix_formats_first_8_bytes() { + let bytes = b""; + assert_eq!(hex_prefix(bytes), "3c21444f43545950"); + } + + #[test] + fn hex_prefix_handles_short_buffer() { + let bytes = [0xffu8, 0xd8]; + assert_eq!(hex_prefix(&bytes), "ffd8"); + } } diff --git a/src/slack.rs b/src/slack.rs index cbe101f2..e04497c2 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -934,6 +934,7 @@ async fn handle_message( let mut echo_entries: Vec = Vec::new(); let mut text_file_bytes: u64 = 0; let mut text_file_count: u32 = 0; + let mut failed_image_files: Vec = Vec::new(); if let Some(files) = files { for file in files { @@ -1032,21 +1033,61 @@ async fn handle_message( debug!(filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - url, - Some(mimetype), - filename, - size, - Some(bot_token), - ) - .await - { - debug!(filename, "adding image attachment"); - extra_blocks.push(block); + } else { + match media::download_and_encode_image( + url, + Some(mimetype), + filename, + size, + Some(bot_token), + ) + .await + { + Ok(block) => { + debug!(filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => {} + Err( + media::MediaFetchError::UnsupportedResponseType { .. } + | media::MediaFetchError::InvalidImageBody { .. }, + ) => { + warn!( + filename, + "image validation failed; server may have returned HTML (missing files:read scope?)" + ); + failed_image_files.push(filename.to_string()); + } + Err(e) => { + warn!(filename, error = %e, "image download failed"); + } + } } } } + // Notify user if any images couldn't be validated (likely missing files:read scope). + if !failed_image_files.is_empty() { + let warn_channel = ChannelRef { + platform: "slack".into(), + channel_id: channel_id.clone(), + thread_id: thread_ts.clone().or_else(|| Some(ts.clone())), + parent_id: None, + origin_event_id: None, + }; + let file_list = failed_image_files.join("`, `"); + let _ = adapter + .send_message( + &warn_channel, + &format!( + ":warning: I couldn't access the file(s) you shared (`{file_list}`). \ + This often means the bot is missing the `files:read` OAuth scope. \ + Please ask an admin to reinstall the app with that scope." + ), + ) + .await; + } + // Resolve Slack display name (best-effort, fallback to user_id) let display_name = adapter .resolve_user_name(&user_id) From 8aef57dd10c611d8811ab8866448203d533eef11 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 20:05:29 +0800 Subject: [PATCH 2/4] refactor(media): simplify per /simplify review - Remove dead hinted field from UnsupportedResponseType (always None) - Eliminate double reader.format() call with fmt@ binding - Deduplicate hex_prefix() in resize error path (compute once, reuse) - Promote strip_mime_params to media::strip_mime_params (pub crate), slack.rs delegates to it -- single source of truth for MIME stripping Co-Authored-By: Claude Opus 4.7 --- src/media.rs | 34 +++++++++++++++++----------------- src/slack.rs | 5 +---- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/media.rs b/src/media.rs index 1f3b4fa0..b79afe97 100644 --- a/src/media.rs +++ b/src/media.rs @@ -27,10 +27,7 @@ pub enum MediaFetchError { /// URL empty or MIME/filename doesn't indicate an image; skip silently. NotAnImage, /// HTTP response Content-Type is not a supported image format. - UnsupportedResponseType { - hinted: Option, - actual: Option, - }, + UnsupportedResponseType { actual: Option }, /// Response body magic bytes don't match a supported image format. InvalidImageBody { magic_prefix_hex: String }, /// File exceeds the configured size limit. @@ -45,10 +42,9 @@ impl std::fmt::Display for MediaFetchError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::NotAnImage => write!(f, "not an image attachment"), - Self::UnsupportedResponseType { hinted, actual } => write!( + Self::UnsupportedResponseType { actual } => write!( f, - "server returned unexpected content type (hinted: {}, actual: {})", - hinted.as_deref().unwrap_or("none"), + "server returned unexpected content type (actual: {})", actual.as_deref().unwrap_or("none"), ), Self::InvalidImageBody { magic_prefix_hex } => write!( @@ -64,6 +60,11 @@ impl std::fmt::Display for MediaFetchError { } } +/// Strip MIME parameters and trim whitespace. `"image/png; charset=binary"` → `"image/png"`. +pub(crate) fn strip_mime_params(mime: &str) -> &str { + mime.split(';').next().unwrap_or(mime).trim() +} + /// Format the first 8 bytes of a buffer as lowercase hex (no separator). fn hex_prefix(body: &[u8]) -> String { body.iter() @@ -88,10 +89,9 @@ fn validate_image_response( body: &[u8], ) -> Result { if let Some(ct) = content_type { - let base = ct.split(';').next().unwrap_or(ct).trim().to_lowercase(); + let base = strip_mime_params(ct).to_lowercase(); if !ALLOWED_IMAGE_MIME.contains(&base.as_str()) { return Err(MediaFetchError::UnsupportedResponseType { - hinted: None, actual: Some(base), }); } @@ -108,11 +108,11 @@ fn validate_image_response( match reader.format() { Some( - image::ImageFormat::Png - | image::ImageFormat::Jpeg - | image::ImageFormat::Gif - | image::ImageFormat::WebP, - ) => Ok(reader.format().unwrap()), + fmt @ (image::ImageFormat::Png + | image::ImageFormat::Jpeg + | image::ImageFormat::Gif + | image::ImageFormat::WebP), + ) => Ok(fmt), _ => Err(MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), }), @@ -227,15 +227,16 @@ pub async fn download_and_encode_image( let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { + let magic = hex_prefix(&bytes); error!( filename, error = %e, - magic = hex_prefix(&bytes), + magic = %magic, size = bytes.len(), "resize failed after successful validation" ); return Err(MediaFetchError::InvalidImageBody { - magic_prefix_hex: hex_prefix(&bytes), + magic_prefix_hex: magic, }); } }; @@ -667,7 +668,6 @@ mod tests { fn media_fetch_error_display_renders() { let _ = MediaFetchError::NotAnImage.to_string(); let _ = MediaFetchError::UnsupportedResponseType { - hinted: Some("image/png".into()), actual: Some("text/html".into()), } .to_string(); diff --git a/src/slack.rs b/src/slack.rs index e04497c2..b0ccf7f2 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1205,11 +1205,8 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str { .unwrap_or("") } -/// Strip MIME parameters like `; charset=utf-8` so type-detection helpers see -/// the bare media type. Slack occasionally sends mimetypes like -/// `text/plain; charset=utf-8`; `media::is_text_file` expects the bare form. fn strip_mime_params(mimetype: &str) -> &str { - mimetype.split(';').next().unwrap_or(mimetype).trim() + media::strip_mime_params(mimetype) } /// True only when a Slack non-bot event represents a real user message From 4c12d9a17287a4c6b5f740a42740d1fd9afd3b42 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 20:40:07 +0800 Subject: [PATCH 3/4] fix(media): address cross-model review findings Critical: change Content-Type check from allow-list to block-list (Codex finding). The allow-list rejected application/octet-stream before magic-byte check ran, silently dropping valid images from CDNs. Only text/* is now rejected early; everything else falls through to magic-byte verification. Also: - Soften Slack warning message: no longer attributes all failures to files:read scope; now mentions format support as a second cause - Add SizeExceeded to Slack user notification (was silent) - Log failures from send_message() instead of using let _ = - Log discarded io::Error from with_guessed_format - Fix doc comments: download_and_encode_image (SizeExceeded fires pre-HTTP), validate_image_response (Content-Type check short-circuits, not sequential) - Replace inline "Validate Content-Type..." comment with WHY explanation - Restore doc comment on strip_mime_params wrapper in slack.rs - Add tests: octet-stream acceptance (Codex regression fix), JSON body rejection by magic bytes, missing Content-Type + invalid body Co-Authored-By: Claude Opus 4.7 --- src/media.rs | 61 +++++++++++++++++++++++++++++++++++++--------------- src/slack.rs | 30 +++++++++++++++----------- 2 files changed, 62 insertions(+), 29 deletions(-) diff --git a/src/media.rs b/src/media.rs index b79afe97..21fde4fc 100644 --- a/src/media.rs +++ b/src/media.rs @@ -74,23 +74,21 @@ fn hex_prefix(body: &[u8]) -> String { .concat() } -/// Allowed MIME types for model-bound images. -const ALLOWED_IMAGE_MIME: &[&str] = &["image/png", "image/jpeg", "image/gif", "image/webp"]; - /// Validate the HTTP response Content-Type and body magic bytes. /// -/// Returns the detected `ImageFormat` on success. Both checks are applied: -/// the Content-Type allow-list rejects non-image responses early (e.g. Slack -/// serving an HTML login page when `files:read` scope is missing), and the -/// magic-byte sniff catches corrupted or mislabeled bodies regardless of the -/// header. +/// If Content-Type is present and explicitly non-binary (e.g. `text/html` from +/// Slack's auth redirect when `files:read` scope is missing), rejects immediately. +/// Generic types such as `application/octet-stream` and absent headers pass through +/// to the magic-byte check, which is the authoritative gate for image validity. fn validate_image_response( content_type: Option<&str>, body: &[u8], ) -> Result { + // Reject explicitly-text responses early (e.g. Slack HTML login page at HTTP 200). + // application/octet-stream and other generic types pass through to magic-byte check. if let Some(ct) = content_type { let base = strip_mime_params(ct).to_lowercase(); - if !ALLOWED_IMAGE_MIME.contains(&base.as_str()) { + if base.starts_with("text/") { return Err(MediaFetchError::UnsupportedResponseType { actual: Some(base), }); @@ -99,7 +97,8 @@ fn validate_image_response( let reader = match ImageReader::new(Cursor::new(body)).with_guessed_format() { Ok(r) => r, - Err(_) => { + Err(e) => { + error!(error = %e, "image format detection I/O error"); return Err(MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), }); @@ -122,9 +121,11 @@ fn validate_image_response( /// Download an image from a URL, resize/compress it, and return as a ContentBlock. /// /// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't -/// indicate an image (silent skip). Returns other `Err` variants when the -/// download succeeded but the response bytes fail Content-Type or magic-byte -/// validation — callers should surface these to the user. +/// indicate an image — callers should skip silently. Returns +/// `Err(MediaFetchError::SizeExceeded)` when the declared `size` exceeds the limit +/// before any request is made. Returns other `Err` variants (`Network`, +/// `HttpStatus`, `UnsupportedResponseType`, `InvalidImageBody`) after a request +/// attempt — callers should surface these to the user. /// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( @@ -211,7 +212,8 @@ pub async fn download_and_encode_image( }); } - // Validate Content-Type and magic bytes before processing. + // Guard against HTTP 200 responses that are error pages (e.g. Slack auth redirect + // when files:read scope is missing), and against corrupted or mislabeled bodies. if let Err(e) = validate_image_response(content_type.as_deref(), &bytes) { error!( filename, @@ -634,13 +636,38 @@ mod tests { )); } + /// Regression test for the application/octet-stream fix: CDNs and generic + /// file download endpoints commonly serve any file with this Content-Type. + /// The old allow-list incorrectly rejected it before magic-byte check. #[test] - fn validate_rejects_application_json_content_type() { + fn validate_accepts_octet_stream_with_valid_png() { let png = make_png(1, 1); - let result = validate_image_response(Some("application/json"), &png); + assert!( + validate_image_response(Some("application/octet-stream"), &png).is_ok(), + "application/octet-stream must pass through to magic-byte check" + ); + } + + /// application/json body is rejected by magic bytes, not by Content-Type. + #[test] + fn validate_rejects_json_body_by_magic_bytes() { + let json_body = b"{\"error\":\"invalid_auth\",\"ok\":false}"; + let result = validate_image_response(Some("application/json"), json_body); assert!(matches!( result, - Err(MediaFetchError::UnsupportedResponseType { .. }) + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + /// Missing Content-Type with invalid body: CDN stripping the header should + /// still be caught by magic-byte detection. + #[test] + fn validate_rejects_html_body_with_missing_content_type() { + let html_body = b"error page"; + let result = validate_image_response(None, html_body); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) )); } diff --git a/src/slack.rs b/src/slack.rs index b0ccf7f2..6beda7fb 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1048,13 +1048,17 @@ async fn handle_message( extra_blocks.push(block); } Err(media::MediaFetchError::NotAnImage) => {} + Err(media::MediaFetchError::SizeExceeded { actual, limit }) => { + warn!(filename, actual, limit, "image exceeds size limit"); + failed_image_files.push(format!("{filename} (exceeds {limit} byte limit)")); + } Err( media::MediaFetchError::UnsupportedResponseType { .. } | media::MediaFetchError::InvalidImageBody { .. }, ) => { warn!( filename, - "image validation failed; server may have returned HTML (missing files:read scope?)" + "image validation failed; server may have returned non-image content" ); failed_image_files.push(filename.to_string()); } @@ -1066,7 +1070,7 @@ async fn handle_message( } } - // Notify user if any images couldn't be validated (likely missing files:read scope). + // Notify user if any images couldn't be processed. if !failed_image_files.is_empty() { let warn_channel = ChannelRef { platform: "slack".into(), @@ -1076,16 +1080,14 @@ async fn handle_message( origin_event_id: None, }; let file_list = failed_image_files.join("`, `"); - let _ = adapter - .send_message( - &warn_channel, - &format!( - ":warning: I couldn't access the file(s) you shared (`{file_list}`). \ - This often means the bot is missing the `files:read` OAuth scope. \ - Please ask an admin to reinstall the app with that scope." - ), - ) - .await; + let msg = format!( + ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ + This can happen when the bot lacks the `files:read` OAuth scope, \ + or when the file format isn't supported (PNG/JPEG/GIF/WebP only)." + ); + if let Err(e) = adapter.send_message(&warn_channel, &msg).await { + warn!(error = %e, "failed to send image validation warning to user"); + } } // Resolve Slack display name (best-effort, fallback to user_id) @@ -1205,6 +1207,10 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str { .unwrap_or("") } +/// Strip MIME parameters so type-detection helpers see the bare media type. +/// Delegates to media::strip_mime_params (single source of truth). +/// Needed because Slack occasionally sends `text/plain; charset=utf-8` and +/// `media::is_text_file` expects the bare form. fn strip_mime_params(mimetype: &str) -> &str { media::strip_mime_params(mimetype) } From 4e1a6827affa9ae0734e43a1b4f2edd4bab822c4 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 20:57:42 +0800 Subject: [PATCH 4/4] fix(slack): sanitize filenames in warning message (mrkdwn injection) Codex adversarial review found that user-controlled filenames embedded in the mrkdwn warning message could inject Slack markup (backtick break-out, mentions, <@uid> pings). Replace backticks and angle brackets with safe ASCII equivalents before embedding in the message. Co-Authored-By: Claude Opus 4.7 --- src/slack.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/slack.rs b/src/slack.rs index 6beda7fb..2d3a8f35 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1079,7 +1079,15 @@ async fn handle_message( parent_id: None, origin_event_id: None, }; - let file_list = failed_image_files.join("`, `"); + // Sanitize filenames before embedding in mrkdwn: backticks and angle- + // bracket sequences (`<@U...>`, ``) are Slack markup delimiters + // that would allow injection if the filename is user-controlled. + let sanitize = |s: &str| s.replace('`', "'").replace('<', "(").replace('>', ")"); + let file_list = failed_image_files + .iter() + .map(|n| sanitize(n)) + .collect::>() + .join("`, `"); let msg = format!( ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ This can happen when the bot lacks the `files:read` OAuth scope, \