diff --git a/src/discord.rs b/src/discord.rs index ce2b5e11..a811cfbc 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -702,25 +702,43 @@ impl EventHandler for Handler { debug!(filename = %attachment.filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - &attachment.url, - attachment.content_type.as_deref(), - &attachment.filename, - u64::from(attachment.size), - None, - ) - .await - { - debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); - extra_blocks.push(block); - } else if media::is_video_file(&attachment.filename, attachment.content_type.as_deref()) { - debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); - extra_blocks.push(video_attachment_block( - &attachment.filename, + } else { + match media::download_and_encode_image( + &attachment.url, attachment.content_type.as_deref(), + &attachment.filename, u64::from(attachment.size), - &attachment.url, - )); + None, + ) + .await + { + Ok(block) => { + debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => { + if media::is_video_file( + &attachment.filename, + attachment.content_type.as_deref(), + ) { + debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); + extra_blocks.push(video_attachment_block( + &attachment.filename, + attachment.content_type.as_deref(), + u64::from(attachment.size), + &attachment.url, + )); + } + } + Err(e) => { + tracing::warn!( + url = %attachment.url, + filename = %attachment.filename, + error = %e, + "image attachment failed" + ); + } + } } } diff --git a/src/media.rs b/src/media.rs index b42cfa6e..21fde4fc 100644 --- a/src/media.rs +++ b/src/media.rs @@ -21,7 +21,112 @@ const IMAGE_MAX_DIMENSION_PX: u32 = 1200; /// JPEG quality for compressed output. const IMAGE_JPEG_QUALITY: u8 = 75; +/// Error variants for `download_and_encode_image`. +#[derive(Debug)] +pub enum MediaFetchError { + /// URL empty or MIME/filename doesn't indicate an image; skip silently. + NotAnImage, + /// HTTP response Content-Type is not a supported image format. + UnsupportedResponseType { actual: Option }, + /// Response body magic bytes don't match a supported image format. + InvalidImageBody { magic_prefix_hex: String }, + /// File exceeds the configured size limit. + SizeExceeded { actual: u64, limit: u64 }, + /// Network-level error (send or body-read). + Network(reqwest::Error), + /// Server returned a non-success HTTP status. + HttpStatus(reqwest::StatusCode), +} + +impl std::fmt::Display for MediaFetchError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotAnImage => write!(f, "not an image attachment"), + Self::UnsupportedResponseType { actual } => write!( + f, + "server returned unexpected content type (actual: {})", + actual.as_deref().unwrap_or("none"), + ), + Self::InvalidImageBody { magic_prefix_hex } => write!( + f, + "response body is not a valid image (first 8 bytes: {magic_prefix_hex})" + ), + Self::SizeExceeded { actual, limit } => { + write!(f, "file size {actual} exceeds limit {limit}") + } + Self::Network(e) => write!(f, "network error: {e}"), + Self::HttpStatus(s) => write!(f, "HTTP {s}"), + } + } +} + +/// Strip MIME parameters and trim whitespace. `"image/png; charset=binary"` → `"image/png"`. +pub(crate) fn strip_mime_params(mime: &str) -> &str { + mime.split(';').next().unwrap_or(mime).trim() +} + +/// Format the first 8 bytes of a buffer as lowercase hex (no separator). +fn hex_prefix(body: &[u8]) -> String { + body.iter() + .take(8) + .map(|b| format!("{b:02x}")) + .collect::>() + .concat() +} + +/// Validate the HTTP response Content-Type and body magic bytes. +/// +/// If Content-Type is present and explicitly non-binary (e.g. `text/html` from +/// Slack's auth redirect when `files:read` scope is missing), rejects immediately. +/// Generic types such as `application/octet-stream` and absent headers pass through +/// to the magic-byte check, which is the authoritative gate for image validity. +fn validate_image_response( + content_type: Option<&str>, + body: &[u8], +) -> Result { + // Reject explicitly-text responses early (e.g. Slack HTML login page at HTTP 200). + // application/octet-stream and other generic types pass through to magic-byte check. + if let Some(ct) = content_type { + let base = strip_mime_params(ct).to_lowercase(); + if base.starts_with("text/") { + return Err(MediaFetchError::UnsupportedResponseType { + actual: Some(base), + }); + } + } + + let reader = match ImageReader::new(Cursor::new(body)).with_guessed_format() { + Ok(r) => r, + Err(e) => { + error!(error = %e, "image format detection I/O error"); + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }); + } + }; + + match reader.format() { + Some( + fmt @ (image::ImageFormat::Png + | image::ImageFormat::Jpeg + | image::ImageFormat::Gif + | image::ImageFormat::WebP), + ) => Ok(fmt), + _ => Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }), + } +} + /// Download an image from a URL, resize/compress it, and return as a ContentBlock. +/// +/// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't +/// indicate an image — callers should skip silently. Returns +/// `Err(MediaFetchError::SizeExceeded)` when the declared `size` exceeds the limit +/// before any request is made. Returns other `Err` variants (`Network`, +/// `HttpStatus`, `UnsupportedResponseType`, `InvalidImageBody`) after a request +/// attempt — callers should surface these to the user. +/// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( url: &str, @@ -29,11 +134,11 @@ pub async fn download_and_encode_image( filename: &str, size: u64, auth_token: Option<&str>, -) -> Option { +) -> Result { const MAX_SIZE: u64 = 10 * 1024 * 1024; // 10 MB if url.is_empty() { - return None; + return Err(MediaFetchError::NotAnImage); } let mime = mime_hint.or_else(|| { @@ -51,17 +156,20 @@ pub async fn download_and_encode_image( let Some(mime) = mime else { debug!(filename, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); }; let mime = mime.split(';').next().unwrap_or(mime).trim(); if !mime.starts_with("image/") { debug!(filename, mime, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); } if size > MAX_SIZE { error!(filename, size, "image exceeds 10MB limit"); - return None; + return Err(MediaFetchError::SizeExceeded { + actual: size, + limit: MAX_SIZE, + }); } let mut req = HTTP_CLIENT.get(url); @@ -73,39 +181,65 @@ pub async fn download_and_encode_image( Ok(resp) => resp, Err(e) => { error!(url, error = %e, "download failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if !response.status().is_success() { error!(url, status = %response.status(), "HTTP error downloading image"); - return None; + return Err(MediaFetchError::HttpStatus(response.status())); } + + // Capture Content-Type BEFORE .bytes() consumes the response. + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(str::to_string); + let bytes = match response.bytes().await { Ok(b) => b, Err(e) => { error!(url, error = %e, "read failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if bytes.len() as u64 > MAX_SIZE { + error!(filename, size = bytes.len(), "downloaded image exceeds limit"); + return Err(MediaFetchError::SizeExceeded { + actual: bytes.len() as u64, + limit: MAX_SIZE, + }); + } + + // Guard against HTTP 200 responses that are error pages (e.g. Slack auth redirect + // when files:read scope is missing), and against corrupted or mislabeled bodies. + if let Err(e) = validate_image_response(content_type.as_deref(), &bytes) { error!( filename, - size = bytes.len(), - "downloaded image exceeds limit" + mime_hint = mime, + content_type = content_type.as_deref().unwrap_or("none"), + magic = hex_prefix(&bytes), + error = %e, + "image validation failed — body is not a supported image" ); - return None; + return Err(e); } let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { - if bytes.len() > 1024 * 1024 { - error!(filename, error = %e, size = bytes.len(), "resize failed and original too large, skipping"); - return None; - } - debug!(filename, error = %e, "resize failed, using original"); - (bytes.to_vec(), mime.to_string()) + let magic = hex_prefix(&bytes); + error!( + filename, + error = %e, + magic = %magic, + size = bytes.len(), + "resize failed after successful validation" + ); + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: magic, + }); } }; @@ -117,7 +251,7 @@ pub async fn download_and_encode_image( ); let encoded = BASE64.encode(&output_bytes); - Some(ContentBlock::Image { + Ok(ContentBlock::Image { media_type: output_mime, data: encoded, }) @@ -348,6 +482,13 @@ mod tests { buf.into_inner() } + fn make_jpeg(width: u32, height: u32) -> Vec { + let img = image::RgbImage::new(width, height); + let mut buf = Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::Jpeg).unwrap(); + buf.into_inner() + } + #[test] fn large_image_resized_to_max_dimension() { let png = make_png(3000, 2000); @@ -429,4 +570,155 @@ mod tests { assert!(is_video_file("clip.MOV", None)); assert!(!is_video_file("notes.txt", Some("text/plain"))); } + + // --- validate_image_response tests --- + + #[test] + fn validate_accepts_png_with_matching_content_type() { + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png"), &png).is_ok()); + } + + #[test] + fn validate_accepts_jpeg_with_matching_content_type() { + let jpeg = make_jpeg(1, 1); + assert!(validate_image_response(Some("image/jpeg"), &jpeg).is_ok()); + } + + #[test] + fn validate_accepts_gif_with_matching_content_type() { + let gif: Vec = vec![ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x2C, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, + 0x3B, + ]; + assert!(validate_image_response(Some("image/gif"), &gif).is_ok()); + } + + #[test] + fn validate_accepts_missing_content_type_with_valid_png() { + // When Content-Type header is absent, fall back to magic-byte detection. + let png = make_png(1, 1); + assert!(validate_image_response(None, &png).is_ok()); + } + + #[test] + fn validate_content_type_strips_params() { + // "image/png; charset=binary" is a real header value — must be accepted. + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png; charset=binary"), &png).is_ok()); + } + + /// Exact reproduction of issue #776: Slack serves the workspace login HTML + /// page at HTTP 200 when the bot token lacks the `files:read` scope. + /// The Slack file metadata says `mimetype: image/png`; the response body + /// magic bytes are `Slack login"; + let result = validate_image_response(Some("image/png"), html_body); + match result { + Err(MediaFetchError::InvalidImageBody { magic_prefix_hex }) => { + assert_eq!(magic_prefix_hex, "3c21444f43545950"); + } + other => panic!("expected InvalidImageBody, got {other:?}"), + } + } + + #[test] + fn validate_rejects_text_html_content_type() { + // Even if the body were a valid image, a text/html Content-Type must be rejected. + let png = make_png(1, 1); + let result = validate_image_response(Some("text/html; charset=utf-8"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + /// Regression test for the application/octet-stream fix: CDNs and generic + /// file download endpoints commonly serve any file with this Content-Type. + /// The old allow-list incorrectly rejected it before magic-byte check. + #[test] + fn validate_accepts_octet_stream_with_valid_png() { + let png = make_png(1, 1); + assert!( + validate_image_response(Some("application/octet-stream"), &png).is_ok(), + "application/octet-stream must pass through to magic-byte check" + ); + } + + /// application/json body is rejected by magic bytes, not by Content-Type. + #[test] + fn validate_rejects_json_body_by_magic_bytes() { + let json_body = b"{\"error\":\"invalid_auth\",\"ok\":false}"; + let result = validate_image_response(Some("application/json"), json_body); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + /// Missing Content-Type with invalid body: CDN stripping the header should + /// still be caught by magic-byte detection. + #[test] + fn validate_rejects_html_body_with_missing_content_type() { + let html_body = b"error page"; + let result = validate_image_response(None, html_body); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_rejects_empty_body() { + let result = validate_image_response(Some("image/png"), &[]); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_rejects_truncated_png_header() { + // PNG magic is 8 bytes; 4 bytes is not enough to identify the format. + let truncated = [0x89u8, 0x50, 0x4e, 0x47]; + let result = validate_image_response(Some("image/png"), &truncated); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn media_fetch_error_display_renders() { + let _ = MediaFetchError::NotAnImage.to_string(); + let _ = MediaFetchError::UnsupportedResponseType { + actual: Some("text/html".into()), + } + .to_string(); + let _ = MediaFetchError::InvalidImageBody { + magic_prefix_hex: "3c21444f43545950".into(), + } + .to_string(); + let _ = MediaFetchError::SizeExceeded { + actual: 11_000_000, + limit: 10_000_000, + } + .to_string(); + let _ = MediaFetchError::HttpStatus(reqwest::StatusCode::UNAUTHORIZED).to_string(); + } + + #[test] + fn hex_prefix_formats_first_8_bytes() { + let bytes = b""; + assert_eq!(hex_prefix(bytes), "3c21444f43545950"); + } + + #[test] + fn hex_prefix_handles_short_buffer() { + let bytes = [0xffu8, 0xd8]; + assert_eq!(hex_prefix(&bytes), "ffd8"); + } } diff --git a/src/slack.rs b/src/slack.rs index cbe101f2..2d3a8f35 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -934,6 +934,7 @@ async fn handle_message( let mut echo_entries: Vec = Vec::new(); let mut text_file_bytes: u64 = 0; let mut text_file_count: u32 = 0; + let mut failed_image_files: Vec = Vec::new(); if let Some(files) = files { for file in files { @@ -1032,21 +1033,71 @@ async fn handle_message( debug!(filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - url, - Some(mimetype), - filename, - size, - Some(bot_token), - ) - .await - { - debug!(filename, "adding image attachment"); - extra_blocks.push(block); + } else { + match media::download_and_encode_image( + url, + Some(mimetype), + filename, + size, + Some(bot_token), + ) + .await + { + Ok(block) => { + debug!(filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => {} + Err(media::MediaFetchError::SizeExceeded { actual, limit }) => { + warn!(filename, actual, limit, "image exceeds size limit"); + failed_image_files.push(format!("{filename} (exceeds {limit} byte limit)")); + } + Err( + media::MediaFetchError::UnsupportedResponseType { .. } + | media::MediaFetchError::InvalidImageBody { .. }, + ) => { + warn!( + filename, + "image validation failed; server may have returned non-image content" + ); + failed_image_files.push(filename.to_string()); + } + Err(e) => { + warn!(filename, error = %e, "image download failed"); + } + } } } } + // Notify user if any images couldn't be processed. + if !failed_image_files.is_empty() { + let warn_channel = ChannelRef { + platform: "slack".into(), + channel_id: channel_id.clone(), + thread_id: thread_ts.clone().or_else(|| Some(ts.clone())), + parent_id: None, + origin_event_id: None, + }; + // Sanitize filenames before embedding in mrkdwn: backticks and angle- + // bracket sequences (`<@U...>`, ``) are Slack markup delimiters + // that would allow injection if the filename is user-controlled. + let sanitize = |s: &str| s.replace('`', "'").replace('<', "(").replace('>', ")"); + let file_list = failed_image_files + .iter() + .map(|n| sanitize(n)) + .collect::>() + .join("`, `"); + let msg = format!( + ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ + This can happen when the bot lacks the `files:read` OAuth scope, \ + or when the file format isn't supported (PNG/JPEG/GIF/WebP only)." + ); + if let Err(e) = adapter.send_message(&warn_channel, &msg).await { + warn!(error = %e, "failed to send image validation warning to user"); + } + } + // Resolve Slack display name (best-effort, fallback to user_id) let display_name = adapter .resolve_user_name(&user_id) @@ -1164,11 +1215,12 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str { .unwrap_or("") } -/// Strip MIME parameters like `; charset=utf-8` so type-detection helpers see -/// the bare media type. Slack occasionally sends mimetypes like -/// `text/plain; charset=utf-8`; `media::is_text_file` expects the bare form. +/// Strip MIME parameters so type-detection helpers see the bare media type. +/// Delegates to media::strip_mime_params (single source of truth). +/// Needed because Slack occasionally sends `text/plain; charset=utf-8` and +/// `media::is_text_file` expects the bare form. fn strip_mime_params(mimetype: &str) -> &str { - mimetype.split(';').next().unwrap_or(mimetype).trim() + media::strip_mime_params(mimetype) } /// True only when a Slack non-bot event represents a real user message