diff --git a/docs/slash-commands.md b/docs/slash-commands.md index 040838e5..1ab7fa13 100644 --- a/docs/slash-commands.md +++ b/docs/slash-commands.md @@ -11,6 +11,7 @@ OpenAB registers Discord slash commands for session control. These work in both | `/cancel` | Cancel the current in-flight operation | Yes | | `/reset` | Reset the conversation session (clear history, start fresh) | Yes | | `/remind` | Set a one-shot delayed reminder to mention users/roles | No | +| `/export-thread` | Download the current Discord thread as a `.txt` transcript | No | All responses are **ephemeral** — only the user who invoked the command sees the reply. @@ -64,6 +65,33 @@ This is equivalent to the `sessions close` + `sessions new` pattern used by [Ope - Bot identity and system prompt (re-applied on next session creation) - Config settings in `config.toml` +### `/export-thread` + +Fetches the current Discord thread or DM history and returns a `.txt` file as an ephemeral follow-up. The transcript includes message timestamps, author names and IDs, message text, and attachment URLs. + +**Optional parameters** (mutually exclusive — use at most one): + +| Parameter | Type | Description | +|-----------|------|-------------| +| `limit` | Integer | Export only the most recent N messages (1–5000) | +| `since` | String | Export messages after this date, format `YYYY-MM-DD` (interpreted as UTC midnight) | +| `days` | Integer | Export messages from the last N days (1–365) | + +If no parameter is provided, all messages are exported (capped at 5000). + +**Examples:** +``` +/export-thread → export all (cap 5000) +/export-thread limit:100 → most recent 100 messages +/export-thread since:2026-05-10 → messages after 2026-05-10 00:00 UTC +/export-thread days:3 → messages from the last 3 days +``` + +**Constraints:** +- Only works in allowed Discord threads or enabled DMs. +- Specifying more than one filter returns an error. +- Very large exports may be truncated to fit Discord's attachment size limit. + ## Passing CLI Commands via @mention In addition to slash commands, you can pass built-in CLI commands directly after an @mention: diff --git a/src/discord.rs b/src/discord.rs index 5452e6a9..67cf93f2 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -8,9 +8,10 @@ use crate::media; use crate::remind::{self, ReminderStore}; use async_trait::async_trait; use serenity::builder::{ - CreateActionRow, CreateButton, CreateCommand, CreateCommandOption, CreateInteractionResponse, - CreateInteractionResponseMessage, CreateSelectMenu, CreateSelectMenuKind, - CreateSelectMenuOption, CreateThread, EditMessage, + CreateActionRow, CreateAttachment, CreateButton, CreateCommand, CreateCommandOption, + CreateInteractionResponse, CreateInteractionResponseFollowup, CreateInteractionResponseMessage, + CreateSelectMenu, CreateSelectMenuKind, CreateSelectMenuOption, CreateThread, EditMessage, + GetMessages, }; use serenity::http::Http; use serenity::model::application::ButtonStyle; @@ -34,6 +35,9 @@ const PARTICIPATION_CACHE_MAX: usize = 1000; /// Discord StringSelectMenu hard limit on options. const SELECT_MENU_PAGE_SIZE: usize = 25; +/// Avoid unbounded Discord history exports from very large threads. +const THREAD_EXPORT_MESSAGE_LIMIT: usize = 5000; + // --- DiscordAdapter: implements ChatAdapter for Discord via serenity --- pub struct DiscordAdapter { @@ -837,6 +841,23 @@ impl EventHandler for Handler { "delay", "Delay before firing (e.g. 30m, 2h, 1d)", ).required(true)), + CreateCommand::new("export-thread") + .description("Download this thread as a text file") + .add_option(CreateCommandOption::new( + CommandOptionType::Integer, + "limit", + "Export only the most recent N messages (1–5000)", + )) + .add_option(CreateCommandOption::new( + CommandOptionType::String, + "since", + "Export messages after this date (YYYY-MM-DD, UTC)", + )) + .add_option(CreateCommandOption::new( + CommandOptionType::Integer, + "days", + "Export messages from the last N days (1–365)", + )), ]; // Register global commands (works in DMs + all guilds after propagation). @@ -895,6 +916,9 @@ impl EventHandler for Handler { Interaction::Command(cmd) if cmd.data.name == "remind" => { self.handle_remind_command(&ctx, &cmd).await; } + Interaction::Command(cmd) if cmd.data.name == "export-thread" => { + self.handle_export_thread_command(&ctx, &cmd).await; + } Interaction::Component(comp) if comp.data.custom_id.starts_with("acp_config_") => { self.handle_config_select(&ctx, &comp).await; } @@ -1296,6 +1320,182 @@ impl Handler { } } + async fn handle_export_thread_command( + &self, + ctx: &Context, + cmd: &serenity::model::application::CommandInteraction, + ) { + if is_denied_user( + false, + self.allow_all_users, + &self.allowed_users, + cmd.user.id.get(), + ) { + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("🚫 You are not allowed to use this bot.") + .ephemeral(true), + ); + if let Err(e) = cmd.create_response(&ctx.http, response).await { + tracing::error!(error = %e, "failed to deny /export-thread command"); + } + return; + } + + let channel_id = cmd.channel_id; + let (export_allowed, export_name) = match channel_id.to_channel(&ctx.http).await { + Ok(serenity::model::channel::Channel::Guild(gc)) => { + let in_allowed_channel = + self.allow_all_channels || self.allowed_channels.contains(&channel_id.get()); + let (in_thread, _) = detect_thread( + gc.thread_metadata.is_some(), + gc.parent_id.map(|id| id.get()), + gc.owner_id.map(|id| id.get()), + ctx.cache.current_user().id.get(), + &self.allowed_channels, + self.allow_all_channels, + in_allowed_channel, + ); + (in_thread, gc.name.clone()) + } + Ok(serenity::model::channel::Channel::Private(_)) => { + (self.allow_dm, "dm".to_string()) + } + Ok(_) => (false, "channel".to_string()), + Err(e) => { + tracing::warn!(channel_id = %channel_id, error = %e, "failed to inspect channel for export"); + (false, "channel".to_string()) + } + }; + + if !export_allowed { + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("⚠️ Run this command inside an allowed Discord thread or DM.") + .ephemeral(true), + ); + if let Err(e) = cmd.create_response(&ctx.http, response).await { + tracing::error!(error = %e, "failed to respond to /export-thread rejection"); + } + return; + } + + // --- Parse and validate filter params (mutual exclusion) --- + let opts = &cmd.data.options; + let limit_opt = opts.iter().find(|o| o.name == "limit").and_then(|o| o.value.as_i64()); + let since_opt = opts.iter().find(|o| o.name == "since").and_then(|o| o.value.as_str()); + let days_opt = opts.iter().find(|o| o.name == "days").and_then(|o| o.value.as_i64()); + + let filter_count = limit_opt.is_some() as u8 + since_opt.is_some() as u8 + days_opt.is_some() as u8; + if filter_count > 1 { + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("⚠️ Please specify only one filter: `limit`, `since`, or `days`.") + .ephemeral(true), + ); + let _ = cmd.create_response(&ctx.http, response).await; + return; + } + + let filter = if let Some(n) = limit_opt { + if !(1..=5000).contains(&n) { + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("⚠️ `limit` must be between 1 and 5000.") + .ephemeral(true), + ); + let _ = cmd.create_response(&ctx.http, response).await; + return; + } + ExportFilter::Limit(n as usize) + } else if let Some(date_str) = since_opt { + match chrono::NaiveDate::parse_from_str(date_str, "%Y-%m-%d") { + Ok(date) => { + let ts_ms = date.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp_millis() as u64; + ExportFilter::After(timestamp_ms_to_snowflake(ts_ms)) + } + Err(_) => { + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("⚠️ `since` must be a valid date in YYYY-MM-DD format (UTC).") + .ephemeral(true), + ); + let _ = cmd.create_response(&ctx.http, response).await; + return; + } + } + } else if let Some(d) = days_opt { + if !(1..=365).contains(&d) { + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("⚠️ `days` must be between 1 and 365.") + .ephemeral(true), + ); + let _ = cmd.create_response(&ctx.http, response).await; + return; + } + let since_ts = chrono::Utc::now() - chrono::Duration::days(d); + let ts_ms = since_ts.timestamp_millis() as u64; + ExportFilter::After(timestamp_ms_to_snowflake(ts_ms)) + } else { + ExportFilter::All + }; + + let response = CreateInteractionResponse::Message( + CreateInteractionResponseMessage::new() + .content("Preparing thread export...") + .ephemeral(true), + ); + if let Err(e) = cmd.create_response(&ctx.http, response).await { + tracing::error!(error = %e, "failed to acknowledge /export-thread command"); + return; + } + + match export_channel_messages( + &ctx.http, + channel_id, + &export_name, + cmd.attachment_size_limit, + filter, + ) + .await + { + Ok(result) => { + let mut content = format!("Exported {} messages.", result.written); + if result.hit_cap { + content.push_str(&format!( + " Only the most recent {} messages were fetched — older messages were not included.", + result.fetched + )); + } + if result.byte_truncated { + content.push_str(&format!( + " Transcript truncated to fit Discord's attachment size limit ({} of {} fetched messages included).", + result.written, result.fetched + )); + } + let attachment = + CreateAttachment::bytes(result.transcript.into_bytes(), result.filename); + let followup = CreateInteractionResponseFollowup::new() + .content(content) + .add_file(attachment) + .ephemeral(true); + if let Err(e) = cmd.create_followup(&ctx.http, followup).await { + tracing::error!(error = %e, "failed to send /export-thread attachment"); + } + } + Err(e) => { + tracing::warn!(channel_id = %channel_id, error = %e, "failed to export thread"); + let followup = CreateInteractionResponseFollowup::new() + .content(format!("⚠️ Failed to export thread: {e}")) + .ephemeral(true); + if let Err(e) = cmd.create_followup(&ctx.http, followup).await { + tracing::error!(error = %e, "failed to send /export-thread error"); + } + } + } + } + async fn handle_config_select( &self, ctx: &Context, @@ -1412,6 +1612,283 @@ fn discord_msg_ref(msg: &Message) -> MessageRef { } } +struct ExportResult { + filename: String, + transcript: String, + /// Messages successfully pulled from Discord. + fetched: usize, + /// Messages that fit in the transcript (≤ `fetched`; differs when the + /// attachment-size limit truncates). + written: usize, + /// We stopped fetching because we hit the message cap and the thread still + /// has more messages we did not include. + hit_cap: bool, + /// Transcript was cut to keep the attachment under Discord's size limit. + byte_truncated: bool, +} + +/// Filter mode for export_channel_messages. +enum ExportFilter { + /// Fetch all messages (newest-first via `before`), capped at THREAD_EXPORT_MESSAGE_LIMIT. + All, + /// Fetch the most recent N messages (newest-first via `before`). + Limit(usize), + /// Fetch messages after a synthetic snowflake (newest-first via `before`, with boundary filtering). + After(MessageId), +} + +/// Discord epoch: 2015-01-01T00:00:00Z in milliseconds. +const DISCORD_EPOCH_MS: u64 = 1_420_070_400_000; + +/// Convert a UTC timestamp (in milliseconds since Unix epoch) to a synthetic +/// Discord snowflake suitable for use as an `after` cursor. +fn timestamp_ms_to_snowflake(timestamp_ms: u64) -> MessageId { + let discord_ms = timestamp_ms.saturating_sub(DISCORD_EPOCH_MS); + // Snowflake IDs use NonZeroU64 in serenity; ensure at least 1. + MessageId::new((discord_ms << 22).max(1)) +} + +async fn export_channel_messages( + http: &Http, + channel_id: ChannelId, + channel_name: &str, + attachment_size_limit: u32, + filter: ExportFilter, +) -> anyhow::Result { + let cap = match &filter { + ExportFilter::Limit(n) => *n, + _ => THREAD_EXPORT_MESSAGE_LIMIT, + }; + + let mut messages = Vec::new(); + let mut hit_cap = false; + + match &filter { + ExportFilter::All | ExportFilter::Limit(_) => { + // Fetch newest-first using `before` pagination, then reverse. + let mut before = None; + loop { + if messages.len() >= cap { + hit_cap = true; + break; + } + let remaining = cap - messages.len(); + let limit = remaining.min(100) as u8; + let mut request = GetMessages::new().limit(limit); + if let Some(before_id) = before { + request = request.before(before_id); + } + let batch = channel_id.messages(http, request).await?; + if batch.is_empty() { + break; + } + before = batch.last().map(|m| m.id); + let batch_len = batch.len(); + messages.extend(batch); + if batch_len < limit as usize { + break; + } + } + // Probe to confirm we actually left messages behind. + if hit_cap { + let probe = GetMessages::new().limit(1); + let probe = if let Some(before_id) = before { + probe.before(before_id) + } else { + probe + }; + if matches!(channel_id.messages(http, probe).await, Ok(b) if b.is_empty()) { + hit_cap = false; + } + } + messages.reverse(); + } + ExportFilter::After(after_id) => { + // Fetch newest-first using `before` pagination, stop when we hit + // messages at or before the filter boundary. This ensures that when + // the cap is reached, we keep the *newest* messages in the window. + let mut before = None; + loop { + if messages.len() >= cap { + hit_cap = true; + break; + } + let remaining = cap - messages.len(); + let limit = remaining.min(100) as u8; + let mut request = GetMessages::new().limit(limit); + if let Some(before_id) = before { + request = request.before(before_id); + } + let batch = channel_id.messages(http, request).await?; + if batch.is_empty() { + break; + } + before = batch.last().map(|m| m.id); + let batch_len = batch.len(); + // Filter out messages at or before the boundary. + let filtered: Vec<_> = batch.into_iter().filter(|m| m.id > *after_id).collect(); + let hit_boundary = filtered.len() < batch_len; + messages.extend(filtered); + if hit_boundary { + // We've reached the time boundary; no need to fetch older. + break; + } + if batch_len < limit as usize { + break; + } + } + // Probe only if we stopped due to cap (not boundary). + if hit_cap { + let probe = GetMessages::new().limit(1); + let probe = if let Some(before_id) = before { + probe.before(before_id) + } else { + probe + }; + if let Ok(batch) = channel_id.messages(http, probe).await { + // If the next message is beyond our filter boundary, + // we didn't actually leave relevant messages behind. + let has_more_in_window = batch.iter().any(|m| m.id > *after_id); + if !has_more_in_window { + hit_cap = false; + } + } + } + messages.reverse(); + } + } + + let filename = export_filename(channel_id, channel_name); + if attachment_size_limit < 2048 { + tracing::warn!(attachment_size_limit, "attachment_size_limit is very small; export will likely be truncated"); + } + let max_bytes = usize::try_from(attachment_size_limit) + .unwrap_or(8 * 1024 * 1024) + .saturating_sub(1024) + .max(1024); + let (transcript, written, byte_truncated) = + format_thread_export(channel_id, channel_name, &messages, max_bytes); + let fetched = messages.len(); + + Ok(ExportResult { + filename, + transcript, + fetched, + written, + hit_cap, + byte_truncated, + }) +} + +fn format_thread_export( + channel_id: ChannelId, + channel_name: &str, + messages: &[Message], + max_bytes: usize, +) -> (String, usize, bool) { + let header = format!( + "Discord thread export\nChannel: {channel_name} ({channel_id})\nMessages: {}\n\n", + messages.len() + ); + let entries: Vec = messages.iter().map(format_export_message).collect(); + assemble_export(&header, &entries, max_bytes) +} + +/// Build the transcript body from a pre-rendered header and a list of +/// already-formatted message entries, honouring `max_bytes`. +/// +/// Returns `(transcript, written, truncated)` where `written` is the number of +/// entries actually included. Split out from `format_thread_export` so the +/// truncation boundary logic can be unit-tested without constructing real +/// `serenity::model::channel::Message` values. +fn assemble_export(header: &str, entries: &[String], max_bytes: usize) -> (String, usize, bool) { + let mut out = String::from(header); + let mut written = 0; + let mut truncated = false; + + for entry in entries { + if out.len() + entry.len() > max_bytes { + truncated = true; + break; + } + out.push_str(entry); + written += 1; + } + + if truncated { + let note = "\n[Export truncated to fit Discord attachment size limit]\n"; + let room = max_bytes.saturating_sub(out.len()); + if room >= note.len() { + out.push_str(note); + } + } + + (out, written, truncated) +} + +fn format_export_message(msg: &Message) -> String { + let bot_marker = if msg.author.bot { " [bot]" } else { "" }; + let mut out = format!( + "[{}] {}{} ({})\n", + msg.timestamp, + msg.author.name, + bot_marker, + msg.author.id + ); + + if msg.content.is_empty() { + out.push_str("(no text)\n"); + } else { + out.push_str(&msg.content); + out.push('\n'); + } + + for attachment in &msg.attachments { + let mime = attachment.content_type.as_deref().unwrap_or("unknown"); + out.push_str(&format!( + "[attachment] {} ({} bytes, {}): {}\n", + attachment.filename, attachment.size, mime, attachment.url + )); + } + + out.push('\n'); + out +} + +fn export_filename(channel_id: ChannelId, channel_name: &str) -> String { + let safe_name = sanitize_filename_component(channel_name); + format!("discord-thread-{safe_name}-{channel_id}.txt") +} + +/// Reduce a free-form Discord channel/thread name to a safe ASCII filename +/// fragment. +/// +/// Non-ASCII characters are dropped silently — a purely-Chinese thread name +/// like "扈三娘的房間" yields a date-based fallback (e.g. `"20260512"`). +/// The caller appends the channel ID, which already guarantees uniqueness, +/// and an ASCII fragment plays nicer with downstream tools (mail attachments, +/// S3 keys, browser save-as dialogs). The 64-byte cap leaves room for the +/// `discord-thread-` prefix and the channel-ID suffix within typical +/// filesystem limits. +fn sanitize_filename_component(input: &str) -> String { + let mut safe = String::with_capacity(input.len()); + for ch in input.chars() { + if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') { + safe.push(ch); + } else if ch.is_whitespace() || matches!(ch, '.' | '/') { + safe.push('-'); + } + } + let safe = safe.trim_matches('-'); + if safe.is_empty() { + // Use current date as a human-friendly fallback when the thread name + // is entirely non-ASCII. + chrono::Utc::now().format("%Y%m%d").to_string() + } else { + safe.chars().take(64).collect() + } +} + async fn get_or_create_thread( ctx: &Context, adapter: &Arc, @@ -1777,6 +2254,108 @@ mod tests { assert!(!is_thread_already_exists_error(&err)); } + // --- thread export helpers --- + + #[test] + fn sanitize_filename_component_keeps_safe_ascii() { + assert_eq!( + sanitize_filename_component("release notes_v2"), + "release-notes_v2" + ); + } + + #[test] + fn sanitize_filename_component_falls_back_for_empty_result() { + let result = sanitize_filename_component("///..."); + // Fallback is a YYYYMMDD date string + assert_eq!(result.len(), 8); + assert!(result.chars().all(|c| c.is_ascii_digit())); + } + + // --- assemble_export --- + // Split out from format_thread_export so we can test the truncation + // boundary without constructing serenity::model::channel::Message values. + + #[test] + fn assemble_export_empty_entries_returns_header_only() { + let (out, written, truncated) = assemble_export("HDR\n", &[], 1024); + assert_eq!(out, "HDR\n"); + assert_eq!(written, 0); + assert!(!truncated); + } + + #[test] + fn assemble_export_single_oversized_entry_writes_zero_and_marks_truncated() { + let entries = vec!["x".repeat(200)]; + let (out, written, truncated) = assemble_export("h\n", &entries, 50); + assert_eq!(written, 0); + assert!(truncated); + // Footer needs ~56 bytes; max_bytes 50 leaves ≤48 of room, so it is + // intentionally omitted (it can't be appended without exceeding the + // limit). The header is still present. + assert!(out.starts_with("h\n")); + assert!(!out.contains("xx")); + } + + #[test] + fn assemble_export_entry_at_exact_boundary_is_included() { + // header(2) + entry(3) == max_bytes(5); the strict-greater check + // keeps the entry in. + let (out, written, truncated) = assemble_export("h\n", &["abc".to_string()], 5); + assert_eq!(written, 1); + assert!(!truncated); + assert_eq!(out, "h\nabc"); + } + + #[test] + fn assemble_export_entry_one_byte_over_boundary_is_excluded() { + // header(2) + entry(4) == 6 > max_bytes(5); entry is dropped. + let (out, written, truncated) = assemble_export("h\n", &["abcd".to_string()], 5); + assert_eq!(written, 0); + assert!(truncated); + assert!(out.starts_with("h\n")); + assert!(!out.contains("abcd")); + } + + #[test] + fn assemble_export_appends_footer_when_room_remains() { + // First two short entries fit; the long third entry would overflow, + // and the remaining headroom is enough for the truncation footer. + let entries = vec!["a\n".to_string(), "b\n".to_string(), "c".repeat(500)]; + let (out, written, truncated) = assemble_export("h\n", &entries, 200); + assert_eq!(written, 2); + assert!(truncated); + assert!(out.contains("[Export truncated")); + } + + // --- snowflake conversion --- + + #[test] + fn timestamp_ms_to_snowflake_known_value() { + // 2026-05-10 00:00:00 UTC = 1778572800000 ms since Unix epoch + // Discord ms = 1778572800000 - 1420070400000 = 358502400000 + // Snowflake = 358502400000 << 22 = 1503238553600000000 (approx) + let ts_ms: u64 = 1_778_572_800_000; + let snowflake = timestamp_ms_to_snowflake(ts_ms); + // Verify round-trip: extract timestamp back from snowflake + let extracted_ms = (snowflake.get() >> 22) + DISCORD_EPOCH_MS; + assert_eq!(extracted_ms, ts_ms); + } + + #[test] + fn timestamp_ms_to_snowflake_at_discord_epoch_is_one() { + // At exactly the Discord epoch, discord_ms=0, shifted=0, clamped to 1 + let snowflake = timestamp_ms_to_snowflake(DISCORD_EPOCH_MS); + assert_eq!(snowflake.get(), 1); + } + + #[test] + fn timestamp_ms_to_snowflake_before_epoch_saturates() { + // Timestamp before Discord epoch should saturate to 1 + let snowflake = timestamp_ms_to_snowflake(1_000_000_000_000); + assert_eq!(snowflake.get(), 1); + } + // --- should_process_user_message tests (GIVEN/WHEN/THEN) --- // Tests the multibot-mentions gating logic extracted from EventHandler::message. // The bug in #481 was that other bots' messages were filtered by bot gating