diff --git a/crates/amalthea/src/error.rs b/crates/amalthea/src/error.rs index 99fdf15fa..119a39949 100644 --- a/crates/amalthea/src/error.rs +++ b/crates/amalthea/src/error.rs @@ -43,7 +43,6 @@ pub enum Error { UnknownCommName(String), UnknownCommId(String), InvalidCommMessage(String, String, String), - InvalidInputRequest(String), InvalidConsoleInput(String), Anyhow(anyhow::Error), ShellErrorReply(Exception), @@ -196,9 +195,6 @@ impl fmt::Display for Error { msg, id, err ) }, - Error::InvalidInputRequest(message) => { - write!(f, "{message}") - }, Error::InvalidConsoleInput(message) => { write!(f, "{message}") }, @@ -228,6 +224,6 @@ impl From> for Error { macro_rules! anyhow { ($($rest: expr),*) => {{ let message = anyhow::anyhow!($($rest, )*); - crate::error::Error::Anyhow(message) + $crate::error::Error::Anyhow(message) }} } diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 137e453a2..04255b5e6 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -21,6 +21,8 @@ use crate::wire::jupyter_message::JupyterMessage; use crate::wire::jupyter_message::Message; use crate::wire::jupyter_message::ProtocolMessage; use crate::wire::jupyter_message::Status; +use crate::wire::shutdown_reply::ShutdownReply; +use crate::wire::shutdown_request::ShutdownRequest; use crate::wire::status::ExecutionState; use crate::wire::stream::Stream; use crate::wire::wire_message::WireMessage; @@ -36,7 +38,7 @@ pub struct DummyConnection { } pub struct DummyFrontend { - pub _control_socket: Socket, + pub control_socket: Socket, pub shell_socket: Socket, pub iopub_socket: Socket, pub stdin_socket: Socket, @@ -132,7 +134,7 @@ impl DummyFrontend { // the Jupyter specification, these must share a ZeroMQ identity. let shell_id = rand::thread_rng().gen::<[u8; 16]>(); - let _control_socket = Socket::new( + let control_socket = Socket::new( connection.session.clone(), connection.ctx.clone(), String::from("Control"), @@ -198,7 +200,7 @@ impl DummyFrontend { }); Self { - _control_socket, + control_socket, shell_socket, iopub_socket, stdin_socket, @@ -207,12 +209,22 @@ impl DummyFrontend { } } + /// Sends a Jupyter message on the Control socket; returns the ID of the newly + /// created message + pub fn send_control(&self, msg: T) -> String { + Self::send(&self.control_socket, &self.session, msg) + } + /// Sends a Jupyter message on the Shell socket; returns the ID of the newly /// created message pub fn send_shell(&self, msg: T) -> String { Self::send(&self.shell_socket, &self.session, msg) } + pub fn send_shutdown_request(&self, restart: bool) -> String { + self.send_control(ShutdownRequest { restart }) + } + pub fn send_execute_request(&self, code: &str, options: ExecuteRequestOptions) -> String { self.send_shell(ExecuteRequest { code: String::from(code), @@ -224,6 +236,77 @@ impl DummyFrontend { }) } + /// Sends an execute request and handles the standard message flow: + /// busy -> execute_input -> idle -> execute_reply. + /// Asserts that the input code matches and returns the execution count. + #[track_caller] + pub fn execute_request_invisibly(&self, code: &str) -> u32 { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + + /// Sends an execute request and handles the standard message flow with a result: + /// busy -> execute_input -> execute_result -> idle -> execute_reply. + /// Asserts that the input code matches and passes the result to the callback. + /// Returns the execution count. + #[track_caller] + pub fn execute_request(&self, code: &str, result_check: F) -> u32 + where + F: FnOnce(String), + { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let result = self.recv_iopub_execute_result(); + result_check(result); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + + /// Sends an execute request that produces an error and handles the standard message flow: + /// busy -> execute_input -> execute_error -> idle -> execute_reply_exception. + /// Passes the error message to the callback for custom assertions. + /// Returns the execution count. + #[track_caller] + pub fn execute_request_error(&self, code: &str, error_check: F) -> u32 + where + F: FnOnce(String), + { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let error_msg = self.recv_iopub_execute_error(); + error_check(error_msg); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply_exception(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + /// Sends a Jupyter message on the Stdin socket pub fn send_stdin(&self, msg: T) { Self::send(&self.stdin_socket, &self.session, msg); @@ -236,6 +319,7 @@ impl DummyFrontend { id } + #[track_caller] pub fn recv(socket: &Socket) -> Message { // It's important to wait with a timeout because the kernel thread might have // panicked, preventing it from sending the expected message. The tests would then @@ -246,6 +330,8 @@ impl DummyFrontend { // // Note that the panic hook will still have run to record the panic, so we'll get // expected panic information in the test output. + // + // If you're debugging tests, you'll need to bump this timeout to a large value. if socket.poll_incoming(10000).unwrap() { return Message::read_from_socket(socket).unwrap(); } @@ -253,21 +339,39 @@ impl DummyFrontend { panic!("Timeout while expecting message on socket {}", socket.name); } + /// Receives a Jupyter message from the Control socket + #[track_caller] + pub fn recv_control(&self) -> Message { + Self::recv(&self.control_socket) + } + /// Receives a Jupyter message from the Shell socket + #[track_caller] pub fn recv_shell(&self) -> Message { Self::recv(&self.shell_socket) } /// Receives a Jupyter message from the IOPub socket + #[track_caller] pub fn recv_iopub(&self) -> Message { Self::recv(&self.iopub_socket) } /// Receives a Jupyter message from the Stdin socket + #[track_caller] pub fn recv_stdin(&self) -> Message { Self::recv(&self.stdin_socket) } + /// Receive from Control and assert `ShutdownReply` message. + #[track_caller] + pub fn recv_control_shutdown_reply(&self) -> ShutdownReply { + let message = self.recv_control(); + assert_matches!(message, Message::ShutdownReply(message) => { + message.content + }) + } + /// Receive from Shell and assert `ExecuteReply` message. /// Returns `execution_count`. #[track_caller] @@ -349,30 +453,63 @@ impl DummyFrontend { assert_matches!(msg, Message::UpdateDisplayData(_)) } + /// Receive from IOPub Stream + /// + /// Stdout and Stderr Stream messages are buffered, so to reliably test + /// against them we have to collect the messages in batches on the receiving + /// end and compare against an expected message. + /// + /// The comparison is done with an assertive closure: we'll wait for more + /// output as long as the closure panics. + /// + /// Because closures can't track callers yet, the `recv_iopub_stream()` + /// variant is more ergonomic and should be preferred. + /// See for tracking issue. #[track_caller] - pub fn recv_iopub_stream_stdout(&self, expect: &str) { - self.recv_iopub_stream(expect, Stream::Stdout) + fn recv_iopub_stream_with(&self, stream: Stream, mut f: F) + where + F: FnMut(&str), + { + let mut out = String::new(); + + loop { + let msg = self.recv_iopub(); + let piece = assert_matches!(msg, Message::Stream(data) => { + assert_eq!(data.content.name, stream); + data.content.text + }); + out.push_str(&piece); + + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + f(&out); + })) { + Ok(_) => break, + Err(_) => continue, + }; + } } #[track_caller] - pub fn recv_iopub_stream_stderr(&self, expect: &str) { - self.recv_iopub_stream(expect, Stream::Stderr) + pub fn recv_iopub_stream_stdout_with(&self, f: F) + where + F: FnMut(&str), + { + self.recv_iopub_stream_with(Stream::Stdout, f) } #[track_caller] - pub fn recv_iopub_comm_close(&self) -> String { - let msg = self.recv_iopub(); - - assert_matches!(msg, Message::CommClose(data) => { - data.content.comm_id - }) + pub fn recv_iopub_stream_stderr_with(&self, f: F) + where + F: FnMut(&str), + { + self.recv_iopub_stream_with(Stream::Stderr, f) } /// Receive from IOPub Stream /// - /// Stdout and Stderr Stream messages are buffered, so to reliably test against them - /// we have to collect the messages in batches on the receiving end and compare against - /// an expected message. + /// This variant compares the stream against its expected _last_ output. + /// We can't use `recv_iopub_stream_with()` here because closures + /// can't track callers. #[track_caller] fn recv_iopub_stream(&self, expect: &str, stream: Stream) { let mut out = String::new(); @@ -381,23 +518,15 @@ impl DummyFrontend { // Receive a piece of stream output (with a timeout) let msg = self.recv_iopub(); - // Assert its type let piece = assert_matches!(msg, Message::Stream(data) => { assert_eq!(data.content.name, stream); data.content.text }); - // Add to what we've already collected out += piece.as_str(); - if out == expect { - // Done, found the entire `expect` string - return; - } - - if !expect.starts_with(out.as_str()) { - // Something is wrong, message doesn't match up - panic!("Expected IOPub stream of '{expect}'. Actual stream of '{out}'."); + if out.ends_with(expect) { + break; } // We have a prefix of `expect`, but not the whole message yet. @@ -405,6 +534,29 @@ impl DummyFrontend { } } + /// Receives stdout stream output until the collected output ends with + /// `expect`. Note: The comparison uses `ends_with`, not full equality. + #[track_caller] + pub fn recv_iopub_stream_stdout(&self, expect: &str) { + self.recv_iopub_stream(expect, Stream::Stdout) + } + + /// Receives stderr stream output until the collected output ends with + /// `expect`. Note: The comparison uses `ends_with`, not full equality. + #[track_caller] + pub fn recv_iopub_stream_stderr(&self, expect: &str) { + self.recv_iopub_stream(expect, Stream::Stderr) + } + + #[track_caller] + pub fn recv_iopub_comm_close(&self) -> String { + let msg = self.recv_iopub(); + + assert_matches!(msg, Message::CommClose(data) => { + data.content.comm_id + }) + } + /// Receive from IOPub and assert ExecuteResult message. Returns compulsory /// `evalue` field. #[track_caller] diff --git a/crates/amalthea/src/socket/control.rs b/crates/amalthea/src/socket/control.rs index 8e7d05716..d62c19433 100644 --- a/crates/amalthea/src/socket/control.rs +++ b/crates/amalthea/src/socket/control.rs @@ -102,6 +102,11 @@ impl Control { H: FnOnce(JupyterMessage) -> Result<(), Error>, { // Enter the kernel-busy state in preparation for handling the message. + // The protocol specification is vague about status messages for + // Control, we mostly emit them for compatibility with ipykernel: + // https://github.com/ipython/ipykernel/pull/585. These status messages + // can be discriminated from those on Shell by examining the parent + // header. if let Err(err) = self.send_state(req.clone(), ExecutionState::Busy) { warn!("Failed to change kernel status to busy: {err}"); } diff --git a/crates/amalthea/src/wire/jupyter_message.rs b/crates/amalthea/src/wire/jupyter_message.rs index 76d835605..b60ae002f 100644 --- a/crates/amalthea/src/wire/jupyter_message.rs +++ b/crates/amalthea/src/wire/jupyter_message.rs @@ -46,6 +46,7 @@ use crate::wire::is_complete_reply::IsCompleteReply; use crate::wire::is_complete_request::IsCompleteRequest; use crate::wire::kernel_info_request::KernelInfoRequest; use crate::wire::originator::Originator; +use crate::wire::shutdown_reply::ShutdownReply; use crate::wire::shutdown_request::ShutdownRequest; use crate::wire::status::KernelStatus; use crate::wire::wire_message::WireMessage; @@ -101,6 +102,7 @@ pub enum Message { // Control InterruptReply(JupyterMessage), InterruptRequest(JupyterMessage), + ShutdownReply(JupyterMessage), ShutdownRequest(JupyterMessage), // Registration HandshakeRequest(JupyterMessage), @@ -163,6 +165,7 @@ impl TryFrom<&Message> for WireMessage { Message::IsCompleteRequest(msg) => WireMessage::try_from(msg), Message::KernelInfoReply(msg) => WireMessage::try_from(msg), Message::KernelInfoRequest(msg) => WireMessage::try_from(msg), + Message::ShutdownReply(msg) => WireMessage::try_from(msg), Message::ShutdownRequest(msg) => WireMessage::try_from(msg), Message::Status(msg) => WireMessage::try_from(msg), Message::CommInfoReply(msg) => WireMessage::try_from(msg), @@ -245,6 +248,9 @@ impl TryFrom<&WireMessage> for Message { if kind == UpdateDisplayData::message_type() { return Ok(Message::UpdateDisplayData(JupyterMessage::try_from(msg)?)); } + if kind == ShutdownReply::message_type() { + return Ok(Message::ShutdownReply(JupyterMessage::try_from(msg)?)); + } if kind == ShutdownRequest::message_type() { return Ok(Message::ShutdownRequest(JupyterMessage::try_from(msg)?)); } diff --git a/crates/amalthea/tests/client.rs b/crates/amalthea/tests/client.rs index e20f27fdb..3c1570bbd 100644 --- a/crates/amalthea/tests/client.rs +++ b/crates/amalthea/tests/client.rs @@ -19,6 +19,7 @@ use amalthea::wire::comm_info_request::CommInfoRequest; use amalthea::wire::comm_msg::CommWireMsg; use amalthea::wire::comm_open::CommOpen; use amalthea::wire::jupyter_message::Message; +use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use amalthea::wire::status::ExecutionState; use assert_matches::assert_matches; @@ -63,6 +64,31 @@ fn test_amalthea_execute_request() { frontend.recv_iopub_idle(); } +#[test] +fn test_amalthea_shutdown_request() { + let frontend = DummyAmaltheaFrontend::lock(); + + // Send a shutdown request with restart = false + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + frontend.recv_iopub_idle(); + + // Test again with restart = true. + // Although the R thread has shut down, the Amalthea thread keeps running + // and is able to reply. + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + frontend.recv_iopub_idle(); +} + #[test] fn test_amalthea_input_request() { let frontend = DummyAmaltheaFrontend::lock(); diff --git a/crates/ark/src/control.rs b/crates/ark/src/control.rs index 93fcfbc9b..b005e8485 100644 --- a/crates/ark/src/control.rs +++ b/crates/ark/src/control.rs @@ -36,6 +36,12 @@ impl ControlHandler for Control { ) -> Result { log::info!("Received shutdown request: {msg:?}"); + // Interrupt any ongoing computation. We shut down from ReadConsole when + // R has become idle again. Note that Positron will have interrupted us + // beforehand, but another frontend might not have, and it's good to + // have this as a defensive measure in any case. + crate::sys::control::handle_interrupt_request(); + // According to the Jupyter protocol we should block here until the // shutdown is complete. However AFAICS ipykernel doesn't wait // until complete shutdown before replying and instead just signals diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 10719f519..8a1c96f7c 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -18,8 +18,8 @@ use harp::object::RObject; use stdext::result::ResultExt; use stdext::spawn; -use crate::dap::dap_r_main::FrameInfo; use crate::dap::dap_server; +use crate::repl_debug::FrameInfo; use crate::request::RRequest; use crate::thread::RThreadSafe; diff --git a/crates/ark/src/dap/dap_r_main.rs b/crates/ark/src/dap/dap_r_main.rs deleted file mode 100644 index 3b9d96a70..000000000 --- a/crates/ark/src/dap/dap_r_main.rs +++ /dev/null @@ -1,359 +0,0 @@ -// -// dap_r_main.rs -// -// Copyright (C) 2024 Posit Software, PBC. All rights reserved. -// -// - -use std::collections::HashMap; -use std::sync::Arc; -use std::sync::Mutex; - -use anyhow::anyhow; -use harp::exec::RFunction; -use harp::exec::RFunctionExt; -use harp::object::RObject; -use harp::protect::RProtect; -use harp::r_string; -use harp::session::r_sys_calls; -use harp::session::r_sys_frames; -use harp::session::r_sys_functions; -use harp::utils::r_is_null; -use libr::R_NilValue; -use libr::R_Srcref; -use libr::Rf_allocVector; -use libr::Rf_xlength; -use libr::INTSXP; -use libr::SET_INTEGER_ELT; -use libr::SEXP; -use libr::VECTOR_ELT; -use stdext::result::ResultExt; - -use crate::dap::dap::DapBackendEvent; -use crate::dap::Dap; -use crate::modules::ARK_ENVS; -use crate::thread::RThreadSafe; - -pub struct RMainDap { - /// Underlying dap state - dap: Arc>, - - /// Whether or not we are currently in a debugging state. - debugging: bool, - - /// The current call emitted by R as `debug: `. - call_text: DebugCallText, - - /// The last known `start_line` for the active context frame. - last_start_line: Option, - - /// The current frame `id`. Unique across all frames within a single debug session. - /// Reset after `stop_debug()`, not between debug steps. If we reset between steps, - /// we could potentially have a race condition where `handle_scopes()` could request - /// a `variables_reference` for a `frame_id` that we've already overwritten the - /// `variables_reference` for, potentially sending back incorrect information. - current_frame_info_id: i64, -} - -#[derive(Clone, Debug)] -pub enum DebugCallText { - None, - Capturing(String), - Finalized(String), -} - -#[derive(Debug)] -pub struct FrameInfo { - pub id: i64, - /// The name shown in the editor tab bar when this frame is viewed. - pub source_name: String, - /// The name shown in the stack frame UI when this frame is visible. - pub frame_name: String, - pub source: FrameSource, - pub environment: Option>, - pub start_line: i64, - pub start_column: i64, - pub end_line: i64, - pub end_column: i64, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum FrameSource { - File(String), - Text(String), -} - -/// Version of `FrameInfo` that identifies the frame by value and doesn't keep a -/// reference to the environment. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct FrameInfoId { - pub source: FrameSource, - pub start_line: i64, - pub start_column: i64, - pub end_line: i64, - pub end_column: i64, -} - -impl From<&FrameInfo> for FrameInfoId { - fn from(info: &FrameInfo) -> Self { - FrameInfoId { - source: info.source.clone(), - start_line: info.start_line, - start_column: info.start_column, - end_line: info.end_line, - end_column: info.end_column, - } - } -} - -impl RMainDap { - pub fn new(dap: Arc>) -> Self { - Self { - dap, - debugging: false, - call_text: DebugCallText::None, - last_start_line: None, - current_frame_info_id: 0, - } - } - - pub fn is_debugging(&self) -> bool { - self.debugging - } - - pub fn start_debug( - &mut self, - stack: Vec, - preserve_focus: bool, - fallback_sources: HashMap, - ) { - self.debugging = true; - let mut dap = self.dap.lock().unwrap(); - dap.start_debug(stack, preserve_focus, fallback_sources) - } - - pub fn stop_debug(&mut self) { - let mut dap = self.dap.lock().unwrap(); - dap.stop_debug(); - drop(dap); - - self.reset_frame_id(); - self.debugging = false; - } - - pub fn handle_stdout(&mut self, content: &str) { - if let DebugCallText::Capturing(ref mut call_text) = self.call_text { - // Append to current expression if we are currently capturing stdout - call_text.push_str(content); - return; - } - - // `debug: ` is emitted by R (if no srcrefs are available!) right before it emits - // the current expression we are debugging, so we use that as a signal to begin - // capturing. - if content == "debug: " { - self.call_text = DebugCallText::Capturing(String::new()); - return; - } - - // Entering or exiting a closure, reset the debug start line state and call text - if content == "debugging in: " || content == "exiting from: " { - self.last_start_line = None; - self.call_text = DebugCallText::None; - return; - } - } - - pub fn finalize_call_text(&mut self) { - match &self.call_text { - // If not debugging, nothing to do. - DebugCallText::None => (), - // If already finalized, keep what we have. - DebugCallText::Finalized(_) => (), - // If capturing, transition to finalized. - DebugCallText::Capturing(call_text) => { - self.call_text = DebugCallText::Finalized(call_text.clone()) - }, - } - } - - pub fn send_dap(&self, event: DapBackendEvent) { - let dap = self.dap.lock().unwrap(); - if let Some(tx) = &dap.backend_events_tx { - tx.send(event).log_err(); - } - } - - pub fn stack_info(&mut self) -> anyhow::Result> { - // We leave finalized `call_text` in place rather than setting it to `None` here - // in case the user executes an arbitrary expression in the debug R console, which - // loops us back here without updating the `call_text` in any way, allowing us to - // recreate the debugger state after their code execution. - let call_text = match self.call_text.clone() { - DebugCallText::None => None, - DebugCallText::Capturing(call_text) => { - log::error!( - "Call text is in `Capturing` state, but should be `Finalized`: '{call_text}'." - ); - None - }, - DebugCallText::Finalized(call_text) => Some(call_text), - }; - - let last_start_line = self.last_start_line; - - let frames = self.r_stack_info(call_text, last_start_line)?; - - // If we have `frames`, update the `last_start_line` with the context - // frame's start line - if let Some(frame) = frames.get(0) { - self.last_start_line = Some(frame.start_line); - } - - Ok(frames) - } - - fn r_stack_info( - &mut self, - context_call_text: Option, - context_last_start_line: Option, - ) -> anyhow::Result> { - unsafe { - let mut protect = RProtect::new(); - - let context_srcref = libr::get(R_Srcref); - protect.add(context_srcref); - - let context_call_text = match context_call_text { - Some(context_call_text) => r_string!(context_call_text, &mut protect), - None => R_NilValue, - }; - - let context_last_start_line = match context_last_start_line { - Some(context_last_start_line) => { - let x = Rf_allocVector(INTSXP, 1); - protect.add(x); - SET_INTEGER_ELT(x, 0, i32::try_from(context_last_start_line)?); - x - }, - None => R_NilValue, - }; - - let functions = r_sys_functions()?; - protect.add(functions); - - let environments = r_sys_frames()?; - protect.add(environments.sexp); - - let calls = r_sys_calls()?; - protect.add(calls.sexp); - - let info = RFunction::new("", "debugger_stack_info") - .add(context_call_text) - .add(context_last_start_line) - .add(context_srcref) - .add(functions) - .add(environments) - .add(calls) - .call_in(ARK_ENVS.positron_ns)?; - - let n: isize = Rf_xlength(info.sexp); - - let mut out = Vec::with_capacity(n as usize); - - // Reverse the order for DAP - for i in (0..n).rev() { - let frame = VECTOR_ELT(info.sexp, i); - out.push(self.as_frame_info(frame)?); - } - - Ok(out) - } - } - - fn as_frame_info(&mut self, info: SEXP) -> anyhow::Result { - unsafe { - let mut i = 0; - - let source_name = VECTOR_ELT(info, i); - let source_name: String = RObject::view(source_name).try_into()?; - - i += 1; - let frame_name = VECTOR_ELT(info, i); - let frame_name: String = RObject::view(frame_name).try_into()?; - - let mut source = None; - - i += 1; - let file = VECTOR_ELT(info, i); - if file != R_NilValue { - let file: String = RObject::view(file).try_into()?; - source = Some(FrameSource::File(file)); - } - - i += 1; - let text = VECTOR_ELT(info, i); - if text != R_NilValue { - let text: String = RObject::view(text).try_into()?; - source = Some(FrameSource::Text(text)); - } - - let Some(source) = source else { - return Err(anyhow!( - "Expected either `file` or `text` to be non-`NULL`." - )); - }; - - i += 1; - let environment = VECTOR_ELT(info, i); - let environment = if r_is_null(environment) { - None - } else { - Some(RThreadSafe::new(RObject::from(environment))) - }; - - i += 1; - let start_line = VECTOR_ELT(info, i); - let start_line: i32 = RObject::view(start_line).try_into()?; - - i += 1; - let start_column = VECTOR_ELT(info, i); - let start_column: i32 = RObject::view(start_column).try_into()?; - - i += 1; - let end_line = VECTOR_ELT(info, i); - let end_line: i32 = RObject::view(end_line).try_into()?; - - // For `end_column`, the column range provided by R is inclusive `[,]`, but the - // one used on the DAP / Positron side is exclusive `[,)` so we have to add 1. - i += 1; - let end_column = VECTOR_ELT(info, i); - let end_column: i32 = RObject::view(end_column).try_into()?; - let end_column = end_column + 1; - - let id = self.next_frame_id(); - - Ok(FrameInfo { - id, - source_name, - frame_name, - source, - environment, - start_line: start_line.try_into()?, - start_column: start_column.try_into()?, - end_line: end_line.try_into()?, - end_column: end_column.try_into()?, - }) - } - } - - fn next_frame_id(&mut self) -> i64 { - let out = self.current_frame_info_id; - self.current_frame_info_id += 1; - out - } - - fn reset_frame_id(&mut self) { - self.current_frame_info_id = 0; - } -} diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 8b7a02afb..841c7280f 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -34,11 +34,11 @@ use stdext::spawn; use super::dap::Dap; use super::dap::DapBackendEvent; use crate::dap::dap::DapStoppedEvent; -use crate::dap::dap_r_main::FrameInfo; -use crate::dap::dap_r_main::FrameSource; use crate::dap::dap_variables::object_variables; use crate::dap::dap_variables::RVariable; use crate::r_task; +use crate::repl_debug::FrameInfo; +use crate::repl_debug::FrameSource; use crate::request::debug_request_command; use crate::request::DebugRequest; use crate::request::RRequest; diff --git a/crates/ark/src/dap/mod.rs b/crates/ark/src/dap/mod.rs index 1d56d1f30..2482fdc87 100644 --- a/crates/ark/src/dap/mod.rs +++ b/crates/ark/src/dap/mod.rs @@ -6,7 +6,6 @@ // pub mod dap; -pub mod dap_r_main; pub mod dap_server; pub mod dap_variables; diff --git a/crates/ark/src/errors.rs b/crates/ark/src/errors.rs index ba36a76b2..960d1b296 100644 --- a/crates/ark/src/errors.rs +++ b/crates/ark/src/errors.rs @@ -5,6 +5,9 @@ // // +use amalthea::wire::exception::Exception; +use harp::exec::r_peek_error_buffer; +use harp::exec::RE_STACK_OVERFLOW; use harp::object::RObject; use harp::r_symbol; use harp::session::r_format_traceback; @@ -37,9 +40,16 @@ unsafe extern "C-unwind" fn ps_record_error(evalue: SEXP, traceback: SEXP) -> an Vec::::new() }); - main.error_occurred = true; - main.error_message = evalue; - main.error_traceback = traceback; + main.last_error = Some( + // We don't fill out `ename` with anything meaningful because typically + // R errors don't have names. We could consider using the condition class + // here, which r-lib/tidyverse packages have been using more heavily. + Exception { + ename: String::from(""), + evalue, + traceback, + }, + ); Ok(R_NilValue) } @@ -67,3 +77,12 @@ unsafe extern "C-unwind" fn ps_rust_backtrace() -> anyhow::Result { let trace = format!("{trace}"); Ok(*RObject::from(trace)) } + +pub(crate) fn stack_overflow_occurred() -> bool { + // Error handlers are not called on stack overflow so the error flag + // isn't set. Instead we detect stack overflows by peeking at the error + // buffer. The message is explicitly not translated to save stack space + // so the matching should be reliable. + let err_buf = r_peek_error_buffer(); + RE_STACK_OVERFLOW.is_match(&err_buf) +} diff --git a/crates/ark/src/fixtures/dummy_frontend.rs b/crates/ark/src/fixtures/dummy_frontend.rs index 5823dd16c..3a12f8fc5 100644 --- a/crates/ark/src/fixtures/dummy_frontend.rs +++ b/crates/ark/src/fixtures/dummy_frontend.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use std::sync::Mutex; use std::sync::MutexGuard; use std::sync::OnceLock; +use std::time::Duration; use amalthea::fixtures::dummy_frontend::DummyConnection; use amalthea::fixtures::dummy_frontend::DummyFrontend; @@ -62,6 +63,25 @@ impl DummyArkFrontend { } } + /// Wait for R cleanup to start (indicating shutdown has been initiated). + /// Panics if cleanup doesn't start within the timeout. + #[cfg(unix)] + #[track_caller] + pub fn wait_for_cleanup() { + use crate::sys::interface::CLEANUP_SIGNAL; + + let (lock, cvar) = &CLEANUP_SIGNAL; + let result = cvar + .wait_timeout_while(lock.lock().unwrap(), Duration::from_secs(3), |started| { + !*started + }) + .unwrap(); + + if !*result.0 { + panic!("Cleanup did not start within timeout"); + } + } + fn get_frontend() -> &'static Arc> { // These are the hard-coded defaults. Call `init()` explicitly to // override. diff --git a/crates/ark/src/help/r_help.rs b/crates/ark/src/help/r_help.rs index 3bff54d39..aa5977572 100644 --- a/crates/ark/src/help/r_help.rs +++ b/crates/ark/src/help/r_help.rs @@ -271,22 +271,21 @@ impl RHelp { let env = (|| { #[cfg(not(test))] if RMain::is_initialized() { - if let Some(debug_env) = &RMain::get().debug_env() { - // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` - return debug_env.sexp; + if let Ok(debug_env) = &RMain::get().read_console_frame.try_borrow() { + return (*debug_env).clone(); } } - R_GlobalEnv + RObject::from(R_GlobalEnv) })(); - let obj = harp::parse_eval0(topic.as_str(), env)?; + let obj = harp::parse_eval0(topic.as_str(), env.sexp)?; let handler: Option = ArkGenerics::HelpGetHandler.try_dispatch(obj.sexp, vec![])?; if let Some(handler) = handler { let mut fun = RFunction::new_inlined(handler); - match fun.call_in(env) { + match fun.call_in(env.sexp) { Err(err) => { log::error!("Error calling help handler: {:?}", err); return Err(anyhow!("Error calling help handler: {:?}", err)); diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index dcc26f0fc..b72894eae 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -10,6 +10,7 @@ // The frontend methods called by R are forwarded to the corresponding // `RMain` methods via `R_MAIN`. +use std::cell::Cell; use std::cell::RefCell; use std::cell::UnsafeCell; use std::collections::HashMap; @@ -59,12 +60,12 @@ use harp::command::r_home_setup; use harp::environment::r_ns_env; use harp::environment::Environment; use harp::environment::R_ENVS; +use harp::exec::exec_with_cleanup; use harp::exec::r_check_stack; use harp::exec::r_peek_error_buffer; use harp::exec::r_sandbox; use harp::exec::RFunction; use harp::exec::RFunctionExt; -use harp::exec::RE_STACK_OVERFLOW; use harp::library::RLibraries; use harp::line_ending::convert_line_endings; use harp::line_ending::LineEnding; @@ -73,6 +74,9 @@ use harp::object::RObject; use harp::r_symbol; use harp::routines::r_register_routines; use harp::session::r_traceback; +use harp::srcref::get_srcref_list; +use harp::srcref::srcref_list_get; +use harp::srcref::SrcFile; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; use harp::R_MAIN_THREAD_ID; @@ -93,10 +97,9 @@ use tokio::sync::mpsc::UnboundedReceiver as AsyncUnboundedReceiver; use uuid::Uuid; use crate::dap::dap::DapBackendEvent; -use crate::dap::dap_r_main::FrameInfoId; -use crate::dap::dap_r_main::RMainDap; use crate::dap::Dap; use crate::errors; +use crate::errors::stack_overflow_occurred; use crate::help::message::HelpEvent; use crate::help::r_help::RHelp; use crate::lsp::events::EVENTS; @@ -115,6 +118,7 @@ use crate::r_task::BoxFuture; use crate::r_task::RTask; use crate::r_task::RTaskStartInfo; use crate::r_task::RTaskStatus; +use crate::repl_debug::FrameInfoId; use crate::repos::apply_default_repos; use crate::repos::DefaultRepos; use crate::request::debug_request_command; @@ -123,11 +127,9 @@ use crate::request::RRequest; use crate::signals::initialize_signal_handlers; use crate::signals::interrupts_pending; use crate::signals::set_interrupts_pending; -use crate::srcref::ark_uri; use crate::srcref::ns_populate_srcref; use crate::srcref::resource_loaded_namespaces; use crate::startup; -use crate::strings::lines; use crate::sys::console::console_to_utf8; use crate::ui::UiCommMessage; use crate::ui::UiCommSender; @@ -148,6 +150,13 @@ pub enum SessionMode { Background, } +#[derive(Clone, Debug)] +pub enum DebugCallText { + None, + Capturing(String), + Finalized(String), +} + // --- Globals --- // These values must be global in order for them to be accessible from R // callbacks, which do not have a facility for passing or returning context. @@ -214,10 +223,9 @@ pub struct RMain { /// by forwarding them through the UI comm. Optional, and really Positron specific. ui_comm_tx: Option, - /// Represents whether an error occurred during R code execution. - pub error_occurred: bool, - pub error_message: String, // `evalue` in the Jupyter protocol - pub error_traceback: Vec, + /// Error captured by our global condition handler during the last iteration + /// of the REPL. + pub(crate) last_error: Option, /// Channel to communicate with the Help thread help_event_tx: Option>, @@ -231,11 +239,9 @@ pub struct RMain { /// initially connects and after an LSP restart. lsp_virtual_documents: HashMap, - dap: RMainDap, - pub positron_ns: Option, - pending_lines: Vec, + pending_inputs: Option, /// Banner output accumulated during startup, but set to `None` after we complete /// the initialization procedure and forward the banner on @@ -255,18 +261,165 @@ pub struct RMain { /// See https://github.com/posit-dev/positron/issues/3151. debug_preserve_focus: bool, + /// Underlying dap state. Shared with the DAP server thread. + pub(crate) debug_dap: Arc>, + + /// Whether or not we are currently in a debugging state. + pub(crate) debug_is_debugging: bool, + + /// The current call emitted by R as `debug: `. + pub(crate) debug_call_text: DebugCallText, + + /// The last known `start_line` for the active context frame. + pub(crate) debug_last_line: Option, + /// The stack of frames we saw the last time we stopped. Used as a mostly /// reliable indication of whether we moved since last time. - debug_last_stack: Vec, - - /// Current topmost environment on the stack while waiting for input in the - /// debugger. This is `Some()` only when R is idle and in a `browser()` - /// prompt. - debug_env: Option, + pub(crate) debug_last_stack: Vec, /// Ever increasing debug session index. Used to create URIs that are only /// valid for a single session. - debug_session_index: u32, + pub(crate) debug_session_index: u32, + + /// The current frame `id`. Unique across all frames within a single debug session. + /// Reset after `stop_debug()`, not between debug steps. + pub(crate) debug_current_frame_id: i64, + + /// Tracks how many nested `r_read_console()` calls are on the stack. + /// Incremented when entering `r_read_console(),` decremented on exit. + read_console_depth: Cell, + + /// Set to true when `r_read_console()` exits via an error longjump. Used to + /// detect if we need to go return from `r_read_console()` with a dummy + /// evaluation to reset things like `R_EvalDepth`. + read_console_threw_error: Cell, + + /// Set to true when `r_read_console()` exits. Reset to false at the start + /// of each `r_read_console()` call. Used to detect if `eval()` returned + /// from a nested REPL (the flag will be true when the evaluation returns). + /// In these cases, we need to return from `r_read_console()` with a dummy + /// evaluation to reset things like `R_ConsoleIob`. + read_console_nested_return: Cell, + + /// Used to track an input to evaluate upon returning to `r_read_console()`, + /// after having returned a dummy input to reset `R_ConsoleIob` in R's REPL. + read_console_nested_return_next_input: Cell>, + + /// We've received a Shutdown signal and need to return EOF from all nested + /// consoles to get R to shut down + read_console_shutdown: Cell, + + /// Current topmost environment on the stack while waiting for input in ReadConsole. + /// This is a RefCell since we require `get()` for this field and `RObject` isn't `Copy`. + pub(crate) read_console_frame: RefCell, +} + +/// Stack of pending inputs +struct PendingInputs { + /// EXPRSXP vector of parsed expressions + exprs: RObject, + /// List of srcrefs if any, the same length as `exprs` + srcrefs: Option, + /// Length of `exprs` and `srcrefs` + len: isize, + /// Index into the stack + index: isize, +} + +enum ParseResult { + Success(Option), + SyntaxError(String), +} + +impl PendingInputs { + pub(crate) fn read(input: &str) -> anyhow::Result> { + let mut _srcfile = None; + + let input = if harp::get_option_bool("keep.source") { + _srcfile = Some(SrcFile::new_virtual_empty_filename(input.into())); + harp::ParseInput::SrcFile(&_srcfile.unwrap()) + } else { + harp::ParseInput::Text(input) + }; + + let status = match harp::parse_status(&input) { + Err(err) => { + // Failed to even attempt to parse the input, something is seriously wrong + return Ok(ParseResult::SyntaxError(format!("{err}"))); + }, + Ok(status) => status, + }; + + // - Incomplete inputs put R into a state where it expects more input that will never come, so we + // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. + // - Complete statements are obviously fine. + // - Syntax errors will get bubbled up as R errors via an `ConsoleResult::Error`. + let exprs = match status { + harp::ParseResult::Complete(exprs) => exprs, + harp::ParseResult::Incomplete => { + return Ok(ParseResult::SyntaxError(format!( + "Can't parse incomplete input" + ))); + }, + harp::ParseResult::SyntaxError { message, .. } => { + return Ok(ParseResult::SyntaxError(format!("Syntax error: {message}"))); + }, + }; + + let srcrefs = get_srcref_list(exprs.sexp); + + let len = exprs.length(); + let index = 0; + + if len == 0 { + return Ok(ParseResult::Success(None)); + } + + Ok(ParseResult::Success(Some(Self { + exprs, + srcrefs, + len, + index, + }))) + } + + pub(crate) fn is_empty(&self) -> bool { + self.index >= self.len + } + + pub(crate) fn pop(&mut self) -> Option { + if self.is_empty() { + return None; + } + + let expr = RObject::new(harp::list_get(self.exprs.sexp, self.index)); + + let srcref = self + .srcrefs + .as_ref() + .map(|xs| srcref_list_get(xs.sexp, self.index)) + .unwrap_or(RObject::null()); + + self.index += 1; + Some(PendingInput { expr, srcref }) + } +} + +#[derive(Debug)] +pub(crate) struct PendingInput { + expr: RObject, + srcref: RObject, +} + +#[derive(Debug, Clone)] +enum ConsoleValue { + Success(serde_json::Map), + Error(Exception), +} + +enum WaitFor { + InputReply, + ExecuteRequest, } /// Represents the currently active execution request from the frontend. It @@ -287,6 +440,19 @@ pub struct KernelInfo { pub continuation_prompt: Option, } +/// The kind of prompt we're handling in the REPL. +#[derive(Clone, Debug, PartialEq)] +pub enum PromptKind { + /// A top-level REPL prompt + TopLevel, + + /// A `browser()` debugging prompt + Browser, + + /// A user input request from code, e.g., via `readline()` + InputRequest, +} + /// This struct represents the data that we wish R would pass to /// `ReadConsole()` methods. We need this information to determine what kind /// of prompt we are dealing with. @@ -298,7 +464,7 @@ pub struct PromptInfo { input_prompt: String, /// The continuation prompt string when user supplies incomplete - /// inputs. This always corresponds to `getOption("continue"). We send + /// inputs. This always corresponds to `getOption("continue")`. We send /// it to frontends along with `prompt` because some frontends such as /// Positron do not send incomplete inputs to Ark and take charge of /// continuation prompts themselves. For frontends that can send @@ -306,16 +472,8 @@ pub struct PromptInfo { /// error on them rather than requesting that this be shown. continuation_prompt: String, - /// Whether this is a `browser()` prompt. A browser prompt can be - /// incomplete but is never a user request. - browser: bool, - - /// Whether the last input didn't fully parse and R is waiting for more input - incomplete: bool, - - /// Whether this is a prompt from a fresh REPL iteration (browser or - /// top level) or a prompt from some user code, e.g. via `readline()` - input_request: bool, + /// The kind of prompt we're handling. + kind: PromptKind, } pub enum ConsoleInput { @@ -323,11 +481,13 @@ pub enum ConsoleInput { Input(String), } -pub enum ConsoleResult { +#[derive(Debug)] +pub(crate) enum ConsoleResult { NewInput, + NewPendingInput(PendingInput), Interrupt, Disconnected, - Error(amalthea::Error), + Error(String), } impl RMain { @@ -476,6 +636,9 @@ impl RMain { if let Err(err) = apply_default_repos(default_repos) { log::error!("Error setting default repositories: {err:?}"); } + + // Initialise Ark's last value + libr::SETCDR(r_symbol!(".ark_last_value"), harp::r_null()); } // Now that R has started (emitting any startup messages that we capture in the @@ -604,27 +767,35 @@ impl RMain { execution_count: 0, autoprint_output: String::new(), ui_comm_tx: None, - error_occurred: false, - error_message: String::new(), - error_traceback: Vec::new(), + last_error: None, help_event_tx: None, help_port: None, lsp_events_tx: None, lsp_virtual_documents: HashMap::new(), - dap: RMainDap::new(dap), + debug_dap: dap, + debug_is_debugging: false, tasks_interrupt_rx, tasks_idle_rx, pending_futures: HashMap::new(), session_mode, positron_ns: None, - pending_lines: Vec::new(), banner: None, r_error_buffer: None, captured_output: String::new(), + debug_call_text: DebugCallText::None, + debug_last_line: None, debug_preserve_focus: false, debug_last_stack: vec![], - debug_env: None, debug_session_index: 1, + debug_current_frame_id: 0, + pending_inputs: None, + read_console_depth: Cell::new(0), + read_console_nested_return: Cell::new(false), + read_console_threw_error: Cell::new(false), + read_console_nested_return_next_input: Cell::new(None), + // Can't use `R_ENVS.global` here as it isn't initialised yet + read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), + read_console_shutdown: Cell::new(false), } } @@ -732,11 +903,16 @@ impl RMain { /// * `prompt` - The prompt shown to the user /// * `buf` - Pointer to buffer to receive the user's input (type `CONSOLE_BUFFER_CHAR`) /// * `buflen` - Size of the buffer to receiver user's input - /// * `hist` - Whether to add the input to the history (1) or not (0) + /// * `_hist` - Whether to add the input to the history (1) or not (0) /// - /// Returns a tuple. First value is to be passed on to `ReadConsole()` and - /// indicates whether new input is available. Second value indicates whether - /// we need to call `Rf_onintr()` to process an interrupt. + /// This does two things: + /// - Move the Console state machine to the next state: + /// - Wait for input + /// - Set an active execute request and a list of pending expressions + /// - Set `self.debug_is_debugging` depending on presence or absence of debugger prompt + /// - Evaluate next pending expression + /// - Close active execute request if pending list is empty + /// - Run an event loop while waiting for input fn read_console( &mut self, prompt: *const c_char, @@ -744,54 +920,57 @@ impl RMain { buflen: c_int, _hist: c_int, ) -> ConsoleResult { + self.debug_handle_read_console(); + + // State machine part of ReadConsole + let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); - // Upon entering read-console, finalize any debug call text that we were capturing. - // At this point, the user can either advance the debugger, causing us to capture - // a new expression, or execute arbitrary code, where we will reuse a finalized - // debug call text to maintain the debug state. - self.dap.finalize_call_text(); + // Invariant: If we detect a browser prompt, `self.debug_is_debugging` + // is true. Otherwise it is false. + if matches!(info.kind, PromptKind::Browser) { + // Start or continue debugging with the `debug_preserve_focus` hint + // from the last expression we evaluated + self.debug_is_debugging = true; + self.debug_start(self.debug_preserve_focus); + } else if self.debug_is_debugging { + self.debug_is_debugging = false; + self.debug_stop(); + } + + if let Some(exception) = self.take_exception() { + // We might get an input request if `readline()` or `menu()` is + // called in `options(error = )`. We respond to this with an error + // as this is not supported by Ark. + if matches!(info.kind, PromptKind::InputRequest) { + // Reset error so we can handle it when we recurse here after + // the error aborts the readline. Note it's better to first emit + // the R invalid input request error, and then handle + // `exception` within the context of a new `ReadConsole` + // instance, so that we emit the proper execution prompts as + // part of the response, and not the readline prompt. + self.last_error = Some(exception); + return self.handle_invalid_input_request_after_error(); + } - // We get called here everytime R needs more input. This handler - // represents the driving event of a small state machine that manages - // communication between R and the frontend. In the following order: - // - // - If we detect an input request prompt, then we forward the request - // on to the frontend and then fall through to the event loop to wait - // on the input reply. - // - // - If the vector of pending lines is not empty, R might be waiting for - // us to complete an incomplete expression, or we might just have - // completed an intermediate expression (e.g. from an ExecuteRequest - // like `foo\nbar` where `foo` is intermediate and `bar` is final). - // Send the next line to R. - // - // - If the vector of pending lines is empty, and if the prompt is for - // new R code, we close the active ExecuteRequest and send an - // ExecuteReply to the frontend. We then fall through to the event - // loop to wait for more input. - // - // This state machine depends on being able to reliably distinguish - // between readline prompts (from `readline()`, `scan()`, or `menu()`), - // and actual R code prompts (either top-level or from a nested debug - // REPL). A readline prompt should never change our state (in - // particular our vector of pending inputs). We think we are making this - // distinction sufficiently robustly but ideally R would let us know the - // prompt type so there is no ambiguity at all. - // - // R might throw an error at any time while we are working on our vector - // of pending lines, either from a syntax error or from an evaluation - // error. When this happens, we abort evaluation and clear the pending - // lines. - // - // If the vector of pending lines is empty and we detect an incomplete - // prompt, this is a panic. We check ahead of time for complete - // expressions before breaking up an ExecuteRequest in multiple lines, - // so this should not happen. - if let Some(console_result) = self.handle_active_request(&info, buf, buflen) { - return console_result; - }; + // Clear any pending inputs, if any + self.pending_inputs = None; + + // Reply to active request with error, then fall through to event loop + self.handle_active_request(&info, ConsoleValue::Error(exception)); + } else if matches!(info.kind, PromptKind::InputRequest) { + // Request input from the frontend and return it to R + return self.handle_input_request(&info, buf, buflen); + } else if let Some(input) = self.pop_pending() { + // Evaluate pending expression if there is any remaining + return self.handle_pending_input(input, buf, buflen); + } else { + // Otherwise reply to active request with accumulated result, then + // fall through to event loop + let result = self.take_result(); + self.handle_active_request(&info, ConsoleValue::Success(result)); + } // In the future we'll also send browser information, see // https://github.com/posit-dev/positron/issues/3001. Currently this is @@ -801,21 +980,30 @@ impl RMain { // often. We'd still push a `DidChangeConsoleInputs` notification from // here, but only containing high-level information such as `search()` // contents and `ls(rho)`. - if !info.browser && !info.incomplete && !info.input_request { + if !self.debug_is_debugging && !matches!(info.kind, PromptKind::InputRequest) { self.refresh_lsp(); } // Signal prompt EVENTS.console_prompt.emit(()); - if info.browser { - self.start_debug(); - } else { - if self.dap.is_debugging() { - self.stop_debug(); - } - } + self.run_event_loop(&info, buf, buflen, WaitFor::ExecuteRequest) + } + /// Runs the ReadConsole event loop. + /// This handles events for: + /// - Reception of either input replies or execute requests (as determined + /// by `wait_for`) + /// - Idle-time and interrupt-time tasks + /// - Requests from the frontend (currently only used for establishing UI comm) + /// - R's polled events + fn run_event_loop( + &mut self, + info: &PromptInfo, + buf: *mut c_uchar, + buflen: c_int, + wait_for: WaitFor, + ) -> ConsoleResult { let mut select = crossbeam::channel::Select::new(); // Cloning is necessary to avoid a double mutable borrow error @@ -831,22 +1019,28 @@ impl RMain { // package. 50ms seems to be more in line with RStudio (posit-dev/positron#7235). let polled_events_rx = crossbeam::channel::tick(Duration::from_millis(50)); - let r_request_index = select.recv(&r_request_rx); - let stdin_reply_index = select.recv(&stdin_reply_rx); + // This is the main kind of message from the frontend that we are expecting. + // We either wait for `input_reply` messages on StdIn, or for + // `execute_request` on Shell. + let (r_request_index, stdin_reply_index) = match wait_for { + WaitFor::ExecuteRequest => (Some(select.recv(&r_request_rx)), None), + WaitFor::InputReply => (None, Some(select.recv(&stdin_reply_rx))), + }; + let kernel_request_index = select.recv(&kernel_request_rx); let tasks_interrupt_index = select.recv(&tasks_interrupt_rx); let polled_events_index = select.recv(&polled_events_rx); - // Don't process idle tasks in browser prompts. We currently don't want - // idle tasks (e.g. for srcref generation) to run when the call stack is - // empty. We could make this configurable though if needed, i.e. some - // idle tasks would be able to run in the browser. Those should be sent - // to a dedicated channel that would always be included in the set of - // recv channels. - let tasks_idle_index = if info.browser { - None - } else { + // Only process idle at top level. We currently don't want idle tasks + // (e.g. for srcref generation) to run when the call stack is not empty. + // We could make this configurable though if needed, i.e. some idle + // tasks would be able to run in the browser. Those should be sent to a + // dedicated channel that would always be included in the set of recv + // channels. + let tasks_idle_index = if matches!(info.kind, PromptKind::TopLevel) { Some(select.recv(&tasks_idle_rx)) + } else { + None }; loop { @@ -858,7 +1052,7 @@ impl RMain { // `UserBreak`, but won't actually fire the interrupt b/c // we have them disabled, so it would end up swallowing the // user interrupt request. - if info.input_request && interrupts_pending() { + if matches!(info.kind, PromptKind::InputRequest) && interrupts_pending() { return ConsoleResult::Interrupt; } @@ -867,17 +1061,13 @@ impl RMain { // reset the flag set_interrupts_pending(false); - // FIXME: Race between interrupt and new code request. To fix - // this, we could manage the Shell and Control sockets on the - // common message event thread. The Control messages would need - // to be handled in a blocking way to ensure subscribers are - // notified before the next incoming message is processed. - // First handle execute requests outside of `select` to ensure they // have priority. `select` chooses at random. - if let Ok(req) = r_request_rx.try_recv() { - if let Some(input) = self.handle_execute_request(req, &info, buf, buflen) { - return input; + if let WaitFor::ExecuteRequest = wait_for { + if let Ok(req) = r_request_rx.try_recv() { + if let Some(input) = self.handle_execute_request(req, &info, buf, buflen) { + return input; + } } } @@ -885,7 +1075,7 @@ impl RMain { match oper.index() { // We've got an execute request from the frontend - i if i == r_request_index => { + i if Some(i) == r_request_index => { let req = oper.recv(&r_request_rx); let Ok(req) = req else { // The channel is disconnected and empty @@ -898,7 +1088,7 @@ impl RMain { }, // We've got a reply for readline - i if i == stdin_reply_index => { + i if Some(i) == stdin_reply_index => { let reply = oper.recv(&stdin_reply_rx).unwrap(); return self.handle_input_reply(reply, buf, buflen); }, @@ -942,8 +1132,9 @@ impl RMain { let prompt_slice = unsafe { CStr::from_ptr(prompt_c) }; let prompt = prompt_slice.to_string_lossy().into_owned(); + // Sent to the frontend after each top-level command so users can + // customise their prompts let continuation_prompt: String = harp::get_option("continue").try_into().unwrap(); - let matches_continuation = prompt == continuation_prompt; // Detect browser prompt by matching the prompt string // https://github.com/posit-dev/positron/issues/4742. @@ -951,99 +1142,122 @@ impl RMain { // `options(prompt =, continue = ` to something that looks like // a browser prompt, or doing the same with `readline()`. We have // chosen to not support these edge cases. - // Additionally, we send code to R one line at a time, so even if we are debugging - // it can look like we are in a continuation state. To try and detect that, we - // detect if we matched the continuation prompt while the DAP is active. - let browser = - RE_DEBUG_PROMPT.is_match(&prompt) || (self.dap.is_debugging() && matches_continuation); - - // If there are frames on the stack and we're not in a browser prompt, - // this means some user code is requesting input, e.g. via `readline()` - let user_request = !browser && n_frame > 0; - - // The request is incomplete if we see the continue prompt, except if - // we're in a user request, e.g. `readline("+ ")`. To guard against - // this, we check that we are at top-level (call stack is empty or we - // have a debug prompt). - let top_level = n_frame == 0 || browser; - let incomplete = matches_continuation && top_level; + let browser = RE_DEBUG_PROMPT.is_match(&prompt); + + // Determine the prompt kind based on context + let kind = if browser { + PromptKind::Browser + } else if n_frame > 0 { + // If there are frames on the stack and we're not in a browser prompt, + // this means some user code is requesting input, e.g. via `readline()` + PromptKind::InputRequest + } else { + PromptKind::TopLevel + }; return PromptInfo { input_prompt: prompt, continuation_prompt, - browser, - incomplete, - input_request: user_request, + kind, }; } - fn read_console_cleanup(&mut self) { - // The debug environment is only valid while R is idle - self.debug_env = None; - } + /// Take result from `self.autoprint_output` and R's `.Last.value` object + fn take_result(&mut self) -> serde_json::Map { + // TODO: Implement rich printing of certain outputs. + // Will we need something similar to the RStudio model, + // where we implement custom print() methods? Or can + // we make the stub below behave sensibly even when + // streaming R output? + let mut data = serde_json::Map::new(); - /// Returns: - /// - `None` if we should fall through to the event loop to wait for more user input - /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` - fn handle_active_request( - &mut self, - info: &PromptInfo, - buf: *mut c_uchar, - buflen: c_int, - ) -> Option { - // TODO: Can we remove this below code? - // If the prompt begins with "Save workspace", respond with (n) - // and allow R to immediately exit. - // - // NOTE: Should be able to overwrite the `Cleanup` frontend method. - // This would also help with detecting normal exits versus crashes. - if info.input_prompt.starts_with("Save workspace") { - match Self::on_console_input(buf, buflen, String::from("n")) { - Ok(()) => return Some(ConsoleResult::NewInput), - Err(err) => return Some(ConsoleResult::Error(err)), - } + // The output generated by autoprint is emitted as an + // `execute_result` message. + let mut autoprint = std::mem::take(&mut self.autoprint_output); + + if autoprint.ends_with('\n') { + // Remove the trailing newlines that R adds to outputs but that + // Jupyter frontends are not expecting + autoprint.pop(); + } + if autoprint.len() != 0 { + data.insert("text/plain".to_string(), json!(autoprint)); } - // First check if we are inside request for user input, like a `readline()` or `menu()`. - // It's entirely possible that we still have more pending lines, but an intermediate line - // put us into an `input_request` state. We must respond to that request before processing - // the rest of the pending lines. - if info.input_request { - if let Some(req) = &self.active_request { - // Send request to frontend. We'll wait for an `input_reply` - // from the frontend in the event loop in `read_console()`. - // The active request remains active. - self.request_input(req.originator.clone(), info.input_prompt.to_string()); - return None; - } else { - // Invalid input request, propagate error to R - return Some(self.handle_invalid_input_request(buf, buflen)); + // Include HTML representation of data.frame + unsafe { + let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); + if r_is_data_frame(value) { + match to_html(value) { + Ok(html) => { + data.insert("text/html".to_string(), json!(html)); + }, + Err(err) => { + log::error!("{:?}", err); + }, + }; } } - // An incomplete prompt when we no longer have any inputs to send should - // never happen because we check for incomplete inputs ahead of time and - // respond to the frontend with an error. - if info.incomplete && self.pending_lines.is_empty() { - unreachable!("Incomplete input in `ReadConsole` handler"); + data + } + + fn take_exception(&mut self) -> Option { + let mut exception = if let Some(exception) = self.last_error.take() { + exception + } else if stack_overflow_occurred() { + // Call `base::traceback()` since we don't have a handled error + // object carrying a backtrace. This won't be formatted as a + // tree which is just as well since the recursive calls would + // push a tree too far to the right. + let traceback = r_traceback(); + + let exception = Exception { + ename: String::from(""), + evalue: r_peek_error_buffer(), + traceback, + }; + + // Reset error buffer so we don't display this message again + let _ = RFunction::new("base", "stop").call(); + + exception + } else { + return None; + }; + + // Flush any accumulated output to StdOut. This can happen if + // the last input errors out during autoprint. + let autoprint = std::mem::take(&mut self.autoprint_output); + if !autoprint.is_empty() { + let message = IOPubMessage::Stream(StreamOutput { + name: Stream::Stdout, + text: autoprint, + }); + self.iopub_tx.send(message).unwrap(); } - // Next check if we have any pending lines. If we do, we are in the middle of - // evaluating a multi line selection, so immediately write the next line into R's buffer. - // The active request remains active. - if let Some(console_result) = self.handle_pending_line(buf, buflen) { - return Some(console_result); + // Jupyter clients typically discard the `evalue` when a `traceback` is + // present. Jupyter-Console even disregards `evalue` in all cases. So + // include it here if we are in Notebook mode. But should Positron + // implement similar behaviour as the other frontends eventually? The + // first component of `traceback` could be compared to `evalue` and + // discarded from the traceback if the same. + if let SessionMode::Notebook = self.session_mode { + exception.traceback.insert(0, exception.evalue.clone()); } - // Finally, check if we have an active request from a previous `read_console()` - // iteration. If so, we `take()` and clear the `active_request` as we're about - // to complete it and send a reply to unblock the active Shell - // request. + Some(exception) + } + + fn handle_active_request(&mut self, info: &PromptInfo, value: ConsoleValue) { + // If we get here we finished evaluating all pending inputs. Check if we + // have an active request from a previous `read_console()` iteration. If + // so, we `take()` and clear the `active_request` as we're about to + // complete it and send a reply to unblock the active Shell request. if let Some(req) = std::mem::take(&mut self.active_request) { - // FIXME: Race condition between the comm and shell socket threads. - // - // Perform a refresh of the frontend state - // (Prompts, working directory, etc) + // Perform a refresh of the frontend state (Prompts, working + // directory, etc) self.with_mut_ui_comm_tx(|ui_comm_tx| { let input_prompt = info.input_prompt.clone(); let continuation_prompt = info.continuation_prompt.clone(); @@ -1058,13 +1272,15 @@ impl RMain { // Let frontend know the last request is complete. This turns us // back to Idle. - self.reply_execute_request(req, &info); + Self::reply_execute_request(&self.iopub_tx, req, &info, value); + } else { + log::info!("No active request to handle, discarding: {value:?}"); } - - // Prepare for the next user input - None } + // Called from Ark's ReadConsole event loop when we get a new execute + // request. It's not possible to get one while an active request is ongoing + // because of Jupyter's queueing of Shell messages. fn handle_execute_request( &mut self, req: RRequest, @@ -1072,7 +1288,7 @@ impl RMain { buf: *mut c_uchar, buflen: c_int, ) -> Option { - if info.input_request { + if matches!(info.kind, PromptKind::InputRequest) { panic!("Unexpected `execute_request` while waiting for `input_reply`."); } @@ -1096,7 +1312,7 @@ impl RMain { RRequest::DebugCommand(cmd) => { // Just ignore command in case we left the debugging state already - if !self.dap.is_debugging() { + if !self.debug_is_debugging { return None; } @@ -1106,48 +1322,171 @@ impl RMain { }, }; - // Clear error flag - self.error_occurred = false; - match input { ConsoleInput::Input(code) => { - // Handle commands for the debug interpreter - if self.dap.is_debugging() { - let continue_cmds = vec!["n", "f", "c", "cont"]; - if continue_cmds.contains(&&code[..]) { - // We're stepping so we want to focus the next location we stop at - self.debug_preserve_focus = false; - self.dap.send_dap(DapBackendEvent::Continued); - } else { - // The user is evaluating some other expression so preserve current focus - // https://github.com/posit-dev/positron/issues/3151 - self.debug_preserve_focus = true; - } + // Parse input into pending expressions + match PendingInputs::read(&code) { + Ok(ParseResult::Success(inputs)) => { + self.pending_inputs = inputs; + }, + Ok(ParseResult::SyntaxError(message)) => { + return Some(ConsoleResult::Error(message)) + }, + Err(err) => { + return Some(ConsoleResult::Error(format!( + "Error while parsing input: {err:?}" + ))) + }, } - // If the input is invalid (e.g. incomplete), don't send it to R - // at all, reply with an error right away - if let Err(err) = Self::check_console_input(code.as_str()) { - return Some(ConsoleResult::Error(err)); - } + // Evaluate first expression if there is one + if let Some(input) = self.pop_pending() { + Some(self.handle_pending_input(input, buf, buflen)) + } else { + // Otherwise we got an empty input, e.g. `""` and there's + // nothing to do. Close active request. + self.handle_active_request(info, ConsoleValue::Success(Default::default())); - // Split input by lines, retrieve first line, and store - // remaining lines in a buffer. This helps with long inputs - // because R has a fixed input buffer size of 4096 bytes at the - // time of writing. - let code = self.buffer_console_input(code.as_str()); - - // Store input in R's buffer and return sentinel indicating some - // new input is ready - match Self::on_console_input(buf, buflen, code) { - Ok(()) => Some(ConsoleResult::NewInput), - Err(err) => Some(ConsoleResult::Error(err)), + // And return to event loop + None } }, + ConsoleInput::EOF => Some(ConsoleResult::Disconnected), } } + /// Handles user input requests (e.g., readline, menu) and special prompts. + /// Runs the ReadConsole event loop until a reply comes in. + fn handle_input_request( + &mut self, + info: &PromptInfo, + buf: *mut c_uchar, + buflen: c_int, + ) -> ConsoleResult { + if let Some(req) = &self.active_request { + // Send request to frontend. We'll wait for an `input_reply` + // from the frontend in the event loop in `read_console()`. + // The active request remains active. + self.request_input(req.originator.clone(), String::from(&info.input_prompt)); + + // Run the event loop, waiting for stdin replies but not execute requests + self.run_event_loop(info, buf, buflen, WaitFor::InputReply) + } else { + // Invalid input request, propagate error to R + self.handle_invalid_input_request(buf, buflen) + } + } + + fn handle_pending_input( + &mut self, + input: PendingInput, + buf: *mut c_uchar, + buflen: c_int, + ) -> ConsoleResult { + // Default: preserve current focus for evaluated expressions. + // This only has an effect if we're debugging. + // https://github.com/posit-dev/positron/issues/3151 + self.debug_preserve_focus = true; + + if self.debug_is_debugging { + // Try to interpret this pending input as a symbol (debug commands + // are entered as symbols). Whether or not it parses as a symbol, + // if we're currently debugging we must set `debug_preserve_focus`. + if let Ok(sym) = harp::RSymbol::new(input.expr.sexp) { + // All debug commands as documented in `?browser` + const DEBUG_COMMANDS: &[&str] = + &["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; + + // The subset of debug commands that continue execution + const DEBUG_COMMANDS_CONTINUE: &[&str] = &["n", "f", "c", "cont"]; + + let sym = String::from(sym); + + if DEBUG_COMMANDS.contains(&&sym[..]) { + if DEBUG_COMMANDS_CONTINUE.contains(&&sym[..]) { + // For continue-like commands, we do not preserve focus, + // i.e. we let the cursor jump to the stopped + // position. Set the preserve focus hint for the + // next iteration of ReadConsole. + self.debug_preserve_focus = false; + + // Let the DAP client know that execution is now continuing + self.debug_send_dap(DapBackendEvent::Continued); + } + + // All debug commands are forwarded to the base REPL as + // is so that R can interpret them. + // Unwrap safety: A debug command fits in the buffer. + Self::on_console_input(buf, buflen, sym).unwrap(); + return ConsoleResult::NewInput; + } + } + } + + ConsoleResult::NewPendingInput(input) + } + + fn pop_pending(&mut self) -> Option { + let Some(pending_inputs) = self.pending_inputs.as_mut() else { + return None; + }; + + let Some(input) = pending_inputs.pop() else { + self.pending_inputs = None; + return None; + }; + + if pending_inputs.is_empty() { + self.pending_inputs = None; + } + + Some(input) + } + + // SAFETY: Call this from a POD frame. Inputs must be protected. + unsafe fn eval(expr: libr::SEXP, srcref: libr::SEXP, buf: *mut c_uchar, buflen: c_int) { + let frame = harp::r_current_frame(); + + // SAFETY: This may jump in case of error, keep this POD + unsafe { + let frame = libr::Rf_protect(frame.into()); + + // The global source reference is stored in this global variable by + // the R REPL before evaluation. We do the same here. + let old_srcref = libr::Rf_protect(libr::get(libr::R_Srcref)); + libr::set(libr::R_Srcref, srcref); + + // Evaluate the expression. Beware: this may throw an R longjump. + let value = libr::Rf_eval(expr, frame); + libr::Rf_protect(value); + + // Restore `R_Srcref`, necessary at least to avoid messing with + // DAP's last frame info + libr::set(libr::R_Srcref, old_srcref); + + // Store in the base environment for robust access from (almost) any + // evaluation environment. We only require the presence of `::` so + // we can reach into base. Note that unlike regular environments + // which are stored in pairlists or hash tables, the base environment + // is stored in the `value` field of symbols, i.e. their "CDR". + libr::SETCDR(r_symbol!(".ark_last_value"), value); + + libr::Rf_unprotect(3); + value + }; + + // Back in business, Rust away + let code = if unsafe { libr::get(libr::R_Visible) == 1 } { + String::from("base::.ark_last_value") + } else { + String::from("base::invisible(base::.ark_last_value)") + }; + + // Unwrap safety: The input always fits in the buffer + Self::on_console_input(buf, buflen, code).unwrap(); + } + /// Handle an `input_request` received outside of an `execute_request` context /// /// We believe it is always invalid to receive an `input_request` that isn't @@ -1173,65 +1512,33 @@ impl RMain { /// https://github.com/rstudio/renv/blob/5d0d52c395e569f7f24df4288d949cef95efca4e/inst/resources/activate.R#L85-L87 fn handle_invalid_input_request(&self, buf: *mut c_uchar, buflen: c_int) -> ConsoleResult { if let Some(input) = Self::renv_autoloader_reply() { - log::info!("Detected `readline()` call in renv autoloader. Returning `'{input}'`."); + log::warn!("Detected `readline()` call in renv autoloader. Returning `'{input}'`."); match Self::on_console_input(buf, buflen, input) { Ok(()) => return ConsoleResult::NewInput, - Err(err) => return ConsoleResult::Error(err), + Err(err) => return ConsoleResult::Error(format!("{err}")), } } - log::info!("Detected invalid `input_request` outside an `execute_request`. Preparing to throw an R error."); + log::warn!("Detected invalid `input_request` outside an `execute_request`. Preparing to throw an R error."); let message = vec![ "Can't request input from the user at this time.", "Are you calling `readline()` or `menu()` from an `.Rprofile` or `.Rprofile.site` file? If so, that is the issue and you should remove that code." ].join("\n"); - return ConsoleResult::Error(Error::InvalidInputRequest(message)); + return ConsoleResult::Error(message); } - fn start_debug(&mut self) { - match self.dap.stack_info() { - Ok(stack) => { - if let Some(frame) = stack.first() { - if let Some(ref env) = frame.environment { - // This is reset on exit in the cleanup phase, see `r_read_console()` - self.debug_env = Some(env.get().clone()); - } - } + fn handle_invalid_input_request_after_error(&self) -> ConsoleResult { + log::warn!("Detected invalid `input_request` after error (probably from `getOption('error')`). Preparing to throw an R error."); - // Figure out whether we changed location since last time, - // e.g. because the user evaluated an expression that hit - // another breakpoint. In that case we do want to move - // focus, even though the user didn't explicitly used a step - // gesture. Our indication that we changed location is - // whether the call stack looks the same as last time. This - // is not 100% reliable as this heuristic might have false - // negatives, e.g. if the control flow exited the current - // context via condition catching and jumped back in the - // debugged function. - let stack_id: Vec = stack.iter().map(|f| f.into()).collect(); - let same_stack = stack_id == self.debug_last_stack; - - // Initialize fallback sources for this stack - let fallback_sources = self.load_fallback_sources(&stack); - - self.debug_last_stack = stack_id; - self.dap.start_debug( - stack, - same_stack && self.debug_preserve_focus, - fallback_sources, - ); - }, - Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), - }; - } + let message = vec![ + "Can't request input from the user at this time.", + "Are you calling `readline()` or `menu()` from `options(error = )`?", + ] + .join("\n"); - fn stop_debug(&mut self) { - self.debug_last_stack = vec![]; - self.clear_fallback_sources(); - self.debug_session_index += 1; - self.dap.stop_debug(); + return ConsoleResult::Error(message); } /// Load `fallback_sources` with this stack's text sources @@ -1241,12 +1548,12 @@ impl RMain { /// in duplicate virtual editors being opened on the client side. pub fn load_fallback_sources( &mut self, - stack: &Vec, + stack: &Vec, ) -> HashMap { let mut sources = HashMap::new(); for frame in stack.iter() { - if let crate::dap::dap_r_main::FrameSource::Text(source) = &frame.source { + if let crate::repl_debug::FrameSource::Text(source) = &frame.source { let uri = Self::ark_debug_uri(self.debug_session_index, &frame.source_name, source); if self.has_virtual_document(&uri) { @@ -1277,31 +1584,6 @@ impl RMain { } } - fn ark_debug_uri(debug_session_index: u32, source_name: &str, source: &str) -> String { - // Hash the source to generate a unique identifier used in - // the URI. This is needed to disambiguate frames that have - // the same source name (used as file name in the URI) but - // different sources. - use std::collections::hash_map::DefaultHasher; - use std::hash::Hash; - use std::hash::Hasher; - let mut hasher = DefaultHasher::new(); - source.hash(&mut hasher); - let hash = format!("{:x}", hasher.finish()); - - ark_uri(&format!( - "debug/session{i}/{hash}/{source_name}.R", - i = debug_session_index, - )) - } - - // Doesn't expect `ark:` scheme, used for checking keys in our vdoc map - fn is_ark_debug_path(uri: &str) -> bool { - static RE_ARK_DEBUG_URI: std::sync::OnceLock = std::sync::OnceLock::new(); - let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); - re.is_match(uri) - } - fn renv_autoloader_reply() -> Option { let is_autoloader_running = harp::get_option("renv.autoloader.running") .try_into() @@ -1354,10 +1636,10 @@ impl RMain { let input = convert_line_endings(&input.value, LineEnding::Posix); match Self::on_console_input(buf, buflen, input) { Ok(()) => ConsoleResult::NewInput, - Err(err) => ConsoleResult::Error(err), + Err(err) => ConsoleResult::Error(format!("{err:?}")), } }, - Err(err) => ConsoleResult::Error(err), + Err(err) => ConsoleResult::Error(format!("{err:?}")), } } @@ -1545,63 +1827,6 @@ impl RMain { self.get_ui_comm_tx().is_some() } - fn handle_pending_line(&mut self, buf: *mut c_uchar, buflen: c_int) -> Option { - if self.error_occurred { - // If an error has occurred, we've already sent a complete expression that resulted in - // an error. Flush the remaining lines and return to `read_console()`, who will handle - // that error. - self.pending_lines.clear(); - return None; - } - - let Some(input) = self.pending_lines.pop() else { - // No pending lines - return None; - }; - - match Self::on_console_input(buf, buflen, input) { - Ok(()) => Some(ConsoleResult::NewInput), - Err(err) => Some(ConsoleResult::Error(err)), - } - } - - fn check_console_input(input: &str) -> amalthea::Result<()> { - let status = unwrap!(harp::parse_status(&harp::ParseInput::Text(input)), Err(err) => { - // Failed to even attempt to parse the input, something is seriously wrong - return Err(Error::InvalidConsoleInput(format!( - "Failed to parse input: {err:?}" - ))); - }); - - // - Incomplete inputs put R into a state where it expects more input that will never come, so we - // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. - // - Complete statements are obviously fine. - // - Syntax errors will cause R to throw an error, which is expected. - match status { - harp::ParseResult::Incomplete => Err(Error::InvalidConsoleInput(format!( - "Can't execute incomplete input:\n{input}" - ))), - harp::ParseResult::Complete(_) => Ok(()), - harp::ParseResult::SyntaxError { .. } => Ok(()), - } - } - - fn buffer_console_input(&mut self, input: &str) -> String { - // Split into lines and reverse them to be able to `pop()` from the front - let mut lines: Vec = lines(input).rev().map(String::from).collect(); - - // SAFETY: There is always at least one line because: - // - `lines("")` returns 1 element containing `""` - // - `lines("\n")` returns 2 elements containing `""` - let first = lines.pop().unwrap(); - - // No-op if `lines` is empty - assert!(self.pending_lines.is_empty()); - self.pending_lines.append(&mut lines); - - first - } - /// Copy console input into R's internal input buffer /// /// Supposedly `buflen` is "the maximum length, in bytes, including the @@ -1651,6 +1876,13 @@ impl RMain { Ok(()) } + fn console_input(buf: *mut c_uchar, _buflen: c_int) -> String { + unsafe { + let cstr = CStr::from_ptr(buf as *const c_char); + cstr.to_string_lossy().into_owned() + } + } + // Hitting this means a SINGLE line from the user was longer than the buffer size (>4000 characters) fn buffer_overflow_error() -> amalthea::Error { Error::InvalidConsoleInput(String::from( @@ -1660,142 +1892,56 @@ impl RMain { // Reply to the previously active request. The current prompt type and // whether an error has occurred defines the reply kind. - fn reply_execute_request(&mut self, req: ActiveReadConsoleRequest, prompt_info: &PromptInfo) { + fn reply_execute_request( + iopub_tx: &Sender, + req: ActiveReadConsoleRequest, + prompt_info: &PromptInfo, + value: ConsoleValue, + ) { let prompt = &prompt_info.input_prompt; - let (reply, result) = if prompt_info.incomplete { - log::trace!("Got prompt {} signaling incomplete request", prompt); - (new_incomplete_reply(&req.request, req.exec_count), None) - } else if prompt_info.input_request { - unreachable!(); - } else { - log::trace!("Got R prompt '{}', completing execution", prompt); + log::trace!("Got R prompt '{}', completing execution", prompt); - self.make_execute_reply_error(req.exec_count) - .unwrap_or_else(|| self.make_execute_reply(req.exec_count)) - }; + let exec_count = req.exec_count; - if let Some(result) = result { - self.iopub_tx.send(result).unwrap(); - } + let (reply, result) = match value { + ConsoleValue::Success(data) => { + let reply = Ok(ExecuteReply { + status: Status::Ok, + execution_count: exec_count, + user_expressions: json!({}), + }); - log::trace!("Sending `execute_reply`: {reply:?}"); - req.reply_tx.send(reply).unwrap(); - } + let result = if data.len() > 0 { + Some(IOPubMessage::ExecuteResult(ExecuteResult { + execution_count: exec_count, + data: serde_json::Value::Object(data), + metadata: json!({}), + })) + } else { + None + }; - fn make_execute_reply_error( - &mut self, - exec_count: u32, - ) -> Option<(amalthea::Result, Option)> { - // Save and reset error occurred flag - let error_occurred = self.error_occurred; - self.error_occurred = false; - - // Error handlers are not called on stack overflow so the error flag - // isn't set. Instead we detect stack overflows by peeking at the error - // buffer. The message is explicitly not translated to save stack space - // so the matching should be reliable. - let err_buf = r_peek_error_buffer(); - let stack_overflow_occurred = RE_STACK_OVERFLOW.is_match(&err_buf); - - // Reset error buffer so we don't display this message again - if stack_overflow_occurred { - let _ = RFunction::new("base", "stop").call(); - } + (reply, result) + }, - // Send the reply to the frontend - if !error_occurred && !stack_overflow_occurred { - return None; - } + ConsoleValue::Error(exception) => { + let reply = Err(amalthea::Error::ShellErrorExecuteReply( + exception.clone(), + exec_count, + )); + let result = IOPubMessage::ExecuteError(ExecuteError { exception }); - // We don't fill out `ename` with anything meaningful because typically - // R errors don't have names. We could consider using the condition class - // here, which r-lib/tidyverse packages have been using more heavily. - let mut exception = if error_occurred { - Exception { - ename: String::from(""), - evalue: self.error_message.clone(), - traceback: self.error_traceback.clone(), - } - } else { - // Call `base::traceback()` since we don't have a handled error - // object carrying a backtrace. This won't be formatted as a - // tree which is just as well since the recursive calls would - // push a tree too far to the right. - let traceback = r_traceback(); - Exception { - ename: String::from(""), - evalue: err_buf.clone(), - traceback, - } + (reply, Some(result)) + }, }; - // Jupyter clients typically discard the `evalue` when a `traceback` is - // present. Jupyter-Console even disregards `evalue` in all cases. So - // include it here if we are in Notebook mode. But should Positron - // implement similar behaviour as the other frontends eventually? The - // first component of `traceback` could be compared to `evalue` and - // discarded from the traceback if the same. - if let SessionMode::Notebook = self.session_mode { - exception.traceback.insert(0, exception.evalue.clone()) - } - - let reply = new_execute_reply_error(exception.clone(), exec_count); - let result = IOPubMessage::ExecuteError(ExecuteError { exception }); - - Some((reply, Some(result))) - } - - fn make_execute_reply( - &mut self, - exec_count: u32, - ) -> (amalthea::Result, Option) { - // TODO: Implement rich printing of certain outputs. - // Will we need something similar to the RStudio model, - // where we implement custom print() methods? Or can - // we make the stub below behave sensibly even when - // streaming R output? - let mut data = serde_json::Map::new(); - - // The output generated by autoprint is emitted as an - // `execute_result` message. - let mut autoprint = std::mem::take(&mut self.autoprint_output); - - if autoprint.ends_with('\n') { - // Remove the trailing newlines that R adds to outputs but that - // Jupyter frontends are not expecting. Is it worth taking a - // mutable self ref across calling methods to avoid the clone? - autoprint.pop(); - } - if autoprint.len() != 0 { - data.insert("text/plain".to_string(), json!(autoprint)); - } - - // Include HTML representation of data.frame - unsafe { - let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); - if r_is_data_frame(value) { - match to_html(value) { - Ok(html) => data.insert("text/html".to_string(), json!(html)), - Err(err) => { - log::error!("{:?}", err); - None - }, - }; - } + if let Some(result) = result { + iopub_tx.send(result).unwrap(); } - let reply = new_execute_reply(exec_count); - - let result = (data.len() > 0).then(|| { - IOPubMessage::ExecuteResult(ExecuteResult { - execution_count: exec_count, - data: serde_json::Value::Object(data), - metadata: json!({}), - }) - }); - - (reply, result) + log::trace!("Sending `execute_reply`: {reply:?}"); + req.reply_tx.send(reply).unwrap(); } /// Sends a `Wait` message to IOPub, which responds when the IOPub thread @@ -1875,7 +2021,7 @@ impl RMain { // To capture the current `debug: ` output, for use in the debugger's // match based fallback - r_main.dap.handle_stdout(&content); + r_main.debug_handle_write_console(&content); let stream = if otype == 0 { Stream::Stdout @@ -1918,7 +2064,7 @@ impl RMain { // https://github.com/posit-dev/positron/issues/1881 // Handle last expression - if r_main.pending_lines.is_empty() { + if r_main.pending_inputs.is_none() { r_main.autoprint_output.push_str(&content); return; } @@ -2211,42 +2357,12 @@ impl RMain { } } - fn propagate_error(&mut self, err: anyhow::Error) -> ! { - // Save error message to `RMain`'s buffer to avoid leaking memory when `Rf_error()` jumps. - // Some gymnastics are required to deal with the possibility of `CString` conversion failure - // since the error message comes from the frontend and might be corrupted. - self.r_error_buffer = Some(new_cstring(format!("\n{err}"))); - unsafe { Rf_error(self.r_error_buffer.as_ref().unwrap().as_ptr()) } - } - #[cfg(not(test))] // Avoid warnings in unit test - pub(crate) fn debug_env(&self) -> Option { - self.debug_env.clone() + pub(crate) fn read_console_frame(&self) -> RObject { + self.read_console_frame.borrow().clone() } } -/// Report an incomplete request to the frontend -fn new_incomplete_reply(req: &ExecuteRequest, exec_count: u32) -> amalthea::Result { - let error = Exception { - ename: "IncompleteInput".to_string(), - evalue: format!("Code fragment is not complete: {}", req.code), - traceback: vec![], - }; - Err(amalthea::Error::ShellErrorExecuteReply(error, exec_count)) -} - -fn new_execute_reply(exec_count: u32) -> amalthea::Result { - Ok(ExecuteReply { - status: Status::Ok, - execution_count: exec_count, - user_expressions: json!({}), - }) -} - -fn new_execute_reply_error(error: Exception, exec_count: u32) -> amalthea::Result { - Err(amalthea::Error::ShellErrorExecuteReply(error, exec_count)) -} - /// Converts a data frame to HTML fn to_html(frame: SEXP) -> Result { unsafe { @@ -2288,9 +2404,107 @@ pub extern "C-unwind" fn r_read_console( buflen: c_int, hist: c_int, ) -> i32 { + // In this entry point we handle two kinds of state: + // - The number of nested REPLs `read_console_depth` + // - A bunch of flags that help us reset the calling R REPL + // + // The second kind is unfortunate and due to us taking charge of parsing and + // evaluation. Ideally R would extend their frontend API so that this would + // only be necessary for backward compatibility with old versions of R. + let main = RMain::get_mut(); + + // Propagate an EOF event (e.g. from a Shutdown request). We need to exit + // from all consoles on the stack to let R shut down with an `exit()`. + if main.read_console_shutdown.get() { + return 0; + } + + // We've finished evaluating a dummy value to reset state in R's REPL, + // and are now ready to evaluate the actual input, which is typically + // just `.ark_last_value`. + if let Some(next_input) = main.read_console_nested_return_next_input.take() { + RMain::on_console_input(buf, buflen, next_input).unwrap(); + return 1; + } + + // In case of error, we haven't had a chance to evaluate ".ark_last_value". + // So we return to the R REPL to give R a chance to run the state + // restoration that occurs between `R_ReadConsole()` and `eval()`: + // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 + // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 + // + // Technically this also resets time limits (see `base::setTimeLimit()`) but + // these aren't supported in Ark because they cause errors when we poll R + // events. + if main.last_error.is_some() && main.read_console_threw_error.get() { + main.read_console_threw_error.set(false); + + // Evaluate last value so that `base::.Last.value` remains the same + RMain::on_console_input( + buf, + buflen, + String::from("base::invisible(base::.Last.value)"), + ) + .unwrap(); + return 1; + } + + // Keep track of state that we care about + + // - Track nesting depth of ReadConsole REPLs + main.read_console_depth + .set(main.read_console_depth.get() + 1); + + // - Set current frame environment + let old_current_frame = main.read_console_frame.replace(harp::r_current_frame()); + + // Keep track of state that we use for workarounds while interacting + // with the R REPL and force it to reset state + + // - Reset flag that helps us figure out when a nested REPL returns + main.read_console_nested_return.set(false); + + // - Reset flag that helps us figure out when an error occurred and needs a + // reset of `R_EvalDepth` and friends + main.read_console_threw_error.set(true); + + exec_with_cleanup( + || { + let main = RMain::get_mut(); + let result = r_read_console_impl(main, prompt, buf, buflen, hist); + + // If we get here, there was no error + main.read_console_threw_error.set(false); + + result + }, + || { + let main = RMain::get_mut(); + + // We're exiting, decrease depth of nested consoles + main.read_console_depth + .set(main.read_console_depth.get() - 1); + + // Set flag so that parent read console, if any, can detect that a + // nested console returned (if it indeed returns instead of looping + // for another iteration) + main.read_console_nested_return.set(true); + + // Restore current frame + main.read_console_frame.replace(old_current_frame); + }, + ) +} + +fn r_read_console_impl( + main: &mut RMain, + prompt: *const c_char, + buf: *mut c_uchar, + buflen: c_int, + hist: c_int, +) -> i32 { let result = r_sandbox(|| main.read_console(prompt, buf, buflen, hist)); - main.read_console_cleanup(); let result = unwrap!(result, Err(err) => { panic!("Unexpected longjump while reading from console: {err:?}"); @@ -2300,8 +2514,49 @@ pub extern "C-unwind" fn r_read_console( // destructors. We're longjumping from here in case of interrupt. match result { - ConsoleResult::NewInput => return 1, - ConsoleResult::Disconnected => return 0, + ConsoleResult::NewPendingInput(input) => { + let PendingInput { expr, srcref } = input; + + unsafe { + // The pointer protection stack is restored by `run_Rmainloop()` + // after a longjump to top-level, so it's safe to protect here + // even if the evaluation throws + let expr = libr::Rf_protect(expr.into()); + let srcref = libr::Rf_protect(srcref.into()); + + RMain::eval(expr, srcref, buf, buflen); + + // Check if a nested read_console() just returned. If that's the + // case, we need to reset the `R_ConsoleIob` by first returning + // a dummy value causing a `PARSE_NULL` event. + if main.read_console_nested_return.get() { + let next_input = RMain::console_input(buf, buflen); + main.read_console_nested_return_next_input + .set(Some(next_input)); + + // Evaluating a space causes a `PARSE_NULL` event. Don't + // evaluate a newline, that would cause a parent debug REPL + // to interpret it as `n`, causing it to exit instead of + // being a no-op. + RMain::on_console_input(buf, buflen, String::from(" ")).unwrap(); + main.read_console_nested_return.set(false); + } + + libr::Rf_unprotect(2); + return 1; + } + }, + + ConsoleResult::NewInput => { + return 1; + }, + + ConsoleResult::Disconnected => { + // Cause parent consoles to shutdown too + main.read_console_shutdown.set(true); + return 0; + }, + ConsoleResult::Interrupt => { log::trace!("Interrupting `ReadConsole()`"); unsafe { @@ -2312,8 +2567,14 @@ pub extern "C-unwind" fn r_read_console( log::error!("`Rf_onintr()` did not longjump"); return 0; }, - ConsoleResult::Error(err) => { - main.propagate_error(anyhow::anyhow!("{err}")); + + ConsoleResult::Error(message) => { + // Save error message in `RMain` to avoid leaking memory when + // `Rf_error()` jumps. Some gymnastics are required to deal with the + // possibility of `CString` conversion failure since the error + // message comes from the frontend and might be corrupted. + main.r_error_buffer = Some(new_cstring(message)); + unsafe { Rf_error(main.r_error_buffer.as_ref().unwrap().as_ptr()) } }, }; } @@ -2411,7 +2672,10 @@ fn do_resource_namespaces() -> bool { fn is_auto_printing() -> bool { let n_frame = harp::session::r_n_frame().unwrap(); - // The call-stack is empty so this must be R auto-printing an unclassed object + // The call-stack is empty so this must be R auto-printing an unclassed + // object. Note that this might wrongly return true in debug REPLs. Ideally + // we'd take note of the number of frames on the stack when we enter + // `r_read_console()`, and compare against that. if n_frame == 0 { return true; } diff --git a/crates/ark/src/lib.rs b/crates/ark/src/lib.rs index 0f88a0d5c..6b3d88790 100644 --- a/crates/ark/src/lib.rs +++ b/crates/ark/src/lib.rs @@ -29,6 +29,7 @@ pub mod modules_utils; pub mod plots; pub mod r_task; pub mod repos; +pub mod repl_debug; pub mod request; pub mod reticulate; pub mod shell; diff --git a/crates/ark/src/lsp/completions/sources/composite/search_path.rs b/crates/ark/src/lsp/completions/sources/composite/search_path.rs index 0adfd8004..273cd7951 100644 --- a/crates/ark/src/lsp/completions/sources/composite/search_path.rs +++ b/crates/ark/src/lsp/completions/sources/composite/search_path.rs @@ -13,7 +13,6 @@ use harp::vector::CharacterVector; use harp::vector::Vector; use harp::RObject; use libr::R_EmptyEnv; -use libr::R_GlobalEnv; use libr::R_lsInternal; use libr::ENCLOS; use tower_lsp::lsp_types::CompletionItem; @@ -51,19 +50,12 @@ fn completions_from_search_path( ]; unsafe { - // Iterate through environments starting from the global environment. - let mut env = R_GlobalEnv; - - // If we're waiting for input in `read_console()` with a debugger - // prompt, start from current environment + // Iterate through environments starting from the current frame environment. #[cfg(not(test))] // Unit tests do not have an `RMain` - { - use crate::interface::RMain; - if let Some(debug_env) = &RMain::get().debug_env() { - // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` - env = debug_env.sexp; - } - } + // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` + let mut env = crate::interface::RMain::get().read_console_frame().sexp; + #[cfg(test)] + let mut env = libr::R_GlobalEnv; while env != R_EmptyEnv { let is_pkg_env = r_env_is_pkg_env(env); diff --git a/crates/ark/src/main.rs b/crates/ark/src/main.rs index b5e4d202d..70cfe5e78 100644 --- a/crates/ark/src/main.rs +++ b/crates/ark/src/main.rs @@ -83,7 +83,6 @@ fn main() -> anyhow::Result<()> { let mut capture_streams = true; let mut default_repos = DefaultRepos::Auto; - // Process remaining arguments. TODO: Need an argument that can passthrough args to R while let Some(arg) = argv.next() { match arg.as_str() { "--connection_file" => { @@ -325,6 +324,20 @@ fn main() -> anyhow::Result<()> { r_args.push(String::from("--interactive")); } + // Prepend the vector of arguments with our default. These can be overridden + // by user arguments (last one wins). + r_args.splice(0..0, [ + // We don't support the asking the user whether to save the workspace + // data on exit because calling readline during shutdown puts in a + // precarious position. So effectively we're implementing "no-save" by + // default. Note that there is no argument to opt into the "ask" + // behaviour, so it can't be reenabled by the user. + String::from("--no-save"), + // Since we don't save by default, we also don't restore by default for + // consistency + String::from("--no-restore-data"), + ]); + // This causes panics on background threads to propagate on the main // thread. If we don't propagate a background thread panic, the program // keeps running in an unstable state as all communications with this diff --git a/crates/ark/src/modules/positron/errors.R b/crates/ark/src/modules/positron/errors.R index 11e082cb5..c5376d3df 100644 --- a/crates/ark/src/modules/positron/errors.R +++ b/crates/ark/src/modules/positron/errors.R @@ -33,6 +33,14 @@ #' @export .ps.errors.globalErrorHandler <- function(cnd) { + # Unlike C stack overflow errors, expressions nested too deeply errors allow + # calling handlers. But since we run R code, we need to temporarily bump the + # threshold to give a little room while we handle the error. + if (inherits(cnd, "expressionStackOverflowError")) { + old <- options(expressions = getOption("expressions") + 500) + defer(options(old)) + } + # This reproduces the behaviour of R's default error handler: # - Invoke `getOption("error")` # - Save backtrace for `traceback()` @@ -192,8 +200,40 @@ invoke_option_error_handler <- function() { handler <- as.expression(list(handler)) } + delayedAssign("non_local_return", return()) + for (hnd in handler) { - eval(hnd, globalenv()) + # Use `withCallingHandlers()` instead of `tryCatch()` to avoid making + # the call stack too complex. We might be running `options(error = browser())` + withCallingHandlers( + { + # Evaluate from a promise to keep a simple call stack. + # We do evaluate from a closure wrapped in `handler()` so that R + # can infer a named call, for instance in the "Called from:" + # output of `browser()`. + error_handler <- eval(bquote(function() .(hnd))) + error_handler() + }, + error = function(err) { + # Disable error handler to avoid cascading errors + options(error = NULL) + + # We don't let the error propagate to avoid a confusing sequence of + # error messages from R, such as "Error during wrapup" + writeLines( + c( + "The `getOption(\"error\")` handler failed.", + "This option was unset to avoid cascading errors.", + "Caused by:", + conditionMessage(err) + ), + con = stderr() + ) + + # Bail early + non_local_return + } + ) } } diff --git a/crates/ark/src/modules/positron/hooks.R b/crates/ark/src/modules/positron/hooks.R index 227c12729..d25ce85ca 100644 --- a/crates/ark/src/modules/positron/hooks.R +++ b/crates/ark/src/modules/positron/hooks.R @@ -14,6 +14,14 @@ register_hooks <- function() { new_ark_debug(base::debugonce), namespace = TRUE ) + + rebind( + "utils", + "recover", + # Keep this wrapped up this way for a better "Called from:" call + function(...) ark_recover(), + namespace = TRUE + ) register_getHook_hook() } @@ -151,3 +159,11 @@ check_version <- function(pkg) { } ) } + +# We don't support `utils::recover()` in Ark, but the same functionality is +# provided via the call stack pane of IDEs. So replace it by `browser()` so that +# people can enter the debugger on error using the familiar `options(error = +# recover)` gesture. +ark_recover <- function(...) { + browser() +} diff --git a/crates/ark/src/repl_debug.rs b/crates/ark/src/repl_debug.rs new file mode 100644 index 000000000..da5f8b46f --- /dev/null +++ b/crates/ark/src/repl_debug.rs @@ -0,0 +1,360 @@ +// +// repl_debug.rs +// +// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// + +use anyhow::anyhow; +use anyhow::Result; +use harp::exec::RFunction; +use harp::exec::RFunctionExt; +use harp::object::RObject; +use harp::protect::RProtect; +use harp::r_string; +use harp::session::r_sys_calls; +use harp::session::r_sys_frames; +use harp::session::r_sys_functions; +use harp::utils::r_is_null; +use regex::Regex; +use stdext::result::ResultExt; + +use crate::dap::dap::DapBackendEvent; +use crate::interface::DebugCallText; +use crate::interface::RMain; +use crate::modules::ARK_ENVS; +use crate::srcref::ark_uri; +use crate::thread::RThreadSafe; + +#[derive(Debug)] +pub struct FrameInfo { + pub id: i64, + /// The name shown in the editor tab bar when this frame is viewed. + pub source_name: String, + /// The name shown in the stack frame UI when this frame is visible. + pub frame_name: String, + pub source: FrameSource, + pub environment: Option>, + pub start_line: i64, + pub start_column: i64, + pub end_line: i64, + pub end_column: i64, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum FrameSource { + File(String), + Text(String), +} + +/// Version of `FrameInfo` that identifies the frame by value and doesn't keep a +/// reference to the environment. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FrameInfoId { + pub source: FrameSource, + pub start_line: i64, + pub start_column: i64, + pub end_line: i64, + pub end_column: i64, +} + +impl From<&FrameInfo> for FrameInfoId { + fn from(info: &FrameInfo) -> Self { + FrameInfoId { + source: info.source.clone(), + start_line: info.start_line, + start_column: info.start_column, + end_line: info.end_line, + end_column: info.end_column, + } + } +} + +impl RMain { + pub(crate) fn debug_start(&mut self, debug_preserve_focus: bool) { + match self.debug_stack_info() { + Ok(stack) => { + // Figure out whether we changed location since last time, + // e.g. because the user evaluated an expression that hit + // another breakpoint. In that case we do want to move + // focus, even though the user didn't explicitly used a step + // gesture. Our indication that we changed location is + // whether the call stack looks the same as last time. This + // is not 100% reliable as this heuristic might have false + // negatives, e.g. if the control flow exited the current + // context via condition catching and jumped back in the + // debugged function. + let stack_id: Vec = stack.iter().map(|f| f.into()).collect(); + let same_stack = stack_id == self.debug_last_stack; + + // Initialize fallback sources for this stack + let fallback_sources = self.load_fallback_sources(&stack); + + self.debug_last_stack = stack_id; + + let preserve_focus = same_stack && debug_preserve_focus; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.start_debug(stack, preserve_focus, fallback_sources) + }, + Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), + }; + } + + pub(crate) fn debug_stop(&mut self) { + self.debug_last_stack = vec![]; + self.clear_fallback_sources(); + self.debug_reset_frame_id(); + self.debug_session_index += 1; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.stop_debug(); + } + + pub(crate) fn debug_send_dap(&self, event: DapBackendEvent) { + let dap = self.debug_dap.lock().unwrap(); + if let Some(tx) = &dap.backend_events_tx { + tx.send(event).log_err(); + } + } + + pub(crate) fn debug_handle_read_console(&mut self) { + // Upon entering read-console, finalize any debug call text that we were capturing. + // At this point, the user can either advance the debugger, causing us to capture + // a new expression, or execute arbitrary code, where we will reuse a finalized + // debug call text to maintain the debug state. + match &self.debug_call_text { + // If not debugging, nothing to do. + DebugCallText::None => (), + // If already finalized, keep what we have. + DebugCallText::Finalized(_) => (), + // If capturing, transition to finalized. + DebugCallText::Capturing(call_text) => { + self.debug_call_text = DebugCallText::Finalized(call_text.clone()) + }, + } + } + + pub(crate) fn debug_handle_write_console(&mut self, content: &str) { + if let DebugCallText::Capturing(ref mut call_text) = self.debug_call_text { + // Append to current expression if we are currently capturing stdout + call_text.push_str(content); + return; + } + + // `debug: ` is emitted by R (if no srcrefs are available!) right before it emits + // the current expression we are debugging, so we use that as a signal to begin + // capturing. + if content == "debug: " { + self.debug_call_text = DebugCallText::Capturing(String::new()); + return; + } + + // Entering or exiting a closure, reset the debug start line state and call text + if content == "debugging in: " || content == "exiting from: " { + self.debug_last_line = None; + self.debug_call_text = DebugCallText::None; + return; + } + } + + pub(crate) fn debug_stack_info(&mut self) -> Result> { + // We leave finalized `call_text` in place rather than setting it to `None` here + // in case the user executes an arbitrary expression in the debug R console, which + // loops us back here without updating the `call_text` in any way, allowing us to + // recreate the debugger state after their code execution. + let call_text = match self.debug_call_text.clone() { + DebugCallText::None => None, + DebugCallText::Capturing(call_text) => { + log::error!( + "Call text is in `Capturing` state, but should be `Finalized`: '{call_text}'." + ); + None + }, + DebugCallText::Finalized(call_text) => Some(call_text), + }; + + let last_start_line = self.debug_last_line; + + let frames = self.debug_r_stack_info(call_text, last_start_line)?; + + // If we have `frames`, update the `last_start_line` with the context + // frame's start line + if let Some(frame) = frames.get(0) { + self.debug_last_line = Some(frame.start_line); + } + + Ok(frames) + } + + pub(crate) fn debug_r_stack_info( + &mut self, + context_call_text: Option, + context_last_start_line: Option, + ) -> Result> { + unsafe { + let mut protect = RProtect::new(); + + let context_srcref = libr::get(libr::R_Srcref); + protect.add(context_srcref); + + let context_call_text = match context_call_text { + Some(context_call_text) => r_string!(context_call_text, &mut protect), + None => libr::R_NilValue, + }; + + let context_last_start_line = match context_last_start_line { + Some(context_last_start_line) => { + let x = libr::Rf_allocVector(libr::INTSXP, 1); + protect.add(x); + libr::SET_INTEGER_ELT(x, 0, i32::try_from(context_last_start_line)?); + x + }, + None => libr::R_NilValue, + }; + + let functions = r_sys_functions()?; + protect.add(functions); + + let environments = r_sys_frames()?; + protect.add(environments.sexp); + + let calls = r_sys_calls()?; + protect.add(calls.sexp); + + let info = RFunction::new("", "debugger_stack_info") + .add(context_call_text) + .add(context_last_start_line) + .add(context_srcref) + .add(functions) + .add(environments) + .add(calls) + .call_in(ARK_ENVS.positron_ns)?; + + let n: isize = libr::Rf_xlength(info.sexp); + + let mut out = Vec::with_capacity(n as usize); + + // Reverse the order for DAP + for i in (0..n).rev() { + let frame = libr::VECTOR_ELT(info.sexp, i); + let id = self.debug_next_frame_id(); + out.push(as_frame_info(frame, id)?); + } + + Ok(out) + } + } + + fn debug_next_frame_id(&mut self) -> i64 { + let out = self.debug_current_frame_id; + self.debug_current_frame_id += 1; + out + } + + pub(crate) fn debug_reset_frame_id(&mut self) { + self.debug_current_frame_id = 0; + } + + pub(crate) fn ark_debug_uri( + debug_session_index: u32, + source_name: &str, + source: &str, + ) -> String { + // Hash the source to generate a unique identifier used in + // the URI. This is needed to disambiguate frames that have + // the same source name (used as file name in the URI) but + // different sources. + use std::collections::hash_map::DefaultHasher; + use std::hash::Hash; + use std::hash::Hasher; + let mut hasher = DefaultHasher::new(); + source.hash(&mut hasher); + let hash = format!("{:x}", hasher.finish()); + + ark_uri(&format!( + "debug/session{i}/{hash}/{source_name}.R", + i = debug_session_index, + )) + } + + // Doesn't expect `ark:` scheme, used for checking keys in our vdoc map + pub(crate) fn is_ark_debug_path(uri: &str) -> bool { + static RE_ARK_DEBUG_URI: std::sync::OnceLock = std::sync::OnceLock::new(); + let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); + re.is_match(uri) + } +} + +fn as_frame_info(info: libr::SEXP, id: i64) -> Result { + unsafe { + let mut i = 0; + + let source_name = libr::VECTOR_ELT(info, i); + let source_name: String = RObject::view(source_name).try_into()?; + + i += 1; + let frame_name = libr::VECTOR_ELT(info, i); + let frame_name: String = RObject::view(frame_name).try_into()?; + + let mut source = None; + + i += 1; + let file = libr::VECTOR_ELT(info, i); + if file != libr::R_NilValue { + let file: String = RObject::view(file).try_into()?; + source = Some(FrameSource::File(file)); + } + + i += 1; + let text = libr::VECTOR_ELT(info, i); + if text != libr::R_NilValue { + let text: String = RObject::view(text).try_into()?; + source = Some(FrameSource::Text(text)); + } + + let Some(source) = source else { + return Err(anyhow!( + "Expected either `file` or `text` to be non-`NULL`." + )); + }; + + i += 1; + let environment = libr::VECTOR_ELT(info, i); + let environment = if r_is_null(environment) { + None + } else { + Some(RThreadSafe::new(RObject::from(environment))) + }; + + i += 1; + let start_line = libr::VECTOR_ELT(info, i); + let start_line: i32 = RObject::view(start_line).try_into()?; + + i += 1; + let start_column = libr::VECTOR_ELT(info, i); + let start_column: i32 = RObject::view(start_column).try_into()?; + + i += 1; + let end_line = libr::VECTOR_ELT(info, i); + let end_line: i32 = RObject::view(end_line).try_into()?; + + // For `end_column`, the column range provided by R is inclusive `[,]`, but the + // one used on the DAP / Positron side is exclusive `[,)` so we have to add 1. + i += 1; + let end_column = libr::VECTOR_ELT(info, i); + let end_column: i32 = RObject::view(end_column).try_into()?; + let end_column = end_column + 1; + + Ok(FrameInfo { + id, + source_name, + frame_name, + source, + environment, + start_line: start_line.try_into()?, + start_column: start_column.try_into()?, + end_line: end_line.try_into()?, + end_column: end_column.try_into()?, + }) + } +} diff --git a/crates/ark/src/sys/unix/interface.rs b/crates/ark/src/sys/unix/interface.rs index db5bc60ce..30f146320 100644 --- a/crates/ark/src/sys/unix/interface.rs +++ b/crates/ark/src/sys/unix/interface.rs @@ -7,6 +7,8 @@ use std::ffi::CStr; use std::os::raw::c_char; +use std::sync::Condvar; +use std::sync::Mutex; use libr::ptr_R_Busy; use libr::ptr_R_ReadConsole; @@ -38,6 +40,9 @@ use crate::interface::r_write_console; use crate::interface::RMain; use crate::signals::initialize_signal_handlers; +// For shutdown signal in integration tests +pub static CLEANUP_SIGNAL: (Mutex, Condvar) = (Mutex::new(false), Condvar::new()); + pub fn setup_r(args: &Vec) { unsafe { // Before `Rf_initialize_R()` @@ -73,6 +78,14 @@ pub fn setup_r(args: &Vec) { libr::set(ptr_R_Busy, Some(r_busy)); libr::set(ptr_R_Suicide, Some(r_suicide)); + // Install a CleanUp hook for integration tests that test the shutdown process. + // We confirm that shutdown occurs by waiting in the test until `CLEANUP_SIGNAL`'s + // condition variable sends a notification, which occurs in this cleanup method + // that is called during R's shutdown process. + if stdext::IS_TESTING { + libr::set(libr::ptr_R_CleanUp, Some(r_cleanup_for_tests)); + } + // In tests R may be run from various threads. This confuses R's stack // overflow checks so we disable those. This should not make it in // production builds as it causes stack overflows to crash R instead of @@ -122,3 +135,21 @@ pub fn run_activity_handlers() { } } } + +#[no_mangle] +pub extern "C-unwind" fn r_cleanup_for_tests(_save_act: i32, _status: i32, _run_last: i32) { + // Signal that cleanup has started + let (lock, cvar) = &CLEANUP_SIGNAL; + + let mut started = lock.lock().unwrap(); + *started = true; + + cvar.notify_all(); + drop(started); + + // Sleep to give tests time to complete before we panic + std::thread::sleep(std::time::Duration::from_secs(5)); + + // Fallthrough to R which will call `exit()`. Note that panicking from here + // would be UB, we can't panic over a C stack. +} diff --git a/crates/ark/src/variables/variable.rs b/crates/ark/src/variables/variable.rs index 4653fabb2..6aa8c9ceb 100644 --- a/crates/ark/src/variables/variable.rs +++ b/crates/ark/src/variables/variable.rs @@ -2112,7 +2112,7 @@ mod tests { #[test] fn test_truncation_on_matrices() { r_task(|| { - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("matrix(0, nrow = 10000, ncol = 10000)").unwrap(); env.bind("x".into(), &value); @@ -2132,7 +2132,7 @@ mod tests { #[test] fn test_string_truncation() { r_task(|| { - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("paste(1:5e6, collapse = ' - ')").unwrap(); env.bind("x".into(), &value); @@ -2143,7 +2143,7 @@ mod tests { assert_eq!(vars[0].is_truncated, true); // Test for the empty string - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("''").unwrap(); env.bind("x".into(), &value); @@ -2157,7 +2157,7 @@ mod tests { #[test] fn test_s4_with_different_length() { r_task(|| { - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); // Matrix::Matrix objects have length != 1, but their format() method returns a length 1 character // describing their class. let value = harp::parse_eval_base("Matrix::Matrix(0, nrow= 10, ncol = 10)").unwrap(); @@ -2181,7 +2181,7 @@ mod tests { return; } - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base(r#"rlang:::chr_get("foo", 0L)"#).unwrap(); env.bind("x".into(), &value); @@ -2196,7 +2196,7 @@ mod tests { fn test_matrix_display() { r_task(|| { // Test 10x10 matrix - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("matrix(paste(1:90, collapse = ' - '), nrow = 9, ncol = 10)") .unwrap(); @@ -2220,7 +2220,7 @@ mod tests { assert_eq!(display_value_matrix, display_value_df); // Test plurals - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("matrix(paste(1:100, collapse = ' - '), nrow = 1, ncol = 1)") .unwrap(); @@ -2231,7 +2231,7 @@ mod tests { assert_eq!(vars[0].display_value, "[1 row x 1 column] "); // Test class - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base( "structure(matrix(paste(1:100, collapse = ' - '), nrow = 1, ncol = 1), class='foo')", ) diff --git a/crates/ark/tests/kernel-notebook.rs b/crates/ark/tests/kernel-notebook.rs index 9051111f2..965003805 100644 --- a/crates/ark/tests/kernel-notebook.rs +++ b/crates/ark/tests/kernel-notebook.rs @@ -72,7 +72,7 @@ fn test_notebook_execute_request_incomplete() { assert!(frontend .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + .contains("Can't parse incomplete input")); frontend.recv_iopub_idle(); @@ -95,7 +95,7 @@ fn test_notebook_execute_request_incomplete_multiple_lines() { assert!(frontend .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + .contains("Can't parse incomplete input")); frontend.recv_iopub_idle(); diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 042142cec..979b17667 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -1,5 +1,6 @@ use amalthea::fixtures::dummy_frontend::ExecuteRequestOptions; use amalthea::wire::jupyter_message::Message; +use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; use stdext::assert_match; @@ -24,140 +25,158 @@ fn test_kernel_info() { #[test] fn test_execute_request() { let frontend = DummyArkFrontend::lock(); - - let code = "42"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] 42"); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("42", |result| assert_eq!(result, "[1] 42")); } #[test] fn test_execute_request_empty() { let frontend = DummyArkFrontend::lock(); - let code = ""; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly(""); // Equivalent to invisible output - let code = "invisible(1)"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("invisible(1)"); } #[test] fn test_execute_request_multiple_lines() { let frontend = DummyArkFrontend::lock(); - let code = "1 +\n 2+\n 3"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); + frontend.execute_request("1 +\n 2+\n 3", |result| assert_eq!(result, "[1] 6")); +} - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] 6"); +#[test] +fn test_execute_request_incomplete() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); - frontend.recv_iopub_idle(); + let frontend = DummyArkFrontend::lock(); + + frontend.execute_request_invisibly("options(positron.error_entrace = FALSE)"); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count) + frontend.execute_request_error("1 +", |error_msg| { + assert_eq!(error_msg, "Error:\nCan't parse incomplete input"); + }); } #[test] -fn test_execute_request_incomplete() { +fn test_execute_request_incomplete_multiple_lines() { let frontend = DummyArkFrontend::lock(); - let code = "1 +"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); + frontend.execute_request_error("1 +\n2 +", |error_msg| { + assert!(error_msg.contains("Can't parse incomplete input")); + }); +} - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); +#[test] +fn test_execute_request_invalid() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + let frontend = DummyArkFrontend::lock(); - frontend.recv_iopub_idle(); + frontend.execute_request_error("1 + )", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ) + // https://github.com/posit-dev/ark/issues/598 + frontend.execute_request_error("``", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); + + // https://github.com/posit-dev/ark/issues/722 + frontend.execute_request_error("_ + _()", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); } #[test] -fn test_execute_request_incomplete_multiple_lines() { +fn test_execute_request_browser() { let frontend = DummyArkFrontend::lock(); - let code = "1 +\n2 +"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); + frontend.execute_request_invisibly("Q"); +} - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); +#[test] +fn test_execute_request_browser_continue() { + let frontend = DummyArkFrontend::lock(); - frontend.recv_iopub_idle(); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ) + frontend.execute_request_invisibly("n"); } #[test] -fn test_execute_request_browser() { +fn test_execute_request_browser_nested() { + // Test nested browser() calls - entering a browser within a browser let frontend = DummyArkFrontend::lock(); - let code = "browser()"; + // Start first browser + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); + + // Evaluate a value in the outer browser + frontend.execute_request("42", |result| assert!(result.contains("[1] 42"))); + + // Start nested browser from within the first browser + // Nested browser() produces execute_result output + frontend.execute_request("browser()", |_result| {}); + + // Evaluate a command in the nested browser + frontend.execute_request("1", |result| assert!(result.contains("[1] 1"))); + + // Evaluate another value in the nested browser + frontend.execute_request("\"hello\"", |result| assert!(result.contains("hello"))); + + // Throw an error in the nested browser + let code = "stop('error in nested')"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - + frontend.recv_iopub_stream_stderr("Error: error in nested\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; + // Continue to exit the nested browser and return to parent + frontend.execute_request_invisibly("c"); + + // Back in the parent browser, evaluate another value + frontend.execute_request("3.14", |result| assert!(result.contains("[1] 3.14"))); + + // Throw an error in the outer browser + let code = "stop('error in parent')"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); + frontend.recv_iopub_stream_stderr("Error: error in parent\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request("NA", |result| assert!(result.contains("[1] NA"))); + // Quit the outer browser + frontend.execute_request_invisibly("Q"); } #[test] @@ -169,20 +188,9 @@ fn test_execute_request_browser_error() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); frontend.send_execute_request("stop('foobar')", ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -195,36 +203,21 @@ fn test_execute_request_browser_error() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] fn test_execute_request_browser_incomplete() { - let frontend = DummyArkFrontend::lock(); - - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); + let frontend = DummyArkFrontend::lock(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); let code = "1 +"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); @@ -233,7 +226,7 @@ fn test_execute_request_browser_incomplete() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - frontend.recv_iopub_stream_stderr("Error: \nCan't execute incomplete input:\n1 +\n"); + frontend.recv_iopub_stream_stderr("Error: Can't parse incomplete input\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); @@ -290,54 +283,86 @@ fn()"; assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.execute_request_invisibly("Q"); +} + +#[test] +fn test_execute_request_browser_stdin() { + let frontend = DummyArkFrontend::lock(); + + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); + + let options = ExecuteRequestOptions { allow_stdin: true }; + let code = "readline('prompt>')"; + frontend.send_execute_request(code, options); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - frontend.recv_iopub_idle(); + let prompt = frontend.recv_stdin_input_request(); + assert_eq!(prompt, String::from("prompt>")); + + frontend.send_stdin_input_reply(String::from("hi")); + assert_eq!(frontend.recv_iopub_execute_result(), "[1] \"hi\""); + frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request_invisibly("Q"); } #[test] -fn test_execute_request_browser_stdin() { +fn test_execute_request_browser_multiple_expressions() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; + // Ideally the evaluation of `1` would be cancelled + let code = "browser()\n1"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); + frontend.recv_iopub_stream_stdout("Called from: top level \n"); + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 1"); frontend.recv_iopub_idle(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let options = ExecuteRequestOptions { allow_stdin: true }; - let code = "readline('prompt>')"; - frontend.send_execute_request(code, options); + // Even if we could cancel pending expressions, it would still be possible + // to run multiple expressions in a debugger prompt + let code = "1\n2"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - let prompt = frontend.recv_stdin_input_request(); - assert_eq!(prompt, String::from("prompt>")); + frontend.recv_iopub_stream_stdout("[1] 1\n"); - frontend.send_stdin_input_reply(String::from("hi")); + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 2"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] \"hi\""); + // But getting in a nested browser session with a pending expression would + // cancel it (not the case currently) + let code = "browser()\n1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("Called from: top level \n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 1"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + // Quit session let code = "Q"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -350,17 +375,75 @@ fn test_execute_request_browser_stdin() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_local_variable() { + let frontend = DummyArkFrontend::lock(); + + let code = "local({\n local_foo <- 1\n browser()\n})"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout( + "Called from: eval(quote({\n local_foo <- 1\n browser()\n}), new.env())\n", + ); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "local_foo"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Should ideally be `recv_iopub_execute_result()`, but auto-printing + // detection currently does not work reliably in debug REPLs + frontend.recv_iopub_stream_stdout("[1] 1\n"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request_invisibly("Q"); +} + #[test] fn test_execute_request_error() { let frontend = DummyArkFrontend::lock(); - frontend.send_execute_request("stop('foobar')", ExecuteRequestOptions::default()); + frontend.execute_request_error("stop('foobar')", |error_msg| { + assert!(error_msg.contains("foobar")); + }); +} + +#[test] +fn test_execute_request_error_with_accumulated_output() { + // Test that when the very last input throws an error after producing + // output, the accumulated output is flushed before the error is reported. + // This tests the autoprint buffer flush logic in error handling. + let frontend = DummyArkFrontend::lock(); + + let code = "{ + print.foo <- function(x) { + print(unclass(x)) + stop(\"foo\") + } + structure(42, class = \"foo\") + }"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, "stop('foobar')"); - assert!(frontend.recv_iopub_execute_error().contains("foobar")); + assert_eq!(input.code, code); + + // The output from print(1) should be flushed to stdout + frontend.recv_iopub_stream_stdout("[1] 42\n"); + // Then the error should be reported on stderr + assert!(frontend.recv_iopub_execute_error().contains("foo")); frontend.recv_iopub_idle(); assert_eq!( @@ -369,15 +452,58 @@ fn test_execute_request_error() { ); } +#[test] +fn test_execute_request_error_expressions_overflow() { + let frontend = DummyArkFrontend::lock(); + + // Deterministically produce an "evaluation too deeply nested" error + frontend.execute_request_error( + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); f(100)", + |error_msg| { + assert!(error_msg.contains("evaluation nested too deeply")); + }, + ); + + // Check we can still evaluate without causing another too deeply nested error + frontend.execute_request_invisibly("f(10)"); +} + +#[test] +fn test_execute_request_error_expressions_overflow_last_value() { + let frontend = DummyArkFrontend::lock(); + + // Set state and last value + frontend.execute_request_invisibly( + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); invisible('hello')", + ); + + // Check last value is set + frontend.execute_request(".Last.value", |result| { + assert_eq!(result, "[1] \"hello\""); + }); + + // Deterministically produce an "evaluation too deeply nested" error + frontend.execute_request_error("f(100)", |error_msg| { + assert!(error_msg.contains("evaluation nested too deeply")); + }); + + // Check last value is still set + frontend.execute_request(".Last.value", |result| { + assert_eq!(result, "[1] \"hello\""); + }); +} + #[test] fn test_execute_request_error_multiple_expressions() { let frontend = DummyArkFrontend::lock(); - frontend.send_execute_request("1\nstop('foobar')\n2", ExecuteRequestOptions::default()); + // `print(2)` and `3` are never evaluated + let code = "1\nstop('foobar')\nprint(2)\n3"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, "1\nstop('foobar')\n2"); + assert_eq!(input.code, code); frontend.recv_iopub_stream_stdout("[1] 1\n"); assert!(frontend.recv_iopub_execute_error().contains("foobar")); @@ -418,28 +544,16 @@ fn test_execute_request_multiple_expressions() { fn test_execute_request_single_line_buffer_overflow() { let frontend = DummyArkFrontend::lock(); + // This used to fail back when we were passing inputs down to the REPL from + // our `ReadConsole` handler. Below is the old test description for posterity. + // The newlines do matter for what we are testing here, // due to how we internally split by newlines. We want // to test that the `aaa`s result in an immediate R error, // not in text written to the R buffer that calls `stop()`. let aaa = "a".repeat(4096); let code = format!("quote(\n{aaa}\n)"); - frontend.send_execute_request(code.as_str(), ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't pass console input on to R")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request(code.as_str(), |result| assert!(result.contains(&aaa))); } #[test] @@ -590,6 +704,98 @@ fn test_stdin_from_menu() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +// Can debug the base environment (parent is the empty environment) +#[test] +fn test_browser_in_base_env() { + let frontend = DummyArkFrontend::lock(); + + let code = "evalq(browser(), baseenv())"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Inside `evalq()` we aren't at top level, so this comes as an iopub stream + // and not an execute result + frontend.recv_iopub_stream_stdout("Called from: evalq(browser(), baseenv())\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // While paused in the debugger, evaluate a simple expression + let code = "1 + 1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("[1] 2\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + +// The minimal environment we can debug in: access to base via `::`. This might +// be a problem for very specialised sandboxing environment, but they can +// temporarily add `::` while debugging. +#[test] +fn test_browser_in_sandboxing_environment() { + let frontend = DummyArkFrontend::lock(); + + let code = " +env <- new.env(parent = emptyenv()) +env$`::` <- `::` +evalq(base::browser(), env)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Inside `evalq()` we aren't at top level, so this comes as an iopub stream + // and not an execute result + frontend.recv_iopub_stream_stdout("Called from: evalq(base::browser(), env)\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // While paused in the debugger, evaluate a simple expression that only + // requires `::` + let code = "base::list"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("function (...) .Primitive(\"list\")\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_env_vars() { // These environment variables are set by R's shell script frontend. @@ -610,3 +816,256 @@ fn test_env_vars() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } + +#[test] +fn test_execute_request_error_handler_failure() { + let frontend = DummyArkFrontend::lock(); + + let code = r#" +f <- function() g() +g <- function() h() +h <- function() stop("foo") +options(error = function() stop("ouch")) +"#; + frontend.execute_request_invisibly(code); + + frontend.send_execute_request("f()", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, "f()"); + + frontend.recv_iopub_stream_stderr( + r#"The `getOption("error")` handler failed. +This option was unset to avoid cascading errors. +Caused by: +ouch +"#, + ); + + assert!(frontend.recv_iopub_execute_error().contains("foo")); + + frontend.recv_iopub_idle(); + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + +#[test] +fn test_execute_request_error_handler_readline() { + let frontend = DummyArkFrontend::lock(); + + let code = r#" +f <- function() g() +g <- function() h() +h <- function() stop("foo") +options(error = function() menu("ouch")) +"#; + frontend.execute_request_invisibly(code); + + frontend.send_execute_request("f()", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, "f()"); + + frontend.recv_iopub_stream_stdout("Enter an item from the menu, or 0 to exit\n"); + + frontend.recv_iopub_stream_stderr( + r#"The `getOption("error")` handler failed. +This option was unset to avoid cascading errors. +Caused by: +Can't request input from the user at this time. +Are you calling `readline()` or `menu()` from `options(error = )`? +"#, + ); + + assert!(frontend.recv_iopub_execute_error().contains("foo")); + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + +#[test] +fn test_execute_request_error_recover() { + let frontend = DummyArkFrontend::lock(); + + let code = r#" +f <- function() g() +g <- function() h() +h <- function() stop("foo") +options(error = recover) +"#; + frontend.execute_request_invisibly(code); + + frontend.send_execute_request("f()", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, "f()"); + + // We set up the call stack to show a simple `error_handler()` + frontend.recv_iopub_stream_stdout("Called from: ark_recover()\n"); + + assert!(frontend.recv_iopub_execute_error().contains("foo")); + + frontend.recv_iopub_idle(); + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + +/// Install a SIGINT handler for shutdown tests. This overrides the test runner +/// handler so it doesn't cancel our test. +fn install_sigint_handler() { + extern "C" fn sigint_handler(_: libc::c_int) {} + #[cfg(unix)] + unsafe { + use nix::sys::signal::signal; + use nix::sys::signal::SigHandler; + use nix::sys::signal::Signal; + + signal(Signal::SIGINT, SigHandler::Handler(sigint_handler)).unwrap(); + } +} + +// Note that because of these shutdown tests you _have_ to use `cargo nextest` +// instead of `cargo test`, so that each test has its own process and R thread. +#[test] +#[cfg(unix)] +fn test_shutdown_request() { + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +#[test] +#[cfg(unix)] +fn test_shutdown_request_with_restart() { + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +static SHUTDOWN_TESTS_ENABLED: bool = false; + +// Can shut down Ark when running a nested debug console +// https://github.com/posit-dev/positron/issues/6553 +#[test] +#[cfg(unix)] +fn test_shutdown_request_browser() { + if !SHUTDOWN_TESTS_ENABLED { + return; + } + + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); + + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + // There is a race condition between the Control thread and the Shell + // threads. Ideally we'd wait for both the Shutdown reply and the IOPub Idle + // messages concurrently instead of sequentially. + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +#[test] +#[cfg(unix)] +fn test_shutdown_request_while_busy() { + if !SHUTDOWN_TESTS_ENABLED { + return; + } + + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + let code = "Sys.sleep(10)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + // It seems this isn't emitted on older R versions + frontend.recv_iopub_stream_stderr("\n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +#[test] +fn test_execute_request_source_references() { + let frontend = DummyArkFrontend::lock(); + + // Test that our parser attaches source references when global option is set + frontend.execute_request_invisibly("options(keep.source = TRUE)"); + frontend.execute_request_invisibly("f <- function() {}"); + + frontend.execute_request( + "srcref <- attr(f, 'srcref'); inherits(srcref, 'srcref')", + |result| { + assert_eq!(result, "[1] TRUE"); + }, + ); + + frontend.execute_request( + "srcfile <- attr(srcref, 'srcfile'); inherits(srcfile, 'srcfile')", + |result| { + assert_eq!(result, "[1] TRUE"); + }, + ); + + // When global option is unset, we don't attach source references + frontend.execute_request_invisibly("options(keep.source = FALSE)"); + frontend.execute_request_invisibly("g <- function() {}"); + + frontend.execute_request( + "srcref <- attr(g, 'srcref'); identical(srcref, NULL)", + |result| { + assert_eq!(result, "[1] TRUE"); + }, + ); +} diff --git a/crates/harp/.zed/settings.json b/crates/harp/.zed/settings.json new file mode 120000 index 000000000..5de98924d --- /dev/null +++ b/crates/harp/.zed/settings.json @@ -0,0 +1 @@ +../../../.zed/settings.json \ No newline at end of file diff --git a/crates/harp/src/environment.rs b/crates/harp/src/environment.rs index 8666369d6..e65705d62 100644 --- a/crates/harp/src/environment.rs +++ b/crates/harp/src/environment.rs @@ -56,10 +56,12 @@ impl Environment { Self::new_filtered(env, EnvironmentFilter::default()) } - pub fn new_empty() -> anyhow::Result { - Ok(Self::new(harp::parse_eval_base( - "new.env(parent = emptyenv())", - )?)) + /// Creates hashed environment of default size inheriting from the empty + /// environment + pub fn new_empty() -> Self { + // Passing `size = 0` causes default size to be picked up + let env = unsafe { libr::R_NewEnv(R_ENVS.empty, 1, 0) }; + Self::new(RObject::new(env)) } pub fn new_filtered(env: RObject, filter: EnvironmentFilter) -> Self { diff --git a/crates/harp/src/environment_iter.rs b/crates/harp/src/environment_iter.rs index a1fc8f04b..41c2b3e01 100644 --- a/crates/harp/src/environment_iter.rs +++ b/crates/harp/src/environment_iter.rs @@ -189,7 +189,7 @@ mod tests { #[allow(non_snake_case)] fn test_binding_eq() { r_task(|| { - let env: Environment = Environment::new_empty().unwrap(); + let env: Environment = Environment::new_empty(); let obj = harp::parse_eval_base("1").unwrap(); env.bind(RSymbol::from("a"), &obj); diff --git a/crates/harp/src/error.rs b/crates/harp/src/error.rs index 5b4f2a5f5..5989986f0 100644 --- a/crates/harp/src/error.rs +++ b/crates/harp/src/error.rs @@ -46,7 +46,6 @@ pub enum Error { InvalidUtf8(Utf8Error), ParseSyntaxError { message: String, - line: i32, }, MissingValueError, MissingColumnError { @@ -200,8 +199,8 @@ impl fmt::Display for Error { write!(f, "Invalid UTF-8 in string: {}", error) }, - Error::ParseSyntaxError { message, line } => { - write!(f, "Syntax error on line {} when parsing: {}", line, message) + Error::ParseSyntaxError { message } => { + write!(f, "Syntax error: {}", message) }, Error::MissingValueError => { diff --git a/crates/harp/src/exec.rs b/crates/harp/src/exec.rs index 808a954da..672fcaec8 100644 --- a/crates/harp/src/exec.rs +++ b/crates/harp/src/exec.rs @@ -367,6 +367,101 @@ where } } +/// Execute a function with a cleanup handler using R's cleanup mechanism. +/// +/// This wraps `R_ExecWithCleanup` to provide execution with guaranteed cleanup, +/// even in case of an R longjump. +/// +/// In case of longjump, `cleanup()` runs but `exec_with_cleanup()` does not +/// return, the lonjump propagates. +/// +/// Note that `fun` and `cleanup` must be longjump-safe: +/// - Only POD types without Drop destructors on the stack +/// - Or protects itself from longjumps via e.g. `try_catch()` +/// ``` +pub fn exec_with_cleanup<'env, F, C, T>(fun: F, cleanup: C) -> T +where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, +{ + struct CleanupData<'a, F, C, T> + where + F: FnOnce() -> T + 'a, + C: FnOnce() + 'a, + { + // slot for the result of the closure + result: &'a mut Option, + closure: Option, + cleanup: Option, + } + + // Allocate stack memory for the result + let mut result: Option = None; + + // Move closures to the payload + let mut callback_data = CleanupData { + result: &mut result, + closure: Some(fun), + cleanup: Some(cleanup), + }; + let payload = &mut callback_data as *mut _ as *mut c_void; + + extern "C-unwind" fn exec_callback<'env, F, C, T>(data: *mut c_void) -> SEXP + where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, + { + // SAFETY: `data` points to a `CleanupData` allocated on the caller's stack. + let data: &mut CleanupData = unsafe { &mut *(data as *mut CleanupData) }; + + // Move closure here so it can be called. Required since that's an `FnOnce`. + let closure = take(&mut data.closure).unwrap(); + + // Call closure and store the result in the payload + let result = closure(); + *(data.result) = Some(result); + + // Always return R_NilValue to R_ExecWithCleanup; the real result is in `payload`. + unsafe { R_NilValue } + } + + extern "C-unwind" fn cleanup_callback<'env, F, C, T>(data: *mut c_void) + where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, + { + // SAFETY: `data` points to a `CleanupData` allocated on the caller's stack. + let data: &mut CleanupData = unsafe { &mut *(data as *mut CleanupData) }; + + // Move cleanup closure here so it can be called + if let Some(cleanup) = take(&mut data.cleanup) { + cleanup(); + } + } + + // Call into R; the callbacks will populate `res` and always return R_NilValue. + unsafe { + R_ExecWithCleanup( + Some(exec_callback::), + payload, + Some(cleanup_callback::), + payload, + ) + }; + + // Unwrap Safety: If we get here, we're in the happy path and the result is Some + result.unwrap() +} + pub fn r_peek_error_buffer() -> String { // SAFETY: Returns pointer to static memory buffer owned by R. let buffer = unsafe { R_curErrorBuf() }; @@ -495,6 +590,8 @@ pub fn r_check_stack(size: Option) -> Result<()> { #[cfg(test)] mod tests { use std::ffi::CString; + use std::sync::Arc; + use std::sync::Mutex; use stdext::assert_match; @@ -644,4 +741,52 @@ mod tests { }); }) } + + #[test] + fn test_exec_with_cleanup() { + crate::r_task(|| { + let cleanup_called = Arc::new(Mutex::new(false)); + let cleanup_called_clone = cleanup_called.clone(); + + let result = exec_with_cleanup( + || { + // Create a simple R object and return it directly (T = RObject) + let obj = RObject::from(unsafe { Rf_ScalarInteger(42) }); + obj + }, + || { + *cleanup_called_clone.lock().unwrap() = true; + }, + ); + + assert_eq!(unsafe { Rf_asInteger(*result) }, 42); + assert!( + *cleanup_called.lock().unwrap(), + "Cleanup should have been called" + ); + + // Test error case - cleanup should still be called. + let cleanup_called_error = Arc::new(Mutex::new(false)); + let cleanup_called_error_clone = cleanup_called_error.clone(); + + let result = try_catch(|| { + exec_with_cleanup( + || -> RObject { + let msg = CString::new("ouch").unwrap(); // This leaks + unsafe { Rf_error(msg.as_ptr()) }; + }, + || { + *cleanup_called_error_clone.lock().unwrap() = true; + }, + ) + }); + + assert!(result.is_err()); + + assert!( + *cleanup_called_error.lock().unwrap(), + "Cleanup should have been called on error" + ); + }) + } } diff --git a/crates/harp/src/lib.rs b/crates/harp/src/lib.rs index ea4d66e9f..c505e48ec 100644 --- a/crates/harp/src/lib.rs +++ b/crates/harp/src/lib.rs @@ -1,9 +1,10 @@ // // lib.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2025 Posit Software, PBC. All rights reserved. // // + pub mod attrib; pub mod call; mod column_names; @@ -24,6 +25,7 @@ pub mod line_ending; mod matrix; pub mod modules; pub mod object; +pub mod options; pub mod parse; pub mod parser; pub mod polled_events; @@ -61,6 +63,7 @@ pub use vector::list::*; // resolve to the correct symbols extern crate self as harp; +pub use harp::environment::*; pub use harp::error::as_result; pub use harp::exec::top_level_exec; pub use harp::exec::try_catch; @@ -70,8 +73,9 @@ pub(crate) use harp::fixtures::r_task; pub use harp::object::list_get; pub use harp::object::list_poke; pub use harp::object::RObject; +pub use harp::options::*; +pub use harp::session::*; pub use harp::symbol::RSymbol; -pub use harp::utils::get_option; pub use harp::weak_ref::RWeakRef; pub use harp_macros::register; @@ -254,6 +258,28 @@ macro_rules! push_rds { }; } +/// Allocate global variable for the R thread with lazy init +/// +/// Uses thread_local storage to avoid issues with SEXP being non-Sync. +/// Usage: +/// +/// ``` +/// harp::once! { +/// static NAME: Type = initialization_expression; +/// } +/// NAME.with(|x| foo(x)); +/// ``` +/// +/// Expands to a thread-local static initialized on first access in the thread. +#[macro_export] +macro_rules! once { + ( $( static $name:ident : $ty:ty = $init:expr );* $(;)? ) => { + thread_local! { + $( static $name: $ty = $init; )* + } + }; +} + #[cfg(test)] mod tests { use libr::*; diff --git a/crates/harp/src/options.rs b/crates/harp/src/options.rs new file mode 100644 index 000000000..f2eb83003 --- /dev/null +++ b/crates/harp/src/options.rs @@ -0,0 +1,16 @@ +// +// options.rs +// +// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// +// + +use crate::{r_symbol, RObject}; + +pub fn get_option(name: &str) -> RObject { + unsafe { libr::Rf_GetOption1(r_symbol!(name)).into() } +} + +pub fn get_option_bool(name: &str) -> bool { + harp::get_option(name).try_into().unwrap_or(false) +} diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index 7e819a8ca..43a141afc 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -28,9 +28,10 @@ pub struct RParseOptions { pub enum ParseResult { Complete(RObject), Incomplete, - SyntaxError { message: String, line: i32 }, + SyntaxError { message: String }, } +#[derive(Clone, Debug)] pub enum ParseInput<'a> { Text(&'a str), SrcFile(&'a srcref::SrcFile), @@ -67,7 +68,7 @@ pub fn parse_exprs(text: &str) -> crate::Result { /// Same but creates srcrefs pub fn parse_exprs_with_srcrefs(text: &str) -> crate::Result { - let srcfile = srcref::SrcFile::try_from(text)?; + let srcfile = srcref::SrcFile::from(text); parse_exprs_ext(&ParseInput::SrcFile(&srcfile)) } @@ -79,14 +80,12 @@ pub fn parse_exprs_ext<'a>(input: &ParseInput<'a>) -> crate::Result { code: parse_input_as_string(input).unwrap_or(String::from("Conversion error")), message: String::from("Incomplete code"), }), - ParseResult::SyntaxError { message, line } => { - Err(crate::Error::ParseSyntaxError { message, line }) - }, + ParseResult::SyntaxError { message } => Err(crate::Error::ParseSyntaxError { message }), } } pub fn parse_with_parse_data(text: &str) -> crate::Result<(ParseResult, ParseData)> { - let srcfile = srcref::SrcFile::try_from(text)?; + let srcfile = srcref::SrcFile::from(text); // Fill parse data in `srcfile` by side effect let status = parse_status(&ParseInput::SrcFile(&srcfile))?; @@ -109,17 +108,34 @@ pub fn parse_status<'a>(input: &ParseInput<'a>) -> crate::Result { ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.clone()), }; - let result: RObject = - try_catch(|| libr::R_ParseVector(text.sexp, -1, &mut status, srcfile.sexp).into())?; + let result = + try_catch(|| libr::R_ParseVector(text.sexp, -1, &mut status, srcfile.sexp).into()); + + let value = match result { + Ok(value) => value, + Err(err) => match err { + // The parser sometimes throws errors instead of returning an + // error flag. Convert these errors to proper syntax errors so + // we don't leak a backtrace making it seem like an internal + // error. + // https://github.com/posit-dev/ark/issues/598 + // https://github.com/posit-dev/ark/issues/722 + crate::Error::TryCatchError { message, .. } => { + return Ok(ParseResult::SyntaxError { message }); + }, + _ => { + return Err(err); + }, + }, + }; match status { - libr::ParseStatus_PARSE_OK => Ok(ParseResult::Complete(result)), + libr::ParseStatus_PARSE_OK => Ok(ParseResult::Complete(value)), libr::ParseStatus_PARSE_INCOMPLETE => Ok(ParseResult::Incomplete), libr::ParseStatus_PARSE_ERROR => Ok(ParseResult::SyntaxError { message: CStr::from_ptr(libr::get(libr::R_ParseErrorMsg).as_ptr()) .to_string_lossy() .to_string(), - line: libr::get(libr::R_ParseError) as i32, }), _ => { // Should not get here @@ -204,18 +220,19 @@ mod tests { Ok(ParseResult::Incomplete) ); - // Error + // Syntax error (error longjump thrown by parser) assert_match!( parse_status(&ParseInput::Text("42 + _")), - Err(_) => {} + Ok(ParseResult::SyntaxError { message }) => { + assert!(message.contains("invalid use of pipe placeholder")); + } ); - // "normal" syntax error + // Syntax error (error code returned by parser) assert_match!( parse_status(&ParseInput::Text("1+1\n*42")), - Ok(ParseResult::SyntaxError {message, line}) => { + Ok(ParseResult::SyntaxError { message }) => { assert!(message.contains("unexpected")); - assert_eq!(line, 2); } ); diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index 91562fd94..d7b76782d 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -31,7 +31,7 @@ pub struct SrcRef { pub column_byte: std::ops::Range, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct SrcFile { pub inner: RObject, } @@ -118,13 +118,33 @@ impl TryFrom for SrcRef { /// Creates the same sort of srcfile object as with `parse(text = )`. /// Takes code as an R string containing newlines, or as a R vector of lines. impl SrcFile { - fn new_virtual(text: RObject) -> harp::Result { + // Created by the R function `parse()` + pub fn new_virtual(text: RObject) -> Self { let inner = RFunction::new("base", "srcfilecopy") .param("filename", "") .param("lines", text) - .call()?; + .call(); + + // Unwrap safety: Should never fail, unless something is seriously wrong + let inner = inner.unwrap(); + + Self { inner } + } + + // Created by the C-level parser + pub fn new_virtual_empty_filename(text: RObject) -> Self { + let inner = harp::Environment::new_empty(); + inner.bind("filename".into(), &RObject::from("")); + inner.bind("lines".into(), &text); + + let inner: RObject = inner.into(); + + harp::once! { + static CLASS: RObject = crate::CharacterVector::create(vec!["srcfile", "srcfilecopy"]).into(); + } + CLASS.with(|c| inner.set_attribute("class", c.sexp)); - Ok(Self { inner }) + Self { inner } } pub fn lines(&self) -> harp::Result { @@ -136,23 +156,58 @@ impl SrcFile { } } -impl TryFrom<&str> for SrcFile { - type Error = harp::Error; - - fn try_from(value: &str) -> harp::Result { +impl From<&str> for SrcFile { + fn from(value: &str) -> Self { let input = crate::as_parse_text(value); SrcFile::new_virtual(input) } } -impl TryFrom<&harp::CharacterVector> for SrcFile { - type Error = harp::Error; - - fn try_from(value: &harp::CharacterVector) -> harp::Result { +impl From<&harp::CharacterVector> for SrcFile { + fn from(value: &harp::CharacterVector) -> Self { SrcFile::new_virtual(value.object.clone()) } } +pub fn srcref_list_get(srcrefs: libr::SEXP, ind: isize) -> RObject { + if crate::r_is_null(srcrefs) { + return RObject::null(); + } + + if harp::r_length(srcrefs) <= ind { + return RObject::null(); + } + + let result = harp::list_get(srcrefs, ind); + + if crate::r_is_null(result) { + return RObject::null(); + } + + if unsafe { libr::TYPEOF(result) as u32 } != libr::INTSXP { + return RObject::null(); + } + + if harp::r_length(result) < 6 { + return RObject::null(); + } + + RObject::new(result) +} + +// Some objects, such as calls to `{` and expression vectors returned by +// `parse()`, have a list of `srcref` objects attached as `srcref` attribute. +// This helper retrieves them if they exist. +pub fn get_srcref_list(call: libr::SEXP) -> Option { + let srcrefs = unsafe { libr::Rf_getAttrib(call, libr::R_SrcrefSymbol) }; + + if unsafe { libr::TYPEOF(srcrefs) as u32 } == libr::VECSXP { + return Some(RObject::new(srcrefs)); + } + + None +} + #[cfg(test)] mod tests { use std::ops::Range; diff --git a/crates/harp/src/session.rs b/crates/harp/src/session.rs index 6d75e53b0..261ab0164 100644 --- a/crates/harp/src/session.rs +++ b/crates/harp/src/session.rs @@ -27,6 +27,7 @@ static SESSION_INIT: Once = Once::new(); static mut NFRAME_CALL: Option = None; static mut SYS_CALLS_CALL: Option = None; static mut SYS_FRAMES_CALL: Option = None; +static mut CURRENT_ENV_CALL: Option = None; pub fn r_n_frame() -> crate::Result { SESSION_INIT.call_once(init_interface); @@ -60,6 +61,11 @@ pub fn r_sys_frames() -> crate::Result { } } +pub fn r_current_frame() -> RObject { + SESSION_INIT.call_once(init_interface); + unsafe { libr::Rf_eval(CURRENT_ENV_CALL.unwrap_unchecked(), R_BaseEnv) }.into() +} + pub fn r_sys_functions() -> crate::Result { unsafe { let mut protect = RProtect::new(); @@ -150,5 +156,16 @@ fn init_interface() { let sys_frames_call = r_lang!(r_symbol!("sys.frames")); R_PreserveObject(sys_frames_call); SYS_FRAMES_CALL = Some(sys_frames_call); + + // Create a closure that calls `sys.frame(-1)` to get the current + // evaluation environment. We use `sys.frame(-1)` from within a closure + // because `sys.nframe()` returns the frame number where evaluation + // occurs, not the number of frames on the stack. By calling from a + // closure, we push a new frame and use negative indexing to get the + // previous frame (the actual current environment). + let closure = harp::parse_eval_base("function() sys.frame(-1)").unwrap(); + let current_env_call = r_lang!(closure.sexp); + R_PreserveObject(current_env_call); + CURRENT_ENV_CALL = Some(current_env_call); } } diff --git a/crates/harp/src/utils.rs b/crates/harp/src/utils.rs index e9b40796f..951752f20 100644 --- a/crates/harp/src/utils.rs +++ b/crates/harp/src/utils.rs @@ -310,10 +310,6 @@ pub fn r_type2char>(kind: T) -> String { } } -pub fn get_option(name: &str) -> RObject { - unsafe { Rf_GetOption1(r_symbol!(name)).into() } -} - pub fn r_inherits(object: SEXP, class: &str) -> bool { let class = CString::new(class).unwrap(); unsafe { libr::Rf_inherits(object, class.as_ptr()) != 0 } diff --git a/crates/harp/src/vector/character_vector.rs b/crates/harp/src/vector/character_vector.rs index a6fea148a..35dc1c0c6 100644 --- a/crates/harp/src/vector/character_vector.rs +++ b/crates/harp/src/vector/character_vector.rs @@ -134,6 +134,12 @@ impl TryFrom<&CharacterVector> for Vec { } } +impl From for RObject { + fn from(value: CharacterVector) -> Self { + value.object + } +} + #[cfg(test)] mod test { use libr::STRSXP; diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index eb927e48c..54a9c0f1f 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -19,6 +19,8 @@ use crate::types::*; // Functions and globals functions::generate! { + pub fn R_NewEnv(enclos: SEXP, hash: std::ffi::c_int, size: std::ffi::c_int) -> SEXP; + pub fn Rf_initialize_R(ac: std::ffi::c_int, av: *mut *mut std::ffi::c_char) -> std::ffi::c_int; pub fn run_Rmainloop(); @@ -98,6 +100,13 @@ functions::generate! { data: *mut std::ffi::c_void ) -> Rboolean; + pub fn R_ExecWithCleanup( + fun: Option SEXP>, + data: *mut std::ffi::c_void, + cleanfun: Option, + cleandata: *mut std::ffi::c_void + ) -> SEXP; + pub fn R_withCallingErrorHandler( body: Option SEXP>, bdata: *mut std::ffi::c_void, @@ -135,7 +144,7 @@ functions::generate! { pub fn Rf_cons(arg1: SEXP, arg2: SEXP) -> SEXP; - pub fn Rf_defineVar(arg1: SEXP, arg2: SEXP, arg3: SEXP); + pub fn Rf_defineVar(sym: SEXP, value: SEXP, env: SEXP); pub fn Rf_eval(arg1: SEXP, arg2: SEXP) -> SEXP; @@ -617,6 +626,14 @@ constant_globals::generate! { #[default = std::ptr::null_mut()] pub static R_TripleColonSymbol: SEXP; + #[doc = "\"srcfile\""] + #[default = std::ptr::null_mut()] + pub static R_SrcfileSymbol: SEXP; + + #[doc = "\"srcref\""] + #[default = std::ptr::null_mut()] + pub static R_SrcrefSymbol: SEXP; + #[doc = "\"tsp\""] #[default = std::ptr::null_mut()] pub static R_TspSymbol: SEXP; @@ -689,6 +706,8 @@ mutable_globals::generate! { pub static mut R_Srcref: SEXP; + pub static mut R_Visible: Rboolean; + // ----------------------------------------------------------------------------------- // Unix @@ -743,6 +762,9 @@ mutable_globals::generate! { #[cfg(target_family = "unix")] pub static mut ptr_R_Suicide: Option; + #[cfg(target_family = "unix")] + pub static mut ptr_R_CleanUp: Option; + // ----------------------------------------------------------------------------------- // Windows