Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .changeset/improve_region_error_messages.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
livekit-api: patch
---

fix: surface full error chain in region fetch failures for better TLS error diagnosis.

When connecting to LiveKit Cloud from containers without CA certificates installed, the error message now includes the full error chain (e.g., "invalid peer certificate: UnknownIssuer") instead of just "error sending request for url (...)". This makes TLS certificate issues self-diagnosing.

Also added documentation for TLS features in Cargo.toml, highlighting `rustls-tls-webpki-roots` as the recommended option for container deployments.
28 changes: 26 additions & 2 deletions livekit-api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,48 @@ services-async = ["dep:isahc"]
access-token = ["dep:jsonwebtoken"]
webhooks = ["access-token", "dep:serde_json", "dep:base64"]

# Note that the following features only change the behavior of tokio-tungstenite.
# It doesn't change the behavior of libwebrtc/webrtc-sys
# TLS Configuration
# -----------------
# These features control TLS behavior for WebSocket and HTTP connections.
# Note: These features only change the behavior of tokio-tungstenite and reqwest.
# They don't change the behavior of libwebrtc/webrtc-sys.
#
# IMPORTANT FOR CONTAINER DEPLOYMENTS:
# When using `rustls-tls-native-roots`, the SDK relies on the operating system's
# CA certificate store. In container environments using slim/minimal base images,
# this store may be empty, causing TLS errors like "invalid peer certificate: UnknownIssuer".
#
# Solutions:
# 1. Install CA certificates in your Dockerfile:
# - Debian/Ubuntu: RUN apt-get update && apt-get install -y ca-certificates
# - Alpine: RUN apk add --no-cache ca-certificates
#
# 2. Use `rustls-tls-webpki-roots` instead, which bundles Mozilla's root
# certificates and doesn't require system CA certificates. This is the
# recommended option for containerized deployments.

# Uses the platform's native TLS implementation (OpenSSL on Linux, Secure Transport on macOS, SChannel on Windows)
native-tls = [
"tokio-tungstenite?/native-tls",
"async-tungstenite?/async-native-tls",
"reqwest?/native-tls"
]
# Same as native-tls but compiles OpenSSL from source (useful for cross-compilation)
native-tls-vendored = [
"tokio-tungstenite?/native-tls-vendored",
"reqwest?/native-tls-vendored",
]
# Uses rustls with the operating system's CA certificate store.
# Requires ca-certificates to be installed in container environments.
rustls-tls-native-roots = [
"tokio-tungstenite?/rustls-tls-native-roots",
"reqwest?/rustls-tls-native-roots",
"tokio-tungstenite?/__rustls-tls",
"dep:tokio-rustls",
"dep:rustls-native-certs"
]
# Uses rustls with Mozilla's bundled root certificates.
# RECOMMENDED for container deployments - no system CA certificates required.
rustls-tls-webpki-roots = [
"tokio-tungstenite?/rustls-tls-webpki-roots",
"reqwest?/rustls-tls-webpki-roots",
Expand Down
106 changes: 106 additions & 0 deletions livekit-api/src/signal_client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ pub enum SignalError {
Timeout(String),
#[error("failed to send message to the server")]
SendError,
/// Failed to retrieve region information from LiveKit Cloud.
///
/// This error occurs when the SDK cannot fetch the `/settings/regions` endpoint
/// from LiveKit Cloud. The error message includes the full error chain to help
/// diagnose the root cause.
///
/// # Common Causes
///
/// - **Missing CA certificates**: When deploying in containers using slim base images
/// (e.g., `node:*-slim`, `debian:*-slim`, Alpine), the system CA certificate store
/// may be empty. The error will include "invalid peer certificate: UnknownIssuer".
///
/// **Fix**: Install the `ca-certificates` package in your Dockerfile:
/// ```dockerfile
/// RUN apt-get update && apt-get install -y ca-certificates
/// ```
///
/// **Alternative**: Use the `rustls-tls-webpki-roots` feature instead of
/// `rustls-tls-native-roots` to bundle Mozilla's root certificates.
///
/// - **Network connectivity issues**: The container cannot reach LiveKit Cloud endpoints.
///
/// - **Invalid or expired access token**: The token used for authentication is not valid.
#[error("failed to retrieve region info: {0}")]
RegionError(String),
#[error("server sent leave during reconnect: reason={reason:?}, action={action:?}")]
Expand Down Expand Up @@ -1249,4 +1272,87 @@ mod tests {
err
);
}

/// Test that connection errors include the full error chain.
/// This is critical for diagnosing TLS certificate issues in container deployments.
#[cfg(feature = "signal-client-tokio")]
#[tokio::test]
async fn region_fetch_connection_refused_includes_error_chain() {
// Try to connect to a port that's definitely not listening
// This simulates a network-level failure
let endpoint = "http://127.0.0.1:1/settings/regions";
let result = region::fetch_from_endpoint(endpoint, "fake-token").await;

assert!(result.is_err());
let err = result.unwrap_err();

// The error should be a RegionError
let SignalError::RegionError(msg) = err else {
panic!("expected RegionError, got: {:?}", err);
};

// The error message should contain information about the connection failure.
// The exact message varies by platform, but it should contain more than just
// "error sending request" - it should include the underlying cause.
assert!(
msg.contains("error sending request") || msg.contains("connection"),
"Error should mention the request failure, got: {}",
msg
);

// Most importantly, verify the error contains a colon, indicating the chain
// was preserved (format is "outer: middle: inner")
// Note: On some platforms the error might be simple, so we just verify
// we got a descriptive error message
assert!(
msg.len() > 20,
"Error message should be descriptive with chain info, got: {}",
msg
);
}

/// Test that JSON parsing errors include the full error chain.
#[cfg(feature = "signal-client-tokio")]
#[tokio::test]
async fn region_fetch_invalid_json_includes_error_chain() {
use tokio::io::AsyncWriteExt;
use tokio::net::TcpListener;

let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();

// Spawn a task that returns invalid JSON
tokio::spawn(async move {
let (mut socket, _) = listener.accept().await.unwrap();

let mut buf = [0u8; 4096];
let _ = tokio::io::AsyncReadExt::read(&mut socket, &mut buf).await;

// Return invalid JSON that will fail to parse
let body = r#"{"invalid": "not a regions response"}"#;
let response = format!(
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
body.len(),
body
);
socket.write_all(response.as_bytes()).await.unwrap();
});

let endpoint = format!("http://127.0.0.1:{}/settings/regions", addr.port());
let result = region::fetch_from_endpoint(&endpoint, "fake-token").await;

assert!(result.is_err());
let err = result.unwrap_err();

let SignalError::RegionError(msg) = err else {
panic!("expected RegionError, got: {:?}", err);
};

// The error should mention JSON parsing failure
assert!(
msg.contains("missing field") || msg.contains("error decoding") || msg.contains("JSON"),
"Error should mention JSON parsing failure, got: {}",
msg
);
}
}
165 changes: 163 additions & 2 deletions livekit-api/src/signal_client/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::error::Error as StdError;

use http::header::{HeaderMap, HeaderValue, AUTHORIZATION};
use serde::Deserialize;

use crate::http_client;

use super::{SignalError, SignalResult, REGION_FETCH_TIMEOUT};

/// Converts an error into a string that includes the full error chain.
/// This is important for debugging TLS errors, where the root cause
/// (e.g., "invalid peer certificate: UnknownIssuer") is often buried
/// in the source chain.
fn error_with_chain<E: StdError>(err: E) -> String {
let mut msg = err.to_string();
let mut source = err.source();
while let Some(err) = source {
msg.push_str(": ");
msg.push_str(&err.to_string());
source = err.source();
}
msg
}

pub struct RegionUrlProvider;

#[derive(Deserialize)]
Expand Down Expand Up @@ -57,15 +74,15 @@ pub(crate) async fn fetch_from_endpoint(
.headers(headers)
.send()
.await
.map_err(|e| SignalError::RegionError(e.to_string()))?;
.map_err(|e| SignalError::RegionError(error_with_chain(e)))?;

if !res.status().is_success() {
return Err(SignalError::Client(res.status(), res.text().await.unwrap_or_default()));
}
let res = res
.json::<RegionUrlResponse>()
.await
.map_err(|e| SignalError::RegionError(e.to_string()))?;
.map_err(|e| SignalError::RegionError(error_with_chain(e)))?;
Ok(res.regions.into_iter().map(|i| i.url).collect())
};

Expand Down Expand Up @@ -101,6 +118,150 @@ fn region_endpoint(url: &str) -> SignalResult<String> {
#[cfg(test)]
mod tests {
use super::*;
use std::fmt;
use std::io;

// Mock error types to test error chain preservation
#[derive(Debug)]
struct RootCauseError {
message: String,
}

impl fmt::Display for RootCauseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.message)
}
}

impl std::error::Error for RootCauseError {}

#[derive(Debug)]
struct MiddleError {
message: String,
source: RootCauseError,
}

impl fmt::Display for MiddleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.message)
}
}

impl std::error::Error for MiddleError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.source)
}
}

#[derive(Debug)]
struct OuterError {
message: String,
source: MiddleError,
}

impl fmt::Display for OuterError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.message)
}
}

impl std::error::Error for OuterError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.source)
}
}

#[test]
fn test_error_with_chain_single_error() {
let err = RootCauseError { message: "root cause".to_string() };
let result = error_with_chain(err);
assert_eq!(result, "root cause");
}

#[test]
fn test_error_with_chain_two_level_chain() {
let root =
RootCauseError { message: "invalid peer certificate: UnknownIssuer".to_string() };
let middle = MiddleError { message: "error trying to connect".to_string(), source: root };
let result = error_with_chain(middle);
assert_eq!(result, "error trying to connect: invalid peer certificate: UnknownIssuer");
}

#[test]
fn test_error_with_chain_three_level_chain() {
// Simulates the actual error chain from reqwest -> hyper -> TLS
let root =
RootCauseError { message: "invalid peer certificate: UnknownIssuer".to_string() };
let middle = MiddleError { message: "error trying to connect".to_string(), source: root };
let outer = OuterError {
message:
"error sending request for url (https://example.livekit.cloud/settings/regions)"
.to_string(),
source: middle,
};
let result = error_with_chain(outer);
assert_eq!(
result,
"error sending request for url (https://example.livekit.cloud/settings/regions): error trying to connect: invalid peer certificate: UnknownIssuer"
);
}

#[test]
fn test_error_with_chain_preserves_tls_error_info() {
// Verify that TLS-specific error messages are preserved in the chain
let root =
RootCauseError { message: "invalid peer certificate: UnknownIssuer".to_string() };
let outer = MiddleError { message: "TLS connection error".to_string(), source: root };
let result = error_with_chain(outer);

// The error message should contain both the outer message and the root cause
assert!(result.contains("TLS connection error"));
assert!(result.contains("UnknownIssuer"));
assert!(result.contains("invalid peer certificate"));
}

#[test]
fn test_region_error_includes_full_chain() {
// Test that SignalError::RegionError properly includes the full error chain
let root =
RootCauseError { message: "invalid peer certificate: UnknownIssuer".to_string() };
let middle = MiddleError { message: "error trying to connect".to_string(), source: root };
let outer = OuterError { message: "error sending request".to_string(), source: middle };

let signal_error = SignalError::RegionError(error_with_chain(outer));
let error_string = signal_error.to_string();

// Verify the full chain is in the error message
assert!(
error_string.contains("UnknownIssuer"),
"Error should contain root cause 'UnknownIssuer', got: {}",
error_string
);
assert!(
error_string.contains("error trying to connect"),
"Error should contain middle error, got: {}",
error_string
);
assert!(
error_string.contains("error sending request"),
"Error should contain outer error, got: {}",
error_string
);
}

#[test]
fn test_error_with_chain_io_error() {
// Test with a real std::io::Error chain
let inner = io::Error::new(io::ErrorKind::ConnectionRefused, "connection refused");
let outer = io::Error::new(io::ErrorKind::Other, inner);

let result = error_with_chain(outer);
assert!(
result.contains("connection refused"),
"Should contain the inner error message, got: {}",
result
);
}

#[test]
fn test_is_cloud_url() {
Expand Down
Loading