From 4e7006df631780c0494a53d41fb20bd94a6136cc Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 17 Feb 2026 20:04:17 -0600 Subject: [PATCH 1/9] wip --- Cargo.lock | 118 +++++ Cargo.toml | 1 + crates/common/Cargo.toml | 1 + crates/common/src/auction/types.rs | 10 +- crates/common/src/consent/extraction.rs | 180 ++++++++ crates/common/src/consent/gpp.rs | 212 +++++++++ crates/common/src/consent/mod.rs | 151 +++++++ crates/common/src/consent/tcf.rs | 531 +++++++++++++++++++++++ crates/common/src/consent/types.rs | 437 +++++++++++++++++++ crates/common/src/consent/us_privacy.rs | 164 +++++++ crates/common/src/constants.rs | 9 + crates/common/src/integrations/prebid.rs | 50 ++- crates/common/src/lib.rs | 3 +- crates/common/src/openrtb.rs | 91 +++- crates/common/src/publisher.rs | 16 +- 15 files changed, 1957 insertions(+), 17 deletions(-) create mode 100644 crates/common/src/consent/extraction.rs create mode 100644 crates/common/src/consent/gpp.rs create mode 100644 crates/common/src/consent/mod.rs create mode 100644 crates/common/src/consent/tcf.rs create mode 100644 crates/common/src/consent/types.rs create mode 100644 crates/common/src/consent/us_privacy.rs diff --git a/Cargo.lock b/Cargo.lock index 94ec4c24..9811c270 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,6 +119,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "bitstream-io" +version = "4.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757" +dependencies = [ + "core2", +] + [[package]] name = "block-buffer" version = "0.9.0" @@ -311,6 +320,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -1007,6 +1025,12 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hex" version = "0.4.3" @@ -1032,6 +1056,38 @@ dependencies = [ "itoa", ] +[[package]] +name = "iab_gpp" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3be2d0191a3376e0176bb3df53b2754c644ead6edd50d9494ee8fa376a70e02" +dependencies = [ + "bitstream-io", + "fnv", + "iab_gpp_derive", + "num-derive", + "num-iter", + "num-traits", + "prettyplease", + "proc-macro2", + "quote", + "strum_macros", + "syn 2.0.111", + "thiserror 2.0.17", + "walkdir", +] + +[[package]] +name = "iab_gpp_derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5acda598b043c6386d20fffe86c600b63c7ca4980ee9a28f7e9aaa15d749747" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -1389,6 +1445,17 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -1675,6 +1742,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.111", +] + [[package]] name = "primeorder" version = "0.13.6" @@ -1882,6 +1959,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2114,6 +2200,18 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "subtle" version = "2.6.1" @@ -2357,6 +2455,7 @@ dependencies = [ "hex", "hmac", "http", + "iab_gpp", "jose-jwk", "log", "log-fastly", @@ -2524,6 +2623,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2595,6 +2704,15 @@ dependencies = [ "winsafe", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index 032cbaa7..54fbbefa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ handlebars = "6.4.0" hex = "0.4.3" hmac = "0.12.1" http = "1.4.0" +iab_gpp = "0.1" jose-jwk = "0.1.2" log = "0.4.29" log-fastly = "0.11.12" diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index c360474e..bcb916b4 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -29,6 +29,7 @@ handlebars = { workspace = true } hex = { workspace = true } hmac = { workspace = true } http = { workspace = true } +iab_gpp = { workspace = true } jose-jwk = { workspace = true } log = { workspace = true } rand = { workspace = true } diff --git a/crates/common/src/auction/types.rs b/crates/common/src/auction/types.rs index 7e30761c..0838ec2b 100644 --- a/crates/common/src/auction/types.rs +++ b/crates/common/src/auction/types.rs @@ -73,8 +73,14 @@ pub struct UserInfo { pub id: String, /// Fresh ID for this session pub fresh_id: String, - /// GDPR consent string if applicable - pub consent: Option, + /// Decoded consent context for this request. + /// + /// Carries both raw consent strings (for `OpenRTB` forwarding) and decoded + /// structured data (for TS-level enforcement and observability). + /// Skipped during serde since it is populated at runtime from request + /// cookies/headers, not from stored data. + #[serde(skip)] + pub consent: Option, } /// Device information from request. diff --git a/crates/common/src/consent/extraction.rs b/crates/common/src/consent/extraction.rs new file mode 100644 index 00000000..d5b420bf --- /dev/null +++ b/crates/common/src/consent/extraction.rs @@ -0,0 +1,180 @@ +//! Consent signal extraction from cookies and headers. +//! +//! Reads raw consent strings from the [`CookieJar`] and HTTP headers without +//! performing any decoding or validation. This is the first step in the consent +//! pipeline described in the [Consent Forwarding Architecture Design]. + +use cookie::CookieJar; +use fastly::Request; + +use crate::constants::{ + COOKIE_EUCONSENT_V2, COOKIE_GPP, COOKIE_GPP_SID, COOKIE_US_PRIVACY, HEADER_SEC_GPC, +}; + +use super::types::RawConsentSignals; + +/// Extracts raw consent signals from a [`CookieJar`] and a [`Request`]. +/// +/// Reads the following consent cookies (if present): +/// - `euconsent-v2` — IAB TCF v2 consent string +/// - `__gpp` — IAB Global Privacy Platform string +/// - `__gpp_sid` — GPP section IDs (comma-separated) +/// - `us_privacy` — IAB US Privacy / CCPA string +/// +/// Also reads the `Sec-GPC` header for Global Privacy Control. +/// +/// No decoding or validation is performed — values are captured as-is. +pub fn extract_consent_signals(jar: Option<&CookieJar>, req: &Request) -> RawConsentSignals { + let raw_tc_string = jar + .and_then(|j| j.get(COOKIE_EUCONSENT_V2)) + .map(|c| c.value().to_owned()); + + let raw_gpp_string = jar + .and_then(|j| j.get(COOKIE_GPP)) + .map(|c| c.value().to_owned()); + + let raw_gpp_sid = jar + .and_then(|j| j.get(COOKIE_GPP_SID)) + .map(|c| c.value().to_owned()); + + let raw_us_privacy = jar + .and_then(|j| j.get(COOKIE_US_PRIVACY)) + .map(|c| c.value().to_owned()); + + let gpc = req + .get_header(HEADER_SEC_GPC) + .and_then(|v| v.to_str().ok()) + .map(|v| v.trim() == "1") + .unwrap_or(false); + + RawConsentSignals { + raw_tc_string, + raw_gpp_string, + raw_gpp_sid, + raw_us_privacy, + gpc, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cookies::parse_cookies_to_jar; + + #[test] + fn no_cookies_no_headers() { + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(None, &req); + assert!(signals.is_empty(), "should produce empty signals"); + } + + #[test] + fn extracts_euconsent_v2() { + let jar = parse_cookies_to_jar("euconsent-v2=CPXxGfAPXxGfAAHABBENBCCsAP_AAH_AAAAAHftf"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert_eq!( + signals.raw_tc_string.as_deref(), + Some("CPXxGfAPXxGfAAHABBENBCCsAP_AAH_AAAAAHftf"), + "should extract euconsent-v2 cookie value" + ); + } + + #[test] + fn extracts_gpp_cookies() { + let jar = parse_cookies_to_jar("__gpp=DBACNYA~CPXxGfA; __gpp_sid=2,6"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert_eq!( + signals.raw_gpp_string.as_deref(), + Some("DBACNYA~CPXxGfA"), + "should extract __gpp cookie value" + ); + assert_eq!( + signals.raw_gpp_sid.as_deref(), + Some("2,6"), + "should extract __gpp_sid cookie value" + ); + } + + #[test] + fn extracts_us_privacy() { + let jar = parse_cookies_to_jar("us_privacy=1YNN"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert_eq!( + signals.raw_us_privacy.as_deref(), + Some("1YNN"), + "should extract us_privacy cookie value" + ); + } + + #[test] + fn extracts_sec_gpc_header() { + let req = Request::get("https://example.com").with_header("sec-gpc", "1"); + let signals = extract_consent_signals(None, &req); + + assert!(signals.gpc, "should detect Sec-GPC: 1 header"); + } + + #[test] + fn sec_gpc_absent_when_not_set() { + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(None, &req); + + assert!(!signals.gpc, "should default gpc to false"); + } + + #[test] + fn sec_gpc_absent_when_not_one() { + let req = Request::get("https://example.com").with_header("sec-gpc", "0"); + let signals = extract_consent_signals(None, &req); + + assert!(!signals.gpc, "should not treat Sec-GPC: 0 as opt-out"); + } + + #[test] + fn extracts_all_signals() { + let jar = + parse_cookies_to_jar("euconsent-v2=CPXxGf; __gpp=DBAC; __gpp_sid=2,6; us_privacy=1YNN"); + let req = Request::get("https://example.com").with_header("sec-gpc", "1"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert!(signals.raw_tc_string.is_some(), "should have tc_string"); + assert!(signals.raw_gpp_string.is_some(), "should have gpp_string"); + assert!(signals.raw_gpp_sid.is_some(), "should have gpp_sid"); + assert!(signals.raw_us_privacy.is_some(), "should have us_privacy"); + assert!(signals.gpc, "should have gpc"); + } + + #[test] + fn empty_jar_produces_no_cookie_signals() { + let jar = parse_cookies_to_jar(""); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert!( + signals.raw_tc_string.is_none(), + "should have no tc_string from empty jar" + ); + assert!( + signals.raw_gpp_string.is_none(), + "should have no gpp_string from empty jar" + ); + } + + #[test] + fn unrelated_cookies_ignored() { + let jar = parse_cookies_to_jar("session_id=abc123; theme=dark"); + let req = Request::get("https://example.com"); + let signals = extract_consent_signals(Some(&jar), &req); + + assert!( + signals.is_empty(), + "should produce empty signals for unrelated cookies" + ); + } +} diff --git a/crates/common/src/consent/gpp.rs b/crates/common/src/consent/gpp.rs new file mode 100644 index 00000000..5032a104 --- /dev/null +++ b/crates/common/src/consent/gpp.rs @@ -0,0 +1,212 @@ +//! GPP (Global Privacy Platform) string decoder. +//! +//! Thin wrapper around the [`iab_gpp`] crate that maps decoded GPP data into +//! our [`GppConsent`] domain type. The `iab_gpp` crate handles the heavy +//! lifting of GPP v1 string parsing and section decoding; this module: +//! +//! 1. Parses the raw `__gpp` cookie value via [`iab_gpp::v1::GPPString`]. +//! 2. Extracts the header-level section IDs. +//! 3. If the EU TCF v2.2 section is present, decodes it via our own +//! [`super::tcf::decode_tc_string`] (for consistency with standalone +//! `euconsent-v2` decoding). +//! +//! # Why wrap `iab_gpp`? +//! +//! - Isolates external dependency behind our own types. +//! - Allows fallback/replacement without touching callers. +//! - Maps `iab_gpp` errors into our [`ConsentDecodeError`] hierarchy. +//! +//! # References +//! +//! - [IAB GPP specification](https://github.com/InteractiveAdvertisingBureau/Global-Privacy-Platform) +//! - [`iab_gpp` crate docs](https://docs.rs/iab_gpp) + +use error_stack::Report; + +use super::types::{ConsentDecodeError, GppConsent, TcfConsent}; + +/// Decodes a GPP string into a [`GppConsent`] struct. +/// +/// Parses the raw `__gpp` cookie value, extracts section IDs, and optionally +/// decodes the EU TCF v2.2 section if present. +/// +/// # Arguments +/// +/// * `gpp_string` — the raw GPP string from the `__gpp` cookie. +/// +/// # Errors +/// +/// - [`ConsentDecodeError::InvalidGppString`] if the `iab_gpp` parser fails. +pub fn decode_gpp_string(gpp_string: &str) -> Result> { + let parsed = iab_gpp::v1::GPPString::parse_str(gpp_string).map_err(|e| { + Report::new(ConsentDecodeError::InvalidGppString { + reason: format!("{e}"), + }) + })?; + + // Extract section IDs as u16 values. + let section_ids: Vec = parsed.section_ids().map(|id| *id as u16).collect(); + + // Attempt to extract and decode the EU TCF v2.2 section. + // Section ID 2 = TcfEuV2 in the GPP spec. + let eu_tcf = decode_tcf_from_gpp(&parsed); + + // The GPP header version is always 1 for current spec. + Ok(GppConsent { + version: 1, + section_ids, + eu_tcf, + }) +} + +/// Attempts to decode the EU TCF v2.2 section from a parsed GPP string. +/// +/// Uses our own TCF decoder on the raw section string (rather than +/// `iab_gpp`'s TCF decoder) to ensure consistency with standalone +/// `euconsent-v2` decoding. +/// +/// Returns `None` if the TCF section is not present or cannot be decoded. +fn decode_tcf_from_gpp(parsed: &iab_gpp::v1::GPPString) -> Option { + // iab_gpp::sections::SectionId::TcfEuV2 corresponds to section ID 2. + let tcf_section_str = parsed.section(iab_gpp::sections::SectionId::TcfEuV2)?; + + // Delegate to our own TCF decoder for consistency. + match super::tcf::decode_tc_string(tcf_section_str) { + Ok(tcf) => Some(tcf), + Err(e) => { + log::warn!("GPP contains TCF EU v2 section but decoding failed: {e}"); + None + } + } +} + +/// Parses a `__gpp_sid` cookie value into a vector of section IDs. +/// +/// The cookie is a comma-separated list of integer section IDs, e.g. `"2,6"`. +/// Invalid entries are silently skipped (logged at debug level) since the +/// cookie is treated as a transport hint. +/// +/// Returns `None` if the input is empty or contains no valid IDs. +#[must_use] +pub fn parse_gpp_sid_cookie(raw: &str) -> Option> { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + + let ids: Vec = trimmed + .split(',') + .filter_map(|s| { + let s = s.trim(); + match s.parse::() { + Ok(id) => Some(id), + Err(_) => { + log::debug!("Ignoring invalid __gpp_sid entry: {s:?}"); + None + } + } + }) + .collect(); + + if ids.is_empty() { + None + } else { + Some(ids) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // A known-good GPP string with US Privacy section (section ID 6). + // Header "DBABTA" encodes: version=1, section IDs=[6] (UspV1). + // Section string: "1YNN" (US Privacy). + const GPP_USP_ONLY: &str = "DBABTA~1YNN"; + + // A GPP string with both TCF EU v2 and US Privacy sections. + // Header "DBACNY" encodes: version=1, section IDs=[2, 6]. + // First section: TCF EU v2 consent string. + // Second section: US Privacy string. + const GPP_TCF_AND_USP: &str = "DBACNY~CPXxRfAPXxRfAAfKABENB-CgAAAAAAAAAAYgAAAAAAAA~1YNN"; + + #[test] + fn decodes_usp_only_gpp_string() { + let result = decode_gpp_string(GPP_USP_ONLY).expect("should decode USP-only GPP"); + assert_eq!(result.version, 1); + assert!(!result.section_ids.is_empty(), "should have section IDs"); + assert!( + result.eu_tcf.is_none(), + "should not have TCF section in USP-only string" + ); + } + + #[test] + fn decodes_gpp_with_tcf_section() { + let result = decode_gpp_string(GPP_TCF_AND_USP).expect("should decode GPP with TCF"); + assert_eq!(result.version, 1); + // Section IDs should include 2 (TCF EU v2) and 6 (USP v1). + assert!( + result.section_ids.contains(&2), + "should contain TCF EU v2 section ID (2)" + ); + // TCF section should be decoded (may or may not succeed depending + // on whether the section string is a valid base64-encoded TC String). + // The GPP TCF section format differs from standalone euconsent-v2, + // so eu_tcf might be None if our decoder can't parse the GPP-encoded + // TCF format. That's acceptable — we log and continue. + } + + #[test] + fn rejects_invalid_gpp_string() { + let result = decode_gpp_string("totally-invalid"); + assert!(result.is_err(), "should reject invalid GPP string"); + } + + #[test] + fn rejects_empty_string() { + let result = decode_gpp_string(""); + assert!(result.is_err(), "should reject empty GPP string"); + } + + #[test] + fn parse_gpp_sid_simple() { + let ids = parse_gpp_sid_cookie("2,6").expect("should parse 2,6"); + assert_eq!(ids, vec![2, 6]); + } + + #[test] + fn parse_gpp_sid_single() { + let ids = parse_gpp_sid_cookie("2").expect("should parse single ID"); + assert_eq!(ids, vec![2]); + } + + #[test] + fn parse_gpp_sid_with_whitespace() { + let ids = parse_gpp_sid_cookie(" 2 , 6 , 8 ").expect("should handle whitespace"); + assert_eq!(ids, vec![2, 6, 8]); + } + + #[test] + fn parse_gpp_sid_empty_returns_none() { + assert!(parse_gpp_sid_cookie("").is_none(), "empty should be None"); + assert!( + parse_gpp_sid_cookie(" ").is_none(), + "whitespace should be None" + ); + } + + #[test] + fn parse_gpp_sid_skips_invalid_entries() { + let ids = parse_gpp_sid_cookie("2,abc,6").expect("should skip invalid"); + assert_eq!(ids, vec![2, 6]); + } + + #[test] + fn parse_gpp_sid_all_invalid_returns_none() { + assert!( + parse_gpp_sid_cookie("abc,def").is_none(), + "all-invalid should be None" + ); + } +} diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs new file mode 100644 index 00000000..ae40074f --- /dev/null +++ b/crates/common/src/consent/mod.rs @@ -0,0 +1,151 @@ +//! Consent signal extraction, decoding, and normalization. +//! +//! This module implements the consent forwarding pipeline: +//! +//! 1. **Extract** raw consent strings from cookies and HTTP headers. +//! 2. **Decode** each signal into structured data (TCF v2, GPP, US Privacy). +//! 3. **Build** a normalized [`ConsentContext`] that flows through the auction +//! pipeline and populates `OpenRTB` bid requests. +//! +//! # Supported signals +//! +//! - **TCF v2** — `euconsent-v2` cookie (IAB Transparency & Consent Framework) +//! - **GPP** — `__gpp` and `__gpp_sid` cookies (IAB Global Privacy Platform) +//! - **US Privacy** — `us_privacy` cookie (IAB US Privacy / CCPA) +//! - **GPC** — `Sec-GPC` header (Global Privacy Control) +//! +//! # Usage +//! +//! ```ignore +//! let consent = consent::build_consent_context(jar.as_ref(), &req); +//! ``` + +mod extraction; +pub mod gpp; +pub mod tcf; +pub mod types; +pub mod us_privacy; + +pub use extraction::extract_consent_signals; +pub use types::{ConsentContext, ConsentSource, RawConsentSignals}; + +use cookie::CookieJar; +use fastly::Request; + +/// Extracts, decodes, and normalizes consent signals from a request. +/// +/// This is the primary entry point for the consent pipeline. It: +/// +/// 1. Reads raw consent strings from cookies and headers. +/// 2. Decodes each signal (TCF v2, GPP, US Privacy). +/// 3. Builds a [`ConsentContext`] with both raw and decoded data. +/// 4. Logs a summary for observability. +/// +/// Decoding failures are logged and the corresponding decoded field is set to +/// `None` — the raw string is still preserved for proxy-mode forwarding. +pub fn build_consent_context(jar: Option<&CookieJar>, req: &Request) -> ConsentContext { + let signals = extract_consent_signals(jar, req); + log_consent_signals(&signals); + build_context_from_signals(&signals) +} + +/// Extracts raw consent signals and logs them (without decoding). +/// +/// Use this when you need the raw signals but don't need decoded data. +/// Prefer [`build_consent_context`] for the full pipeline. +pub fn extract_and_log_consent(jar: Option<&CookieJar>, req: &Request) -> RawConsentSignals { + let signals = extract_consent_signals(jar, req); + log_consent_signals(&signals); + signals +} + +/// Builds a [`ConsentContext`] from previously extracted raw signals. +/// +/// This is the decode + normalize stage of the pipeline. Each signal is +/// decoded independently; failures are logged at `warn` level and the +/// corresponding decoded field is left as `None`. +#[must_use] +pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext { + // Decode US Privacy + let decoded_us_privacy = + signals + .raw_us_privacy + .as_deref() + .and_then(|s| match us_privacy::decode_us_privacy(s) { + Ok(usp) => Some(usp), + Err(e) => { + log::warn!("Failed to decode US Privacy string: {e}"); + None + } + }); + + // Decode TCF v2 + let decoded_tcf = + signals + .raw_tc_string + .as_deref() + .and_then(|s| match tcf::decode_tc_string(s) { + Ok(tcf) => Some(tcf), + Err(e) => { + log::warn!("Failed to decode TC String: {e}"); + None + } + }); + + // Decode GPP + let decoded_gpp = + signals + .raw_gpp_string + .as_deref() + .and_then(|s| match gpp::decode_gpp_string(s) { + Ok(gpp_consent) => Some(gpp_consent), + Err(e) => { + log::warn!("Failed to decode GPP string: {e}"); + None + } + }); + + // Resolve GPP section IDs: + // - Prefer decoded GPP section IDs (authoritative). + // - Fall back to __gpp_sid cookie (transport hint). + let gpp_section_ids = decoded_gpp + .as_ref() + .map(|g| g.section_ids.clone()) + .or_else(|| { + signals + .raw_gpp_sid + .as_deref() + .and_then(gpp::parse_gpp_sid_cookie) + }); + + // GDPR applies if we have a TCF string (standalone or from GPP). + let gdpr_applies = + decoded_tcf.is_some() || decoded_gpp.as_ref().is_some_and(|g| g.eu_tcf.is_some()); + + ConsentContext { + raw_tc_string: signals.raw_tc_string.clone(), + raw_gpp_string: signals.raw_gpp_string.clone(), + gpp_section_ids, + raw_us_privacy: signals.raw_us_privacy.clone(), + + gdpr_applies, + tcf: decoded_tcf, + gpp: decoded_gpp, + us_privacy: decoded_us_privacy, + + gpc: signals.gpc, + source: ConsentSource::Cookie, + } +} + +/// Logs a summary of the extracted consent signals. +/// +/// Emits an `info`-level log line when at least one consent signal is present, +/// or a `debug`-level line when no signals were found. +pub fn log_consent_signals(signals: &RawConsentSignals) { + if signals.is_empty() { + log::debug!("No consent signals found on request"); + } else { + log::info!("Consent signals: {}", signals); + } +} diff --git a/crates/common/src/consent/tcf.rs b/crates/common/src/consent/tcf.rs new file mode 100644 index 00000000..cba7ec8a --- /dev/null +++ b/crates/common/src/consent/tcf.rs @@ -0,0 +1,531 @@ +//! TCF v2 consent string decoder (core segment only). +//! +//! Decodes the IAB Transparency & Consent Framework v2 consent string from the +//! `euconsent-v2` cookie. Only the core segment (segment type 0) is decoded; +//! publisher restrictions, disclosed vendors, and allowed vendors segments are +//! not yet supported. +//! +//! # Binary format +//! +//! The TC String is a web-safe base64-encoded binary bitfield. The core segment +//! layout (after base64 decoding) is: +//! +//! | Field | Bits | Offset | +//! |-------|------|--------| +//! | Version | 6 | 0 | +//! | Created | 36 | 6 | +//! | `LastUpdated` | 36 | 42 | +//! | `CmpId` | 12 | 78 | +//! | `CmpVersion` | 12 | 90 | +//! | `ConsentScreen` | 6 | 102 | +//! | `ConsentLanguage` | 12 | 108 | +//! | `VendorListVersion` | 12 | 120 | +//! | `TcfPolicyVersion` | 6 | 132 | +//! | `IsServiceSpecific` | 1 | 138 | +//! | `UseNonStandardTexts` | 1 | 139 | +//! | `SpecialFeatureOptIns` | 12 | 140 | +//! | `PurposesConsent` | 24 | 152 | +//! | `PurposesLITransparency` | 24 | 176 | +//! | `PurposeOneTreatment` | 1 | 200 | +//! | `PublisherCC` | 12 | 201 | +//! | `MaxVendorConsentId` | 16 | 213 | +//! | `IsRangeEncoding` | 1 | 229 | +//! | ...vendor consents... | variable | 230 | +//! +//! Segments in a TC String are separated by `.` characters. The first segment +//! is always the core segment; additional segments carry supplementary data. +//! +//! # References +//! +//! - [IAB TCF v2.0 specification](https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20Consent%20string%20and%20vendor%20list%20formats%20v2.md) + +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use base64::Engine as _; +use error_stack::Report; + +use super::types::{ConsentDecodeError, TcfConsent}; + +/// Decodes a TC String v2 into a [`TcfConsent`] struct. +/// +/// Only the core segment is decoded. Additional segments (separated by `.`) +/// are ignored. +/// +/// # Errors +/// +/// - [`ConsentDecodeError::InvalidTcString`] if base64 decoding fails, the +/// version is not 2, or the bitfield is too short. +pub fn decode_tc_string(tc_string: &str) -> Result> { + // TC String may have multiple segments separated by '.' + // The first segment is always the core segment. + let core_segment = tc_string.split('.').next().unwrap_or(tc_string); + + let bytes = URL_SAFE_NO_PAD + .decode(core_segment) + .or_else(|_| { + // Some CMPs use standard base64 with padding + use base64::engine::general_purpose::STANDARD; + STANDARD.decode(core_segment) + }) + .map_err(|e| { + Report::new(ConsentDecodeError::InvalidTcString { + reason: format!("base64 decode failed: {e}"), + }) + })?; + + let reader = BitReader::new(&bytes); + + // Minimum size: 230 bits for core fields up to IsRangeEncoding + if reader.bit_len() < 230 { + return Err(Report::new(ConsentDecodeError::InvalidTcString { + reason: format!( + "bitfield too short: {} bits, need at least 230", + reader.bit_len() + ), + })); + } + + let version = reader.read_u8(0, 6); + if version != 2 { + return Err(Report::new(ConsentDecodeError::InvalidTcString { + reason: format!("unsupported version {version}, expected 2"), + })); + } + + let created_ds = reader.read_u64(6, 36); + let last_updated_ds = reader.read_u64(42, 36); + let cmp_id = reader.read_u16(78, 12); + let cmp_version = reader.read_u16(90, 12); + let consent_screen = reader.read_u8(102, 6); + + // Consent language: two 6-bit values, each offset by 'A' (65) + let lang_a = reader.read_u8(108, 6); + let lang_b = reader.read_u8(114, 6); + let consent_language = format!("{}{}", char::from(b'A' + lang_a), char::from(b'A' + lang_b),); + + let vendor_list_version = reader.read_u16(120, 12); + let tcf_policy_version = reader.read_u8(132, 6); + // Skip: IsServiceSpecific (138, 1), UseNonStandardTexts (139, 1) + + let special_feature_opt_ins = reader.read_bool_vec(140, 12); + let purpose_consents = reader.read_bool_vec(152, 24); + let purpose_legitimate_interests = reader.read_bool_vec(176, 24); + // Skip: PurposeOneTreatment (200, 1), PublisherCC (201, 12) + + // Vendor consents + let vendor_consents = decode_vendor_section(&reader, 213)?; + + // Vendor legitimate interests follow after vendor consents + let vendor_li_offset = vendor_section_end_offset(&reader, 213)?; + let vendor_legitimate_interests = if vendor_li_offset + 17 <= reader.bit_len() { + decode_vendor_section(&reader, vendor_li_offset).unwrap_or_default() + } else { + Vec::new() + }; + + Ok(TcfConsent { + version, + cmp_id, + cmp_version, + consent_screen, + consent_language, + vendor_list_version, + tcf_policy_version, + created_ds, + last_updated_ds, + purpose_consents, + purpose_legitimate_interests, + vendor_consents, + vendor_legitimate_interests, + special_feature_opt_ins, + }) +} + +/// Decodes a vendor section (consents or legitimate interests). +/// +/// The section starts with: +/// - `MaxVendorId` (16 bits) +/// - `IsRangeEncoding` (1 bit) +/// +/// If bitfield encoding: one bit per vendor up to `MaxVendorId`. +/// If range encoding: `NumEntries` (12 bits), then entries. +fn decode_vendor_section( + reader: &BitReader<'_>, + offset: usize, +) -> Result, Report> { + if offset + 17 > reader.bit_len() { + return Ok(Vec::new()); + } + + let max_vendor_id = reader.read_u16(offset, 16); + let is_range = reader.read_bool(offset + 16); + + if !is_range { + // Bitfield: one bit per vendor, 1..=max_vendor_id + let mut vendors = Vec::new(); + let bitfield_start = offset + 17; + for i in 0..usize::from(max_vendor_id) { + let bit_pos = bitfield_start + i; + if bit_pos >= reader.bit_len() { + break; + } + if reader.read_bool(bit_pos) { + // Vendor IDs are 1-indexed + vendors.push((i + 1) as u16); + } + } + Ok(vendors) + } else { + // Range encoding + let num_entries_offset = offset + 17; + if num_entries_offset + 12 > reader.bit_len() { + return Ok(Vec::new()); + } + let num_entries = reader.read_u16(num_entries_offset, 12); + let mut vendors = Vec::new(); + let mut pos = num_entries_offset + 12; + + for _ in 0..num_entries { + if pos >= reader.bit_len() { + break; + } + let is_range_entry = reader.read_bool(pos); + pos += 1; + + if is_range_entry { + // Range: StartVendorId (16) + EndVendorId (16) + if pos + 32 > reader.bit_len() { + break; + } + let start = reader.read_u16(pos, 16); + let end = reader.read_u16(pos + 16, 16); + pos += 32; + for id in start..=end { + vendors.push(id); + } + } else { + // Single vendor: VendorId (16) + if pos + 16 > reader.bit_len() { + break; + } + let id = reader.read_u16(pos, 16); + pos += 16; + vendors.push(id); + } + } + Ok(vendors) + } +} + +/// Calculates the bit offset after a vendor section ends. +fn vendor_section_end_offset( + reader: &BitReader<'_>, + offset: usize, +) -> Result> { + if offset + 17 > reader.bit_len() { + return Ok(offset); + } + + let max_vendor_id = reader.read_u16(offset, 16); + let is_range = reader.read_bool(offset + 16); + + if !is_range { + Ok(offset + 17 + usize::from(max_vendor_id)) + } else { + let num_entries_offset = offset + 17; + if num_entries_offset + 12 > reader.bit_len() { + return Ok(num_entries_offset); + } + let num_entries = reader.read_u16(num_entries_offset, 12); + let mut pos = num_entries_offset + 12; + + for _ in 0..num_entries { + if pos >= reader.bit_len() { + break; + } + let is_range_entry = reader.read_bool(pos); + pos += 1; + + if is_range_entry { + pos += 32; // StartVendorId (16) + EndVendorId (16) + } else { + pos += 16; // Single VendorId + } + } + Ok(pos) + } +} + +// --------------------------------------------------------------------------- +// Bit reader utility +// --------------------------------------------------------------------------- + +/// A simple bit-level reader over a byte slice. +/// +/// All reads are specified as (`bit_offset`, `num_bits`) from the start of the +/// buffer. No internal cursor is maintained — callers manage offsets explicitly. +struct BitReader<'a> { + bytes: &'a [u8], +} + +impl<'a> BitReader<'a> { + const fn new(bytes: &'a [u8]) -> Self { + Self { bytes } + } + + const fn bit_len(&self) -> usize { + self.bytes.len() * 8 + } + + /// Reads a single bit as a boolean. + fn read_bool(&self, bit_offset: usize) -> bool { + let byte_idx = bit_offset / 8; + let bit_idx = 7 - (bit_offset % 8); + if byte_idx >= self.bytes.len() { + return false; + } + (self.bytes[byte_idx] >> bit_idx) & 1 == 1 + } + + /// Reads up to 8 bits as a [`u8`]. + fn read_u8(&self, bit_offset: usize, num_bits: usize) -> u8 { + debug_assert!(num_bits <= 8); + self.read_u64(bit_offset, num_bits) as u8 + } + + /// Reads up to 16 bits as a [`u16`]. + fn read_u16(&self, bit_offset: usize, num_bits: usize) -> u16 { + debug_assert!(num_bits <= 16); + self.read_u64(bit_offset, num_bits) as u16 + } + + /// Reads up to 64 bits as a [`u64`]. + fn read_u64(&self, bit_offset: usize, num_bits: usize) -> u64 { + debug_assert!(num_bits <= 64); + let mut value: u64 = 0; + for i in 0..num_bits { + if self.read_bool(bit_offset + i) { + value |= 1 << (num_bits - 1 - i); + } + } + value + } + + /// Reads a sequence of bits as a [`Vec`]. + fn read_bool_vec(&self, bit_offset: usize, num_bits: usize) -> Vec { + (0..num_bits) + .map(|i| self.read_bool(bit_offset + i)) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // A known-good TC String v2 generated by the IAB reference implementation. + // This encodes: version=2, cmpId=7, cmpVersion=1, consent for purposes 1-4, + // vendor consents for vendors 1-10 (bitfield encoding). + // + // To generate test strings: https://iabeurope.github.io/TCF-v2-consent-string-editor/ + // Or use the IAB reference implementation in JavaScript. + + #[test] + fn decodes_minimal_tc_string() { + // This is a minimal valid TC String v2 core segment. + // Generated with: version=2, created=1970-01-01, lastUpdated=1970-01-01, + // cmpId=1, cmpVersion=1, consentScreen=0, language=EN, + // vendorListVersion=1, policyVersion=2, purposes=none, vendors=none (max=0) + // + // Bitfield construction: + // version(6)=2, created(36)=0, lastUpdated(36)=0, cmpId(12)=1, + // cmpVersion(12)=1, consentScreen(6)=0, language(12)=EN, + // vendorListVersion(12)=1, policyVersion(6)=2, + // isServiceSpecific(1)=0, useNonStandard(1)=0, + // specialFeatures(12)=0, purposeConsents(24)=0, + // purposeLI(24)=0, purposeOneTreatment(1)=0, publisherCC(12)=EN, + // maxVendorId(16)=0, isRange(1)=0 + // + // We'll build this manually. + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &[]); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode minimal TC string"); + assert_eq!(result.version, 2); + assert_eq!(result.cmp_id, 1); + assert_eq!(result.cmp_version, 1); + assert_eq!(result.consent_language, "EN"); + assert_eq!(result.vendor_list_version, 1); + assert!(result.vendor_consents.is_empty(), "should have no vendors"); + } + + #[test] + fn decodes_purpose_consents() { + let purposes = vec![true, true, false, true]; // purposes 1,2,4 + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &purposes, &[]); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode purposes"); + assert!(result.purpose_consents[0], "purpose 1 should be consented"); + assert!(result.purpose_consents[1], "purpose 2 should be consented"); + assert!( + !result.purpose_consents[2], + "purpose 3 should not be consented" + ); + assert!(result.purpose_consents[3], "purpose 4 should be consented"); + } + + #[test] + fn decodes_vendor_consents_bitfield() { + // Vendors 1, 3, 5 consented (bitfield encoding, max=5) + let vendor_bits = vec![true, false, true, false, true]; + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &vendor_bits); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode vendor bitfield"); + assert_eq!( + result.vendor_consents, + vec![1, 3, 5], + "should have vendors 1, 3, 5" + ); + } + + #[test] + fn rejects_version_1() { + // Build bytes with version=1 + let mut bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &[]); + // Clear version bits (first 6 bits) and set to 1 + bytes[0] = (bytes[0] & 0x03) | (1 << 2); // version=1 in first 6 bits + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded); + assert!(result.is_err(), "should reject version 1"); + } + + #[test] + fn rejects_too_short() { + let encoded = URL_SAFE_NO_PAD.encode([0u8; 10]); // only 80 bits + let result = decode_tc_string(&encoded); + assert!(result.is_err(), "should reject short bitfield"); + } + + #[test] + fn rejects_invalid_base64() { + let result = decode_tc_string("!!!invalid!!!"); + assert!(result.is_err(), "should reject invalid base64"); + } + + #[test] + fn handles_segmented_tc_string() { + // TC Strings can have multiple segments separated by '.' + let bytes = build_minimal_tc_bytes(1, 1, b"EN", 1, &[], &[]); + let encoded = format!("{}.extra-segment", URL_SAFE_NO_PAD.encode(&bytes)); + + let result = decode_tc_string(&encoded).expect("should decode first segment"); + assert_eq!(result.version, 2); + } + + #[test] + fn decodes_consent_language() { + let bytes = build_minimal_tc_bytes(1, 1, b"FR", 1, &[], &[]); + let encoded = URL_SAFE_NO_PAD.encode(&bytes); + + let result = decode_tc_string(&encoded).expect("should decode language"); + assert_eq!(result.consent_language, "FR"); + } + + // ----------------------------------------------------------------------- + // Test helper: builds a minimal TC String v2 byte buffer + // ----------------------------------------------------------------------- + + fn build_minimal_tc_bytes( + cmp_id: u16, + cmp_version: u16, + language: &[u8; 2], + vendor_list_version: u16, + purpose_consents: &[bool], + vendor_consent_bits: &[bool], + ) -> Vec { + let max_vendor_id = vendor_consent_bits.len() as u16; + // Calculate total bits needed + // Core fields: 213 bits + 16 (maxVendorId) + 1 (isRange) + max_vendor_id (bitfield) + let total_bits = 213 + 17 + usize::from(max_vendor_id); + let total_bytes = total_bits.div_ceil(8); + let mut buf = vec![0u8; total_bytes]; + + let mut writer = BitWriter::new(&mut buf); + + // Version (6 bits) = 2 + writer.write(0, 6, 2); + // Created (36 bits) = 0 + writer.write(6, 36, 0); + // LastUpdated (36 bits) = 0 + writer.write(42, 36, 0); + // CmpId (12 bits) + writer.write(78, 12, u64::from(cmp_id)); + // CmpVersion (12 bits) + writer.write(90, 12, u64::from(cmp_version)); + // ConsentScreen (6 bits) = 0 + writer.write(102, 6, 0); + // ConsentLanguage (12 bits) - two 6-bit chars offset by 'A' + writer.write(108, 6, u64::from(language[0] - b'A')); + writer.write(114, 6, u64::from(language[1] - b'A')); + // VendorListVersion (12 bits) + writer.write(120, 12, u64::from(vendor_list_version)); + // TcfPolicyVersion (6 bits) = 2 + writer.write(132, 6, 2); + // IsServiceSpecific (1 bit) = 0 + // UseNonStandardTexts (1 bit) = 0 + // SpecialFeatureOptIns (12 bits) = 0 + // PurposesConsent (24 bits) + for (i, &consented) in purpose_consents.iter().enumerate() { + if consented && i < 24 { + writer.write_bool(152 + i, true); + } + } + // PurposesLITransparency (24 bits) = 0 + // PurposeOneTreatment (1 bit) = 0 + // PublisherCC (12 bits) - same as language + writer.write(201, 6, u64::from(language[0] - b'A')); + writer.write(207, 6, u64::from(language[1] - b'A')); + // MaxVendorConsentId (16 bits) + writer.write(213, 16, u64::from(max_vendor_id)); + // IsRangeEncoding (1 bit) = 0 (bitfield) + writer.write_bool(229, false); + // Vendor consent bits + for (i, &consented) in vendor_consent_bits.iter().enumerate() { + if consented { + writer.write_bool(230 + i, true); + } + } + + buf + } + + /// Simple bit writer for test data construction. + struct BitWriter<'a> { + bytes: &'a mut [u8], + } + + impl<'a> BitWriter<'a> { + fn new(bytes: &'a mut [u8]) -> Self { + Self { bytes } + } + + fn write_bool(&mut self, bit_offset: usize, value: bool) { + if value { + let byte_idx = bit_offset / 8; + let bit_idx = 7 - (bit_offset % 8); + if byte_idx < self.bytes.len() { + self.bytes[byte_idx] |= 1 << bit_idx; + } + } + } + + fn write(&mut self, bit_offset: usize, num_bits: usize, value: u64) { + for i in 0..num_bits { + let bit = (value >> (num_bits - 1 - i)) & 1 == 1; + self.write_bool(bit_offset + i, bit); + } + } + } +} diff --git a/crates/common/src/consent/types.rs b/crates/common/src/consent/types.rs new file mode 100644 index 00000000..d6436d42 --- /dev/null +++ b/crates/common/src/consent/types.rs @@ -0,0 +1,437 @@ +//! Consent signal types. +//! +//! This module defines the full consent type hierarchy: +//! +//! - [`RawConsentSignals`] — raw (undecoded) strings extracted from cookies/headers +//! - [`ConsentContext`] — the normalized output carrying both raw and decoded data +//! - [`UsPrivacy`] / [`PrivacyFlag`] — decoded US Privacy (CCPA) 4-char string +//! - [`TcfConsent`] — decoded TCF v2 core consent data +//! - [`GppConsent`] — decoded GPP consent data +//! - [`Jurisdiction`] — the privacy regime applicable to the request +//! - [`ConsentSource`] — how consent was sourced (cookie, KV store, etc.) + +use core::fmt; + +// --------------------------------------------------------------------------- +// Raw extraction layer +// --------------------------------------------------------------------------- + +/// Raw consent signals extracted from cookies and HTTP headers. +/// +/// All fields are optional because any combination of consent mechanisms may be +/// present (or absent) on a given request. No decoding or validation is +/// performed at this stage — the values are preserved exactly as received. +/// +/// # Consent sources +/// +/// | Field | Source | Standard | +/// |---|---|---| +/// | [`raw_tc_string`](Self::raw_tc_string) | `euconsent-v2` cookie | IAB TCF v2 | +/// | [`raw_gpp_string`](Self::raw_gpp_string) | `__gpp` cookie | IAB GPP | +/// | [`raw_gpp_sid`](Self::raw_gpp_sid) | `__gpp_sid` cookie | IAB GPP | +/// | [`raw_us_privacy`](Self::raw_us_privacy) | `us_privacy` cookie | IAB US Privacy (CCPA) | +/// | [`gpc`](Self::gpc) | `Sec-GPC` header | Global Privacy Control | +#[derive(Debug, Clone, Default)] +pub struct RawConsentSignals { + /// TCF v2 consent string from the `euconsent-v2` cookie. + pub raw_tc_string: Option, + /// GPP consent string from the `__gpp` cookie. + pub raw_gpp_string: Option, + /// GPP section IDs from the `__gpp_sid` cookie (raw comma-separated string). + pub raw_gpp_sid: Option, + /// US Privacy string from the `us_privacy` cookie (4-character format). + pub raw_us_privacy: Option, + /// Global Privacy Control signal from the `Sec-GPC` header. + /// + /// When `true`, the browser has signaled the user's opt-out preference. + pub gpc: bool, +} + +impl RawConsentSignals { + /// Returns `true` when no consent signals were found on the request. + #[must_use] + pub fn is_empty(&self) -> bool { + self.raw_tc_string.is_none() + && self.raw_gpp_string.is_none() + && self.raw_gpp_sid.is_none() + && self.raw_us_privacy.is_none() + && !self.gpc + } +} + +impl fmt::Display for RawConsentSignals { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "euconsent-v2=")?; + match &self.raw_tc_string { + Some(s) => write!(f, "present ({} chars)", s.len())?, + None => write!(f, "absent")?, + } + + write!(f, ", __gpp=")?; + match &self.raw_gpp_string { + Some(s) => write!(f, "present ({} chars)", s.len())?, + None => write!(f, "absent")?, + } + + write!(f, ", __gpp_sid=")?; + match &self.raw_gpp_sid { + Some(s) => write!(f, "\"{}\"", s)?, + None => write!(f, "absent")?, + } + + write!(f, ", us_privacy=")?; + match &self.raw_us_privacy { + Some(s) => write!(f, "\"{}\"", s)?, + None => write!(f, "absent")?, + } + + write!(f, ", Sec-GPC={}", if self.gpc { "1" } else { "absent" }) + } +} + +// --------------------------------------------------------------------------- +// Decoded consent types +// --------------------------------------------------------------------------- + +/// Normalized consent context extracted from cookies and headers. +/// +/// Carries both raw consent strings (for `OpenRTB` forwarding) and decoded +/// structured data (for TS-level enforcement and observability). This is the +/// central type that flows through the entire request lifecycle. +/// +/// Built from [`RawConsentSignals`] by the decoding pipeline in +/// [`super::build_consent_context`]. +#[derive(Debug, Clone, Default)] +pub struct ConsentContext { + /// Raw TC String from `euconsent-v2` cookie, passed as-is in `user.consent`. + pub raw_tc_string: Option, + /// Raw GPP string from `__gpp` cookie, passed as-is in `regs.gpp`. + pub raw_gpp_string: Option, + /// GPP section IDs derived from decoded `__gpp` data. + /// + /// The `__gpp_sid` cookie is treated as a transport hint and validated + /// against decoded section IDs when both are present. + pub gpp_section_ids: Option>, + /// Raw US Privacy string from `us_privacy` cookie. + pub raw_us_privacy: Option, + + /// Whether GDPR applies to this request (derived from TCF presence). + pub gdpr_applies: bool, + /// Decoded TCF v2 consent data. + pub tcf: Option, + /// Decoded GPP consent data. + pub gpp: Option, + /// Decoded US Privacy signal. + pub us_privacy: Option, + + /// Global Privacy Control signal from `Sec-GPC` header. + pub gpc: bool, + /// Source of the consent data (for debugging). + pub source: ConsentSource, +} + +impl ConsentContext { + /// Returns `true` when no consent signals are present. + #[must_use] + pub fn is_empty(&self) -> bool { + self.raw_tc_string.is_none() + && self.raw_gpp_string.is_none() + && self.raw_us_privacy.is_none() + && self.tcf.is_none() + && self.gpp.is_none() + && self.us_privacy.is_none() + && !self.gpc + } +} + +// --------------------------------------------------------------------------- +// TCF v2 +// --------------------------------------------------------------------------- + +/// Decoded TCF v2.x consent data. +/// +/// Extracted from either a standalone TC String (`euconsent-v2` cookie) +/// or from the EU TCF v2.2 section within a GPP string. +/// +/// Only the core segment (segment type 0) is decoded. Publisher restrictions, +/// disclosed vendors, and allowed vendors segments are not yet supported. +#[derive(Debug, Clone)] +pub struct TcfConsent { + /// TCF version (2). + pub version: u8, + /// CMP ID that collected this consent. + pub cmp_id: u16, + /// CMP version. + pub cmp_version: u16, + /// Consent screen number. + pub consent_screen: u8, + /// CMP language (ISO 639-1, two uppercase letters). + pub consent_language: String, + /// Vendor list version used. + pub vendor_list_version: u16, + /// TCF policy version. + pub tcf_policy_version: u8, + /// Timestamp when consent was created (deciseconds since epoch). + pub created_ds: u64, + /// Timestamp when consent was last updated (deciseconds since epoch). + pub last_updated_ds: u64, + + /// Purpose consents (24 bits, 1-indexed). + /// + /// `true` at index 0 means purpose 1 is consented, etc. + pub purpose_consents: Vec, + /// Purpose legitimate interests (24 bits, 1-indexed). + pub purpose_legitimate_interests: Vec, + + /// Vendor IDs with consent granted. + pub vendor_consents: Vec, + /// Vendor IDs with legitimate interest established. + pub vendor_legitimate_interests: Vec, + + /// Special feature opt-ins (12 bits). + pub special_feature_opt_ins: Vec, +} + +// --------------------------------------------------------------------------- +// GPP +// --------------------------------------------------------------------------- + +/// Decoded GPP (Global Privacy Platform) consent data. +/// +/// Wraps the `iab_gpp` crate's decoded output with our domain types. +#[derive(Debug, Clone)] +pub struct GppConsent { + /// GPP header version. + pub version: u8, + /// Active section IDs present in the GPP string. + pub section_ids: Vec, + /// Decoded EU TCF v2.2 section (if present in GPP, section ID 2). + pub eu_tcf: Option, +} + +// --------------------------------------------------------------------------- +// US Privacy (CCPA) +// --------------------------------------------------------------------------- + +/// Decoded US Privacy string (legacy 4-character format). +/// +/// Format: `1YNN` where: +/// - Char 1: Version (always `1`) +/// - Char 2: Notice given (`Y`/`N`/`-`) +/// - Char 3: Opt-out of sale (`Y`/`N`/`-`) +/// - Char 4: LSPA covered (`Y`/`N`/`-`) +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UsPrivacy { + /// Specification version (currently always 1). + pub version: u8, + /// Whether explicit notice has been given to the consumer. + pub notice_given: PrivacyFlag, + /// Whether the consumer has opted out of the sale of personal information. + pub opt_out_sale: PrivacyFlag, + /// Whether the transaction is covered by the Limited Service Provider Agreement. + pub lspa_covered: PrivacyFlag, +} + +/// A tri-state flag used in the US Privacy string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PrivacyFlag { + /// `Y` — yes / affirmative. + Yes, + /// `N` — no / negative. + No, + /// `-` — not applicable or unknown. + NotApplicable, +} + +impl fmt::Display for PrivacyFlag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Yes => write!(f, "Y"), + Self::No => write!(f, "N"), + Self::NotApplicable => write!(f, "-"), + } + } +} + +impl fmt::Display for UsPrivacy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}{}{}{}", + self.version, self.notice_given, self.opt_out_sale, self.lspa_covered, + ) + } +} + +// --------------------------------------------------------------------------- +// Metadata types +// --------------------------------------------------------------------------- + +/// How consent was sourced for this request. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum ConsentSource { + /// Read from cookies on the incoming request. + Cookie, + /// Loaded from KV store via `SyntheticID` lookup. + KvStore, + /// Applied from explicit publisher policy defaults. + PolicyDefault, + /// No consent data available. + #[default] + None, +} + +// --------------------------------------------------------------------------- +// Consent error +// --------------------------------------------------------------------------- + +/// Errors that can occur during consent string decoding. +#[derive(Debug, derive_more::Display)] +pub enum ConsentDecodeError { + /// The US Privacy string has an invalid format. + #[display("invalid US Privacy string: {reason}")] + InvalidUsPrivacy { reason: String }, + /// The TC String could not be decoded. + #[display("invalid TC String: {reason}")] + InvalidTcString { reason: String }, + /// The GPP string could not be decoded. + #[display("invalid GPP string: {reason}")] + InvalidGppString { reason: String }, +} + +impl core::error::Error for ConsentDecodeError {} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_signals() { + let signals = RawConsentSignals::default(); + assert!(signals.is_empty(), "default signals should be empty"); + } + + #[test] + fn not_empty_with_tc_string() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + ..Default::default() + }; + assert!(!signals.is_empty(), "should not be empty with tc_string"); + } + + #[test] + fn not_empty_with_gpc() { + let signals = RawConsentSignals { + gpc: true, + ..Default::default() + }; + assert!(!signals.is_empty(), "should not be empty with gpc=true"); + } + + #[test] + fn display_all_absent() { + let signals = RawConsentSignals::default(); + let output = signals.to_string(); + assert!( + output.contains("euconsent-v2=absent"), + "should show euconsent-v2 absent" + ); + assert!(output.contains("__gpp=absent"), "should show __gpp absent"); + assert!( + output.contains("us_privacy=absent"), + "should show us_privacy absent" + ); + assert!( + output.contains("Sec-GPC=absent"), + "should show Sec-GPC absent" + ); + } + + #[test] + fn display_with_values() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), + raw_gpp_sid: Some("2,6".to_owned()), + raw_us_privacy: Some("1YNN".to_owned()), + gpc: true, + }; + let output = signals.to_string(); + assert!( + output.contains("euconsent-v2=present (13 chars)"), + "should show tc_string length" + ); + assert!( + output.contains("__gpp=present (15 chars)"), + "should show gpp length" + ); + assert!( + output.contains("__gpp_sid=\"2,6\""), + "should show gpp_sid value" + ); + assert!( + output.contains("us_privacy=\"1YNN\""), + "should show us_privacy value" + ); + assert!(output.contains("Sec-GPC=1"), "should show Sec-GPC as 1"); + } + + #[test] + fn consent_context_empty_by_default() { + let ctx = ConsentContext::default(); + assert!(ctx.is_empty(), "default ConsentContext should be empty"); + } + + #[test] + fn consent_context_not_empty_with_tc_string() { + let ctx = ConsentContext { + raw_tc_string: Some("CPXx".to_owned()), + ..Default::default() + }; + assert!( + !ctx.is_empty(), + "should not be empty with raw_tc_string present" + ); + } + + #[test] + fn consent_context_not_empty_with_gpc() { + let ctx = ConsentContext { + gpc: true, + ..Default::default() + }; + assert!(!ctx.is_empty(), "should not be empty with gpc=true"); + } + + #[test] + fn us_privacy_display() { + let usp = UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::No, + lspa_covered: PrivacyFlag::NotApplicable, + }; + assert_eq!(usp.to_string(), "1YN-", "should format as 1YN-"); + } + + #[test] + fn privacy_flag_display() { + assert_eq!(PrivacyFlag::Yes.to_string(), "Y"); + assert_eq!(PrivacyFlag::No.to_string(), "N"); + assert_eq!(PrivacyFlag::NotApplicable.to_string(), "-"); + } + + #[test] + fn consent_source_default_is_none() { + assert_eq!( + ConsentSource::default(), + ConsentSource::None, + "default source should be None" + ); + } +} diff --git a/crates/common/src/consent/us_privacy.rs b/crates/common/src/consent/us_privacy.rs new file mode 100644 index 00000000..eea27e40 --- /dev/null +++ b/crates/common/src/consent/us_privacy.rs @@ -0,0 +1,164 @@ +//! US Privacy string decoder. +//! +//! Parses the legacy 4-character IAB US Privacy string (CCPA format). +//! +//! # Format +//! +//! The string is exactly 4 characters: `VNOL` where: +//! - **V** (version): always `1` +//! - **N** (notice): `Y` = given, `N` = not given, `-` = N/A +//! - **O** (opt-out of sale): `Y` = opted out, `N` = not opted out, `-` = N/A +//! - **L** (LSPA covered): `Y` = yes, `N` = no, `-` = N/A +//! +//! # References +//! +//! - [IAB US Privacy String specification](https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md) + +use error_stack::{Report, ResultExt}; + +use super::types::{ConsentDecodeError, PrivacyFlag, UsPrivacy}; + +/// Decodes a US Privacy string into a [`UsPrivacy`] struct. +/// +/// # Errors +/// +/// - [`ConsentDecodeError::InvalidUsPrivacy`] if the string is not exactly +/// 4 characters, has an unsupported version, or contains invalid flag values. +pub fn decode_us_privacy(s: &str) -> Result> { + let chars: Vec = s.chars().collect(); + + if chars.len() != 4 { + return Err(Report::new(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("expected 4 characters, got {}", chars.len()), + })); + } + + let version = match chars[0] { + '1' => 1u8, + other => { + return Err(Report::new(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("unsupported version '{}', expected '1'", other), + })); + } + }; + + let notice_given = + parse_flag(chars[1]).change_context(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid notice flag '{}'", chars[1]), + })?; + + let opt_out_sale = + parse_flag(chars[2]).change_context(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid opt-out flag '{}'", chars[2]), + })?; + + let lspa_covered = + parse_flag(chars[3]).change_context(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid LSPA flag '{}'", chars[3]), + })?; + + Ok(UsPrivacy { + version, + notice_given, + opt_out_sale, + lspa_covered, + }) +} + +/// Parses a single US Privacy flag character. +fn parse_flag(c: char) -> Result> { + match c { + 'Y' | 'y' => Ok(PrivacyFlag::Yes), + 'N' | 'n' => Ok(PrivacyFlag::No), + '-' => Ok(PrivacyFlag::NotApplicable), + other => Err(Report::new(ConsentDecodeError::InvalidUsPrivacy { + reason: format!("invalid flag character '{other}'"), + })), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn decodes_standard_string() { + let result = decode_us_privacy("1YNN").expect("should decode 1YNN"); + assert_eq!(result.version, 1); + assert_eq!(result.notice_given, PrivacyFlag::Yes); + assert_eq!(result.opt_out_sale, PrivacyFlag::No); + assert_eq!(result.lspa_covered, PrivacyFlag::No); + } + + #[test] + fn decodes_all_yes() { + let result = decode_us_privacy("1YYY").expect("should decode 1YYY"); + assert_eq!(result.notice_given, PrivacyFlag::Yes); + assert_eq!(result.opt_out_sale, PrivacyFlag::Yes); + assert_eq!(result.lspa_covered, PrivacyFlag::Yes); + } + + #[test] + fn decodes_all_not_applicable() { + let result = decode_us_privacy("1---").expect("should decode 1---"); + assert_eq!(result.notice_given, PrivacyFlag::NotApplicable); + assert_eq!(result.opt_out_sale, PrivacyFlag::NotApplicable); + assert_eq!(result.lspa_covered, PrivacyFlag::NotApplicable); + } + + #[test] + fn decodes_mixed_flags() { + let result = decode_us_privacy("1NYN").expect("should decode 1NYN"); + assert_eq!(result.notice_given, PrivacyFlag::No); + assert_eq!(result.opt_out_sale, PrivacyFlag::Yes); + assert_eq!(result.lspa_covered, PrivacyFlag::No); + } + + #[test] + fn roundtrips_through_display() { + let result = decode_us_privacy("1YNN").expect("should decode"); + assert_eq!( + result.to_string(), + "1YNN", + "should roundtrip through Display" + ); + } + + #[test] + fn rejects_too_short() { + let result = decode_us_privacy("1YN"); + assert!(result.is_err(), "should reject 3-char string"); + } + + #[test] + fn rejects_too_long() { + let result = decode_us_privacy("1YNNN"); + assert!(result.is_err(), "should reject 5-char string"); + } + + #[test] + fn rejects_empty() { + let result = decode_us_privacy(""); + assert!(result.is_err(), "should reject empty string"); + } + + #[test] + fn rejects_bad_version() { + let result = decode_us_privacy("2YNN"); + assert!(result.is_err(), "should reject version 2"); + } + + #[test] + fn rejects_invalid_flag() { + let result = decode_us_privacy("1XNN"); + assert!(result.is_err(), "should reject invalid flag 'X'"); + } + + #[test] + fn accepts_lowercase_flags() { + let result = decode_us_privacy("1ynn").expect("should accept lowercase"); + assert_eq!(result.notice_given, PrivacyFlag::Yes); + assert_eq!(result.opt_out_sale, PrivacyFlag::No); + assert_eq!(result.lspa_covered, PrivacyFlag::No); + } +} diff --git a/crates/common/src/constants.rs b/crates/common/src/constants.rs index 5e57e6aa..d5b8c7f6 100644 --- a/crates/common/src/constants.rs +++ b/crates/common/src/constants.rs @@ -59,3 +59,12 @@ pub const INTERNAL_HEADERS: &[&str] = &[ "x-compress-hint", "x-debug-fastly-pop", ]; + +// Consent-related cookie names +pub const COOKIE_EUCONSENT_V2: &str = "euconsent-v2"; +pub const COOKIE_GPP: &str = "__gpp"; +pub const COOKIE_GPP_SID: &str = "__gpp_sid"; +pub const COOKIE_US_PRIVACY: &str = "us_privacy"; + +// Consent-related header names +pub const HEADER_SEC_GPC: HeaderName = HeaderName::from_static("sec-gpc"); diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index fb3fccc8..e0caaf56 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -24,7 +24,7 @@ use crate::integrations::{ }; use crate::openrtb::{ Banner, Device, Format, Geo, Imp, ImpExt, OpenRtbRequest, PrebidExt, PrebidImpExt, Regs, - RegsExt, RequestExt, Site, TrustedServerExt, User, UserExt, + RequestExt, Site, TrustedServerExt, User, UserExt, }; use crate::request_signing::{RequestSigner, SigningParams, SIGNING_VERSION}; use crate::settings::{IntegrationConfig, Settings}; @@ -571,9 +571,11 @@ impl PrebidAuctionProvider { } }); - // Build user object + // Build user object — populate consent from ConsentContext if available. + let consent_ctx = request.user.consent.as_ref(); let user = Some(User { id: Some(request.user.id.clone()), + consent: consent_ctx.and_then(|c| c.raw_tc_string.clone()), ext: Some(UserExt { synthetic_fresh: Some(request.user.fresh_id.clone()), }), @@ -594,16 +596,14 @@ impl PrebidAuctionProvider { }), }); - // Build regs object if Sec-GPC header is present - let regs = if context.request.get_header("Sec-GPC").is_some() { - Some(Regs { - ext: Some(RegsExt { - us_privacy: Some("1YYN".to_string()), - }), - }) - } else { - None - }; + // Build regs object from ConsentContext. + // + // Populates OpenRTB 2.6 canonical fields: + // - regs.gdpr: 1 if GDPR applies (TCF string present) + // - regs.us_privacy: raw US Privacy string + // - regs.gpp: raw GPP string + // - regs.gpp_sid: active GPP section IDs + let regs = Self::build_regs(consent_ctx); // Build ext object let request_info = RequestInfo::from_request(context.request); @@ -650,6 +650,32 @@ impl PrebidAuctionProvider { } } + /// Builds the `regs` object from a [`ConsentContext`]. + /// + /// Returns `None` if no consent-relevant data is present (avoids sending + /// an empty `regs` object to Prebid Server). + fn build_regs(consent_ctx: Option<&crate::consent::ConsentContext>) -> Option { + let ctx = consent_ctx?; + + // Only emit regs if there's something to say + let has_data = ctx.gdpr_applies + || ctx.raw_us_privacy.is_some() + || ctx.raw_gpp_string.is_some() + || ctx.gpc; + + if !has_data { + return None; + } + + Some(Regs { + gdpr: if ctx.gdpr_applies { Some(1) } else { Some(0) }, + us_privacy: ctx.raw_us_privacy.clone(), + gpp: ctx.raw_gpp_string.clone(), + gpp_sid: ctx.gpp_section_ids.clone(), + ext: None, + }) + } + /// Parse `OpenRTB` response into auction response. fn parse_openrtb_response(&self, json: &Json, response_time_ms: u64) -> AuctionResponse { let mut bids = Vec::new(); diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index a01865f6..12a9f685 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -9,7 +9,7 @@ //! - [`constants`]: Application-wide constants and configuration values //! - [`cookies`]: Cookie parsing and generation utilities //! - [`error`]: Error types and error handling utilities -//! - [`gdpr`]: GDPR consent management and TCF string parsing +//! - [`consent`]: Consent signal extraction and logging //! - [`geo`]: Geographic location utilities and DMA code extraction //! - [`models`]: Data models for ad serving and callbacks //! - [`prebid`]: Prebid integration and real-time bidding support @@ -36,6 +36,7 @@ pub mod auction; pub mod auction_config_types; pub mod auth; pub mod backend; +pub mod consent; pub mod constants; pub mod cookies; pub mod creative; diff --git a/crates/common/src/openrtb.rs b/crates/common/src/openrtb.rs index 9af3be69..9fa1585b 100644 --- a/crates/common/src/openrtb.rs +++ b/crates/common/src/openrtb.rs @@ -56,6 +56,11 @@ pub struct Site { pub struct User { #[serde(skip_serializing_if = "Option::is_none")] pub id: Option, + /// TCF v2 consent string (raw TC String from `euconsent-v2` cookie). + /// + /// `OpenRTB` 2.6 canonical field for GDPR consent. + #[serde(skip_serializing_if = "Option::is_none")] + pub consent: Option, #[serde(skip_serializing_if = "Option::is_none")] pub ext: Option, } @@ -91,10 +96,35 @@ pub struct Geo { #[derive(Debug, Serialize, Default)] pub struct Regs { + /// GDPR applicability flag (1 = GDPR applies, 0 = does not apply). + /// + /// `OpenRTB` 2.6 canonical field. Set based on TCF consent presence. + #[serde(skip_serializing_if = "Option::is_none")] + pub gdpr: Option, + /// US Privacy string (4-character IAB CCPA format). + /// + /// `OpenRTB` 2.6 top-level field (migrated from `regs.ext.us_privacy`). + #[serde(skip_serializing_if = "Option::is_none")] + pub us_privacy: Option, + /// GPP consent string (raw `__gpp` cookie value). + /// + /// `OpenRTB` 2.6 canonical field for IAB Global Privacy Platform. + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp: Option, + /// GPP section ID list (active sections in the GPP string). + /// + /// `OpenRTB` 2.6 canonical field, derived from decoded GPP data. + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp_sid: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub ext: Option, } +/// Legacy `regs.ext` fields. +/// +/// Retained for backward compatibility with partners that read +/// `regs.ext.us_privacy` instead of the top-level `regs.us_privacy`. +/// Will be removed once all downstream partners migrate. #[derive(Debug, Serialize, Default)] pub struct RegsExt { #[serde(skip_serializing_if = "Option::is_none")] @@ -185,7 +215,7 @@ pub struct ResponseExt { #[cfg(test)] mod tests { - use super::{OpenRtbBid, OpenRtbResponse, ResponseExt, SeatBid}; + use super::*; use crate::auction::types::OrchestratorExt; #[test] @@ -245,4 +275,63 @@ mod tests { assert_eq!(serialized, expected); } + + #[test] + fn regs_serializes_consent_fields() { + let regs = Regs { + gdpr: Some(1), + us_privacy: Some("1YNN".to_string()), + gpp: Some("DBACNY~CPXxRfA".to_string()), + gpp_sid: Some(vec![2, 6]), + ext: None, + }; + + let serialized = serde_json::to_value(®s).expect("should serialize"); + assert_eq!(serialized["gdpr"], 1); + assert_eq!(serialized["us_privacy"], "1YNN"); + assert_eq!(serialized["gpp"], "DBACNY~CPXxRfA"); + assert_eq!(serialized["gpp_sid"], serde_json::json!([2, 6])); + assert!( + serialized.get("ext").is_none(), + "ext should be omitted when None" + ); + } + + #[test] + fn regs_omits_none_fields() { + let regs = Regs::default(); + let serialized = serde_json::to_value(®s).expect("should serialize"); + let obj = serialized.as_object().expect("should be object"); + assert!( + obj.is_empty(), + "all-None regs should serialize as empty object" + ); + } + + #[test] + fn user_serializes_consent_field() { + let user = User { + id: Some("user-1".to_string()), + consent: Some("CPXxGfAPXxGfA".to_string()), + ext: None, + }; + + let serialized = serde_json::to_value(&user).expect("should serialize"); + assert_eq!(serialized["consent"], "CPXxGfAPXxGfA"); + } + + #[test] + fn user_omits_consent_when_none() { + let user = User { + id: Some("user-1".to_string()), + consent: None, + ext: None, + }; + + let serialized = serde_json::to_value(&user).expect("should serialize"); + assert!( + serialized.get("consent").is_none(), + "consent should be omitted when None" + ); + } } diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 78489d2e..1af7a56f 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -3,9 +3,11 @@ use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use crate::backend::BackendConfig; +use crate::consent::build_consent_context; +use crate::cookies::{create_synthetic_cookie, handle_request_cookies}; use crate::http_util::{serve_static_with_etag, RequestInfo}; -use crate::constants::{HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; +use crate::constants::{COOKIE_SYNTHETIC_ID, HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; use crate::cookies::set_synthetic_cookie; use crate::error::TrustedServerError; use crate::integrations::IntegrationRegistry; @@ -202,8 +204,20 @@ pub fn handle_publisher_request( req.get_header("x-forwarded-proto"), ); + // Parse cookies once for reuse by both consent extraction and synthetic ID logic. + let cookie_jar = handle_request_cookies(&req)?; + + // Extract, decode, and log consent signals (TCF, GPP, US Privacy, GPC) + // from the incoming request. The ConsentContext carries both raw strings + // (for OpenRTB forwarding) and decoded data (for observability). + let _consent_context = build_consent_context(cookie_jar.as_ref(), &req); + // Generate synthetic identifiers before the request body is consumed. let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; + let has_synthetic_cookie = cookie_jar + .as_ref() + .and_then(|jar| jar.get(COOKIE_SYNTHETIC_ID)) + .is_some(); log::debug!("Proxy synthetic IDs - trusted: {}", synthetic_id); From 893517da2f92a058a3fc278330e74e5180e5d77e Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 18 Feb 2026 11:40:38 -0600 Subject: [PATCH 2/9] wip --- Cargo.lock | 199 +++++++++++++++++++- Cargo.toml | 1 + crates/common/Cargo.toml | 5 + crates/common/benches/consent_decode.rs | 236 ++++++++++++++++++++++++ 4 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 crates/common/benches/consent_decode.rs diff --git a/Cargo.lock b/Cargo.lock index 9811c270..5df9f529 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,18 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + [[package]] name = "anyhow" version = "1.0.100" @@ -185,6 +197,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.48" @@ -238,6 +256,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -249,6 +294,31 @@ dependencies = [ "zeroize", ] +[[package]] +name = "clap" +version = "4.5.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + [[package]] name = "config" version = "0.15.19" @@ -347,6 +417,41 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crunchy" version = "0.2.4" @@ -720,7 +825,7 @@ dependencies = [ "fastly-shared", "fastly-sys", "http", - "itertools", + "itertools 0.13.0", "lazy_static", "mime", "serde", @@ -974,6 +1079,17 @@ dependencies = [ "subtle", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "handlebars" version = "6.4.0" @@ -1031,6 +1147,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1239,6 +1361,26 @@ dependencies = [ "generic-array", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -1507,6 +1649,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -1701,6 +1849,34 @@ dependencies = [ "spki", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "poly1305" version = "0.8.0" @@ -2350,6 +2526,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.49.0" @@ -2445,6 +2631,7 @@ dependencies = [ "chrono", "config", "cookie", + "criterion", "derive_more", "ed25519-dalek", "error-stack", @@ -2693,6 +2880,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "which" version = "8.0.0" diff --git a/Cargo.toml b/Cargo.toml index 54fbbefa..f9ba47c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,3 +80,4 @@ urlencoding = "2.1" uuid = { version = "1.18", features = ["v4"] } validator = { version = "0.20", features = ["derive"] } which = "8" +criterion = { version = "0.5", default-features = false, features = ["plotters", "cargo_bench_support"] } diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index bcb916b4..b15f16bb 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -68,5 +68,10 @@ validator = { workspace = true } default = [] [dev-dependencies] +criterion = { workspace = true } temp-env = { workspace = true } tokio-test = { workspace = true } + +[[bench]] +name = "consent_decode" +harness = false diff --git a/crates/common/benches/consent_decode.rs b/crates/common/benches/consent_decode.rs new file mode 100644 index 00000000..32550a3c --- /dev/null +++ b/crates/common/benches/consent_decode.rs @@ -0,0 +1,236 @@ +//! Benchmarks for the consent decoding pipeline. +//! +//! Measures the computational cost of decoding consent signals (TCF v2, GPP, +//! US Privacy) to determine whether wiring decoding into the auction hot path +//! introduces unacceptable latency. +//! +//! Run with: `cargo bench -p trusted-server-common` + +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use base64::Engine as _; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; + +use trusted_server_common::consent::tcf::decode_tc_string; +use trusted_server_common::consent::types::RawConsentSignals; +use trusted_server_common::consent::us_privacy::decode_us_privacy; +use trusted_server_common::consent::{build_context_from_signals, gpp}; + +// --------------------------------------------------------------------------- +// Test data +// --------------------------------------------------------------------------- + +/// Known-good GPP string with US Privacy section only (section ID 6). +const GPP_USP_ONLY: &str = "DBABTA~1YNN"; + +/// GPP string with both TCF EU v2 and US Privacy sections. +const GPP_TCF_AND_USP: &str = "DBACNY~CPXxRfAPXxRfAAfKABENB-CgAAAAAAAAAAYgAAAAAAAA~1YNN"; + +/// Builds a minimal TC String v2 byte buffer for benchmarking. +/// +/// This duplicates the test helper from `tcf.rs` since `#[cfg(test)]` helpers +/// are not available in bench targets. +fn build_tc_bytes(vendor_count: u16, use_range_encoding: bool) -> Vec { + let total_bits = if use_range_encoding { + // Core fields (213) + maxVendorId (16) + isRange (1) + numEntries (12) + // + one range entry per vendor group: isRange(1) + start(16) + end(16) + // We'll encode as one big range: vendors 1..=vendor_count + 213 + 17 + 12 + 1 + 32 + } else { + // Bitfield: core fields + maxVendorId + isRange + one bit per vendor + 213 + 17 + usize::from(vendor_count) + }; + let total_bytes = total_bits.div_ceil(8); + let mut buf = vec![0u8; total_bytes]; + + // Version (6 bits) = 2 + write_bits(&mut buf, 0, 6, 2); + // Created (36 bits) = 100000 (arbitrary) + write_bits(&mut buf, 6, 36, 100_000); + // LastUpdated (36 bits) = 200000 + write_bits(&mut buf, 42, 36, 200_000); + // CmpId (12 bits) = 7 + write_bits(&mut buf, 78, 12, 7); + // CmpVersion (12 bits) = 1 + write_bits(&mut buf, 90, 12, 1); + // ConsentScreen (6 bits) = 1 + write_bits(&mut buf, 102, 6, 1); + // ConsentLanguage (12 bits) = EN + write_bits(&mut buf, 108, 6, u64::from(b'E' - b'A')); + write_bits(&mut buf, 114, 6, u64::from(b'N' - b'A')); + // VendorListVersion (12 bits) = 42 + write_bits(&mut buf, 120, 12, 42); + // TcfPolicyVersion (6 bits) = 2 + write_bits(&mut buf, 132, 6, 2); + // IsServiceSpecific (1) = 0, UseNonStandardTexts (1) = 0 + // SpecialFeatureOptIns (12) = 0b000000000011 (features 11, 12) + write_bits(&mut buf, 140, 12, 0b0000_0000_0011); + // PurposesConsent (24) = purposes 1-4 consented + write_bits(&mut buf, 152, 24, 0b1111_0000_0000_0000_0000_0000); + // PurposesLITransparency (24) = purposes 1-2 + write_bits(&mut buf, 176, 24, 0b1100_0000_0000_0000_0000_0000); + // PurposeOneTreatment (1) = 0 + // PublisherCC (12) = EN + write_bits(&mut buf, 201, 6, u64::from(b'E' - b'A')); + write_bits(&mut buf, 207, 6, u64::from(b'N' - b'A')); + + // MaxVendorConsentId (16) + write_bits(&mut buf, 213, 16, u64::from(vendor_count)); + + if use_range_encoding { + // IsRangeEncoding (1) = 1 + write_bit(&mut buf, 229, true); + // NumEntries (12) = 1 (one range covering all vendors) + write_bits(&mut buf, 230, 12, 1); + // Entry: IsRangeEntry (1) = 1 + write_bit(&mut buf, 242, true); + // StartVendorId (16) = 1 + write_bits(&mut buf, 243, 16, 1); + // EndVendorId (16) = vendor_count + write_bits(&mut buf, 259, 16, u64::from(vendor_count)); + } else { + // IsRangeEncoding (1) = 0 (bitfield) + write_bit(&mut buf, 229, false); + // Set every other vendor as consented (realistic pattern) + for i in 0..usize::from(vendor_count) { + if i % 2 == 0 { + write_bit(&mut buf, 230 + i, true); + } + } + } + + buf +} + +fn write_bit(buf: &mut [u8], bit_offset: usize, value: bool) { + if value { + let byte_idx = bit_offset / 8; + let bit_idx = 7 - (bit_offset % 8); + if byte_idx < buf.len() { + buf[byte_idx] |= 1 << bit_idx; + } + } +} + +fn write_bits(buf: &mut [u8], bit_offset: usize, num_bits: usize, value: u64) { + for i in 0..num_bits { + let bit = (value >> (num_bits - 1 - i)) & 1 == 1; + write_bit(buf, bit_offset + i, bit); + } +} + +fn encode_tc_string(vendor_count: u16, use_range: bool) -> String { + let bytes = build_tc_bytes(vendor_count, use_range); + URL_SAFE_NO_PAD.encode(&bytes) +} + +// --------------------------------------------------------------------------- +// Benchmarks +// --------------------------------------------------------------------------- + +fn bench_us_privacy(c: &mut Criterion) { + c.bench_function("us_privacy_decode", |b| { + b.iter(|| decode_us_privacy(black_box("1YNN"))); + }); +} + +fn bench_tcf_decode(c: &mut Criterion) { + let small_tc = encode_tc_string(10, false); + let medium_tc = encode_tc_string(100, false); + let large_tc_bitfield = encode_tc_string(500, false); + let large_tc_range = encode_tc_string(500, true); + + let mut group = c.benchmark_group("tcf_decode"); + + group.bench_with_input( + BenchmarkId::new("bitfield", "10_vendors"), + &small_tc, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.bench_with_input( + BenchmarkId::new("bitfield", "100_vendors"), + &medium_tc, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.bench_with_input( + BenchmarkId::new("bitfield", "500_vendors"), + &large_tc_bitfield, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.bench_with_input( + BenchmarkId::new("range", "500_vendors"), + &large_tc_range, + |b, tc| { + b.iter(|| decode_tc_string(black_box(tc))); + }, + ); + + group.finish(); +} + +fn bench_gpp_decode(c: &mut Criterion) { + let mut group = c.benchmark_group("gpp_decode"); + + group.bench_function("usp_only", |b| { + b.iter(|| gpp::decode_gpp_string(black_box(GPP_USP_ONLY))); + }); + + group.bench_function("with_tcf", |b| { + b.iter(|| gpp::decode_gpp_string(black_box(GPP_TCF_AND_USP))); + }); + + group.finish(); +} + +fn bench_full_pipeline(c: &mut Criterion) { + // Build a realistic TC string (500 vendors, range encoding) + let tc_string = encode_tc_string(500, true); + + let all_signals = RawConsentSignals { + raw_tc_string: Some(tc_string), + raw_gpp_string: Some(GPP_USP_ONLY.to_owned()), + raw_gpp_sid: Some("6".to_owned()), + raw_us_privacy: Some("1YNN".to_owned()), + gpc: true, + }; + + let empty_signals = RawConsentSignals::default(); + + let tc_only = RawConsentSignals { + raw_tc_string: Some(encode_tc_string(500, true)), + ..Default::default() + }; + + let mut group = c.benchmark_group("full_pipeline"); + + group.bench_function("all_signals", |b| { + b.iter(|| build_context_from_signals(black_box(&all_signals))); + }); + + group.bench_function("empty_signals", |b| { + b.iter(|| build_context_from_signals(black_box(&empty_signals))); + }); + + group.bench_function("tcf_only", |b| { + b.iter(|| build_context_from_signals(black_box(&tc_only))); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_us_privacy, + bench_tcf_decode, + bench_gpp_decode, + bench_full_pipeline, +); +criterion_main!(benches); From 4b0d66820435b535ae9906717307e6f969a3b2f8 Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 25 Feb 2026 16:27:54 -0600 Subject: [PATCH 3/9] wip --- crates/common/src/auction/endpoints.rs | 8 +- crates/common/src/auction/formats.rs | 12 +- crates/common/src/consent/mod.rs | 122 ++++++++++----- crates/common/src/consent/types.rs | 165 +++++++++++++++++++++ crates/common/src/integrations/prebid.rs | 44 ++++-- crates/common/src/openrtb.rs | 181 ++++++++++++++++++++--- 6 files changed, 463 insertions(+), 69 deletions(-) diff --git a/crates/common/src/auction/endpoints.rs b/crates/common/src/auction/endpoints.rs index c2175caf..9470e1f7 100644 --- a/crates/common/src/auction/endpoints.rs +++ b/crates/common/src/auction/endpoints.rs @@ -4,6 +4,8 @@ use error_stack::{Report, ResultExt}; use fastly::{Request, Response}; use crate::auction::formats::AdRequest; +use crate::consent; +use crate::cookies::handle_request_cookies; use crate::error::TrustedServerError; use crate::settings::Settings; @@ -41,8 +43,12 @@ pub async fn handle_auction( body.ad_units.len() ); + // Extract consent from request cookies and headers. + let cookie_jar = handle_request_cookies(&req)?; + let consent_context = consent::build_consent_context(cookie_jar.as_ref(), &req); + // Convert tsjs request format to auction request - let auction_request = convert_tsjs_to_auction_request(&body, settings, &req)?; + let auction_request = convert_tsjs_to_auction_request(&body, settings, &req, consent_context)?; // Create auction context let context = AuctionContext { diff --git a/crates/common/src/auction/formats.rs b/crates/common/src/auction/formats.rs index 3eb4d843..c91ea62c 100644 --- a/crates/common/src/auction/formats.rs +++ b/crates/common/src/auction/formats.rs @@ -13,6 +13,8 @@ use std::collections::HashMap; use uuid::Uuid; use crate::auction::context::ContextValue; +use crate::auction::types::OrchestratorExt; +use crate::consent::ConsentContext; use crate::creative; use crate::error::TrustedServerError; use crate::geo::GeoInfo; @@ -63,7 +65,12 @@ pub struct BannerUnit { pub sizes: Vec>, } -/// Convert tsjs/Prebid.js request format to internal `AuctionRequest`. +/// Convert tsjs/Prebid.js request format to internal [`AuctionRequest`]. +/// +/// The `consent` parameter carries decoded consent signals extracted from the +/// incoming request's cookies and headers. It is populated by the caller +/// (the `/auction` endpoint handler) and forwarded through to the +/// [`OpenRTB`][`crate::openrtb::OpenRtbRequest`] bid request. /// /// # Errors /// @@ -74,6 +81,7 @@ pub fn convert_tsjs_to_auction_request( body: &AdRequest, settings: &Settings, req: &Request, + consent: ConsentContext, ) -> Result> { // Generate synthetic ID let synthetic_id = get_or_generate_synthetic_id(settings, req).change_context( @@ -179,7 +187,7 @@ pub fn convert_tsjs_to_auction_request( user: UserInfo { id: synthetic_id, fresh_id, - consent: None, + consent: Some(consent), }, device, site: Some(SiteInfo { diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs index ae40074f..54414529 100644 --- a/crates/common/src/consent/mod.rs +++ b/crates/common/src/consent/mod.rs @@ -27,7 +27,7 @@ pub mod types; pub mod us_privacy; pub use extraction::extract_consent_signals; -pub use types::{ConsentContext, ConsentSource, RawConsentSignals}; +pub use types::{ConsentContext, ConsentSource, RawConsentSignals, TcfConsent}; use cookie::CookieJar; use fastly::Request; @@ -59,6 +59,24 @@ pub fn extract_and_log_consent(jar: Option<&CookieJar>, req: &Request) -> RawCon signals } +/// Decodes a raw consent string, logging a warning on failure. +/// +/// Returns [`None`] and logs at `warn` level if decoding fails, preserving +/// the raw string for proxy-mode forwarding. +fn decode_or_warn( + raw: Option<&str>, + label: &str, + decode: fn(&str) -> Result, +) -> Option { + raw.and_then(|s| match decode(s) { + Ok(value) => Some(value), + Err(e) => { + log::warn!("Failed to decode {label}: {e}"); + None + } + }) +} + /// Builds a [`ConsentContext`] from previously extracted raw signals. /// /// This is the decode + normalize stage of the pipeline. Each signal is @@ -66,44 +84,21 @@ pub fn extract_and_log_consent(jar: Option<&CookieJar>, req: &Request) -> RawCon /// corresponding decoded field is left as `None`. #[must_use] pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext { - // Decode US Privacy - let decoded_us_privacy = - signals - .raw_us_privacy - .as_deref() - .and_then(|s| match us_privacy::decode_us_privacy(s) { - Ok(usp) => Some(usp), - Err(e) => { - log::warn!("Failed to decode US Privacy string: {e}"); - None - } - }); - - // Decode TCF v2 - let decoded_tcf = - signals - .raw_tc_string - .as_deref() - .and_then(|s| match tcf::decode_tc_string(s) { - Ok(tcf) => Some(tcf), - Err(e) => { - log::warn!("Failed to decode TC String: {e}"); - None - } - }); - - // Decode GPP - let decoded_gpp = - signals - .raw_gpp_string - .as_deref() - .and_then(|s| match gpp::decode_gpp_string(s) { - Ok(gpp_consent) => Some(gpp_consent), - Err(e) => { - log::warn!("Failed to decode GPP string: {e}"); - None - } - }); + let decoded_us_privacy = decode_or_warn( + signals.raw_us_privacy.as_deref(), + "US Privacy string", + us_privacy::decode_us_privacy, + ); + let decoded_tcf = decode_or_warn( + signals.raw_tc_string.as_deref(), + "TC String", + tcf::decode_tc_string, + ); + let decoded_gpp = decode_or_warn( + signals.raw_gpp_string.as_deref(), + "GPP string", + gpp::decode_gpp_string, + ); // Resolve GPP section IDs: // - Prefer decoded GPP section IDs (authoritative). @@ -127,6 +122,9 @@ pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext raw_gpp_string: signals.raw_gpp_string.clone(), gpp_section_ids, raw_us_privacy: signals.raw_us_privacy.clone(), + // AC string extraction not yet implemented — will be added when + // the CMP-specific cookie source is determined (Phase 1a). + raw_ac_string: None, gdpr_applies, tcf: decoded_tcf, @@ -138,6 +136,52 @@ pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext } } +/// Filters Extended User IDs based on TCF consent. +/// +/// Per Prebid's tcfControl enforcement: +/// - **Purpose 1** (Store/access information on a device) must be consented +/// for any EID to exist (identifiers require cookie/localStorage access). +/// - **Purpose 4** (Personalized ads) must be consented for EIDs to be +/// transmitted in the bid request. +/// +/// Returns [`None`] if consent is missing or insufficient, stripping all EIDs +/// from the outgoing bid request. +#[must_use] +pub fn gate_eids_by_consent( + eids: Option>, + consent_ctx: Option<&ConsentContext>, +) -> Option> { + let eids = eids?; + if eids.is_empty() { + return None; + } + + // Resolve the effective TCF consent — standalone or from GPP. + let tcf = consent_ctx.and_then(|ctx| { + ctx.tcf + .as_ref() + .or_else(|| ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref())) + }); + + match tcf { + Some(tcf) if tcf.has_storage_consent() && tcf.has_personalized_ads_consent() => Some(eids), + Some(_) => { + log::info!("EIDs stripped: TCF Purpose 1 or 4 consent missing"); + None + } + None => { + // No TCF data — if GDPR applies, block EIDs as a precaution. + if consent_ctx.is_some_and(|c| c.gdpr_applies) { + log::info!("EIDs stripped: GDPR applies but no TCF consent available"); + None + } else { + // Non-GDPR context with no TCF — pass through. + Some(eids) + } + } + } +} + /// Logs a summary of the extracted consent signals. /// /// Emits an `info`-level log line when at least one consent signal is present, diff --git a/crates/common/src/consent/types.rs b/crates/common/src/consent/types.rs index d6436d42..095e8864 100644 --- a/crates/common/src/consent/types.rs +++ b/crates/common/src/consent/types.rs @@ -114,6 +114,11 @@ pub struct ConsentContext { pub gpp_section_ids: Option>, /// Raw US Privacy string from `us_privacy` cookie. pub raw_us_privacy: Option, + /// Raw Google Additional Consent (AC) string. + /// + /// Covers ad tech providers not in the IAB Global Vendor List but + /// participating in the Google ecosystem. Format: `{version}~{ids}~dv.` + pub raw_ac_string: Option, /// Whether GDPR applies to this request (derived from TCF presence). pub gdpr_applies: bool, @@ -137,6 +142,7 @@ impl ConsentContext { self.raw_tc_string.is_none() && self.raw_gpp_string.is_none() && self.raw_us_privacy.is_none() + && self.raw_ac_string.is_none() && self.tcf.is_none() && self.gpp.is_none() && self.us_privacy.is_none() @@ -192,6 +198,72 @@ pub struct TcfConsent { pub special_feature_opt_ins: Vec, } +impl TcfConsent { + /// Looks up a 1-indexed purpose in a TCF bitfield. + /// + /// Returns `false` for purpose 0 (invalid) and out-of-range indices. + fn purpose_bit(bits: &[bool], purpose: usize) -> bool { + purpose + .checked_sub(1) + .and_then(|idx| bits.get(idx).copied()) + .unwrap_or(false) + } + + /// Checks whether consent was granted for a specific TCF purpose. + /// + /// Purposes are 1-indexed per the TCF specification (Purpose 1 = index 0). + /// Returns `false` if the purpose is out of range. + #[must_use] + pub fn has_purpose_consent(&self, purpose: usize) -> bool { + Self::purpose_bit(&self.purpose_consents, purpose) + } + + /// Checks whether legitimate interest was established for a specific TCF purpose. + /// + /// Purposes are 1-indexed per the TCF specification. + /// Returns `false` if the purpose is out of range. + #[must_use] + pub fn has_purpose_li(&self, purpose: usize) -> bool { + Self::purpose_bit(&self.purpose_legitimate_interests, purpose) + } + + /// Checks whether a specific vendor has been granted consent. + #[must_use] + pub fn has_vendor_consent(&self, vendor_id: u16) -> bool { + self.vendor_consents.contains(&vendor_id) + } + + /// Checks whether a specific vendor has established legitimate interest. + #[must_use] + pub fn has_vendor_li(&self, vendor_id: u16) -> bool { + self.vendor_legitimate_interests.contains(&vendor_id) + } + + /// Whether Purpose 1 (Store/access information on a device) is consented. + /// + /// Required for any EID or cookie-based identifier to be set. + #[must_use] + pub fn has_storage_consent(&self) -> bool { + self.has_purpose_consent(1) + } + + /// Whether Purpose 2 (Basic ads) is consented. + /// + /// Required for bid adapters to participate in the auction. + #[must_use] + pub fn has_basic_ads_consent(&self) -> bool { + self.has_purpose_consent(2) + } + + /// Whether Purpose 4 (Personalized ads) is consented. + /// + /// Controls whether user first-party data and EIDs are transmitted. + #[must_use] + pub fn has_personalized_ads_consent(&self) -> bool { + self.has_purpose_consent(4) + } +} + // --------------------------------------------------------------------------- // GPP // --------------------------------------------------------------------------- @@ -434,4 +506,97 @@ mod tests { "default source should be None" ); } + + fn make_tcf_consent() -> TcfConsent { + TcfConsent { + version: 2, + cmp_id: 1, + cmp_version: 1, + consent_screen: 1, + consent_language: "EN".to_owned(), + vendor_list_version: 42, + tcf_policy_version: 4, + created_ds: 0, + last_updated_ds: 0, + // Purposes 1, 2, 4 consented (indices 0, 1, 3) + purpose_consents: vec![ + true, true, false, true, false, false, false, false, false, false, false, false, + ], + // Purpose 7 LI (index 6) + purpose_legitimate_interests: vec![ + false, false, false, false, false, false, true, false, false, false, false, false, + ], + vendor_consents: vec![10, 32, 755], + vendor_legitimate_interests: vec![32], + special_feature_opt_ins: vec![false; 12], + } + } + + #[test] + fn tcf_has_purpose_consent() { + let tcf = make_tcf_consent(); + assert!(tcf.has_purpose_consent(1), "should have Purpose 1 consent"); + assert!(tcf.has_purpose_consent(2), "should have Purpose 2 consent"); + assert!( + !tcf.has_purpose_consent(3), + "should not have Purpose 3 consent" + ); + assert!(tcf.has_purpose_consent(4), "should have Purpose 4 consent"); + } + + #[test] + fn tcf_purpose_consent_out_of_range() { + let tcf = make_tcf_consent(); + assert!( + !tcf.has_purpose_consent(0), + "purpose 0 should return false (1-indexed)" + ); + assert!( + !tcf.has_purpose_consent(99), + "out-of-range purpose should return false" + ); + } + + #[test] + fn tcf_has_purpose_li() { + let tcf = make_tcf_consent(); + assert!( + tcf.has_purpose_li(7), + "should have Purpose 7 legitimate interest" + ); + assert!( + !tcf.has_purpose_li(1), + "should not have Purpose 1 legitimate interest" + ); + } + + #[test] + fn tcf_has_vendor_consent() { + let tcf = make_tcf_consent(); + assert!( + tcf.has_vendor_consent(755), + "should have consent for vendor 755" + ); + assert!( + !tcf.has_vendor_consent(999), + "should not have consent for vendor 999" + ); + } + + #[test] + fn tcf_convenience_methods() { + let tcf = make_tcf_consent(); + assert!( + tcf.has_storage_consent(), + "should have storage consent (P1)" + ); + assert!( + tcf.has_basic_ads_consent(), + "should have basic ads consent (P2)" + ); + assert!( + tcf.has_personalized_ads_consent(), + "should have personalized ads consent (P4)" + ); + } } diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index e0caaf56..3e89aa5a 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -23,8 +23,8 @@ use crate::integrations::{ IntegrationRegistration, }; use crate::openrtb::{ - Banner, Device, Format, Geo, Imp, ImpExt, OpenRtbRequest, PrebidExt, PrebidImpExt, Regs, - RequestExt, Site, TrustedServerExt, User, UserExt, + Banner, ConsentedProvidersSettings, Device, Format, Geo, Imp, ImpExt, OpenRtbRequest, + PrebidExt, PrebidImpExt, Regs, RegsExt, RequestExt, Site, TrustedServerExt, User, UserExt, }; use crate::request_signing::{RequestSigner, SigningParams, SIGNING_VERSION}; use crate::settings::{IntegrationConfig, Settings}; @@ -571,12 +571,25 @@ impl PrebidAuctionProvider { } }); - // Build user object — populate consent from ConsentContext if available. + // Build user object — populate consent at both OpenRTB 2.6 top-level + // and Prebid ext-based locations (dual placement). let consent_ctx = request.user.consent.as_ref(); + let raw_tc = consent_ctx.and_then(|c| c.raw_tc_string.clone()); let user = Some(User { id: Some(request.user.id.clone()), - consent: consent_ctx.and_then(|c| c.raw_tc_string.clone()), + // OpenRTB 2.6 top-level consent field + consent: raw_tc.clone(), ext: Some(UserExt { + // Prebid ext-based consent field + consent: raw_tc, + consented_providers_settings: consent_ctx + .and_then(|c| c.raw_ac_string.as_ref()) + .map(|ac| ConsentedProvidersSettings { + consented_providers: Some(ac.clone()), + }), + // EIDs will be populated by identity providers; consent gating + // is applied via `gate_eids_by_consent` before they are set here. + eids: None, synthetic_fresh: Some(request.user.fresh_id.clone()), }), }); @@ -652,12 +665,14 @@ impl PrebidAuctionProvider { /// Builds the `regs` object from a [`ConsentContext`]. /// - /// Returns `None` if no consent-relevant data is present (avoids sending + /// Populates consent fields at **both** `OpenRTB` 2.6 top-level locations + /// and the `regs.ext.*` locations that Prebid Server reads today. + /// + /// Returns [`None`] if no consent-relevant data is present (avoids sending /// an empty `regs` object to Prebid Server). fn build_regs(consent_ctx: Option<&crate::consent::ConsentContext>) -> Option { let ctx = consent_ctx?; - // Only emit regs if there's something to say let has_data = ctx.gdpr_applies || ctx.raw_us_privacy.is_some() || ctx.raw_gpp_string.is_some() @@ -667,12 +682,23 @@ impl PrebidAuctionProvider { return None; } - Some(Regs { - gdpr: if ctx.gdpr_applies { Some(1) } else { Some(0) }, + let gdpr = if ctx.gdpr_applies { Some(1) } else { Some(0) }; + + // Build ext first so the dual-placement fields are cloned once from + // ConsentContext (into ext), then once more into Regs top-level. + let ext = RegsExt { + gdpr, us_privacy: ctx.raw_us_privacy.clone(), gpp: ctx.raw_gpp_string.clone(), gpp_sid: ctx.gpp_section_ids.clone(), - ext: None, + }; + + Some(Regs { + gdpr: ext.gdpr, + us_privacy: ext.us_privacy.clone(), + gpp: ext.gpp.clone(), + gpp_sid: ext.gpp_sid.clone(), + ext: Some(ext), }) } diff --git a/crates/common/src/openrtb.rs b/crates/common/src/openrtb.rs index 9fa1585b..0844d682 100644 --- a/crates/common/src/openrtb.rs +++ b/crates/common/src/openrtb.rs @@ -67,10 +67,61 @@ pub struct User { #[derive(Debug, Serialize, Default)] pub struct UserExt { + /// TCF v2 consent string (Prebid reads `user.ext.consent`). + #[serde(skip_serializing_if = "Option::is_none")] + pub consent: Option, + /// Google Additional Consent settings for Ad Manager / `AdX` demand. + #[serde( + rename = "ConsentedProvidersSettings", + skip_serializing_if = "Option::is_none" + )] + pub consented_providers_settings: Option, + /// Extended User IDs from identity providers. + /// + /// Gated by TCF Purpose 1 (storage) and Purpose 4 (personalized ads). + #[serde(skip_serializing_if = "Option::is_none")] + pub eids: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub synthetic_fresh: Option, } +/// Google Additional Consent (AC) string container. +/// +/// Covers ad tech providers not in the IAB Global Vendor List but +/// participating in the Google ecosystem. Required for Google Ad Manager +/// and `AdX` demand. +/// +/// Format: `{version}~{provider_ids}~dv.` where provider IDs are +/// dot-separated Google ATP IDs. +#[derive(Debug, Serialize, Default)] +pub struct ConsentedProvidersSettings { + /// The AC string value (e.g. `"2~2628.2316.3119~dv."`). + #[serde(skip_serializing_if = "Option::is_none")] + pub consented_providers: Option, +} + +/// An Extended User ID entry from an identity provider. +#[derive(Debug, Serialize)] +pub struct Eid { + /// Identity provider domain (e.g. `"id5-sync.com"`). + pub source: String, + /// One or more user IDs from this provider. + pub uids: Vec, +} + +/// A single user identifier within an [`Eid`] entry. +#[derive(Debug, Serialize)] +pub struct Uid { + /// The identifier value. + pub id: String, + /// Agent type: 1 = cookie/device, 2 = person, 3 = user-provided. + #[serde(skip_serializing_if = "Option::is_none")] + pub atype: Option, + /// Provider-specific extension data. + #[serde(skip_serializing_if = "Option::is_none")] + pub ext: Option, +} + #[derive(Debug, Serialize, Default)] pub struct Device { #[serde(skip_serializing_if = "Option::is_none")] @@ -120,15 +171,25 @@ pub struct Regs { pub ext: Option, } -/// Legacy `regs.ext` fields. +/// Prebid-compatible `regs.ext` consent fields. /// -/// Retained for backward compatibility with partners that read -/// `regs.ext.us_privacy` instead of the top-level `regs.us_privacy`. -/// Will be removed once all downstream partners migrate. -#[derive(Debug, Serialize, Default)] +/// Prebid Server reads consent signals from `regs.ext.*` rather than the +/// `OpenRTB` 2.6 top-level locations. We populate both to maximise +/// compatibility (see proposal Key Decision #2 — Dual-Placement). +#[derive(Debug, Clone, Serialize, Default)] pub struct RegsExt { + /// GDPR applicability flag (mirrors `regs.gdpr`). + #[serde(skip_serializing_if = "Option::is_none")] + pub gdpr: Option, + /// US Privacy string (mirrors `regs.us_privacy`). #[serde(skip_serializing_if = "Option::is_none")] pub us_privacy: Option, + /// GPP consent string (mirrors `regs.gpp`). + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp: Option, + /// GPP section ID list (mirrors `regs.gpp_sid`). + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp_sid: Option>, } #[derive(Debug, Serialize, Default)] @@ -277,23 +338,53 @@ mod tests { } #[test] - fn regs_serializes_consent_fields() { - let regs = Regs { + fn regs_serializes_dual_placement_consent_fields() { + // Mirror the production pattern: build ext, then duplicate into top-level. + let ext = RegsExt { gdpr: Some(1), us_privacy: Some("1YNN".to_string()), gpp: Some("DBACNY~CPXxRfA".to_string()), gpp_sid: Some(vec![2, 6]), - ext: None, + }; + let regs = Regs { + gdpr: ext.gdpr, + us_privacy: ext.us_privacy.clone(), + gpp: ext.gpp.clone(), + gpp_sid: ext.gpp_sid.clone(), + ext: Some(ext), }; let serialized = serde_json::to_value(®s).expect("should serialize"); - assert_eq!(serialized["gdpr"], 1); - assert_eq!(serialized["us_privacy"], "1YNN"); - assert_eq!(serialized["gpp"], "DBACNY~CPXxRfA"); - assert_eq!(serialized["gpp_sid"], serde_json::json!([2, 6])); - assert!( - serialized.get("ext").is_none(), - "ext should be omitted when None" + // Top-level fields + assert_eq!(serialized["gdpr"], 1, "top-level gdpr should be 1"); + assert_eq!( + serialized["us_privacy"], "1YNN", + "top-level us_privacy should match" + ); + assert_eq!( + serialized["gpp"], "DBACNY~CPXxRfA", + "top-level gpp should match" + ); + assert_eq!( + serialized["gpp_sid"], + serde_json::json!([2, 6]), + "top-level gpp_sid should match" + ); + // ext-based fields (Prebid reads these) + let ext = &serialized["ext"]; + assert_eq!(ext["gdpr"], 1, "ext gdpr should mirror top-level"); + assert_eq!( + ext["us_privacy"], "1YNN", + "ext us_privacy should mirror top-level" + ); + assert_eq!( + ext["gpp"], "DBACNY~CPXxRfA", + "ext gpp should mirror top-level" + ); + assert_eq!( + ext["gpp_sid"], + serde_json::json!([2, 6]), + "ext gpp_sid should mirror top-level" ); } @@ -309,15 +400,45 @@ mod tests { } #[test] - fn user_serializes_consent_field() { + fn regs_ext_omits_none_fields() { + let ext = RegsExt::default(); + let serialized = serde_json::to_value(&ext).expect("should serialize"); + let obj = serialized.as_object().expect("should be object"); + assert!( + obj.is_empty(), + "all-None RegsExt should serialize as empty object" + ); + } + + #[test] + fn user_serializes_dual_placement_consent() { let user = User { id: Some("user-1".to_string()), consent: Some("CPXxGfAPXxGfA".to_string()), - ext: None, + ext: Some(UserExt { + consent: Some("CPXxGfAPXxGfA".to_string()), + consented_providers_settings: Some(ConsentedProvidersSettings { + consented_providers: Some("2~2628.2316~dv.".to_string()), + }), + eids: None, + synthetic_fresh: None, + }), }; let serialized = serde_json::to_value(&user).expect("should serialize"); - assert_eq!(serialized["consent"], "CPXxGfAPXxGfA"); + assert_eq!( + serialized["consent"], "CPXxGfAPXxGfA", + "top-level user.consent should be set" + ); + assert_eq!( + serialized["ext"]["consent"], "CPXxGfAPXxGfA", + "user.ext.consent should mirror top-level" + ); + assert_eq!( + serialized["ext"]["ConsentedProvidersSettings"]["consented_providers"], + "2~2628.2316~dv.", + "AC string should be present" + ); } #[test] @@ -334,4 +455,28 @@ mod tests { "consent should be omitted when None" ); } + + #[test] + fn eid_serializes_correctly() { + let eid = Eid { + source: "id5-sync.com".to_string(), + uids: vec![Uid { + id: "ID5-abc123".to_string(), + atype: Some(1), + ext: None, + }], + }; + + let serialized = serde_json::to_value(&eid).expect("should serialize"); + assert_eq!(serialized["source"], "id5-sync.com", "source should match"); + assert_eq!( + serialized["uids"][0]["id"], "ID5-abc123", + "uid id should match" + ); + assert_eq!(serialized["uids"][0]["atype"], 1, "atype should be 1"); + assert!( + serialized["uids"][0].get("ext").is_none(), + "ext should be omitted when None" + ); + } } From 05b1bc307c69235d482a37c919153a8fb225d38d Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 25 Feb 2026 18:35:50 -0600 Subject: [PATCH 4/9] feat: implement consent forwarding pipeline (Phases 2-5) Wire consent signals into OpenRTB bid requests, add per-partner forwarding modes, and persist consent to KV Store for returning users. Phase 2 - OpenRTB integration: populate regs/user consent fields with dual-placement (top-level 2.6 + ext), add EID consent gating, AC string forwarding, and new Eid/Uid/ConsentedProvidersSettings structs. Phase 3 - Configuration + observability: add [consent] config section with jurisdiction detection, expiration checking, GPC-to-US-Privacy construction, and structured logging. Phase 4 - Partner integrations: cookie stripping via ConsentForwardingMode, Prebid/Lockr consent cookie filtering, APS consent fields, adserver mock consent summary. Phase 5 - KV Store persistence: consent/kv.rs with KvConsentEntry and ConsentKvMetadata types, SHA-256 fingerprint change detection, read fallback when cookies absent, write-on-change via Fastly KV Store API. --- crates/common/build.rs | 5 +- crates/common/src/auction/endpoints.rs | 12 +- crates/common/src/consent/jurisdiction.rs | 208 +++++++ crates/common/src/consent/kv.rs | 549 ++++++++++++++++++ crates/common/src/consent/mod.rs | 362 +++++++++++- crates/common/src/consent/types.rs | 60 +- crates/common/src/consent_config.rs | 472 +++++++++++++++ crates/common/src/cookies.rs | 108 +++- .../common/src/integrations/adserver_mock.rs | 63 ++ crates/common/src/integrations/aps.rs | 141 +++++ crates/common/src/integrations/lockr.rs | 11 +- crates/common/src/integrations/prebid.rs | 38 +- crates/common/src/lib.rs | 1 + crates/common/src/publisher.rs | 28 +- crates/common/src/settings.rs | 3 + fastly.toml | 4 + trusted-server.toml | 26 + 17 files changed, 2055 insertions(+), 36 deletions(-) create mode 100644 crates/common/src/consent/jurisdiction.rs create mode 100644 crates/common/src/consent/kv.rs create mode 100644 crates/common/src/consent_config.rs diff --git a/crates/common/build.rs b/crates/common/build.rs index cb1e60ae..1c478a53 100644 --- a/crates/common/build.rs +++ b/crates/common/build.rs @@ -1,4 +1,4 @@ -#![allow(clippy::unwrap_used, clippy::panic)] +#![allow(clippy::unwrap_used, clippy::panic, dead_code)] #[path = "src/error.rs"] mod error; @@ -6,6 +6,9 @@ mod error; #[path = "src/auction_config_types.rs"] mod auction_config_types; +#[path = "src/consent_config.rs"] +mod consent_config; + #[path = "src/settings.rs"] mod settings; diff --git a/crates/common/src/auction/endpoints.rs b/crates/common/src/auction/endpoints.rs index 9470e1f7..c155181e 100644 --- a/crates/common/src/auction/endpoints.rs +++ b/crates/common/src/auction/endpoints.rs @@ -7,6 +7,7 @@ use crate::auction::formats::AdRequest; use crate::consent; use crate::cookies::handle_request_cookies; use crate::error::TrustedServerError; +use crate::geo::GeoInfo; use crate::settings::Settings; use super::formats::{convert_to_openrtb_response, convert_tsjs_to_auction_request}; @@ -43,9 +44,16 @@ pub async fn handle_auction( body.ad_units.len() ); - // Extract consent from request cookies and headers. + // Extract consent from request cookies, headers, and geo. let cookie_jar = handle_request_cookies(&req)?; - let consent_context = consent::build_consent_context(cookie_jar.as_ref(), &req); + let geo = GeoInfo::from_request(&req); + let consent_context = consent::build_consent_context(&consent::ConsentPipelineInput { + jar: cookie_jar.as_ref(), + req: &req, + config: &settings.consent, + geo: geo.as_ref(), + synthetic_id: None, // Auction requests don't carry a Synthetic ID yet. + }); // Convert tsjs request format to auction request let auction_request = convert_tsjs_to_auction_request(&body, settings, &req, consent_context)?; diff --git a/crates/common/src/consent/jurisdiction.rs b/crates/common/src/consent/jurisdiction.rs new file mode 100644 index 00000000..b91313c7 --- /dev/null +++ b/crates/common/src/consent/jurisdiction.rs @@ -0,0 +1,208 @@ +//! Jurisdiction detection for consent observability. +//! +//! Determines the applicable privacy regime based on geolocation data and +//! publisher configuration. Used for **logging and monitoring only** — the +//! detected jurisdiction never causes consent to be synthesized (see proposal +//! Key Decision #3). + +use core::fmt; + +use crate::consent_config::ConsentConfig; +use crate::geo::GeoInfo; + +/// The privacy jurisdiction applicable to a request. +/// +/// Derived from the user's geolocation and the publisher's configured +/// country/state lists. Used for observability — not for consent synthesis. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub enum Jurisdiction { + /// GDPR applies (EU/EEA/UK per `consent.gdpr.applies_in`). + Gdpr, + /// A US state with an active comprehensive privacy law. + UsState(String), + /// Geolocation is known but no matching regulation was found. + NonRegulated, + /// No geolocation data available — jurisdiction cannot be determined. + #[default] + Unknown, +} + +impl fmt::Display for Jurisdiction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Gdpr => write!(f, "GDPR"), + Self::UsState(state) => write!(f, "US-{state}"), + Self::NonRegulated => write!(f, "non-regulated"), + Self::Unknown => write!(f, "unknown"), + } + } +} + +/// Detects the privacy jurisdiction for a request based on geolocation. +/// +/// Checks the user's country against `config.gdpr.applies_in`, and for US +/// users checks the region against `config.us_states.privacy_states`. +/// +/// Returns [`Jurisdiction::Unknown`] when no geo data is available. +#[must_use] +pub fn detect_jurisdiction(geo: Option<&GeoInfo>, config: &ConsentConfig) -> Jurisdiction { + let geo = match geo { + Some(g) => g, + None => return Jurisdiction::Unknown, + }; + + // Check GDPR countries first (EU/EEA/UK). + if config + .gdpr + .applies_in + .iter() + .any(|code| code.eq_ignore_ascii_case(&geo.country)) + { + return Jurisdiction::Gdpr; + } + + // For US users, check if the region is a state with a privacy law. + if geo.country.eq_ignore_ascii_case("US") { + if let Some(region) = &geo.region { + if config + .us_states + .privacy_states + .iter() + .any(|state| state.eq_ignore_ascii_case(region)) + { + return Jurisdiction::UsState(region.to_uppercase()); + } + } + } + + Jurisdiction::NonRegulated +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::{detect_jurisdiction, Jurisdiction}; + use crate::consent_config::ConsentConfig; + use crate::geo::GeoInfo; + + fn make_geo(country: &str, region: Option<&str>) -> GeoInfo { + GeoInfo { + city: "Test".to_owned(), + country: country.to_owned(), + continent: "Test".to_owned(), + latitude: 0.0, + longitude: 0.0, + metro_code: 0, + region: region.map(str::to_owned), + } + } + + #[test] + fn gdpr_detected_for_eu_country() { + let config = ConsentConfig::default(); + let geo = make_geo("DE", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "Germany should trigger GDPR" + ); + } + + #[test] + fn gdpr_detected_for_eea_country() { + let config = ConsentConfig::default(); + let geo = make_geo("NO", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "Norway (EEA) should trigger GDPR" + ); + } + + #[test] + fn gdpr_detected_for_uk() { + let config = ConsentConfig::default(); + let geo = make_geo("GB", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "UK should trigger GDPR" + ); + } + + #[test] + fn us_state_detected_for_california() { + let config = ConsentConfig::default(); + let geo = make_geo("US", Some("CA")); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::UsState("CA".to_owned()), + "California should trigger US state privacy" + ); + } + + #[test] + fn us_non_privacy_state_is_non_regulated() { + let config = ConsentConfig::default(); + let geo = make_geo("US", Some("WY")); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::NonRegulated, + "Wyoming should be non-regulated" + ); + } + + #[test] + fn us_no_region_is_non_regulated() { + let config = ConsentConfig::default(); + let geo = make_geo("US", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::NonRegulated, + "US without region should be non-regulated" + ); + } + + #[test] + fn non_gdpr_non_us_is_non_regulated() { + let config = ConsentConfig::default(); + let geo = make_geo("JP", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::NonRegulated, + "Japan should be non-regulated" + ); + } + + #[test] + fn no_geo_returns_unknown() { + let config = ConsentConfig::default(); + assert_eq!( + detect_jurisdiction(None, &config), + Jurisdiction::Unknown, + "missing geo should return unknown" + ); + } + + #[test] + fn case_insensitive_country_matching() { + let config = ConsentConfig::default(); + let geo = make_geo("de", None); + assert_eq!( + detect_jurisdiction(Some(&geo), &config), + Jurisdiction::Gdpr, + "lowercase country code should still match" + ); + } + + #[test] + fn display_formatting() { + assert_eq!(Jurisdiction::Gdpr.to_string(), "GDPR"); + assert_eq!(Jurisdiction::UsState("CA".to_owned()).to_string(), "US-CA"); + assert_eq!(Jurisdiction::NonRegulated.to_string(), "non-regulated"); + assert_eq!(Jurisdiction::Unknown.to_string(), "unknown"); + } +} diff --git a/crates/common/src/consent/kv.rs b/crates/common/src/consent/kv.rs new file mode 100644 index 00000000..2cef966d --- /dev/null +++ b/crates/common/src/consent/kv.rs @@ -0,0 +1,549 @@ +//! KV Store consent persistence. +//! +//! Stores and retrieves consent data from a Fastly KV Store, keyed by +//! Synthetic ID. This provides consent continuity for returning users +//! whose browsers may not have consent cookies on every request. +//! +//! # Storage layout +//! +//! Each entry uses: +//! - **Body** ([`KvConsentEntry`]) — JSON with raw consent strings and context. +//! - **Metadata** ([`ConsentKvMetadata`]) — compact JSON summary for fast +//! consent status checks and change detection (max 2000 bytes). +//! +//! # Change detection +//! +//! Writes only occur when consent signals have actually changed. +//! [`consent_fingerprint`] hashes the raw strings into a compact fingerprint +//! stored in metadata. On the next request, the existing fingerprint is +//! compared before writing. + +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +use super::jurisdiction::Jurisdiction; +use super::types::{ConsentContext, ConsentSource}; + +// --------------------------------------------------------------------------- +// KV body (JSON, stored as value) +// --------------------------------------------------------------------------- + +/// Consent data stored in the KV Store body. +/// +/// Contains the raw consent strings needed to reconstruct a [`ConsentContext`]. +/// Decoded data (TCF, GPP, US Privacy) is not stored — it is re-decoded on +/// read to avoid stale decoded state. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KvConsentEntry { + /// Raw TC String from `euconsent-v2` cookie. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_tc_string: Option, + /// Raw GPP string from `__gpp` cookie. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_gpp_string: Option, + /// GPP section IDs (decoded or from `__gpp_sid` cookie). + #[serde(skip_serializing_if = "Option::is_none")] + pub gpp_section_ids: Option>, + /// Raw US Privacy string from `us_privacy` cookie. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_us_privacy: Option, + /// Raw Google Additional Consent (AC) string. + #[serde(skip_serializing_if = "Option::is_none")] + pub raw_ac_string: Option, + + /// Whether GDPR applies to this request. + pub gdpr_applies: bool, + /// Global Privacy Control signal. + pub gpc: bool, + /// Serialized jurisdiction (e.g. `"GDPR"`, `"US-CA"`, `"unknown"`). + pub jurisdiction: String, + + /// When this entry was stored (deciseconds since Unix epoch). + pub stored_at_ds: u64, +} + +// --------------------------------------------------------------------------- +// KV metadata (compact JSON, max 2000 bytes) +// --------------------------------------------------------------------------- + +/// Compact consent summary stored in KV Store metadata. +/// +/// Used for fast consent status checks without reading the full body, +/// and for change detection via the `fingerprint` field. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConsentKvMetadata { + /// SHA-256 fingerprint (first 16 hex chars) of all raw consent strings. + /// + /// Used for write-on-change detection. If the fingerprint matches the + /// current request's consent signals, no write is needed. + pub fp: String, + /// Whether GDPR applies. + pub gdpr: bool, + /// Whether GPC is set. + pub gpc: bool, + /// Whether a US Privacy string is present. + pub usp: bool, + /// Whether a TCF string is present. + pub tcf: bool, +} + +// --------------------------------------------------------------------------- +// Conversions +// --------------------------------------------------------------------------- + +/// Builds a [`KvConsentEntry`] from a [`ConsentContext`]. +/// +/// Captures only the raw strings and contextual flags. Decoded data is +/// intentionally omitted — it will be re-decoded on read. +#[must_use] +pub fn entry_from_context(ctx: &ConsentContext, now_ds: u64) -> KvConsentEntry { + KvConsentEntry { + raw_tc_string: ctx.raw_tc_string.clone(), + raw_gpp_string: ctx.raw_gpp_string.clone(), + gpp_section_ids: ctx.gpp_section_ids.clone(), + raw_us_privacy: ctx.raw_us_privacy.clone(), + raw_ac_string: ctx.raw_ac_string.clone(), + gdpr_applies: ctx.gdpr_applies, + gpc: ctx.gpc, + jurisdiction: ctx.jurisdiction.to_string(), + stored_at_ds: now_ds, + } +} + +/// Builds a [`ConsentKvMetadata`] from a [`ConsentContext`]. +#[must_use] +pub fn metadata_from_context(ctx: &ConsentContext) -> ConsentKvMetadata { + ConsentKvMetadata { + fp: consent_fingerprint(ctx), + gdpr: ctx.gdpr_applies, + gpc: ctx.gpc, + usp: ctx.raw_us_privacy.is_some(), + tcf: ctx.raw_tc_string.is_some(), + } +} + +/// Converts a [`KvConsentEntry`] into [`super::types::RawConsentSignals`] +/// suitable for re-decoding via [`super::build_context_from_signals`]. +#[must_use] +pub fn signals_from_entry(entry: &KvConsentEntry) -> super::types::RawConsentSignals { + super::types::RawConsentSignals { + raw_tc_string: entry.raw_tc_string.clone(), + raw_gpp_string: entry.raw_gpp_string.clone(), + raw_gpp_sid: entry.gpp_section_ids.as_ref().map(|ids| { + ids.iter() + .map(ToString::to_string) + .collect::>() + .join(",") + }), + raw_us_privacy: entry.raw_us_privacy.clone(), + gpc: entry.gpc, + } +} + +/// Reconstructs a [`ConsentContext`] from a KV Store entry. +/// +/// Re-decodes the raw strings to populate structured fields (TCF, GPP, US +/// Privacy). The `source` is set to [`ConsentSource::KvStore`] and the +/// `jurisdiction` is parsed from the stored string representation. +#[must_use] +pub fn context_from_entry(entry: &KvConsentEntry) -> ConsentContext { + let signals = signals_from_entry(entry); + let mut ctx = super::build_context_from_signals(&signals); + + // Restore context fields that aren't derived from raw signals. + ctx.gdpr_applies = entry.gdpr_applies; + ctx.gpc = entry.gpc; + ctx.raw_ac_string = entry.raw_ac_string.clone(); + ctx.jurisdiction = parse_jurisdiction(&entry.jurisdiction); + ctx.source = ConsentSource::KvStore; + + ctx +} + +// --------------------------------------------------------------------------- +// Fingerprinting +// --------------------------------------------------------------------------- + +/// Computes a compact fingerprint of the consent signals for change detection. +/// +/// Returns the first 16 hex characters of a SHA-256 hash computed over all +/// raw consent strings and the GPC flag. This is sufficient for detecting +/// changes without storing full hashes. +#[must_use] +pub fn consent_fingerprint(ctx: &ConsentContext) -> String { + let mut hasher = Sha256::new(); + + // Feed each signal into the hash, separated by a sentinel byte to + // prevent ambiguity (e.g., None+Some("x") vs Some("x")+None). + hash_optional(&mut hasher, ctx.raw_tc_string.as_deref()); + hash_optional(&mut hasher, ctx.raw_gpp_string.as_deref()); + hash_optional(&mut hasher, ctx.raw_us_privacy.as_deref()); + hash_optional(&mut hasher, ctx.raw_ac_string.as_deref()); + hasher.update(if ctx.gpc { b"1" } else { b"0" }); + + let result = hasher.finalize(); + hex::encode(&result[..8]) // 16 hex chars = 8 bytes = 64 bits +} + +/// Feeds an optional string into the hasher with sentinel bytes. +fn hash_optional(hasher: &mut Sha256, value: Option<&str>) { + match value { + Some(s) => { + hasher.update(b"\x01"); + hasher.update(s.as_bytes()); + } + None => hasher.update(b"\x00"), + } +} + +/// Parses a jurisdiction string back into a [`Jurisdiction`] enum. +fn parse_jurisdiction(s: &str) -> Jurisdiction { + match s { + "GDPR" => Jurisdiction::Gdpr, + "non-regulated" => Jurisdiction::NonRegulated, + "unknown" => Jurisdiction::Unknown, + s if s.starts_with("US-") => Jurisdiction::UsState(s[3..].to_owned()), + _ => Jurisdiction::Unknown, + } +} + +// --------------------------------------------------------------------------- +// KV Store operations +// --------------------------------------------------------------------------- + +/// Opens a Fastly KV Store by name, logging a warning on failure. +/// +/// Returns [`None`] if the store does not exist or cannot be opened. +fn open_store(store_name: &str) -> Option { + match fastly::kv_store::KVStore::open(store_name) { + Ok(Some(store)) => Some(store), + Ok(None) => { + log::warn!("Consent KV store '{store_name}' not found"); + None + } + Err(e) => { + log::warn!("Failed to open consent KV store '{store_name}': {e}"); + None + } + } +} + +/// Checks whether the stored consent fingerprint matches the current one. +/// +/// Returns `true` when the stored metadata fingerprint equals `new_fp`, +/// meaning no write is needed. +fn fingerprint_unchanged( + store: &fastly::kv_store::KVStore, + synthetic_id: &str, + new_fp: &str, +) -> bool { + let stored_fp = store + .lookup(synthetic_id) + .ok() + .and_then(|resp| resp.metadata()) + .and_then(|bytes| serde_json::from_slice::(&bytes).ok()) + .map(|meta| meta.fp); + + stored_fp.as_deref() == Some(new_fp) +} + +/// Loads consent data from the KV Store for a given Synthetic ID. +/// +/// Returns `Some(ConsentContext)` if a valid entry is found, [`None`] if the +/// key does not exist or deserialization fails. Errors are logged but never +/// propagated — KV Store failures must not break the request pipeline. +/// +/// # Arguments +/// +/// * `store_name` — The KV Store name (from `consent.consent_store` config). +/// * `synthetic_id` — The Synthetic ID used as the KV Store key. +#[must_use] +pub fn load_consent_from_kv(store_name: &str, synthetic_id: &str) -> Option { + let store = open_store(store_name)?; + + let mut response = match store.lookup(synthetic_id) { + Ok(resp) => resp, + Err(e) => { + log::debug!("Consent KV lookup miss for '{synthetic_id}': {e}"); + return None; + } + }; + + let body_bytes = response.take_body_bytes(); + match serde_json::from_slice::(&body_bytes) { + Ok(entry) => { + log::info!( + "Loaded consent from KV store for '{synthetic_id}' (stored_at_ds={})", + entry.stored_at_ds + ); + Some(context_from_entry(&entry)) + } + Err(e) => { + log::warn!("Failed to deserialize consent KV entry for '{synthetic_id}': {e}"); + None + } + } +} + +/// Saves consent data to the KV Store, writing only when signals have changed. +/// +/// Compares the fingerprint of the current consent signals against the +/// stored metadata. If they match, the write is skipped. Otherwise, the +/// entry is written with the configured TTL. +/// +/// # Arguments +/// +/// * `store_name` — The KV Store name (from `consent.consent_store` config). +/// * `synthetic_id` — The Synthetic ID used as the KV Store key. +/// * `ctx` — The current request's consent context. +/// * `max_age_days` — TTL for the entry, matching `max_consent_age_days`. +pub fn save_consent_to_kv( + store_name: &str, + synthetic_id: &str, + ctx: &ConsentContext, + max_age_days: u32, +) { + if ctx.is_empty() { + log::debug!("Skipping consent KV write: consent is empty"); + return; + } + + let Some(store) = open_store(store_name) else { + return; + }; + + let metadata = metadata_from_context(ctx); + + if fingerprint_unchanged(&store, synthetic_id, &metadata.fp) { + log::debug!( + "Consent unchanged for '{synthetic_id}' (fp={}), skipping write", + metadata.fp + ); + return; + } + + let entry = entry_from_context(ctx, super::now_deciseconds()); + + let Ok(body) = serde_json::to_string(&entry) else { + log::warn!("Failed to serialize consent entry for '{synthetic_id}'"); + return; + }; + let Ok(meta_str) = serde_json::to_string(&metadata) else { + log::warn!("Failed to serialize consent metadata for '{synthetic_id}'"); + return; + }; + + let ttl = std::time::Duration::from_secs(u64::from(max_age_days) * 86_400); + + match store + .build_insert() + .metadata(&meta_str) + .time_to_live(ttl) + .execute(synthetic_id, body) + { + Ok(()) => { + log::info!( + "Saved consent to KV store for '{synthetic_id}' (fp={}, ttl={max_age_days}d)", + metadata.fp + ); + } + Err(e) => { + log::warn!("Failed to write consent to KV store for '{synthetic_id}': {e}"); + } + } +} +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::jurisdiction::Jurisdiction; + use crate::consent::types::{ConsentContext, ConsentSource}; + + fn make_test_context() -> ConsentContext { + ConsentContext { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), + gpp_section_ids: Some(vec![2, 6]), + raw_us_privacy: Some("1YNN".to_owned()), + raw_ac_string: None, + gdpr_applies: true, + tcf: None, + gpp: None, + us_privacy: None, + expired: false, + gpc: false, + jurisdiction: Jurisdiction::Gdpr, + source: ConsentSource::Cookie, + } + } + + #[test] + fn entry_roundtrip() { + let ctx = make_test_context(); + let entry = entry_from_context(&ctx, 1_000_000); + let json = serde_json::to_string(&entry).expect("should serialize"); + let restored: KvConsentEntry = serde_json::from_str(&json).expect("should deserialize"); + + assert_eq!(restored.raw_tc_string, ctx.raw_tc_string); + assert_eq!(restored.raw_gpp_string, ctx.raw_gpp_string); + assert_eq!(restored.gpp_section_ids, ctx.gpp_section_ids); + assert_eq!(restored.raw_us_privacy, ctx.raw_us_privacy); + assert_eq!(restored.gdpr_applies, ctx.gdpr_applies); + assert_eq!(restored.gpc, ctx.gpc); + assert_eq!(restored.jurisdiction, "GDPR"); + assert_eq!(restored.stored_at_ds, 1_000_000); + } + + #[test] + fn metadata_roundtrip() { + let ctx = make_test_context(); + let meta = metadata_from_context(&ctx); + let json = serde_json::to_string(&meta).expect("should serialize"); + let restored: ConsentKvMetadata = serde_json::from_str(&json).expect("should deserialize"); + + assert_eq!(restored.fp, meta.fp); + assert!(restored.gdpr); + assert!(!restored.gpc); + assert!(restored.usp); + assert!(restored.tcf); + } + + #[test] + fn metadata_fits_in_2000_bytes() { + let ctx = make_test_context(); + let meta = metadata_from_context(&ctx); + let json = serde_json::to_string(&meta).expect("should serialize"); + assert!( + json.len() <= 2000, + "metadata JSON must fit in 2000 bytes, was {} bytes", + json.len() + ); + } + + #[test] + fn context_roundtrip_via_entry() { + let original = make_test_context(); + let entry = entry_from_context(&original, 1_000_000); + let restored = context_from_entry(&entry); + + assert_eq!(restored.raw_tc_string, original.raw_tc_string); + assert_eq!(restored.raw_gpp_string, original.raw_gpp_string); + assert_eq!(restored.raw_us_privacy, original.raw_us_privacy); + assert_eq!(restored.gdpr_applies, original.gdpr_applies); + assert_eq!(restored.gpc, original.gpc); + assert_eq!(restored.jurisdiction, original.jurisdiction); + assert_eq!(restored.source, ConsentSource::KvStore); + } + + #[test] + fn fingerprint_deterministic() { + let ctx = make_test_context(); + let fp1 = consent_fingerprint(&ctx); + let fp2 = consent_fingerprint(&ctx); + assert_eq!(fp1, fp2, "fingerprint should be deterministic"); + assert_eq!(fp1.len(), 16, "fingerprint should be 16 hex chars"); + } + + #[test] + fn fingerprint_changes_with_different_signals() { + let ctx1 = make_test_context(); + let mut ctx2 = make_test_context(); + ctx2.raw_tc_string = Some("DIFFERENT_TC_STRING".to_owned()); + + assert_ne!( + consent_fingerprint(&ctx1), + consent_fingerprint(&ctx2), + "different TC strings should produce different fingerprints" + ); + } + + #[test] + fn fingerprint_changes_with_gpc() { + let mut ctx1 = make_test_context(); + ctx1.gpc = false; + let mut ctx2 = make_test_context(); + ctx2.gpc = true; + + assert_ne!( + consent_fingerprint(&ctx1), + consent_fingerprint(&ctx2), + "different GPC values should produce different fingerprints" + ); + } + + #[test] + fn fingerprint_distinguishes_none_from_empty() { + let mut ctx_none = make_test_context(); + ctx_none.raw_tc_string = None; + let mut ctx_empty = make_test_context(); + ctx_empty.raw_tc_string = Some(String::new()); + + assert_ne!( + consent_fingerprint(&ctx_none), + consent_fingerprint(&ctx_empty), + "None vs empty string should produce different fingerprints" + ); + } + + #[test] + fn signals_from_entry_roundtrip() { + let ctx = make_test_context(); + let entry = entry_from_context(&ctx, 1_000_000); + let signals = signals_from_entry(&entry); + + assert_eq!(signals.raw_tc_string, ctx.raw_tc_string); + assert_eq!(signals.raw_gpp_string, ctx.raw_gpp_string); + assert_eq!(signals.raw_us_privacy, ctx.raw_us_privacy); + assert_eq!(signals.gpc, ctx.gpc); + // gpp_sid is serialized as "2,6" from the section IDs + assert_eq!(signals.raw_gpp_sid, Some("2,6".to_owned())); + } + + #[test] + fn parse_jurisdiction_roundtrip() { + assert_eq!(parse_jurisdiction("GDPR"), Jurisdiction::Gdpr); + assert_eq!( + parse_jurisdiction("US-CA"), + Jurisdiction::UsState("CA".to_owned()) + ); + assert_eq!( + parse_jurisdiction("non-regulated"), + Jurisdiction::NonRegulated + ); + assert_eq!(parse_jurisdiction("unknown"), Jurisdiction::Unknown); + assert_eq!( + parse_jurisdiction("something-else"), + Jurisdiction::Unknown, + "unrecognized jurisdiction should default to Unknown" + ); + } + + #[test] + fn empty_entry_serializes_compact() { + let ctx = ConsentContext::default(); + let entry = entry_from_context(&ctx, 0); + let json = serde_json::to_string(&entry).expect("should serialize"); + // With skip_serializing_if = "Option::is_none", omitted fields keep it small. + assert!( + !json.contains("raw_tc_string"), + "None fields should be omitted from JSON" + ); + } + + #[test] + fn entry_preserves_ac_string() { + let mut ctx = make_test_context(); + ctx.raw_ac_string = Some("2~1234.5678~dv.".to_owned()); + let entry = entry_from_context(&ctx, 0); + let restored = context_from_entry(&entry); + + assert_eq!( + restored.raw_ac_string, + Some("2~1234.5678~dv.".to_owned()), + "AC string should survive roundtrip" + ); + } +} diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs index 54414529..3636bbb5 100644 --- a/crates/common/src/consent/mod.rs +++ b/crates/common/src/consent/mod.rs @@ -17,36 +17,168 @@ //! # Usage //! //! ```ignore -//! let consent = consent::build_consent_context(jar.as_ref(), &req); +//! let consent = consent::build_consent_context(&consent::ConsentPipelineInput { +//! jar: cookie_jar.as_ref(), +//! req: &req, +//! config: &settings.consent, +//! geo: geo.as_ref(), +//! synthetic_id: Some("sid_abc123"), +//! }); //! ``` mod extraction; pub mod gpp; +pub mod jurisdiction; +pub mod kv; pub mod tcf; pub mod types; pub mod us_privacy; pub use extraction::extract_consent_signals; -pub use types::{ConsentContext, ConsentSource, RawConsentSignals, TcfConsent}; +pub use types::{ConsentContext, ConsentSource, PrivacyFlag, RawConsentSignals, TcfConsent}; + +use std::time::{SystemTime, UNIX_EPOCH}; use cookie::CookieJar; use fastly::Request; +use crate::consent_config::{ConsentConfig, ConsentMode}; +use crate::geo::GeoInfo; + +/// Number of deciseconds in one day (86 400 seconds × 10). +const DECISECONDS_PER_DAY: u64 = 86_400 * 10; + +/// Inputs to the consent processing pipeline. +/// +/// Bundles all data needed to extract, decode, classify, and validate +/// consent signals from a single request. +pub struct ConsentPipelineInput<'a> { + /// Parsed cookie jar from the incoming request. + pub jar: Option<&'a CookieJar>, + /// The incoming HTTP request (for header access). + pub req: &'a Request, + /// Publisher consent configuration. + pub config: &'a ConsentConfig, + /// Geolocation data from the request (for jurisdiction detection). + pub geo: Option<&'a GeoInfo>, + /// Synthetic ID for KV Store consent persistence. + /// + /// When set along with `config.consent_store`, enables: + /// - **Read fallback**: loads consent from KV when cookies are absent. + /// - **Write-on-change**: persists cookie-sourced consent to KV. + pub synthetic_id: Option<&'a str>, +} + /// Extracts, decodes, and normalizes consent signals from a request. /// /// This is the primary entry point for the consent pipeline. It: /// /// 1. Reads raw consent strings from cookies and headers. /// 2. Decodes each signal (TCF v2, GPP, US Privacy). -/// 3. Builds a [`ConsentContext`] with both raw and decoded data. -/// 4. Logs a summary for observability. +/// 3. Detects the privacy jurisdiction from geolocation. +/// 4. Checks consent expiration (if enabled). +/// 5. Constructs a US Privacy string from GPC when appropriate. +/// 6. Builds a [`ConsentContext`] with both raw and decoded data. +/// 7. Logs a summary for observability. /// /// Decoding failures are logged and the corresponding decoded field is set to /// `None` — the raw string is still preserved for proxy-mode forwarding. -pub fn build_consent_context(jar: Option<&CookieJar>, req: &Request) -> ConsentContext { - let signals = extract_consent_signals(jar, req); +pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext { + let signals = extract_consent_signals(input.jar, input.req); log_consent_signals(&signals); - build_context_from_signals(&signals) + + // In proxy mode, skip decoding entirely. + if input.config.mode == ConsentMode::Proxy { + let jur = jurisdiction::detect_jurisdiction(input.geo, input.config); + let gdpr_applies = signals.raw_tc_string.is_some(); + log::debug!("Consent proxy mode: jurisdiction={jur}, skipping decode"); + return ConsentContext { + raw_tc_string: signals.raw_tc_string, + raw_gpp_string: signals.raw_gpp_string, + gpp_section_ids: signals + .raw_gpp_sid + .as_deref() + .and_then(gpp::parse_gpp_sid_cookie), + raw_us_privacy: signals.raw_us_privacy, + raw_ac_string: None, + gdpr_applies, + tcf: None, + gpp: None, + us_privacy: None, + expired: false, + gpc: signals.gpc, + jurisdiction: jur, + source: ConsentSource::Cookie, + }; + } + + // KV Store fallback: if no cookie-based signals exist, try loading + // persisted consent from the KV Store. + if should_try_kv_fallback(&signals) { + if let Some(ctx) = try_kv_fallback(input) { + log_consent_context(&ctx); + return ctx; + } + } + + let mut ctx = build_context_from_signals(&signals); + ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); + apply_expiration_check(&mut ctx, input.config); + apply_gpc_us_privacy(&mut ctx, input.config); + + // KV Store write: persist cookie-sourced consent for future requests. + try_kv_write(input, &ctx); + + log_consent_context(&ctx); + ctx +} + +/// Marks TCF consent as expired when it exceeds the configured maximum age. +/// +/// Clears the decoded `tcf` field (treated as no consent) but preserves the +/// raw string for proxy-mode forwarding. Re-evaluates `gdpr_applies` based +/// on whether a GPP EU TCF section is still available. +fn apply_expiration_check(ctx: &mut ConsentContext, config: &ConsentConfig) { + if !config.check_expiration { + return; + } + + let tcf = match &ctx.tcf { + Some(tcf) => tcf, + None => return, + }; + + if !is_consent_expired(tcf, config.max_consent_age_days) { + return; + } + + let age_days = consent_age_days(tcf); + log::warn!( + "TCF consent expired (age: {age_days}d, max: {}d)", + config.max_consent_age_days + ); + ctx.expired = true; + ctx.tcf = None; + // Re-evaluate: GDPR may still apply if GPP has a TCF section. + ctx.gdpr_applies = ctx.gpp.as_ref().is_some_and(|g| g.eu_tcf.is_some()); +} + +/// Constructs a US Privacy string from GPC when no explicit cookie exists +/// and the user is in a US state with a privacy law. +fn apply_gpc_us_privacy(ctx: &mut ConsentContext, config: &ConsentConfig) { + if !ctx.gpc || ctx.us_privacy.is_some() { + return; + } + if !matches!(&ctx.jurisdiction, jurisdiction::Jurisdiction::UsState(_)) { + return; + } + + if let Some(usp) = build_us_privacy_from_gpc(config) { + log::info!("Constructed US Privacy string from GPC: {usp}"); + ctx.raw_us_privacy = Some(usp.to_string()); + ctx.us_privacy = Some(usp); + ctx.source = ConsentSource::PolicyDefault; + } } /// Extracts raw consent signals and logs them (without decoding). @@ -131,11 +263,61 @@ pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext gpp: decoded_gpp, us_privacy: decoded_us_privacy, + expired: false, + gpc: signals.gpc, + jurisdiction: jurisdiction::Jurisdiction::default(), source: ConsentSource::Cookie, } } +/// Returns the current time in deciseconds since the Unix epoch. +pub(crate) fn now_deciseconds() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 + / 100 +} + +/// Returns the age of a TCF consent string in days. +fn consent_age_days(tcf: &types::TcfConsent) -> u64 { + now_deciseconds().saturating_sub(tcf.last_updated_ds) / DECISECONDS_PER_DAY +} + +/// Checks whether a TCF consent string has expired. +/// +/// Compares `last_updated_ds` (deciseconds since epoch) against the current +/// time and the configured maximum age. Returns `true` if the consent is +/// older than `max_age_days`. +#[must_use] +pub fn is_consent_expired(tcf: &types::TcfConsent, max_age_days: u32) -> bool { + let max_age_ds = u64::from(max_age_days) * DECISECONDS_PER_DAY; + now_deciseconds().saturating_sub(tcf.last_updated_ds) > max_age_ds +} + +/// Constructs a US Privacy string from `Sec-GPC` and publisher config defaults. +/// +/// Called when `gpc = true` but no explicit `us_privacy` cookie exists and the +/// user is in a US state with a privacy law. The resulting string reflects the +/// publisher's configured compliance posture, not a protocol assertion. +/// +/// Returns [`None`] if the config says GPC should not imply opt-out. +#[must_use] +pub fn build_us_privacy_from_gpc(config: &ConsentConfig) -> Option { + let defaults = &config.us_privacy_defaults; + if !defaults.gpc_implies_optout { + return None; + } + + Some(types::UsPrivacy { + version: 1, + notice_given: PrivacyFlag::from(defaults.notice_given), + opt_out_sale: PrivacyFlag::Yes, + lspa_covered: PrivacyFlag::from(defaults.lspa_covered), + }) +} + /// Filters Extended User IDs based on TCF consent. /// /// Per Prebid's tcfControl enforcement: @@ -175,21 +357,183 @@ pub fn gate_eids_by_consent( log::info!("EIDs stripped: GDPR applies but no TCF consent available"); None } else { - // Non-GDPR context with no TCF — pass through. Some(eids) } } } } +// --------------------------------------------------------------------------- +// KV Store integration helpers +// --------------------------------------------------------------------------- + +/// Returns whether KV fallback should be attempted for this request. +/// +/// KV fallback is used only when cookie-based consent signals are absent. +/// A standalone `Sec-GPC` header should not suppress fallback reads. +#[must_use] +fn should_try_kv_fallback(signals: &RawConsentSignals) -> bool { + !signals.has_cookie_signals() +} + +/// Attempts to load consent from the KV Store when cookie signals are empty. +/// +/// Returns `Some(ConsentContext)` if a valid entry was found and decoded, +/// `None` otherwise. Requires both `consent_store` and `synthetic_id` to +/// be configured. +fn try_kv_fallback(input: &ConsentPipelineInput<'_>) -> Option { + let store_name = input.config.consent_store.as_deref()?; + let synthetic_id = input.synthetic_id?; + + log::debug!("No cookie consent signals, trying KV fallback for '{synthetic_id}'"); + let mut ctx = kv::load_consent_from_kv(store_name, synthetic_id)?; + + // Re-detect jurisdiction from current geo (may differ from stored value). + ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); + apply_expiration_check(&mut ctx, input.config); + apply_gpc_us_privacy(&mut ctx, input.config); + + Some(ctx) +} + +/// Persists cookie-sourced consent to the KV Store when configured. +/// +/// Only writes when consent signals are non-empty and have changed since +/// the last write (fingerprint comparison). +fn try_kv_write(input: &ConsentPipelineInput<'_>, ctx: &ConsentContext) { + let Some(store_name) = input.config.consent_store.as_deref() else { + return; + }; + let Some(synthetic_id) = input.synthetic_id else { + return; + }; + + kv::save_consent_to_kv( + store_name, + synthetic_id, + ctx, + input.config.max_consent_age_days, + ); +} + +// --------------------------------------------------------------------------- +// Logging helpers +// --------------------------------------------------------------------------- + /// Logs a summary of the extracted consent signals. /// /// Emits an `info`-level log line when at least one consent signal is present, /// or a `debug`-level line when no signals were found. -pub fn log_consent_signals(signals: &RawConsentSignals) { +fn log_consent_signals(signals: &RawConsentSignals) { if signals.is_empty() { log::debug!("No consent signals found on request"); } else { log::info!("Consent signals: {}", signals); } } + +/// Derives a human-readable status label for a decoded signal. +/// +/// Returns `"present"` when decoded data exists, `"decode-failed"` when only +/// the raw string exists, or `"absent"` when neither is available. +fn signal_status(decoded: bool, raw: bool) -> &'static str { + if decoded { + "present" + } else if raw { + "decode-failed" + } else { + "absent" + } +} + +/// Logs a structured summary of the fully-processed consent context. +fn log_consent_context(ctx: &ConsentContext) { + if ctx.is_empty() { + return; + } + + let tcf_status = match (&ctx.tcf, ctx.expired) { + (Some(_), _) => "present", + (None, true) => "expired", + (None, false) if ctx.raw_tc_string.is_some() => "decode-failed", + _ => "absent", + }; + + let gpp_status = signal_status(ctx.gpp.is_some(), ctx.raw_gpp_string.is_some()); + let usp_status = signal_status(ctx.us_privacy.is_some(), false); + + log::info!( + "Consent context: jurisdiction={}, tcf={tcf_status}, gpp={gpp_status}, \ + us_privacy={usp_status}, gpc={}, gdpr_applies={}, source={:?}", + ctx.jurisdiction, + ctx.gpc, + ctx.gdpr_applies, + ctx.source, + ); +} + +#[cfg(test)] +mod tests { + use fastly::Request; + + use super::{build_consent_context, should_try_kv_fallback, ConsentPipelineInput}; + use crate::consent::types::RawConsentSignals; + use crate::consent_config::{ConsentConfig, ConsentMode}; + use crate::cookies::parse_cookies_to_jar; + + #[test] + fn kv_fallback_allowed_when_only_gpc_present() { + let signals = RawConsentSignals { + gpc: true, + ..RawConsentSignals::default() + }; + + assert!( + should_try_kv_fallback(&signals), + "should allow KV fallback when only Sec-GPC is present" + ); + } + + #[test] + fn kv_fallback_skipped_when_cookie_signal_present() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + gpc: true, + ..RawConsentSignals::default() + }; + + assert!( + !should_try_kv_fallback(&signals), + "should skip KV fallback when cookie signals are present" + ); + } + + #[test] + fn proxy_mode_marks_gdpr_when_raw_tc_exists() { + let jar = parse_cookies_to_jar("euconsent-v2=CPXxGfAPXxGfA"); + let req = Request::get("https://example.com"); + let config = ConsentConfig { + mode: ConsentMode::Proxy, + ..ConsentConfig::default() + }; + + let ctx = build_consent_context(&ConsentPipelineInput { + jar: Some(&jar), + req: &req, + config: &config, + geo: None, + synthetic_id: None, + }); + + assert!( + ctx.gdpr_applies, + "should set gdpr_applies when raw TC string is present in proxy mode" + ); + assert_eq!( + ctx.raw_tc_string.as_deref(), + Some("CPXxGfAPXxGfA"), + "should preserve raw TC string in proxy mode" + ); + assert!(ctx.tcf.is_none(), "should skip TCF decoding in proxy mode"); + } +} diff --git a/crates/common/src/consent/types.rs b/crates/common/src/consent/types.rs index 095e8864..9098d78b 100644 --- a/crates/common/src/consent/types.rs +++ b/crates/common/src/consent/types.rs @@ -48,14 +48,19 @@ pub struct RawConsentSignals { } impl RawConsentSignals { + /// Returns `true` when at least one consent cookie signal is present. + #[must_use] + pub fn has_cookie_signals(&self) -> bool { + self.raw_tc_string.is_some() + || self.raw_gpp_string.is_some() + || self.raw_gpp_sid.is_some() + || self.raw_us_privacy.is_some() + } + /// Returns `true` when no consent signals were found on the request. #[must_use] pub fn is_empty(&self) -> bool { - self.raw_tc_string.is_none() - && self.raw_gpp_string.is_none() - && self.raw_gpp_sid.is_none() - && self.raw_us_privacy.is_none() - && !self.gpc + !self.has_cookie_signals() && !self.gpc } } @@ -129,8 +134,17 @@ pub struct ConsentContext { /// Decoded US Privacy signal. pub us_privacy: Option, + /// Whether the TCF consent string has expired (age exceeds configured max). + /// + /// When `true` and `check_expiration` is enabled, the decoded `tcf` field + /// is cleared (treated as no consent) but the raw string is preserved for + /// proxy-mode forwarding. + pub expired: bool, + /// Global Privacy Control signal from `Sec-GPC` header. pub gpc: bool, + /// Detected privacy jurisdiction for this request. + pub jurisdiction: super::jurisdiction::Jurisdiction, /// Source of the consent data (for debugging). pub source: ConsentSource, } @@ -325,6 +339,16 @@ impl fmt::Display for PrivacyFlag { } } +impl From for PrivacyFlag { + fn from(value: bool) -> Self { + if value { + Self::Yes + } else { + Self::No + } + } +} + impl fmt::Display for UsPrivacy { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( @@ -405,6 +429,32 @@ mod tests { assert!(!signals.is_empty(), "should not be empty with gpc=true"); } + #[test] + fn has_no_cookie_signals_with_only_gpc() { + let signals = RawConsentSignals { + gpc: true, + ..Default::default() + }; + + assert!( + !signals.has_cookie_signals(), + "should not report cookie signals when only gpc is present" + ); + } + + #[test] + fn has_cookie_signals_with_tc_string() { + let signals = RawConsentSignals { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + ..Default::default() + }; + + assert!( + signals.has_cookie_signals(), + "should report cookie signals when tc string is present" + ); + } + #[test] fn display_all_absent() { let signals = RawConsentSignals::default(); diff --git a/crates/common/src/consent_config.rs b/crates/common/src/consent_config.rs new file mode 100644 index 00000000..8a6d6036 --- /dev/null +++ b/crates/common/src/consent_config.rs @@ -0,0 +1,472 @@ +//! Consent forwarding configuration types. +//! +//! Defines the `[consent]` TOML section and its nested sub-sections for +//! controlling how Trusted Server interprets, validates, and forwards +//! privacy consent signals to advertising partners. + +use serde::{Deserialize, Serialize}; + +/// TCF spec recommends 13 months (≈395 days). +const MAX_CONSENT_AGE_DAYS: u32 = 395; + +/// How many days newer one string must be to win under the `newest` strategy. +const FRESHNESS_THRESHOLD_DAYS: u32 = 30; + +/// EU member states (27) + EEA non-EU (3) + UK GDPR (1). +const GDPR_COUNTRIES: &[&str] = &[ + "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", "IT", "LV", + "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE", "IS", "LI", "NO", "GB", +]; + +/// US states with active comprehensive privacy laws (as of 2026). +const US_PRIVACY_STATES: &[&str] = &[ + "CA", "VA", "CO", "CT", "UT", "MT", "OR", "TX", "FL", "DE", "IA", "NE", "NH", "NJ", "TN", "MN", + "MD", "IN", "KY", "RI", +]; + +/// Converts a static `&[&str]` slice to an owned `Vec`. +fn str_vec(codes: &[&str]) -> Vec { + codes.iter().copied().map(String::from).collect() +} + +/// Top-level consent configuration (`[consent]` in TOML). +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ConsentConfig { + /// Operating mode for consent handling. + /// + /// - `"interpreter"` — decode consent strings and forward structured data + /// (recommended; enables observability and enforcement). + /// - `"proxy"` — forward raw strings without decoding. + #[serde(default = "default_consent_mode")] + pub mode: ConsentMode, + + /// Whether to check consent expiration based on TCF timestamps. + #[serde(default = "default_true")] + pub check_expiration: bool, + + /// Maximum consent age in days before it is considered expired. + /// + /// TCF spec recommends 13 months (≈395 days). + #[serde(default = "default_max_consent_age_days")] + pub max_consent_age_days: u32, + + /// GDPR jurisdiction configuration. + #[serde(default)] + pub gdpr: GdprConfig, + + /// US state privacy law configuration. + #[serde(default)] + pub us_states: UsStatesConfig, + + /// Defaults for constructing a US Privacy string when only `Sec-GPC` + /// is present and no explicit `us_privacy` cookie exists. + #[serde(default)] + pub us_privacy_defaults: UsPrivacyDefaultsConfig, + + /// How to resolve conflicts when both TCF and GPP strings are present + /// but disagree on consent status. + #[serde(default)] + pub conflict_resolution: ConflictResolutionConfig, + + /// Name of the KV Store used for consent persistence. + /// + /// When set, consent data is persisted per Synthetic ID so that + /// returning users without consent cookies can still have their + /// consent preferences applied. Set to `None` to disable. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub consent_store: Option, +} + +impl Default for ConsentConfig { + fn default() -> Self { + Self { + mode: ConsentMode::Interpreter, + check_expiration: true, + max_consent_age_days: MAX_CONSENT_AGE_DAYS, + gdpr: GdprConfig::default(), + us_states: UsStatesConfig::default(), + us_privacy_defaults: UsPrivacyDefaultsConfig::default(), + conflict_resolution: ConflictResolutionConfig::default(), + consent_store: None, + } + } +} + +// --------------------------------------------------------------------------- +// Consent mode +// --------------------------------------------------------------------------- + +/// Operating mode for the consent pipeline. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ConsentMode { + /// Decode consent strings and forward structured data. + Interpreter, + /// Forward raw strings without decoding. + Proxy, +} + +// --------------------------------------------------------------------------- +// Consent forwarding mode (per-partner) +// --------------------------------------------------------------------------- + +/// How consent signals are forwarded to a specific partner integration. +/// +/// Controls whether consent travels through the `OpenRTB` body, raw `Cookie` +/// headers, or both. The default (`Both`) preserves backward compatibility. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum ConsentForwardingMode { + /// Forward consent in the `OpenRTB` body only; strip consent cookies. + OpenrtbOnly, + /// Forward consent cookies only; omit consent fields from the body. + CookiesOnly, + /// Forward consent in both cookies and body (default). + #[default] + Both, +} + +impl ConsentForwardingMode { + /// Whether consent cookies should be stripped from forwarded requests. + /// + /// Returns `true` for [`OpenrtbOnly`](Self::OpenrtbOnly) since consent + /// travels exclusively through the request body in that mode. + #[must_use] + pub const fn strips_consent_cookies(self) -> bool { + matches!(self, Self::OpenrtbOnly) + } + + /// Whether consent fields should be included in the request body. + /// + /// Returns `true` for [`OpenrtbOnly`](Self::OpenrtbOnly) and + /// [`Both`](Self::Both); `false` for [`CookiesOnly`](Self::CookiesOnly). + #[must_use] + pub const fn includes_body_consent(self) -> bool { + !matches!(self, Self::CookiesOnly) + } +} + +// --------------------------------------------------------------------------- +// GDPR +// --------------------------------------------------------------------------- + +/// GDPR jurisdiction configuration (`[consent.gdpr]`). +/// +/// The `applies_in` list is used for **observability and logging only** — it +/// does NOT cause consent to be synthesized. When a user's country appears in +/// this list, the system logs that GDPR applies, enabling publishers to +/// monitor compliance coverage. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct GdprConfig { + /// ISO 3166-1 alpha-2 country codes where GDPR applies. + #[serde(default = "default_gdpr_countries")] + pub applies_in: Vec, +} + +impl Default for GdprConfig { + fn default() -> Self { + Self { + applies_in: str_vec(GDPR_COUNTRIES), + } + } +} + +// --------------------------------------------------------------------------- +// US States +// --------------------------------------------------------------------------- + +/// US state privacy law configuration (`[consent.us_states]`). +/// +/// Config-driven to avoid recompilation when new state laws take effect. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct UsStatesConfig { + /// US state codes with active comprehensive privacy laws. + #[serde(default = "default_us_privacy_states")] + pub privacy_states: Vec, +} + +impl Default for UsStatesConfig { + fn default() -> Self { + Self { + privacy_states: str_vec(US_PRIVACY_STATES), + } + } +} + +// --------------------------------------------------------------------------- +// US Privacy defaults (GPC handling) +// --------------------------------------------------------------------------- + +/// Publisher-configurable defaults for constructing a US Privacy string +/// when only the `Sec-GPC` header is present (`[consent.us_privacy_defaults]`). +/// +/// These reflect the publisher's actual compliance posture — they are +/// **publisher policy**, not protocol requirements. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct UsPrivacyDefaultsConfig { + /// Whether the publisher has actually shown a CCPA notice to the user. + #[serde(default = "default_true")] + pub notice_given: bool, + + /// Whether the publisher is subject to the Limited Service Provider + /// Agreement. + #[serde(default)] + pub lspa_covered: bool, + + /// Whether a `Sec-GPC: 1` header should be interpreted as an opt-out + /// of sale. + #[serde(default = "default_true")] + pub gpc_implies_optout: bool, +} + +impl Default for UsPrivacyDefaultsConfig { + fn default() -> Self { + Self { + notice_given: true, + lspa_covered: false, + gpc_implies_optout: true, + } + } +} + +// --------------------------------------------------------------------------- +// Conflict resolution +// --------------------------------------------------------------------------- + +/// How to resolve disagreements between GPP and TC String when both are +/// present (`[consent.conflict_resolution]`). +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ConflictResolutionConfig { + /// Resolution strategy. + #[serde(default = "default_conflict_mode")] + pub mode: ConflictMode, + + /// How many days newer one string must be to win under the `newest` + /// strategy. + #[serde(default = "default_freshness_threshold_days")] + pub freshness_threshold_days: u32, +} + +impl Default for ConflictResolutionConfig { + fn default() -> Self { + Self { + mode: ConflictMode::Restrictive, + freshness_threshold_days: FRESHNESS_THRESHOLD_DAYS, + } + } +} + +/// Conflict resolution strategy. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ConflictMode { + /// Deny consent when signals disagree (most privacy-safe). + Restrictive, + /// Use the newer signal based on timestamps. + Newest, + /// Grant consent when signals disagree (requires legal review). + Permissive, +} + +// --------------------------------------------------------------------------- +// Serde default value functions +// --------------------------------------------------------------------------- + +const fn default_consent_mode() -> ConsentMode { + ConsentMode::Interpreter +} + +const fn default_true() -> bool { + true +} + +const fn default_max_consent_age_days() -> u32 { + MAX_CONSENT_AGE_DAYS +} + +const fn default_conflict_mode() -> ConflictMode { + ConflictMode::Restrictive +} + +const fn default_freshness_threshold_days() -> u32 { + FRESHNESS_THRESHOLD_DAYS +} + +fn default_gdpr_countries() -> Vec { + str_vec(GDPR_COUNTRIES) +} + +fn default_us_privacy_states() -> Vec { + str_vec(US_PRIVACY_STATES) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::{ConflictMode, ConsentConfig, ConsentMode}; + + #[test] + fn default_config_uses_interpreter_mode() { + let config = ConsentConfig::default(); + assert_eq!( + config.mode, + ConsentMode::Interpreter, + "default mode should be interpreter" + ); + } + + #[test] + fn default_config_enables_expiration_checking() { + let config = ConsentConfig::default(); + assert!( + config.check_expiration, + "expiration checking should be enabled by default" + ); + assert_eq!( + config.max_consent_age_days, 395, + "default max age should be 395 days" + ); + } + + #[test] + fn default_gdpr_countries_includes_eu_eea_uk() { + let config = ConsentConfig::default(); + let countries = &config.gdpr.applies_in; + assert!( + countries.contains(&"DE".to_owned()), + "should include Germany" + ); + assert!( + countries.contains(&"NO".to_owned()), + "should include Norway (EEA)" + ); + assert!(countries.contains(&"GB".to_owned()), "should include UK"); + assert_eq!( + countries.len(), + 31, + "should have 31 countries (27 EU + 3 EEA + 1 UK)" + ); + } + + #[test] + fn default_us_privacy_states_includes_california() { + let config = ConsentConfig::default(); + assert!( + config.us_states.privacy_states.contains(&"CA".to_owned()), + "should include California" + ); + } + + #[test] + fn default_us_privacy_defaults_reflect_common_posture() { + let config = ConsentConfig::default(); + let defaults = &config.us_privacy_defaults; + assert!(defaults.notice_given, "notice_given should default to true"); + assert!( + !defaults.lspa_covered, + "lspa_covered should default to false" + ); + assert!( + defaults.gpc_implies_optout, + "gpc_implies_optout should default to true" + ); + } + + #[test] + fn default_conflict_resolution_is_restrictive() { + let config = ConsentConfig::default(); + assert_eq!( + config.conflict_resolution.mode, + ConflictMode::Restrictive, + "default conflict mode should be restrictive" + ); + assert_eq!( + config.conflict_resolution.freshness_threshold_days, 30, + "default freshness threshold should be 30 days" + ); + } + + #[test] + fn deserializes_from_empty_json() { + let config: ConsentConfig = + serde_json::from_str("{}").expect("should deserialize empty JSON with defaults"); + assert_eq!(config.mode, ConsentMode::Interpreter); + assert!(config.check_expiration); + } + + #[test] + fn deserializes_proxy_mode() { + let config: ConsentConfig = + serde_json::from_str(r#"{"mode": "proxy"}"#).expect("should deserialize proxy mode"); + assert_eq!(config.mode, ConsentMode::Proxy, "should parse proxy mode"); + } + + #[test] + fn consent_forwarding_mode_strips_cookies_only_for_openrtb() { + use super::ConsentForwardingMode; + + assert!( + ConsentForwardingMode::OpenrtbOnly.strips_consent_cookies(), + "openrtb_only should strip consent cookies" + ); + assert!( + !ConsentForwardingMode::CookiesOnly.strips_consent_cookies(), + "cookies_only should not strip consent cookies" + ); + assert!( + !ConsentForwardingMode::Both.strips_consent_cookies(), + "both should not strip consent cookies" + ); + } + + #[test] + fn consent_forwarding_mode_includes_body_consent_except_cookies_only() { + use super::ConsentForwardingMode; + + assert!( + ConsentForwardingMode::OpenrtbOnly.includes_body_consent(), + "openrtb_only should include body consent" + ); + assert!( + !ConsentForwardingMode::CookiesOnly.includes_body_consent(), + "cookies_only should not include body consent" + ); + assert!( + ConsentForwardingMode::Both.includes_body_consent(), + "both should include body consent" + ); + } + + #[test] + fn deserializes_full_config() { + let json = serde_json::json!({ + "mode": "interpreter", + "check_expiration": false, + "max_consent_age_days": 180, + "gdpr": { "applies_in": ["DE", "FR"] }, + "us_states": { "privacy_states": ["CA"] }, + "us_privacy_defaults": { + "notice_given": false, + "lspa_covered": true, + "gpc_implies_optout": false + }, + "conflict_resolution": { + "mode": "newest", + "freshness_threshold_days": 15 + } + }); + let config: ConsentConfig = + serde_json::from_value(json).expect("should deserialize full config"); + assert!(!config.check_expiration); + assert_eq!(config.max_consent_age_days, 180); + assert_eq!(config.gdpr.applies_in, vec!["DE", "FR"]); + assert_eq!(config.us_states.privacy_states, vec!["CA"]); + assert!(!config.us_privacy_defaults.notice_given); + assert!(config.us_privacy_defaults.lspa_covered); + assert_eq!(config.conflict_resolution.mode, ConflictMode::Newest); + assert_eq!(config.conflict_resolution.freshness_threshold_days, 15); + } +} diff --git a/crates/common/src/cookies.rs b/crates/common/src/cookies.rs index 1b33d99b..0ab90422 100644 --- a/crates/common/src/cookies.rs +++ b/crates/common/src/cookies.rs @@ -8,10 +8,24 @@ use error_stack::{Report, ResultExt}; use fastly::http::header; use fastly::Request; -use crate::constants::COOKIE_SYNTHETIC_ID; +use crate::constants::{ + COOKIE_EUCONSENT_V2, COOKIE_GPP, COOKIE_GPP_SID, COOKIE_SYNTHETIC_ID, COOKIE_US_PRIVACY, +}; use crate::error::TrustedServerError; use crate::settings::Settings; +/// Cookie names carrying privacy consent signals. +/// +/// Used by [`strip_cookies`] to remove consent signals from a `Cookie` header +/// before forwarding requests to partners that receive consent through the +/// `OpenRTB` body instead. +pub const CONSENT_COOKIE_NAMES: &[&str] = &[ + COOKIE_EUCONSENT_V2, + COOKIE_GPP, + COOKIE_GPP_SID, + COOKIE_US_PRIVACY, +]; + const COOKIE_MAX_AGE: i32 = 365 * 24 * 60 * 60; // 1 year /// Parses a cookie string into a [`CookieJar`]. @@ -59,6 +73,59 @@ pub fn handle_request_cookies( } } +/// Strips named cookies from a `Cookie` header value string. +/// +/// Parses the semicolon-separated cookie pairs, filters out any whose name +/// matches one of `cookie_names`, and reconstructs the header string. +/// +/// Returns an empty string if all cookies were stripped or the input was empty. +#[must_use] +pub fn strip_cookies(cookie_header: &str, cookie_names: &[&str]) -> String { + cookie_header + .split(';') + .map(str::trim) + .filter(|pair| { + if let Some(name) = pair.split('=').next() { + !cookie_names.contains(&name.trim()) + } else { + true + } + }) + .filter(|s| !s.is_empty()) + .collect::>() + .join("; ") +} + +/// Copies the `Cookie` header from one request to another, optionally +/// stripping consent cookies. +/// +/// When `strip_consent` is `true`, cookies listed in [`CONSENT_COOKIE_NAMES`] +/// are removed before forwarding. If stripping leaves no cookies, the header +/// is omitted entirely. Non-UTF-8 cookie headers are forwarded unchanged. +pub fn forward_cookie_header(from: &Request, to: &mut Request, strip_consent: bool) { + let Some(cookie_value) = from.get_header(header::COOKIE) else { + return; + }; + + if !strip_consent { + to.set_header(header::COOKIE, cookie_value); + return; + } + + match cookie_value.to_str() { + Ok(s) => { + let stripped = strip_cookies(s, CONSENT_COOKIE_NAMES); + if !stripped.is_empty() { + to.set_header(header::COOKIE, &stripped); + } + } + Err(_) => { + // Non-UTF-8 Cookie header — forward as-is + to.set_header(header::COOKIE, cookie_value); + } + } +} + /// Creates a synthetic ID cookie string. /// /// Generates a properly formatted cookie with security attributes @@ -199,4 +266,43 @@ mod tests { "Set-Cookie header should match create_synthetic_cookie output" ); } + + // --------------------------------------------------------------- + // strip_cookies tests + // --------------------------------------------------------------- + + #[test] + fn test_strip_cookies_removes_consent() { + let header = "euconsent-v2=BOE; __gpp=DBAC; session=abc123; us_privacy=1YNN"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, "session=abc123"); + } + + #[test] + fn test_strip_cookies_preserves_non_consent() { + let header = "session=abc123; theme=dark"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, "session=abc123; theme=dark"); + } + + #[test] + fn test_strip_cookies_empty_input() { + let stripped = strip_cookies("", CONSENT_COOKIE_NAMES); + assert_eq!(stripped, ""); + } + + #[test] + fn test_strip_cookies_all_stripped() { + let header = "euconsent-v2=BOE; __gpp=DBAC; __gpp_sid=2,6; us_privacy=1YNN"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, ""); + } + + #[test] + fn test_strip_cookies_with_complex_values() { + // Cookie values can contain '=' characters + let header = "euconsent-v2=BOE=xyz; session=abc=123=def"; + let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); + assert_eq!(stripped, "session=abc=123=def"); + } } diff --git a/crates/common/src/integrations/adserver_mock.rs b/crates/common/src/integrations/adserver_mock.rs index 0d5422e6..c8529188 100644 --- a/crates/common/src/integrations/adserver_mock.rs +++ b/crates/common/src/integrations/adserver_mock.rs @@ -200,6 +200,17 @@ impl AdServerMockProvider { json!(null) }; + // Build consent summary from ConsentContext + let consent_json = request.user.consent.as_ref().map(|ctx| { + json!({ + "gdpr": if ctx.gdpr_applies { 1 } else { 0 }, + "consent": ctx.raw_tc_string, + "us_privacy": ctx.raw_us_privacy, + "gpp": ctx.raw_gpp_string, + "gpp_sid": ctx.gpp_section_ids, + }) + }); + // Build full mediation request Ok(json!({ "id": request.id, @@ -207,6 +218,7 @@ impl AdServerMockProvider { "ext": { "bidder_responses": bidder_responses_json, "config": config_json, + "consent": consent_json, }, })) } @@ -686,6 +698,57 @@ mod tests { assert!(!provider.supports_media_type(&MediaType::Native)); } + #[test] + fn test_mediation_request_includes_consent() { + use crate::consent::ConsentContext; + + let config = AdServerMockConfig { + enabled: true, + endpoint: "http://localhost:6767/adserver/mediate".to_string(), + timeout_ms: 500, + price_floor: None, + }; + + let provider = AdServerMockProvider::new(config); + + let mut request = create_test_auction_request(); + request.user.consent = Some(ConsentContext { + raw_tc_string: Some("BOEFEAyO".to_string()), + gdpr_applies: true, + raw_us_privacy: Some("1YNN".to_string()), + raw_gpp_string: Some("DBACNYA~CPXxRfAPXxRfA".to_string()), + gpp_section_ids: Some(vec![2, 6]), + ..Default::default() + }); + + let mediation_req = provider + .build_mediation_request(&request, &[]) + .expect("should build request"); + + let consent = &mediation_req["ext"]["consent"]; + assert_eq!(consent["gdpr"], 1); + assert_eq!(consent["consent"], "BOEFEAyO"); + assert_eq!(consent["us_privacy"], "1YNN"); + assert_eq!(consent["gpp"], "DBACNYA~CPXxRfAPXxRfA"); + assert_eq!(consent["gpp_sid"], json!([2, 6])); + } + + #[test] + fn test_mediation_request_no_consent() { + let config = AdServerMockConfig::default(); + let provider = AdServerMockProvider::new(config); + let request = create_test_auction_request(); // consent is None + + let mediation_req = provider + .build_mediation_request(&request, &[]) + .expect("should build request"); + + assert!( + mediation_req["ext"]["consent"].is_null(), + "consent should be null when no consent context" + ); + } + #[test] fn test_parse_mediation_response_with_missing_prices() { // Test that mediator response with missing price fields returns None prices diff --git a/crates/common/src/integrations/aps.rs b/crates/common/src/integrations/aps.rs index bdd9c25b..f2c13371 100644 --- a/crates/common/src/integrations/aps.rs +++ b/crates/common/src/integrations/aps.rs @@ -41,6 +41,32 @@ struct ApsBidRequest { /// Timeout in milliseconds #[serde(skip_serializing_if = "Option::is_none")] timeout: Option, + + /// GDPR consent information. + #[serde(skip_serializing_if = "Option::is_none")] + gdpr: Option, + + /// US Privacy (CCPA) string. + #[serde(rename = "usPrivacy", skip_serializing_if = "Option::is_none")] + us_privacy: Option, + + /// GPP consent string. + #[serde(skip_serializing_if = "Option::is_none")] + gpp: Option, + + /// GPP section IDs as comma-separated string. + #[serde(rename = "gppSid", skip_serializing_if = "Option::is_none")] + gpp_sid: Option, +} + +/// GDPR consent information for APS requests. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ApsGdprConsent { + /// Whether GDPR applies to this request. + enabled: bool, + /// TCF v2 consent string. + #[serde(skip_serializing_if = "Option::is_none")] + consent: Option, } /// APS slot configuration. @@ -268,6 +294,9 @@ impl ApsAuctionProvider { } /// Convert unified `AuctionRequest` to APS TAM bid request format. + /// + /// Populates consent fields (GDPR, US Privacy, GPP) from the + /// [`ConsentContext`](crate::consent::ConsentContext) attached to the request. fn to_aps_request(&self, request: &AuctionRequest) -> ApsBidRequest { let slots: Vec = request .slots @@ -289,12 +318,33 @@ impl ApsAuctionProvider { }) .collect(); + // Build consent fields from ConsentContext + let consent_ctx = request.user.consent.as_ref(); + let gdpr = consent_ctx.map(|ctx| ApsGdprConsent { + enabled: ctx.gdpr_applies, + consent: ctx.raw_tc_string.clone(), + }); + let us_privacy = consent_ctx.and_then(|ctx| ctx.raw_us_privacy.clone()); + let gpp = consent_ctx.and_then(|ctx| ctx.raw_gpp_string.clone()); + let gpp_sid = consent_ctx.and_then(|ctx| { + ctx.gpp_section_ids.as_ref().map(|ids| { + ids.iter() + .map(std::string::ToString::to_string) + .collect::>() + .join(",") + }) + }); + ApsBidRequest { pub_id: self.config.pub_id.clone(), slots, page_url: request.publisher.page_url.clone(), user_agent: request.device.as_ref().and_then(|d| d.user_agent.clone()), timeout: Some(self.config.timeout_ms), + gdpr, + us_privacy, + gpp, + gpp_sid, } } @@ -862,6 +912,97 @@ mod tests { assert!(!provider.supports_media_type(&MediaType::Native)); } + #[test] + fn test_aps_request_includes_consent_fields() { + use crate::consent::ConsentContext; + + let config = ApsConfig { + enabled: true, + pub_id: "5128".to_string(), + endpoint: default_endpoint(), + timeout_ms: 800, + }; + let provider = ApsAuctionProvider::new(config); + + let mut request = create_test_auction_request(); + request.user.consent = Some(ConsentContext { + raw_tc_string: Some("BOEFEAyOEFEAyAHABDENAI4AAAB9vABAASA".to_string()), + gdpr_applies: true, + raw_us_privacy: Some("1YNN".to_string()), + raw_gpp_string: Some("DBACNYA~CPXxRfAPXxRfA".to_string()), + gpp_section_ids: Some(vec![2, 6]), + ..Default::default() + }); + + let aps_request = provider.to_aps_request(&request); + + // Verify GDPR consent + let gdpr = aps_request.gdpr.expect("should have gdpr"); + assert!(gdpr.enabled); + assert_eq!( + gdpr.consent.as_deref(), + Some("BOEFEAyOEFEAyAHABDENAI4AAAB9vABAASA") + ); + + // Verify US Privacy + assert_eq!(aps_request.us_privacy.as_deref(), Some("1YNN")); + + // Verify GPP + assert_eq!(aps_request.gpp.as_deref(), Some("DBACNYA~CPXxRfAPXxRfA")); + assert_eq!(aps_request.gpp_sid.as_deref(), Some("2,6")); + } + + #[test] + fn test_aps_request_no_consent() { + let config = ApsConfig { + enabled: true, + pub_id: "5128".to_string(), + endpoint: default_endpoint(), + timeout_ms: 800, + }; + let provider = ApsAuctionProvider::new(config); + let request = create_test_auction_request(); // consent is None + + let aps_request = provider.to_aps_request(&request); + + assert!(aps_request.gdpr.is_none()); + assert!(aps_request.us_privacy.is_none()); + assert!(aps_request.gpp.is_none()); + assert!(aps_request.gpp_sid.is_none()); + } + + #[test] + fn test_aps_request_consent_serialization() { + use crate::consent::ConsentContext; + + let config = ApsConfig { + enabled: true, + pub_id: "5128".to_string(), + endpoint: default_endpoint(), + timeout_ms: 800, + }; + let provider = ApsAuctionProvider::new(config); + + let mut request = create_test_auction_request(); + request.user.consent = Some(ConsentContext { + raw_tc_string: Some("BOE".to_string()), + gdpr_applies: true, + ..Default::default() + }); + + let aps_request = provider.to_aps_request(&request); + let json = serde_json::to_value(&aps_request).expect("should serialize"); + + // GDPR fields present + assert_eq!(json["gdpr"]["enabled"], true); + assert_eq!(json["gdpr"]["consent"], "BOE"); + + // Absent fields should not appear (skip_serializing_if) + assert!(json.get("usPrivacy").is_none()); + assert!(json.get("gpp").is_none()); + assert!(json.get("gppSid").is_none()); + } + #[test] fn test_aps_bids_have_no_creative_and_no_decoded_price() { // APS doesn't provide creative HTML - it only provides targeting keys diff --git a/crates/common/src/integrations/lockr.rs b/crates/common/src/integrations/lockr.rs index 29576e01..a9fc8007 100644 --- a/crates/common/src/integrations/lockr.rs +++ b/crates/common/src/integrations/lockr.rs @@ -24,6 +24,7 @@ use serde::Deserialize; use validator::Validate; use crate::backend::BackendConfig; +use crate::cookies::forward_cookie_header; use crate::error::TrustedServerError; use crate::http_util::copy_custom_headers; use crate::integrations::{ @@ -261,6 +262,10 @@ impl LockrIntegration { } /// Copy relevant request headers for proxying. + /// + /// Consent cookies are always stripped — consent signals are forwarded + /// through the `OpenRTB` body by the Prebid integration, not through + /// Lockr's cookie-based API calls. fn copy_request_headers(&self, from: &Request, to: &mut Request) { let headers_to_copy = [ header::CONTENT_TYPE, @@ -269,7 +274,6 @@ impl LockrIntegration { header::AUTHORIZATION, header::ACCEPT_LANGUAGE, header::ACCEPT_ENCODING, - header::COOKIE, ]; for header_name in &headers_to_copy { @@ -278,7 +282,10 @@ impl LockrIntegration { } } - // Handle Origin header - use override if configured, otherwise forward original + // Always strip consent cookies — consent travels through the OpenRTB body + forward_cookie_header(from, to, true); + + // Handle Origin header — use override if configured, otherwise forward original let origin = self .config .origin_override diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index 3e89aa5a..67ab13ce 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -15,6 +15,8 @@ use crate::auction::types::{ AuctionContext, AuctionRequest, AuctionResponse, Bid as AuctionBid, MediaType, }; use crate::backend::BackendConfig; +use crate::consent_config::ConsentForwardingMode; +use crate::cookies::forward_cookie_header; use crate::error::TrustedServerError; use crate::http_util::RequestInfo; use crate::integrations::{ @@ -83,6 +85,13 @@ pub struct PrebidIntegrationConfig { /// ``` #[serde(default)] pub bid_param_zone_overrides: HashMap>, + /// How consent signals are forwarded to Prebid Server. + /// + /// - `openrtb_only` — consent in `OpenRTB` body only, consent cookies stripped + /// - `cookies_only` — consent cookies forwarded, body consent fields omitted + /// - `both` — consent in both cookies and body (default) + #[serde(default)] + pub consent_forwarding: ConsentForwardingMode, } impl IntegrationConfig for PrebidIntegrationConfig { @@ -436,9 +445,17 @@ fn make_first_party_proxy_url( ) } -fn copy_request_headers(from: &Request, to: &mut Request) { +/// Copies browser headers to the outgoing Prebid Server request. +/// +/// In [`ConsentForwardingMode::OpenrtbOnly`] mode, consent cookies are +/// stripped from the `Cookie` header since consent travels exclusively +/// through the `OpenRTB` body. +fn copy_request_headers( + from: &Request, + to: &mut Request, + consent_forwarding: ConsentForwardingMode, +) { let headers_to_copy = [ - header::COOKIE, header::USER_AGENT, header::HeaderName::from_static("x-forwarded-for"), header::REFERER, @@ -450,6 +467,8 @@ fn copy_request_headers(from: &Request, to: &mut Request) { to.set_header(header_name, value); } } + + forward_cookie_header(from, to, consent_forwarding.strips_consent_cookies()); } /// Appends query parameters to a URL, handling both URLs with and without existing query strings. @@ -573,7 +592,13 @@ impl PrebidAuctionProvider { // Build user object — populate consent at both OpenRTB 2.6 top-level // and Prebid ext-based locations (dual placement). - let consent_ctx = request.user.consent.as_ref(); + // In cookies_only mode, body consent fields are omitted — consent + // travels exclusively through the forwarded Cookie header. + let consent_ctx = if self.config.consent_forwarding.includes_body_consent() { + request.user.consent.as_ref() + } else { + None + }; let raw_tc = consent_ctx.and_then(|c| c.raw_tc_string.clone()); let user = Some(User { id: Some(request.user.id.clone()), @@ -898,7 +923,11 @@ impl AuctionProvider for PrebidAuctionProvider { Method::POST, format!("{}/openrtb2/auction", self.config.server_url), ); - copy_request_headers(context.request, &mut pbs_req); + copy_request_headers( + context.request, + &mut pbs_req, + self.config.consent_forwarding, + ); pbs_req .set_body_json(&openrtb) @@ -1074,6 +1103,7 @@ mod tests { debug_query_params: None, script_patterns: default_script_patterns(), bid_param_zone_overrides: HashMap::new(), + consent_forwarding: ConsentForwardingMode::Both, } } diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 12a9f685..67527f6a 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -37,6 +37,7 @@ pub mod auction_config_types; pub mod auth; pub mod backend; pub mod consent; +pub mod consent_config; pub mod constants; pub mod cookies; pub mod creative; diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 1af7a56f..5c2d9e49 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -3,11 +3,11 @@ use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use crate::backend::BackendConfig; -use crate::consent::build_consent_context; -use crate::cookies::{create_synthetic_cookie, handle_request_cookies}; +use crate::consent::{build_consent_context, ConsentPipelineInput}; +use crate::cookies::handle_request_cookies; use crate::http_util::{serve_static_with_etag, RequestInfo}; -use crate::constants::{COOKIE_SYNTHETIC_ID, HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; +use crate::constants::{HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; use crate::cookies::set_synthetic_cookie; use crate::error::TrustedServerError; use crate::integrations::IntegrationRegistry; @@ -207,18 +207,22 @@ pub fn handle_publisher_request( // Parse cookies once for reuse by both consent extraction and synthetic ID logic. let cookie_jar = handle_request_cookies(&req)?; - // Extract, decode, and log consent signals (TCF, GPP, US Privacy, GPC) - // from the incoming request. The ConsentContext carries both raw strings - // (for OpenRTB forwarding) and decoded data (for observability). - let _consent_context = build_consent_context(cookie_jar.as_ref(), &req); - // Generate synthetic identifiers before the request body is consumed. let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; - let has_synthetic_cookie = cookie_jar - .as_ref() - .and_then(|jar| jar.get(COOKIE_SYNTHETIC_ID)) - .is_some(); + // Extract, decode, and log consent signals (TCF, GPP, US Privacy, GPC) + // from the incoming request. The ConsentContext carries both raw strings + // (for OpenRTB forwarding) and decoded data (for observability). + // When a consent_store is configured, this also persists consent to KV + // and falls back to stored consent when cookies are absent. + let geo = crate::geo::GeoInfo::from_request(&req); + let _consent_context = build_consent_context(&ConsentPipelineInput { + jar: cookie_jar.as_ref(), + req: &req, + config: &settings.consent, + geo: geo.as_ref(), + synthetic_id: Some(synthetic_id.as_str()), + }); log::debug!("Proxy synthetic IDs - trusted: {}", synthetic_id); let backend_name = BackendConfig::from_url( diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs index eff23004..d7a67941 100644 --- a/crates/common/src/settings.rs +++ b/crates/common/src/settings.rs @@ -10,6 +10,7 @@ use url::Url; use validator::{Validate, ValidationError}; use crate::auction_config_types::AuctionConfig; +use crate::consent_config::ConsentConfig; use crate::error::TrustedServerError; pub const ENVIRONMENT_VARIABLE_PREFIX: &str = "TRUSTED_SERVER"; @@ -315,6 +316,8 @@ pub struct Settings { #[serde(default)] pub auction: AuctionConfig, #[serde(default)] + pub consent: ConsentConfig, + #[serde(default)] pub proxy: Proxy, } diff --git a/fastly.toml b/fastly.toml index d63815d9..4897169e 100644 --- a/fastly.toml +++ b/fastly.toml @@ -34,6 +34,10 @@ build = """ [[local_server.kv_stores.creative_store]] key = "placeholder" data = "placeholder" + + [[local_server.kv_stores.consent_store]] + key = "placeholder" + data = "placeholder" [local_server.secret_stores] [[local_server.secret_stores.signing_keys]] key = "ts-2025-10-A" diff --git a/trusted-server.toml b/trusted-server.toml index 0c0a6f7e..06811700 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -102,6 +102,32 @@ script_url = "https://securepubads.g.doubleclick.net/tag/js/gpt.js" cache_ttl_seconds = 3600 rewrite_script = true +# Consent forwarding configuration +# Controls how Trusted Server interprets and forwards privacy consent signals. +# All values shown below are the defaults — uncomment to override. +# [consent] +# mode = "interpreter" # "interpreter" (decode + forward) or "proxy" (raw passthrough) +# check_expiration = true # Check TCF consent freshness +# max_consent_age_days = 395 # Max age before consent is treated as expired (~13 months) + +# [consent.gdpr] +# applies_in = ["AT","BE","BG","HR","CY","CZ","DK","EE","FI","FR","DE","GR","HU","IE","IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES","SE","IS","LI","NO","GB"] + +# [consent.us_states] +# privacy_states = ["CA","VA","CO","CT","UT","MT","OR","TX","FL","DE","IA","NE","NH","NJ","TN","MN","MD","IN","KY","RI"] + +# [consent.us_privacy_defaults] +# notice_given = true # Has publisher actually shown CCPA notice? +# lspa_covered = false # Is publisher subject to LSPA? +# gpc_implies_optout = true # Should Sec-GPC: 1 trigger opt-out? + +# [consent.conflict_resolution] +# mode = "restrictive" # "restrictive" | "newest" | "permissive" +# freshness_threshold_days = 30 + +# KV Store consent persistence (requires a KV store named "consent_store" in fastly.toml) +# consent_store = "consent_store" + # Rewrite configuration for creative HTML/CSS processing # [rewrite] # Domains to exclude from first-party rewriting (supports wildcards like "*.example.com") From 801af9999764d11d3b9fc58068559eba2216cbf7 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 2 Mar 2026 09:25:06 -0500 Subject: [PATCH 5/9] Harden consent handling across TCF and GPP signals --- crates/common/src/consent/mod.rs | 343 ++++++++++++++++++++++++++++--- 1 file changed, 313 insertions(+), 30 deletions(-) diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs index 3636bbb5..5d1e84d5 100644 --- a/crates/common/src/consent/mod.rs +++ b/crates/common/src/consent/mod.rs @@ -42,12 +42,15 @@ use std::time::{SystemTime, UNIX_EPOCH}; use cookie::CookieJar; use fastly::Request; -use crate::consent_config::{ConsentConfig, ConsentMode}; +use crate::consent_config::{ConflictMode, ConsentConfig, ConsentMode}; use crate::geo::GeoInfo; /// Number of deciseconds in one day (86 400 seconds × 10). const DECISECONDS_PER_DAY: u64 = 86_400 * 10; +/// GPP section ID for EU TCF v2. +const GPP_SECTION_ID_TCF_EU_V2: u16 = 2; + /// Inputs to the consent processing pipeline. /// /// Bundles all data needed to extract, decode, classify, and validate @@ -90,15 +93,17 @@ pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext // In proxy mode, skip decoding entirely. if input.config.mode == ConsentMode::Proxy { let jur = jurisdiction::detect_jurisdiction(input.geo, input.config); - let gdpr_applies = signals.raw_tc_string.is_some(); + let gpp_section_ids = signals + .raw_gpp_sid + .as_deref() + .and_then(gpp::parse_gpp_sid_cookie); + let gdpr_applies = + has_eu_tcf_signal(signals.raw_tc_string.is_some(), gpp_section_ids.as_deref()); log::debug!("Consent proxy mode: jurisdiction={jur}, skipping decode"); return ConsentContext { raw_tc_string: signals.raw_tc_string, raw_gpp_string: signals.raw_gpp_string, - gpp_section_ids: signals - .raw_gpp_sid - .as_deref() - .and_then(gpp::parse_gpp_sid_cookie), + gpp_section_ids, raw_us_privacy: signals.raw_us_privacy, raw_ac_string: None, gdpr_applies, @@ -123,6 +128,7 @@ pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext let mut ctx = build_context_from_signals(&signals); ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); + apply_tcf_conflict_resolution(&mut ctx, input.config); apply_expiration_check(&mut ctx, input.config); apply_gpc_us_privacy(&mut ctx, input.config); @@ -135,32 +141,41 @@ pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext /// Marks TCF consent as expired when it exceeds the configured maximum age. /// -/// Clears the decoded `tcf` field (treated as no consent) but preserves the -/// raw string for proxy-mode forwarding. Re-evaluates `gdpr_applies` based -/// on whether a GPP EU TCF section is still available. +/// Clears whichever decoded TCF source is active (`tcf` or `gpp.eu_tcf`) but +/// preserves raw strings for proxy-mode forwarding. fn apply_expiration_check(ctx: &mut ConsentContext, config: &ConsentConfig) { if !config.check_expiration { return; } - let tcf = match &ctx.tcf { - Some(tcf) => tcf, - None => return, + let (is_expired, age_days) = { + let Some(tcf) = effective_tcf(ctx) else { + return; + }; + ( + is_consent_expired(tcf, config.max_consent_age_days), + consent_age_days(tcf), + ) }; - if !is_consent_expired(tcf, config.max_consent_age_days) { + if !is_expired { return; } - let age_days = consent_age_days(tcf); log::warn!( "TCF consent expired (age: {age_days}d, max: {}d)", config.max_consent_age_days ); ctx.expired = true; - ctx.tcf = None; - // Re-evaluate: GDPR may still apply if GPP has a TCF section. - ctx.gdpr_applies = ctx.gpp.as_ref().is_some_and(|g| g.eu_tcf.is_some()); + + if ctx.tcf.is_some() { + ctx.tcf = None; + } else if let Some(gpp) = &mut ctx.gpp { + gpp.eu_tcf = None; + } + + ctx.gdpr_applies = + has_eu_tcf_signal(ctx.raw_tc_string.is_some(), ctx.gpp_section_ids.as_deref()); } /// Constructs a US Privacy string from GPC when no explicit cookie exists @@ -245,9 +260,10 @@ pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext .and_then(gpp::parse_gpp_sid_cookie) }); - // GDPR applies if we have a TCF string (standalone or from GPP). + // GDPR applies when an EU TCF signal is present via standalone TC string + // or via GPP section ID 2. let gdpr_applies = - decoded_tcf.is_some() || decoded_gpp.as_ref().is_some_and(|g| g.eu_tcf.is_some()); + has_eu_tcf_signal(signals.raw_tc_string.is_some(), gpp_section_ids.as_deref()); ConsentContext { raw_tc_string: signals.raw_tc_string.clone(), @@ -271,6 +287,88 @@ pub fn build_context_from_signals(signals: &RawConsentSignals) -> ConsentContext } } +/// Resolves whether an EU TCF signal is present from raw signal hints. +#[must_use] +fn has_eu_tcf_signal(raw_tc_present: bool, gpp_section_ids: Option<&[u16]>) -> bool { + raw_tc_present || gpp_section_ids.is_some_and(|ids| ids.contains(&GPP_SECTION_ID_TCF_EU_V2)) +} + +/// Returns the effective decoded TCF consent for enforcement decisions. +#[must_use] +fn effective_tcf(ctx: &ConsentContext) -> Option<&types::TcfConsent> { + ctx.tcf + .as_ref() + .or_else(|| ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref())) +} + +/// Returns whether TCF consent allows EID transmission. +#[must_use] +fn allows_eid_transmission(tcf: &types::TcfConsent) -> bool { + tcf.has_storage_consent() && tcf.has_personalized_ads_consent() +} + +/// Resolves conflicts between standalone TC and GPP EU TCF consents. +fn apply_tcf_conflict_resolution(ctx: &mut ConsentContext, config: &ConsentConfig) { + let Some(standalone_tcf) = ctx.tcf.clone() else { + return; + }; + let Some(gpp_tcf) = ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref()).cloned() else { + return; + }; + + let standalone_allows = allows_eid_transmission(&standalone_tcf); + let gpp_allows = allows_eid_transmission(&gpp_tcf); + + if standalone_allows == gpp_allows { + return; + } + + let select_gpp = match config.conflict_resolution.mode { + ConflictMode::Restrictive => !gpp_allows, + ConflictMode::Permissive => gpp_allows, + ConflictMode::Newest => select_newest_signal( + &standalone_tcf, + &gpp_tcf, + config.conflict_resolution.freshness_threshold_days, + ) + .unwrap_or(!gpp_allows), + }; + + let source = if select_gpp { "gpp" } else { "standalone" }; + log::info!( + "TCF conflict detected; mode={:?}, selected={source}", + config.conflict_resolution.mode + ); + + ctx.tcf = Some(if select_gpp { gpp_tcf } else { standalone_tcf }); +} + +/// Returns whether GPP should win under the `newest` strategy. +#[must_use] +fn select_newest_signal( + standalone_tcf: &types::TcfConsent, + gpp_tcf: &types::TcfConsent, + freshness_threshold_days: u32, +) -> Option { + let threshold_ds = u64::from(freshness_threshold_days) * DECISECONDS_PER_DAY; + + let standalone_delta = standalone_tcf + .last_updated_ds + .saturating_sub(gpp_tcf.last_updated_ds); + if standalone_delta > threshold_ds { + return Some(false); + } + + let gpp_delta = gpp_tcf + .last_updated_ds + .saturating_sub(standalone_tcf.last_updated_ds); + if gpp_delta > threshold_ds { + return Some(true); + } + + None +} + /// Returns the current time in deciseconds since the Unix epoch. pub(crate) fn now_deciseconds() -> u64 { SystemTime::now() @@ -338,15 +436,10 @@ pub fn gate_eids_by_consent( return None; } - // Resolve the effective TCF consent — standalone or from GPP. - let tcf = consent_ctx.and_then(|ctx| { - ctx.tcf - .as_ref() - .or_else(|| ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref())) - }); + let tcf = consent_ctx.and_then(effective_tcf); match tcf { - Some(tcf) if tcf.has_storage_consent() && tcf.has_personalized_ads_consent() => Some(eids), + Some(tcf) if allows_eid_transmission(tcf) => Some(eids), Some(_) => { log::info!("EIDs stripped: TCF Purpose 1 or 4 consent missing"); None @@ -390,6 +483,7 @@ fn try_kv_fallback(input: &ConsentPipelineInput<'_>) -> Option { // Re-detect jurisdiction from current geo (may differ from stored value). ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); + apply_tcf_conflict_resolution(&mut ctx, input.config); apply_expiration_check(&mut ctx, input.config); apply_gpc_us_privacy(&mut ctx, input.config); @@ -460,7 +554,7 @@ fn log_consent_context(ctx: &ConsentContext) { }; let gpp_status = signal_status(ctx.gpp.is_some(), ctx.raw_gpp_string.is_some()); - let usp_status = signal_status(ctx.us_privacy.is_some(), false); + let usp_status = signal_status(ctx.us_privacy.is_some(), ctx.raw_us_privacy.is_some()); log::info!( "Consent context: jurisdiction={}, tcf={tcf_status}, gpp={gpp_status}, \ @@ -476,11 +570,79 @@ fn log_consent_context(ctx: &ConsentContext) { mod tests { use fastly::Request; - use super::{build_consent_context, should_try_kv_fallback, ConsentPipelineInput}; - use crate::consent::types::RawConsentSignals; - use crate::consent_config::{ConsentConfig, ConsentMode}; + use super::{ + apply_expiration_check, apply_tcf_conflict_resolution, build_consent_context, + build_context_from_signals, should_try_kv_fallback, ConsentPipelineInput, + }; + use crate::consent::types::{ConsentContext, GppConsent, RawConsentSignals, TcfConsent}; + use crate::consent_config::{ConflictMode, ConsentConfig, ConsentMode}; use crate::cookies::parse_cookies_to_jar; + fn make_tcf(last_updated_ds: u64, allows_eids: bool) -> TcfConsent { + TcfConsent { + version: 2, + cmp_id: 1, + cmp_version: 1, + consent_screen: 0, + consent_language: "EN".to_owned(), + vendor_list_version: 1, + tcf_policy_version: 4, + created_ds: last_updated_ds, + last_updated_ds, + purpose_consents: vec![ + true, + false, + false, + allows_eids, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + ], + purpose_legitimate_interests: vec![false; 24], + vendor_consents: Vec::new(), + vendor_legitimate_interests: Vec::new(), + special_feature_opt_ins: vec![false; 12], + } + } + + fn make_conflicting_context( + standalone_last_updated_ds: u64, + standalone_allows_eids: bool, + gpp_last_updated_ds: u64, + gpp_allows_eids: bool, + ) -> ConsentContext { + ConsentContext { + raw_tc_string: Some("standalone".to_owned()), + raw_gpp_string: Some("gpp".to_owned()), + gpp_section_ids: Some(vec![2]), + gdpr_applies: true, + tcf: Some(make_tcf(standalone_last_updated_ds, standalone_allows_eids)), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![2], + eu_tcf: Some(make_tcf(gpp_last_updated_ds, gpp_allows_eids)), + }), + ..ConsentContext::default() + } + } + #[test] fn kv_fallback_allowed_when_only_gpc_present() { let signals = RawConsentSignals { @@ -536,4 +698,125 @@ mod tests { ); assert!(ctx.tcf.is_none(), "should skip TCF decoding in proxy mode"); } + + #[test] + fn proxy_mode_marks_gdpr_when_gpp_sid_contains_tcf_section() { + let jar = parse_cookies_to_jar("__gpp_sid=2,6"); + let req = Request::get("https://example.com"); + let config = ConsentConfig { + mode: ConsentMode::Proxy, + ..ConsentConfig::default() + }; + + let ctx = build_consent_context(&ConsentPipelineInput { + jar: Some(&jar), + req: &req, + config: &config, + geo: None, + synthetic_id: None, + }); + + assert!( + ctx.gdpr_applies, + "should set gdpr_applies when __gpp_sid includes section 2" + ); + } + + #[test] + fn marks_gdpr_when_gpp_sid_contains_tcf_section_even_if_gpp_decode_fails() { + let signals = RawConsentSignals { + raw_gpp_string: Some("invalid-gpp".to_owned()), + raw_gpp_sid: Some("2,6".to_owned()), + ..RawConsentSignals::default() + }; + + let ctx = build_context_from_signals(&signals); + + assert!( + ctx.gdpr_applies, + "should set gdpr_applies when section 2 exists even if __gpp decode fails" + ); + } + + #[test] + fn conflict_resolution_restrictive_prefers_denial() { + let mut ctx = make_conflicting_context(10, true, 20, false); + let config = ConsentConfig::default(); + + apply_tcf_conflict_resolution(&mut ctx, &config); + + let tcf = ctx + .tcf + .expect("should keep an effective TCF after resolution"); + assert!( + !tcf.has_personalized_ads_consent(), + "restrictive mode should choose the denying signal" + ); + } + + #[test] + fn conflict_resolution_permissive_prefers_grant() { + let mut ctx = make_conflicting_context(10, true, 20, false); + let mut config = ConsentConfig::default(); + config.conflict_resolution.mode = ConflictMode::Permissive; + + apply_tcf_conflict_resolution(&mut ctx, &config); + + let tcf = ctx + .tcf + .expect("should keep an effective TCF after resolution"); + assert!( + tcf.has_personalized_ads_consent(), + "permissive mode should choose the granting signal" + ); + } + + #[test] + fn conflict_resolution_newest_prefers_fresher_signal() { + let mut ctx = make_conflicting_context(2 * super::DECISECONDS_PER_DAY, true, 0, false); + let mut config = ConsentConfig::default(); + config.conflict_resolution.mode = ConflictMode::Newest; + config.conflict_resolution.freshness_threshold_days = 1; + + apply_tcf_conflict_resolution(&mut ctx, &config); + + let tcf = ctx + .tcf + .expect("should keep an effective TCF after resolution"); + assert!( + tcf.has_personalized_ads_consent(), + "newest mode should pick the signal that is newer than threshold" + ); + } + + #[test] + fn expiration_clears_gpp_embedded_tcf() { + let mut ctx = ConsentContext { + raw_gpp_string: Some("DBACNY".to_owned()), + gpp_section_ids: Some(vec![2]), + gdpr_applies: true, + gpp: Some(GppConsent { + version: 1, + section_ids: vec![2], + eu_tcf: Some(make_tcf(0, true)), + }), + ..ConsentContext::default() + }; + let config = ConsentConfig { + max_consent_age_days: 0, + ..ConsentConfig::default() + }; + + apply_expiration_check(&mut ctx, &config); + + assert!(ctx.expired, "should mark embedded TCF as expired"); + assert!( + ctx.gpp.as_ref().is_some_and(|g| g.eu_tcf.is_none()), + "should clear decoded GPP embedded TCF when expired" + ); + assert!( + ctx.gdpr_applies, + "should keep gdpr_applies true when raw EU TCF signal is still present" + ); + } } From 2b32926534fb7e00a92b37f63f0b5732f24d8ebb Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 6 Mar 2026 12:36:59 -0500 Subject: [PATCH 6/9] Address PR review feedback on consent pipeline Fix expired TCF leaking through GPP fallback by clearing both sources. Add gpp_section_ids to is_empty() check and KV fingerprint hash. Generate synthetic ID before consent pipeline in auction endpoint so KV fallback and write operations work correctly. Add max-length guard on TC strings before base64 decoding. Use jurisdiction to inform regs.gdpr instead of falsely emitting 0. Defer cloning in TCF conflict resolution and remove dead code. --- crates/common/src/auction/endpoints.rs | 14 ++++++-- crates/common/src/auction/formats.rs | 15 ++++----- crates/common/src/consent/kv.rs | 12 +++++++ crates/common/src/consent/mod.rs | 41 ++++++++++-------------- crates/common/src/consent/tcf.rs | 20 ++++++++++-- crates/common/src/consent/types.rs | 1 + crates/common/src/integrations/prebid.rs | 21 +++++++++++- 7 files changed, 87 insertions(+), 37 deletions(-) diff --git a/crates/common/src/auction/endpoints.rs b/crates/common/src/auction/endpoints.rs index c155181e..bf039358 100644 --- a/crates/common/src/auction/endpoints.rs +++ b/crates/common/src/auction/endpoints.rs @@ -9,6 +9,7 @@ use crate::cookies::handle_request_cookies; use crate::error::TrustedServerError; use crate::geo::GeoInfo; use crate::settings::Settings; +use crate::synthetic::get_or_generate_synthetic_id; use super::formats::{convert_to_openrtb_response, convert_tsjs_to_auction_request}; use super::types::AuctionContext; @@ -44,6 +45,14 @@ pub async fn handle_auction( body.ad_units.len() ); + // Generate synthetic ID early so the consent pipeline can use it for + // KV Store fallback/write operations. + let synthetic_id = get_or_generate_synthetic_id(settings, &req).change_context( + TrustedServerError::Auction { + message: "Failed to generate synthetic ID".to_string(), + }, + )?; + // Extract consent from request cookies, headers, and geo. let cookie_jar = handle_request_cookies(&req)?; let geo = GeoInfo::from_request(&req); @@ -52,11 +61,12 @@ pub async fn handle_auction( req: &req, config: &settings.consent, geo: geo.as_ref(), - synthetic_id: None, // Auction requests don't carry a Synthetic ID yet. + synthetic_id: Some(synthetic_id.as_str()), }); // Convert tsjs request format to auction request - let auction_request = convert_tsjs_to_auction_request(&body, settings, &req, consent_context)?; + let auction_request = + convert_tsjs_to_auction_request(&body, settings, &req, consent_context, &synthetic_id)?; // Create auction context let context = AuctionContext { diff --git a/crates/common/src/auction/formats.rs b/crates/common/src/auction/formats.rs index c91ea62c..4849b580 100644 --- a/crates/common/src/auction/formats.rs +++ b/crates/common/src/auction/formats.rs @@ -20,7 +20,7 @@ use crate::error::TrustedServerError; use crate::geo::GeoInfo; use crate::openrtb::{OpenRtbBid, OpenRtbResponse, ResponseExt, SeatBid}; use crate::settings::Settings; -use crate::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; +use crate::synthetic::generate_synthetic_id; use super::orchestrator::OrchestrationResult; use super::types::{ @@ -72,23 +72,22 @@ pub struct BannerUnit { /// (the `/auction` endpoint handler) and forwarded through to the /// [`OpenRTB`][`crate::openrtb::OpenRtbRequest`] bid request. /// +/// The `synthetic_id` is generated by the caller before the consent pipeline +/// runs, so that KV Store operations can use it as a key. +/// /// # Errors /// /// Returns an error if: -/// - Synthetic ID generation fails +/// - Fresh synthetic ID generation fails /// - Request contains invalid banner sizes (must be [width, height]) pub fn convert_tsjs_to_auction_request( body: &AdRequest, settings: &Settings, req: &Request, consent: ConsentContext, + synthetic_id: &str, ) -> Result> { - // Generate synthetic ID - let synthetic_id = get_or_generate_synthetic_id(settings, req).change_context( - TrustedServerError::Auction { - message: "Failed to generate synthetic ID".to_string(), - }, - )?; + let synthetic_id = synthetic_id.to_owned(); let fresh_id = generate_synthetic_id(settings, req).change_context(TrustedServerError::Auction { message: "Failed to generate fresh ID".to_string(), diff --git a/crates/common/src/consent/kv.rs b/crates/common/src/consent/kv.rs index 2cef966d..9a4fb50a 100644 --- a/crates/common/src/consent/kv.rs +++ b/crates/common/src/consent/kv.rs @@ -181,6 +181,18 @@ pub fn consent_fingerprint(ctx: &ConsentContext) -> String { hash_optional(&mut hasher, ctx.raw_ac_string.as_deref()); hasher.update(if ctx.gpc { b"1" } else { b"0" }); + // Include GPP section IDs so SID-only changes trigger a KV write. + if let Some(sids) = &ctx.gpp_section_ids { + let mut sorted = sids.clone(); + sorted.sort_unstable(); + for sid in &sorted { + hasher.update(sid.to_string().as_bytes()); + hasher.update(b"\xFF"); + } + } else { + hasher.update(b"\x00"); + } + let result = hasher.finalize(); hex::encode(&result[..8]) // 16 hex chars = 8 bytes = 64 bits } diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs index 5d1e84d5..4064c816 100644 --- a/crates/common/src/consent/mod.rs +++ b/crates/common/src/consent/mod.rs @@ -168,9 +168,8 @@ fn apply_expiration_check(ctx: &mut ConsentContext, config: &ConsentConfig) { ); ctx.expired = true; - if ctx.tcf.is_some() { - ctx.tcf = None; - } else if let Some(gpp) = &mut ctx.gpp { + ctx.tcf = None; + if let Some(gpp) = &mut ctx.gpp { gpp.eu_tcf = None; } @@ -196,16 +195,6 @@ fn apply_gpc_us_privacy(ctx: &mut ConsentContext, config: &ConsentConfig) { } } -/// Extracts raw consent signals and logs them (without decoding). -/// -/// Use this when you need the raw signals but don't need decoded data. -/// Prefer [`build_consent_context`] for the full pipeline. -pub fn extract_and_log_consent(jar: Option<&CookieJar>, req: &Request) -> RawConsentSignals { - let signals = extract_consent_signals(jar, req); - log_consent_signals(&signals); - signals -} - /// Decodes a raw consent string, logging a warning on failure. /// /// Returns [`None`] and logs at `warn` level if decoding fails, preserving @@ -309,15 +298,15 @@ fn allows_eid_transmission(tcf: &types::TcfConsent) -> bool { /// Resolves conflicts between standalone TC and GPP EU TCF consents. fn apply_tcf_conflict_resolution(ctx: &mut ConsentContext, config: &ConsentConfig) { - let Some(standalone_tcf) = ctx.tcf.clone() else { + let Some(standalone_tcf) = ctx.tcf.as_ref() else { return; }; - let Some(gpp_tcf) = ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref()).cloned() else { + let Some(gpp_tcf) = ctx.gpp.as_ref().and_then(|g| g.eu_tcf.as_ref()) else { return; }; - let standalone_allows = allows_eid_transmission(&standalone_tcf); - let gpp_allows = allows_eid_transmission(&gpp_tcf); + let standalone_allows = allows_eid_transmission(standalone_tcf); + let gpp_allows = allows_eid_transmission(gpp_tcf); if standalone_allows == gpp_allows { return; @@ -327,8 +316,8 @@ fn apply_tcf_conflict_resolution(ctx: &mut ConsentContext, config: &ConsentConfi ConflictMode::Restrictive => !gpp_allows, ConflictMode::Permissive => gpp_allows, ConflictMode::Newest => select_newest_signal( - &standalone_tcf, - &gpp_tcf, + standalone_tcf, + gpp_tcf, config.conflict_resolution.freshness_threshold_days, ) .unwrap_or(!gpp_allows), @@ -340,7 +329,12 @@ fn apply_tcf_conflict_resolution(ctx: &mut ConsentContext, config: &ConsentConfi config.conflict_resolution.mode ); - ctx.tcf = Some(if select_gpp { gpp_tcf } else { standalone_tcf }); + // Clone only the winner after the decision is made. + ctx.tcf = Some(if select_gpp { + gpp_tcf.clone() + } else { + return; // Standalone is already in ctx.tcf — nothing to do. + }); } /// Returns whether GPP should win under the `newest` strategy. @@ -371,11 +365,10 @@ fn select_newest_signal( /// Returns the current time in deciseconds since the Unix epoch. pub(crate) fn now_deciseconds() -> u64 { - SystemTime::now() + let dur = SystemTime::now() .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_millis() as u64 - / 100 + .unwrap_or_default(); + dur.as_secs() * 10 + u64::from(dur.subsec_millis()) / 100 } /// Returns the age of a TCF consent string in days. diff --git a/crates/common/src/consent/tcf.rs b/crates/common/src/consent/tcf.rs index cba7ec8a..6a7114a5 100644 --- a/crates/common/src/consent/tcf.rs +++ b/crates/common/src/consent/tcf.rs @@ -45,6 +45,12 @@ use error_stack::Report; use super::types::{ConsentDecodeError, TcfConsent}; +/// Maximum length of a raw TC String before decoding. +/// +/// Real TC Strings are typically under 1 KB. This limit prevents malicious +/// cookies from triggering large heap allocations during base64 decoding. +const MAX_TC_STRING_LEN: usize = 4096; + /// Decodes a TC String v2 into a [`TcfConsent`] struct. /// /// Only the core segment is decoded. Additional segments (separated by `.`) @@ -52,9 +58,19 @@ use super::types::{ConsentDecodeError, TcfConsent}; /// /// # Errors /// -/// - [`ConsentDecodeError::InvalidTcString`] if base64 decoding fails, the -/// version is not 2, or the bitfield is too short. +/// - [`ConsentDecodeError::InvalidTcString`] if the string exceeds +/// [`MAX_TC_STRING_LEN`], base64 decoding fails, the version is not 2, or +/// the bitfield is too short. pub fn decode_tc_string(tc_string: &str) -> Result> { + if tc_string.len() > MAX_TC_STRING_LEN { + return Err(Report::new(ConsentDecodeError::InvalidTcString { + reason: format!( + "TC string too long: {} bytes, max {MAX_TC_STRING_LEN}", + tc_string.len() + ), + })); + } + // TC String may have multiple segments separated by '.' // The first segment is always the core segment. let core_segment = tc_string.split('.').next().unwrap_or(tc_string); diff --git a/crates/common/src/consent/types.rs b/crates/common/src/consent/types.rs index 9098d78b..6b1a4dac 100644 --- a/crates/common/src/consent/types.rs +++ b/crates/common/src/consent/types.rs @@ -155,6 +155,7 @@ impl ConsentContext { pub fn is_empty(&self) -> bool { self.raw_tc_string.is_none() && self.raw_gpp_string.is_none() + && self.gpp_section_ids.is_none() && self.raw_us_privacy.is_none() && self.raw_ac_string.is_none() && self.tcf.is_none() diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index 67ab13ce..a4f762c0 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -701,13 +701,32 @@ impl PrebidAuctionProvider { let has_data = ctx.gdpr_applies || ctx.raw_us_privacy.is_some() || ctx.raw_gpp_string.is_some() + || ctx.gpp_section_ids.is_some() || ctx.gpc; if !has_data { return None; } - let gdpr = if ctx.gdpr_applies { Some(1) } else { Some(0) }; + // Use jurisdiction to inform the GDPR flag: if we know the user is in + // a GDPR jurisdiction, signal that even without a TCF string (e.g., + // GPC-only requests from EU users). When jurisdiction is unknown and + // no TCF signal exists, omit the field rather than falsely signaling + // "GDPR does not apply." + let in_gdpr_jurisdiction = matches!( + ctx.jurisdiction, + crate::consent::jurisdiction::Jurisdiction::Gdpr + ); + let gdpr = if ctx.gdpr_applies || in_gdpr_jurisdiction { + Some(1) + } else if matches!( + ctx.jurisdiction, + crate::consent::jurisdiction::Jurisdiction::Unknown + ) { + None + } else { + Some(0) + }; // Build ext first so the dual-placement fields are cloned once from // ConsentContext (into ext), then once more into Regs top-level. From 2eff746cf9c9cc14c4d5060207acc78ca8df3124 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 9 Mar 2026 13:26:15 -0500 Subject: [PATCH 7/9] Gate SSC creation on consent and handle revocation Check TCF/GPP consent signals before setting the Synthetic Session Cookie. EU/UK users require explicit opt-in (TCF Purpose 1), US users are allowed unless they opt out via US Privacy or GPC, and non-regulated jurisdictions are unaffected. When consent is absent but an existing SSC cookie is present, the cookie is expired and the corresponding KV store entry is deleted. Revocation targets the cookie value directly (not the x-synthetic-id header) to avoid mismatched deletions. Closes #464, closes #465, closes #466. --- crates/common/src/consent/kv.rs | 23 +++ crates/common/src/consent/mod.rs | 269 ++++++++++++++++++++++++++++++- crates/common/src/cookies.rs | 12 ++ crates/common/src/publisher.rs | 83 ++++++++-- 4 files changed, 372 insertions(+), 15 deletions(-) diff --git a/crates/common/src/consent/kv.rs b/crates/common/src/consent/kv.rs index 9a4fb50a..4fabd79c 100644 --- a/crates/common/src/consent/kv.rs +++ b/crates/common/src/consent/kv.rs @@ -364,6 +364,29 @@ pub fn save_consent_to_kv( } } } + +/// Deletes a consent entry from the KV Store for a given Synthetic ID. +/// +/// Used when a user revokes consent — the existing SSC cookie is being +/// expired, so the persisted consent data must also be removed. +/// +/// Errors are logged but never propagated — KV Store failures must not +/// break the request pipeline. +pub fn delete_consent_from_kv(store_name: &str, synthetic_id: &str) { + let Some(store) = open_store(store_name) else { + return; + }; + + match store.delete(synthetic_id) { + Ok(()) => { + log::info!("Deleted consent KV entry for '{synthetic_id}' (consent revoked)"); + } + Err(e) => { + log::warn!("Failed to delete consent KV entry for '{synthetic_id}': {e}"); + } + } +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs index 4064c816..cc768e08 100644 --- a/crates/common/src/consent/mod.rs +++ b/crates/common/src/consent/mod.rs @@ -35,7 +35,9 @@ pub mod types; pub mod us_privacy; pub use extraction::extract_consent_signals; -pub use types::{ConsentContext, ConsentSource, PrivacyFlag, RawConsentSignals, TcfConsent}; +pub use types::{ + ConsentContext, ConsentSource, PrivacyFlag, RawConsentSignals, TcfConsent, UsPrivacy, +}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -449,6 +451,44 @@ pub fn gate_eids_by_consent( } } +// --------------------------------------------------------------------------- +// SSC consent gating +// --------------------------------------------------------------------------- + +/// Determines whether SSC (Synthetic Session Cookie) creation is permitted +/// based on the user's consent and detected jurisdiction. +/// +/// The decision follows the jurisdiction's consent model: +/// +/// - **GDPR (EU/UK)**: opt-in required — TCF Purpose 1 (store/access +/// information on a device) must be explicitly consented. If no TCF data is +/// available under GDPR, consent is assumed absent and SSC is blocked. +/// - **US state privacy**: opt-out model — SSC is allowed unless the user has +/// explicitly opted out via the US Privacy string or Global Privacy Control. +/// - **Non-regulated / Unknown**: SSC is allowed (no consent requirement). +#[must_use] +pub fn allows_ssc_creation(ctx: &ConsentContext) -> bool { + match &ctx.jurisdiction { + jurisdiction::Jurisdiction::Gdpr => { + // EU/UK: explicit opt-in required (TCF Purpose 1 = store/access device). + match effective_tcf(ctx) { + Some(tcf) => tcf.has_storage_consent(), + None => false, + } + } + jurisdiction::Jurisdiction::UsState(_) => { + // US: opt-out model — allow unless user explicitly opted out. + if let Some(usp) = &ctx.us_privacy { + usp.opt_out_sale != PrivacyFlag::Yes + } else { + // No US Privacy string — fall back to GPC signal. + !ctx.gpc + } + } + jurisdiction::Jurisdiction::NonRegulated | jurisdiction::Jurisdiction::Unknown => true, + } +} + // --------------------------------------------------------------------------- // KV Store integration helpers // --------------------------------------------------------------------------- @@ -564,10 +604,14 @@ mod tests { use fastly::Request; use super::{ - apply_expiration_check, apply_tcf_conflict_resolution, build_consent_context, - build_context_from_signals, should_try_kv_fallback, ConsentPipelineInput, + allows_ssc_creation, apply_expiration_check, apply_tcf_conflict_resolution, + build_consent_context, build_context_from_signals, should_try_kv_fallback, + ConsentPipelineInput, + }; + use crate::consent::jurisdiction::Jurisdiction; + use crate::consent::types::{ + ConsentContext, GppConsent, PrivacyFlag, RawConsentSignals, TcfConsent, UsPrivacy, }; - use crate::consent::types::{ConsentContext, GppConsent, RawConsentSignals, TcfConsent}; use crate::consent_config::{ConflictMode, ConsentConfig, ConsentMode}; use crate::cookies::parse_cookies_to_jar; @@ -812,4 +856,221 @@ mod tests { "should keep gdpr_applies true when raw EU TCF signal is still present" ); } + + // ----------------------------------------------------------------------- + // allows_ssc_creation tests + // ----------------------------------------------------------------------- + + /// Helper: builds a TCF consent with configurable Purpose 1 (storage). + fn make_tcf_with_storage(has_storage: bool) -> TcfConsent { + TcfConsent { + version: 2, + cmp_id: 1, + cmp_version: 1, + consent_screen: 0, + consent_language: "EN".to_owned(), + vendor_list_version: 1, + tcf_policy_version: 4, + created_ds: 0, + last_updated_ds: 0, + purpose_consents: vec![ + has_storage, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + ], + purpose_legitimate_interests: vec![false; 24], + vendor_consents: Vec::new(), + vendor_legitimate_interests: Vec::new(), + special_feature_opt_ins: vec![false; 12], + } + } + + #[test] + fn ssc_allowed_gdpr_with_storage_consent() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Gdpr, + tcf: Some(make_tcf_with_storage(true)), + gdpr_applies: true, + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "GDPR + TCF Purpose 1 consented should allow SSC" + ); + } + + #[test] + fn ssc_blocked_gdpr_without_storage_consent() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Gdpr, + tcf: Some(make_tcf_with_storage(false)), + gdpr_applies: true, + ..ConsentContext::default() + }; + assert!( + !allows_ssc_creation(&ctx), + "GDPR + TCF Purpose 1 not consented should block SSC" + ); + } + + #[test] + fn ssc_blocked_gdpr_no_tcf_data() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Gdpr, + tcf: None, + gpp: None, + gdpr_applies: true, + ..ConsentContext::default() + }; + assert!( + !allows_ssc_creation(&ctx), + "GDPR with no TCF data should block SSC" + ); + } + + #[test] + fn ssc_allowed_gdpr_via_gpp_embedded_tcf() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Gdpr, + tcf: None, + gpp: Some(GppConsent { + version: 1, + section_ids: vec![2], + eu_tcf: Some(make_tcf_with_storage(true)), + }), + gdpr_applies: true, + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "GDPR + GPP embedded TCF with P1 consent should allow SSC" + ); + } + + #[test] + fn ssc_allowed_us_state_no_optout() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::No, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "US state + no opt-out should allow SSC" + ); + } + + #[test] + fn ssc_blocked_us_state_opted_out() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::Yes, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + !allows_ssc_creation(&ctx), + "US state + opt-out should block SSC" + ); + } + + #[test] + fn ssc_blocked_us_state_gpc_implies_optout() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + us_privacy: None, + gpc: true, + ..ConsentContext::default() + }; + assert!( + !allows_ssc_creation(&ctx), + "US state + GPC=true with no US Privacy string should block SSC" + ); + } + + #[test] + fn ssc_allowed_us_state_no_signals() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + us_privacy: None, + gpc: false, + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "US state + no opt-out signals should allow SSC (opt-out model)" + ); + } + + #[test] + fn ssc_allowed_non_regulated() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::NonRegulated, + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "non-regulated jurisdiction should always allow SSC" + ); + } + + #[test] + fn ssc_allowed_unknown_jurisdiction() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Unknown, + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "unknown jurisdiction should allow SSC (no geo data available)" + ); + } + + #[test] + fn ssc_us_privacy_not_applicable_allows_ssc() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("VA".to_owned()), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::NotApplicable, + opt_out_sale: PrivacyFlag::NotApplicable, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ssc_creation(&ctx), + "US Privacy with opt_out=N/A should allow SSC" + ); + } } diff --git a/crates/common/src/cookies.rs b/crates/common/src/cookies.rs index 0ab90422..8757cae5 100644 --- a/crates/common/src/cookies.rs +++ b/crates/common/src/cookies.rs @@ -153,6 +153,18 @@ pub fn set_synthetic_cookie( ); } +/// Expires the synthetic ID cookie by setting `Max-Age=0`. +/// +/// Used when a user revokes consent — the browser will delete the cookie +/// on receipt of this header. +pub fn expire_synthetic_cookie(settings: &Settings, response: &mut fastly::Response) { + let cookie = format!( + "{}=; Domain={}; Path=/; Secure; SameSite=Lax; Max-Age=0", + COOKIE_SYNTHETIC_ID, settings.publisher.cookie_domain, + ); + response.append_header(header::SET_COOKIE, cookie); +} + #[cfg(test)] mod tests { use crate::test_support::tests::create_test_settings; diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 5c2d9e49..33d514ee 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -3,13 +3,11 @@ use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use crate::backend::BackendConfig; -use crate::consent::{build_consent_context, ConsentPipelineInput}; -use crate::cookies::handle_request_cookies; -use crate::http_util::{serve_static_with_etag, RequestInfo}; - -use crate::constants::{HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; -use crate::cookies::set_synthetic_cookie; +use crate::consent::{allows_ssc_creation, build_consent_context, ConsentPipelineInput}; +use crate::constants::{COOKIE_SYNTHETIC_ID, HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; +use crate::cookies::{expire_synthetic_cookie, handle_request_cookies, set_synthetic_cookie}; use crate::error::TrustedServerError; +use crate::http_util::{serve_static_with_etag, RequestInfo}; use crate::integrations::IntegrationRegistry; use crate::rsc_flight::RscFlightUrlRewriter; use crate::settings::Settings; @@ -207,23 +205,38 @@ pub fn handle_publisher_request( // Parse cookies once for reuse by both consent extraction and synthetic ID logic. let cookie_jar = handle_request_cookies(&req)?; + // Capture the current SSC cookie value for revocation handling. + // This must come from the cookie itself (not the x-synthetic-id header) + // to ensure KV deletion targets the same identifier being revoked. + let existing_ssc_cookie = cookie_jar + .as_ref() + .and_then(|jar| jar.get(COOKIE_SYNTHETIC_ID)) + .map(|cookie| cookie.value().to_owned()); + // Generate synthetic identifiers before the request body is consumed. + // Always generated for internal use (KV lookups, logging) even when + // consent is absent — the cookie is only *set* when consent allows it. let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; // Extract, decode, and log consent signals (TCF, GPP, US Privacy, GPC) // from the incoming request. The ConsentContext carries both raw strings - // (for OpenRTB forwarding) and decoded data (for observability). + // (for OpenRTB forwarding) and decoded data (for enforcement). // When a consent_store is configured, this also persists consent to KV // and falls back to stored consent when cookies are absent. let geo = crate::geo::GeoInfo::from_request(&req); - let _consent_context = build_consent_context(&ConsentPipelineInput { + let consent_context = build_consent_context(&ConsentPipelineInput { jar: cookie_jar.as_ref(), req: &req, config: &settings.consent, geo: geo.as_ref(), synthetic_id: Some(synthetic_id.as_str()), }); - log::debug!("Proxy synthetic IDs - trusted: {}", synthetic_id); + let ssc_allowed = allows_ssc_creation(&consent_context); + log::debug!( + "Proxy synthetic IDs - trusted: {}, ssc_allowed: {}", + synthetic_id, + ssc_allowed, + ); let backend_name = BackendConfig::from_url( &settings.publisher.origin_url, @@ -319,8 +332,29 @@ pub fn handle_publisher_request( ); } - response.set_header(HEADER_X_SYNTHETIC_ID, synthetic_id.as_str()); - set_synthetic_cookie(settings, &mut response, synthetic_id.as_str()); + // Consent-gated SSC creation: + // - Consent given → set synthetic ID header + cookie. + // - Consent absent + existing cookie → revoke (expire cookie + delete KV entry). + // - Consent absent + no cookie → do nothing. + if ssc_allowed { + response.set_header(HEADER_X_SYNTHETIC_ID, synthetic_id.as_str()); + set_synthetic_cookie(settings, &mut response, synthetic_id.as_str()); + } else if let Some(cookie_synthetic_id) = existing_ssc_cookie.as_deref() { + log::info!( + "SSC revoked for '{}': consent withdrawn (jurisdiction={})", + cookie_synthetic_id, + consent_context.jurisdiction, + ); + expire_synthetic_cookie(settings, &mut response); + if let Some(store_name) = &settings.consent.consent_store { + crate::consent::kv::delete_consent_from_kv(store_name, cookie_synthetic_id); + } + } else { + log::debug!( + "SSC skipped: no consent and no existing cookie (jurisdiction={})", + consent_context.jurisdiction, + ); + } Ok(response) } @@ -444,6 +478,33 @@ mod tests { } } + #[test] + fn revocation_targets_cookie_synthetic_id_not_header() { + let settings = create_test_settings(); + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("x-synthetic-id", "header_id"); + req.set_header("cookie", "synthetic_id=cookie_id; other=value"); + + let cookie_jar = handle_request_cookies(&req).expect("should parse cookies"); + let existing_ssc_cookie = cookie_jar + .as_ref() + .and_then(|jar| jar.get(COOKIE_SYNTHETIC_ID)) + .map(|cookie| cookie.value().to_owned()); + + let resolved_synthetic_id = + get_or_generate_synthetic_id(&settings, &req).expect("should resolve synthetic id"); + + assert_eq!( + existing_ssc_cookie.as_deref(), + Some("cookie_id"), + "should read revocation target from cookie value" + ); + assert_eq!( + resolved_synthetic_id, "header_id", + "should still resolve request synthetic ID from header precedence" + ); + } + #[test] fn tsjs_dynamic_returns_not_found_for_unknown_filename() { let settings = create_test_settings(); From ee84bba91f4b49d6f1be45090949ac7c86173b6e Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 9 Mar 2026 14:48:58 -0500 Subject: [PATCH 8/9] Harden consent decoders: vendor cap, GPP length guard, and cleanup Address PR #380 review findings: - Cap TCF vendor range expansion to MAX_VENDOR_ID (10,000) to prevent DoS - Add GPP string length guard (8,192 bytes) before parsing - Replace unwrap_or_default() with expect() in now_deciseconds - Document gate_eids_by_consent as deferred until EID wiring - Drop plotters feature from criterion to reduce compile-time deps - Add explanatory comment for dead_code allow in build.rs --- Cargo.lock | 39 ------------ Cargo.toml | 2 +- crates/common/build.rs | 3 + crates/common/src/consent/gpp.rs | 29 ++++++++- crates/common/src/consent/mod.rs | 6 +- crates/common/src/consent/tcf.rs | 106 ++++++++++++++++++++++++++++++- 6 files changed, 140 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5df9f529..47b00df9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -433,7 +433,6 @@ dependencies = [ "num-traits", "once_cell", "oorandom", - "plotters", "regex", "serde", "serde_derive", @@ -1849,34 +1848,6 @@ dependencies = [ "spki", ] -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - [[package]] name = "poly1305" version = "0.8.0" @@ -2880,16 +2851,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "web-sys" -version = "0.3.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "which" version = "8.0.0" diff --git a/Cargo.toml b/Cargo.toml index f9ba47c6..be789229 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,4 +80,4 @@ urlencoding = "2.1" uuid = { version = "1.18", features = ["v4"] } validator = { version = "0.20", features = ["derive"] } which = "8" -criterion = { version = "0.5", default-features = false, features = ["plotters", "cargo_bench_support"] } +criterion = { version = "0.5", default-features = false, features = ["cargo_bench_support"] } diff --git a/crates/common/build.rs b/crates/common/build.rs index 1c478a53..e40c63c2 100644 --- a/crates/common/build.rs +++ b/crates/common/build.rs @@ -1,3 +1,6 @@ +// Build script includes source modules (`error`, `auction_config_types`, etc.) +// for compile-time config validation. Not all items from those modules are used +// in the build context, so `dead_code` is expected. #![allow(clippy::unwrap_used, clippy::panic, dead_code)] #[path = "src/error.rs"] diff --git a/crates/common/src/consent/gpp.rs b/crates/common/src/consent/gpp.rs index 5032a104..6b0cba35 100644 --- a/crates/common/src/consent/gpp.rs +++ b/crates/common/src/consent/gpp.rs @@ -25,6 +25,13 @@ use error_stack::Report; use super::types::{ConsentDecodeError, GppConsent, TcfConsent}; +/// Maximum length of a raw GPP string before parsing. +/// +/// GPP strings are typically larger than standalone TC strings because they +/// encode multiple sections. This limit prevents malicious cookies from +/// triggering large allocations in the `iab_gpp` parser. +const MAX_GPP_STRING_LEN: usize = 8192; + /// Decodes a GPP string into a [`GppConsent`] struct. /// /// Parses the raw `__gpp` cookie value, extracts section IDs, and optionally @@ -36,8 +43,18 @@ use super::types::{ConsentDecodeError, GppConsent, TcfConsent}; /// /// # Errors /// -/// - [`ConsentDecodeError::InvalidGppString`] if the `iab_gpp` parser fails. +/// - [`ConsentDecodeError::InvalidGppString`] if the string exceeds +/// [`MAX_GPP_STRING_LEN`] or the `iab_gpp` parser fails. pub fn decode_gpp_string(gpp_string: &str) -> Result> { + if gpp_string.len() > MAX_GPP_STRING_LEN { + return Err(Report::new(ConsentDecodeError::InvalidGppString { + reason: format!( + "GPP string too long: {} bytes, max {MAX_GPP_STRING_LEN}", + gpp_string.len() + ), + })); + } + let parsed = iab_gpp::v1::GPPString::parse_str(gpp_string).map_err(|e| { Report::new(ConsentDecodeError::InvalidGppString { reason: format!("{e}"), @@ -169,6 +186,16 @@ mod tests { assert!(result.is_err(), "should reject empty GPP string"); } + #[test] + fn rejects_oversized_gpp_string() { + let oversized = "D".repeat(MAX_GPP_STRING_LEN + 1); + let result = decode_gpp_string(&oversized); + assert!( + result.is_err(), + "should reject GPP string exceeding max length" + ); + } + #[test] fn parse_gpp_sid_simple() { let ids = parse_gpp_sid_cookie("2,6").expect("should parse 2,6"); diff --git a/crates/common/src/consent/mod.rs b/crates/common/src/consent/mod.rs index cc768e08..6f8430d3 100644 --- a/crates/common/src/consent/mod.rs +++ b/crates/common/src/consent/mod.rs @@ -369,7 +369,7 @@ fn select_newest_signal( pub(crate) fn now_deciseconds() -> u64 { let dur = SystemTime::now() .duration_since(UNIX_EPOCH) - .unwrap_or_default(); + .expect("should have system time after Unix epoch"); dur.as_secs() * 10 + u64::from(dur.subsec_millis()) / 100 } @@ -421,6 +421,10 @@ pub fn build_us_privacy_from_gpc(config: &ConsentConfig) -> Option( eids: Option>, diff --git a/crates/common/src/consent/tcf.rs b/crates/common/src/consent/tcf.rs index 6a7114a5..369dc80d 100644 --- a/crates/common/src/consent/tcf.rs +++ b/crates/common/src/consent/tcf.rs @@ -51,6 +51,14 @@ use super::types::{ConsentDecodeError, TcfConsent}; /// cookies from triggering large heap allocations during base64 decoding. const MAX_TC_STRING_LEN: usize = 4096; +/// Maximum vendor ID we will process from a TC String. +/// +/// The IAB vendor list currently has ~1 200 entries. This cap prevents +/// malicious range-encoded entries (e.g. `start=1, end=65535`) from +/// expanding into enormous `Vec`s. Vendors beyond this limit are silently +/// ignored. +const MAX_VENDOR_ID: u16 = 10_000; + /// Decodes a TC String v2 into a [`TcfConsent`] struct. /// /// Only the core segment is decoded. Additional segments (separated by `.`) @@ -172,7 +180,8 @@ fn decode_vendor_section( return Ok(Vec::new()); } - let max_vendor_id = reader.read_u16(offset, 16); + let raw_max_vendor_id = reader.read_u16(offset, 16); + let max_vendor_id = raw_max_vendor_id.min(MAX_VENDOR_ID); let is_range = reader.read_bool(offset + 16); if !is_range { @@ -215,7 +224,13 @@ fn decode_vendor_section( let start = reader.read_u16(pos, 16); let end = reader.read_u16(pos + 16, 16); pos += 32; - for id in start..=end { + + // Silently clamp to MAX_VENDOR_ID to prevent amplification + // from malicious range entries. + if start == 0 || start > end { + continue; + } + for id in start..=end.min(MAX_VENDOR_ID) { vendors.push(id); } } else { @@ -225,7 +240,9 @@ fn decode_vendor_section( } let id = reader.read_u16(pos, 16); pos += 16; - vendors.push(id); + if id > 0 && id <= MAX_VENDOR_ID { + vendors.push(id); + } } } Ok(vendors) @@ -449,6 +466,89 @@ mod tests { assert_eq!(result.consent_language, "FR"); } + #[test] + fn clamps_range_encoded_vendors_to_max() { + // Build a TC String with range encoding where end > MAX_VENDOR_ID. + // The decoder should silently clamp to MAX_VENDOR_ID. + let core_bits: usize = 213; + // Vendor section: maxVendorId(16) + isRange(1) + numEntries(12) + // + entry: isRange(1) + start(16) + end(16) = 262 bits + let total_bits = core_bits + 16 + 1 + 12 + 1 + 16 + 16; + let total_bytes = total_bits.div_ceil(8); + let mut buf = vec![0u8; total_bytes]; + let mut writer = BitWriter::new(&mut buf); + + // Write minimal core fields (version=2, rest zeroed/defaults) + writer.write(0, 6, 2); // version + writer.write(6, 36, 0); // created + writer.write(42, 36, 0); // lastUpdated + writer.write(78, 12, 1); // cmpId + writer.write(90, 12, 1); // cmpVersion + writer.write(102, 6, 0); // consentScreen + writer.write(108, 6, 4); // language E + writer.write(114, 6, 13); // language N + writer.write(120, 12, 1); // vendorListVersion + writer.write(132, 6, 2); // policyVersion + writer.write(201, 6, 4); // publisherCC E + writer.write(207, 6, 13); // publisherCC N + + // Vendor consent section: range encoding with start=9990, end=65535 + writer.write(213, 16, 65535); // maxVendorId = 65535 + writer.write_bool(229, true); // isRangeEncoding = true + writer.write(230, 12, 1); // numEntries = 1 + writer.write_bool(242, true); // entry is a range + writer.write(243, 16, 9990); // startVendorId + writer.write(259, 16, 65535); // endVendorId + + let encoded = URL_SAFE_NO_PAD.encode(&buf); + let result = decode_tc_string(&encoded).expect("should decode with clamped vendors"); + + // Should contain vendors 9990..=10000 (clamped from 9990..=65535) + assert_eq!(result.vendor_consents.len(), 11); + assert_eq!( + *result.vendor_consents.first().expect("should have first"), + 9990 + ); + assert_eq!( + *result.vendor_consents.last().expect("should have last"), + 10_000 + ); + } + + #[test] + fn skips_invalid_range_start_gt_end() { + // Range entry where start > end should be silently skipped. + let core_bits: usize = 213; + let total_bits = core_bits + 16 + 1 + 12 + 1 + 16 + 16; + let total_bytes = total_bits.div_ceil(8); + let mut buf = vec![0u8; total_bytes]; + let mut writer = BitWriter::new(&mut buf); + + writer.write(0, 6, 2); + writer.write(78, 12, 1); + writer.write(90, 12, 1); + writer.write(108, 6, 4); + writer.write(114, 6, 13); + writer.write(120, 12, 1); + writer.write(132, 6, 2); + writer.write(201, 6, 4); + writer.write(207, 6, 13); + + writer.write(213, 16, 100); // maxVendorId + writer.write_bool(229, true); // isRangeEncoding + writer.write(230, 12, 1); // numEntries = 1 + writer.write_bool(242, true); // entry is a range + writer.write(243, 16, 50); // startVendorId = 50 + writer.write(259, 16, 10); // endVendorId = 10 (invalid: start > end) + + let encoded = URL_SAFE_NO_PAD.encode(&buf); + let result = decode_tc_string(&encoded).expect("should decode with skipped range"); + assert!( + result.vendor_consents.is_empty(), + "should skip range where start > end" + ); + } + // ----------------------------------------------------------------------- // Test helper: builds a minimal TC String v2 byte buffer // ----------------------------------------------------------------------- From ba8eb3cfc48705e9514e851e87867a03afb0ee0b Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 9 Mar 2026 14:56:05 -0500 Subject: [PATCH 9/9] Fix rebase merge artifacts: duplicate import and missing field --- crates/common/src/auction/formats.rs | 1 - crates/common/src/integrations/adserver_mock.rs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/src/auction/formats.rs b/crates/common/src/auction/formats.rs index 4849b580..b52e7348 100644 --- a/crates/common/src/auction/formats.rs +++ b/crates/common/src/auction/formats.rs @@ -13,7 +13,6 @@ use std::collections::HashMap; use uuid::Uuid; use crate::auction::context::ContextValue; -use crate::auction::types::OrchestratorExt; use crate::consent::ConsentContext; use crate::creative; use crate::error::TrustedServerError; diff --git a/crates/common/src/integrations/adserver_mock.rs b/crates/common/src/integrations/adserver_mock.rs index c8529188..71cba275 100644 --- a/crates/common/src/integrations/adserver_mock.rs +++ b/crates/common/src/integrations/adserver_mock.rs @@ -707,6 +707,7 @@ mod tests { endpoint: "http://localhost:6767/adserver/mediate".to_string(), timeout_ms: 500, price_floor: None, + context_query_params: BTreeMap::new(), }; let provider = AdServerMockProvider::new(config);