diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6e45043..fd06ea2d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [main] + branches: [main, "rc-*"] pull_request: - branches: [main] + branches: [main, "rc-*"] env: CARGO_TERM_COLOR: always diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3c3e8996..b49063a5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -82,13 +82,16 @@ jobs: fail-fast: false matrix: include: - # Use ubuntu-22.04 for GLIBC 2.35 compatibility with server deployments - - target: x86_64-unknown-linux-gnu + # Linux builds use musl for portability across glibc and musl distros + # (e.g. Alpine). Built via `cross` so the musl toolchain is provided + # by the cross-rs container image. + - target: x86_64-unknown-linux-musl os: ubuntu-22.04 binary: ant-node archive: tar.gz + cross: true friendly_name: linux-x64 - - target: aarch64-unknown-linux-gnu + - target: aarch64-unknown-linux-musl os: ubuntu-22.04 binary: ant-node archive: tar.gz diff --git a/Cargo.lock b/Cargo.lock index 3cf0f2a3..13b5520b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -809,7 +809,7 @@ dependencies = [ [[package]] name = "ant-node" -version = "0.11.4" +version = "0.11.5" dependencies = [ "alloy", "ant-protocol", @@ -825,7 +825,9 @@ dependencies = [ "futures", "heed", "hex", + "libc", "lru", + "mimalloc", "objc2", "objc2-foundation", "page_size", @@ -1204,9 +1206,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "aws-lc-rs" @@ -1298,15 +1300,15 @@ dependencies = [ [[package]] name = "bitcoin-io" -version = "0.1.4" +version = "0.1.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dee39a0ee5b4095224a0cfc6bf4cc1baf0f9624b96b367e53b66d974e51d953" +checksum = "11301df0b06f22dea7bb1916403fdd88a371031e495c49b8f96931b28189e175" [[package]] name = "bitcoin_hashes" -version = "0.14.1" +version = "0.14.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26ec84b80c482df901772e931a9a681e26a1b9ee2302edeff23cb30328745c8b" +checksum = "0c9901a56e133a1fc86eeb1113e2591f45f4682451ca893bff494d2f88918e3f" dependencies = [ "bitcoin-io", "hex-conservative", @@ -1421,9 +1423,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" [[package]] name = "byte-slice-cast" @@ -1679,9 +1681,9 @@ dependencies = [ [[package]] name = "const-hex" -version = "1.19.0" +version = "1.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20d9a563d167a9cce0f94153382b33cb6eded6dfabff03c69ad65a28ea1514e0" +checksum = "33e2a781ebdf4467d1428dc4593067825fb646f6871475098d8577421af73558" dependencies = [ "cfg-if", "cpufeatures 0.2.17", @@ -2162,9 +2164,9 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -2911,9 +2913,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" dependencies = [ "bytes", "itoa", @@ -2959,9 +2961,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "eb92f162bf56536459fc83c79b974bb12837acfed43d6bc370a7916d0ae15ecc" dependencies = [ "atomic-waker", "bytes", @@ -3364,9 +3366,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.98" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" dependencies = [ "cfg-if", "futures-util", @@ -3466,11 +3468,20 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "libmimalloc-sys" +version = "0.1.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a45a52f43e1c16f667ccfe4dd8c85b7f7c204fd5e3bf46c5b0db9a5c3c0b8e9" +dependencies = [ + "cc", +] + [[package]] name = "libredox" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" +checksum = "f02ab6bace2054fb888a3c16f990117b579d14a3088e472d63c6011fa185c9d3" dependencies = [ "libc", ] @@ -3509,9 +3520,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" [[package]] name = "lru" @@ -3571,9 +3582,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "memoffset" @@ -3584,6 +3595,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "mimalloc" +version = "0.1.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d4139bb28d14ad1facf21d5eb8825051b326e172d216b39f6d31df53cc97862" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4137,7 +4157,7 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit 0.25.11+spec-1.1.0", + "toml_edit 0.25.12+spec-1.1.0", ] [[package]] @@ -4505,9 +4525,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" dependencies = [ "base64", "bytes", @@ -5219,9 +5239,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -5909,9 +5929,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.25.11+spec-1.1.0" +version = "0.25.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" dependencies = [ "indexmap 2.14.0", "toml_datetime 1.1.1+spec-1.1.0", @@ -6263,9 +6283,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" dependencies = [ "cfg-if", "once_cell", @@ -6276,9 +6296,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.71" +version = "0.4.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" dependencies = [ "js-sys", "wasm-bindgen", @@ -6286,9 +6306,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6296,9 +6316,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" dependencies = [ "bumpalo", "proc-macro2", @@ -6309,9 +6329,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" dependencies = [ "unicode-ident", ] @@ -6366,9 +6386,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.98" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" dependencies = [ "js-sys", "wasm-bindgen", @@ -7085,18 +7105,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "bce33a6288fa3f072a8c2c7d0f2fdbb90e28298f0135c1f99b96c3db2efcc60b" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "8fd425244944f4ab65ccff928e7323354c5a018c75838362fdce749dfad2ee1e" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index ab3f24ac..0ef01ea7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ant-node" -version = "0.11.4" +version = "0.11.5" edition = "2021" authors = ["David Irvine "] description = "Pure quantum-proof network node for the Autonomi decentralized network" @@ -23,6 +23,12 @@ name = "ant-devnet" path = "src/bin/ant-devnet/main.rs" [dependencies] +# Global allocator. musl's default malloc is significantly slower than +# glibc's under concurrent allocation churn, which matches the node's +# steady-state workload. mimalloc neutralises that regression for the +# musl Linux builds (and tends to beat glibc's allocator too). +mimalloc = "0.1" + # Wire protocol — the single version-pin shared with ant-client. # Bumping ant-protocol's `evmlib`/`saorsa-core`/`saorsa-pqc` pins ripples # through here automatically; we keep a direct saorsa-core dep for @@ -106,6 +112,9 @@ page_size = "0.6" # Protocol serialization postcard = { version = "1.1.3", features = ["use-std"] } +[target.'cfg(unix)'.dependencies] +libc = "0.2" + [target.'cfg(windows)'.dependencies] self-replace = "1" diff --git a/src/bin/ant-devnet/main.rs b/src/bin/ant-devnet/main.rs index 1117f7de..44d85b7b 100644 --- a/src/bin/ant-devnet/main.rs +++ b/src/bin/ant-devnet/main.rs @@ -2,6 +2,9 @@ #![cfg_attr(not(feature = "logging"), allow(unused_variables))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + mod cli; use ant_node::devnet::{Devnet, DevnetConfig, DevnetEvmInfo, DevnetManifest}; diff --git a/src/bin/ant-node/main.rs b/src/bin/ant-node/main.rs index 3af62dc9..6849103d 100644 --- a/src/bin/ant-node/main.rs +++ b/src/bin/ant-node/main.rs @@ -2,6 +2,9 @@ #![cfg_attr(not(feature = "logging"), allow(unused_variables))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + mod cli; mod platform; diff --git a/src/payment/verifier.rs b/src/payment/verifier.rs index 8f63916e..56328fad 100644 --- a/src/payment/verifier.rs +++ b/src/payment/verifier.rs @@ -10,7 +10,7 @@ use crate::payment::cache::{CacheStats, VerifiedCache, XorName}; use crate::payment::proof::{ deserialize_merkle_proof, deserialize_proof, detect_proof_type, ProofType, }; -use crate::payment::single_node::{QuotePaymentInfo, SingleNodePayment}; +use crate::payment::single_node::SingleNodePayment; use ant_protocol::payment::verify::{verify_quote_content, verify_quote_signature}; use evmlib::common::Amount; use evmlib::contract::payment_vault; @@ -23,7 +23,6 @@ use parking_lot::{Mutex, RwLock}; use saorsa_core::identity::node_identity::peer_id_from_public_key_bytes; use saorsa_core::identity::PeerId; use saorsa_core::P2PNode; -use std::collections::HashSet; use std::num::NonZeroUsize; use std::sync::Arc; use std::time::{Duration, SystemTime}; @@ -53,30 +52,6 @@ const QUOTE_MAX_AGE_SECS: u64 = 86_400; /// future direction; past-dated quotes are governed by `QUOTE_MAX_AGE_SECS`. const QUOTE_FUTURE_SKEW_TOLERANCE_SECS: u64 = 300; -/// Single-node payments pay one valid quote at three times its quoted price. -const SINGLE_NODE_PRICE_MULTIPLIER: u64 = 3; - -/// Median index after sorting exactly `CLOSE_GROUP_SIZE` single-node quotes by price. -const SINGLE_NODE_MEDIAN_INDEX: usize = CLOSE_GROUP_SIZE / 2; - -/// `PaymentVaultV2.completedPayments` stores the first 16 bytes of the -/// 20-byte rewards address alongside the amount. -const COMPLETED_PAYMENT_REWARDS_PREFIX_LEN: usize = 16; - -/// Single-node close-group validation tolerates up to two quoted peers missing -/// from the local routing-table view to absorb normal DHT view skew. -const SINGLE_NODE_UNKNOWN_PEER_TOLERANCE: usize = 2; - -/// Minimum quoted peers that must appear in this node's local close-group view. -const SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED: usize = - CLOSE_GROUP_SIZE - SINGLE_NODE_UNKNOWN_PEER_TOLERANCE; - -const _: () = assert!( - SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED > 0 - && SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED <= CLOSE_GROUP_SIZE, - "single-node close-group match threshold must be within 1..=CLOSE_GROUP_SIZE", -); - /// Configuration for EVM payment verification. /// /// EVM verification is always on. All new data requires on-chain @@ -105,12 +80,8 @@ pub struct PaymentVerifierConfig { pub evm: EvmVerifierConfig, /// Cache capacity (number of `XorName` values to cache). pub cache_capacity: usize, - /// Local node's configured rewards address. - /// - /// For single-node proofs, the verifier requires the quote signed by this - /// node's peer identity to name this rewards address. The paid quote is - /// separately checked against the on-chain recipient prefix recorded for - /// that quote hash. + /// Local node's rewards address. + /// The verifier rejects payments that don't include this node as a recipient. pub local_rewards_address: RewardsAddress, } @@ -471,12 +442,9 @@ impl PaymentVerifier { /// 2. All quotes target the correct content address (xorname binding) /// 3. Quote timestamps are fresh (not expired or future-dated) /// 4. Peer ID bindings match the ML-DSA-65 public keys - /// 5. This node's peer ID is among the quoted peers + /// 5. This node is among the quoted recipients /// 6. All ML-DSA-65 signatures are valid (offloaded to `spawn_blocking`) - /// 7. This node's quote is bound to the configured local rewards address - /// 8. At least 5 of 7 quoted peers are in this node's local close-group view - /// 9. A median-priced quote from that valid quote set was paid at least - /// 3x its price on-chain to the same rewards address prefix + /// 7. The median-priced quote was paid at least 3x its price on-chain /// (looked up via `completedPayments(quoteHash)` on the payment vault) /// /// For unit tests that don't need on-chain verification, pre-populate @@ -492,7 +460,8 @@ impl PaymentVerifier { Self::validate_quote_structure(payment)?; Self::validate_quote_content(payment, xorname)?; Self::validate_quote_timestamps(payment)?; - let quoted_peer_ids = Self::validate_peer_bindings(payment)?; + Self::validate_peer_bindings(payment)?; + self.validate_local_recipient(payment)?; // Verify quote signatures (CPU-bound, run off async runtime) let peer_quotes = payment.peer_quotes.clone(); @@ -509,9 +478,6 @@ impl PaymentVerifier { .await .map_err(|e| Error::Payment(format!("Signature verification task failed: {e}")))??; - self.validate_single_node_close_group(xorname, payment, "ed_peer_ids) - .await?; - // Reconstruct the SingleNodePayment to identify the median quote. // from_quotes() sorts by price and marks the median for 3x payment. let quotes_with_prices: Vec<_> = payment @@ -525,9 +491,17 @@ impl PaymentVerifier { )) })?; - let verified_amount = self - .verify_single_node_on_chain_payment(&single_payment) - .await?; + // Verify the median quote was paid at least 3x its price on-chain + // via completedPayments(quoteHash) on the payment vault contract. + let verified_amount = single_payment + .verify(&self.config.evm.network) + .await + .map_err(|e| { + let xorname_hex = hex::encode(xorname); + Error::Payment(format!( + "Median quote payment verification failed for {xorname_hex}: {e}" + )) + })?; if crate::logging::enabled!(crate::logging::Level::INFO) { let xorname_hex = hex::encode(xorname); @@ -613,8 +587,7 @@ impl PaymentVerifier { } /// Verify each quote's `pub_key` matches the claimed peer ID via BLAKE3. - fn validate_peer_bindings(payment: &ProofOfPayment) -> Result> { - let mut peer_ids = Vec::with_capacity(payment.peer_quotes.len()); + fn validate_peer_bindings(payment: &ProofOfPayment) -> Result<()> { for (encoded_peer_id, quote) in &payment.peer_quotes { let expected_peer_id = peer_id_from_public_key_bytes("e.pub_key) .map_err(|e| Error::Payment(format!("Invalid ML-DSA public key in quote: {e}")))?; @@ -627,290 +600,10 @@ impl PaymentVerifier { BLAKE3(pub_key) = {expected_hex}, peer_id = {actual_hex}" ))); } - peer_ids.push(expected_peer_id); - } - Ok(peer_ids) - } - - /// Verify enough single-node quotes came from this node's local close-group - /// view for the content address to tolerate bounded routing-table skew. - async fn validate_single_node_close_group( - &self, - xorname: &XorName, - payment: &ProofOfPayment, - quoted_peer_ids: &[PeerId], - ) -> Result<()> { - // Release the RwLock guard before awaiting the local DHT lookup. - let attached = self.p2p_node.read().as_ref().map(Arc::clone); - let Some(p2p_node) = attached else { - crate::logging::error!( - "PaymentVerifier: no P2PNode attached; rejecting single-node \ - payment. PaymentVerifier::attach_p2p_node must be called \ - before any PUT handler runs." - ); - return Err(Error::Payment( - "Single-node payment rejected: verifier is not wired to the \ - P2P layer; cannot verify quoted peer close-group membership." - .into(), - )); - }; - - let local_peer_id = *p2p_node.peer_id(); - Self::validate_local_quoted_peer(local_peer_id, quoted_peer_ids)?; - Self::validate_local_quote_rewards_address( - local_peer_id, - quoted_peer_ids, - payment, - &self.config.local_rewards_address, - )?; - - let close_group = p2p_node - .dht_manager() - .find_closest_nodes_local_with_self(xorname, CLOSE_GROUP_SIZE) - .await; - let close_group_peer_ids: Vec = - close_group.iter().map(|node| node.peer_id).collect(); - - Self::check_single_node_close_group_match(quoted_peer_ids, &close_group_peer_ids, xorname) - } - - /// Pure set-membership check for single-node close-group validation. - fn check_single_node_close_group_match( - quoted_peer_ids: &[PeerId], - close_group_peer_ids: &[PeerId], - xorname: &XorName, - ) -> Result<()> { - let quote_count = quoted_peer_ids.len(); - if quote_count != CLOSE_GROUP_SIZE { - return Err(Error::Payment(format!( - "Single-node payment must have exactly {CLOSE_GROUP_SIZE} quoted peers, got {quote_count}" - ))); - } - - let mut quoted_set = HashSet::with_capacity(quote_count); - for peer_id in quoted_peer_ids { - if !quoted_set.insert(*peer_id) { - return Err(Error::Payment(format!( - "Single-node payment contains duplicate quoted peer {}", - peer_id.to_hex() - ))); - } - } - - let close_group_count = close_group_peer_ids.len(); - if close_group_count < SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED { - return Err(Error::Payment(format!( - "Single-node payment rejected: local close-group view for {} has only \ - {close_group_count} peer(s), need at least \ - {SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED} to verify quotes", - hex::encode(xorname), - ))); - } - - let close_group_set: HashSet = close_group_peer_ids.iter().copied().collect(); - if close_group_set.len() != close_group_peer_ids.len() { - return Err(Error::Payment( - "Single-node payment rejected: local close-group view contains duplicate peer IDs" - .into(), - )); - } - - let unknown_peer_ids: Vec = quoted_peer_ids - .iter() - .filter(|peer_id| !close_group_set.contains(peer_id)) - .map(PeerId::to_hex) - .collect(); - let matched_count = quote_count.saturating_sub(unknown_peer_ids.len()); - - if matched_count < SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED { - return Err(Error::Payment(format!( - "Single-node payment rejected: only {matched_count}/{CLOSE_GROUP_SIZE} quoted \ - peer(s) are present in this node's local close-group view for {}, need at \ - least {SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED}; unknown peer(s): {}", - hex::encode(xorname), - unknown_peer_ids.join(", "), - ))); - } - - Ok(()) - } - - /// Verify the local node's peer identity signed one of the quotes. - fn validate_local_quoted_peer(local_peer_id: PeerId, quoted_peer_ids: &[PeerId]) -> Result<()> { - if !quoted_peer_ids.contains(&local_peer_id) { - return Err(Error::Payment(format!( - "Payment proof does not include this node's peer ID as a quoted peer: {}", - local_peer_id.to_hex() - ))); } Ok(()) } - /// Verify this node's signed quote is bound to the configured local rewards address. - fn validate_local_quote_rewards_address( - local_peer_id: PeerId, - quoted_peer_ids: &[PeerId], - payment: &ProofOfPayment, - local_rewards_address: &RewardsAddress, - ) -> Result<()> { - if quoted_peer_ids.len() != payment.peer_quotes.len() { - return Err(Error::Payment( - "internal error: quoted peer IDs and payment quotes have different lengths".into(), - )); - } - - let Some((_, (_, quote))) = quoted_peer_ids - .iter() - .zip(payment.peer_quotes.iter()) - .find(|(peer_id, _)| **peer_id == local_peer_id) - else { - return Err(Error::Payment(format!( - "Payment proof does not include this node's peer ID as a quoted peer: {}", - local_peer_id.to_hex() - ))); - }; - - if quote.rewards_address != *local_rewards_address { - return Err(Error::Payment(format!( - "Payment proof includes this node's peer ID but its quote rewards address {} \ - does not match the configured local rewards address {}", - quote.rewards_address, local_rewards_address - ))); - } - - Ok(()) - } - - /// Verify a paid single-node quote against `completedPayments(quote.hash())`. - async fn verify_single_node_on_chain_payment( - &self, - single_payment: &SingleNodePayment, - ) -> Result { - let median = single_payment.paid_quote().ok_or_else(|| { - Error::Payment(format!( - "Missing median quote at index {}: quotes array has only {} elements", - SINGLE_NODE_MEDIAN_INDEX, - single_payment.quotes.len() - )) - })?; - let median_price = median.price; - let tied_quotes = Self::ordered_median_tied_quotes(single_payment, median); - - debug!( - "Verifying single-node quote payment: median price {median_price}, {} quote(s) tied", - tied_quotes.len() - ); - - let provider = evmlib::utils::http_provider(self.config.evm.network.rpc_url().clone()); - let vault_address = *self.config.evm.network.payment_vault_address(); - let contract = payment_vault::interface::IPaymentVault::new(vault_address, provider); - - let mut last_rejection = None; - for candidate in &tied_quotes { - let result = contract - .completedPayments(candidate.quote_hash) - .call() - .await - .map_err(|e| Error::Payment(format!("completedPayments lookup failed: {e}")))?; - let on_chain_amount = Amount::from(result.amount); - - match Self::validate_completed_single_node_payment( - candidate, - result.rewardsAddress.as_slice(), - on_chain_amount, - ) { - Ok(verified_amount) => { - debug!( - "Single-node payment verified: {verified_amount} atto paid for quote {}", - candidate.quote_hash - ); - return Ok(verified_amount); - } - Err(e) => { - last_rejection = Some(e.to_string()); - } - } - } - - let detail = last_rejection - .map(|reason| format!(" Last rejection: {reason}")) - .unwrap_or_default(); - let expected_amount = Self::expected_single_node_payment_amount(median_price)?; - Err(Error::Payment(format!( - "No median-priced quote was paid enough to the quoted rewards address: \ - expected at least {expected_amount}, checked {} tied quote(s).{detail}", - tied_quotes.len() - ))) - } - - /// Return median-price quotes with the selected median first, so the - /// normal single-node path needs one on-chain lookup. Other median-price - /// ties are kept as fallback for deterministic tie-order drift. - fn ordered_median_tied_quotes<'a>( - single_payment: &'a SingleNodePayment, - median: &'a QuotePaymentInfo, - ) -> Vec<&'a QuotePaymentInfo> { - let mut tied_quotes = Vec::with_capacity(CLOSE_GROUP_SIZE); - tied_quotes.push(median); - tied_quotes.extend( - single_payment - .quotes - .iter() - .filter(|quote| quote.price == median.price && !std::ptr::eq(*quote, median)), - ); - tied_quotes - } - - /// Validate the contract record for a single quote against amount and recipient. - fn validate_completed_single_node_payment( - quote: &QuotePaymentInfo, - on_chain_rewards_prefix: &[u8], - on_chain_amount: Amount, - ) -> Result { - if quote.price == Amount::ZERO { - return Err(Error::Payment(format!( - "Median quote has zero price for quote {}; refusing to verify as paid", - quote.quote_hash - ))); - } - - let expected_amount = Self::expected_single_node_payment_amount(quote.price)?; - if on_chain_amount < expected_amount { - return Err(Error::Payment(format!( - "Underpayment for quote {}: paid {on_chain_amount}, expected at least {expected_amount}", - quote.quote_hash - ))); - } - - let expected_rewards_prefix = - Self::completed_payment_rewards_prefix("e.rewards_address); - if on_chain_rewards_prefix != expected_rewards_prefix { - return Err(Error::Payment(format!( - "Recipient mismatch for quote {}: completedPayments recipient prefix 0x{} \ - does not match quote rewards address {}", - quote.quote_hash, - hex::encode(on_chain_rewards_prefix), - quote.rewards_address - ))); - } - - Ok(on_chain_amount) - } - - fn expected_single_node_payment_amount(price: Amount) -> Result { - price - .checked_mul(Amount::from(SINGLE_NODE_PRICE_MULTIPLIER)) - .ok_or_else(|| { - Error::Payment(format!( - "Price overflow when calculating {SINGLE_NODE_PRICE_MULTIPLIER}x quote price" - )) - }) - } - - fn completed_payment_rewards_prefix(rewards_address: &RewardsAddress) -> &[u8] { - &rewards_address.as_slice()[..COMPLETED_PAYMENT_REWARDS_PREFIX_LEN] - } - /// Minimum number of candidate `pub_keys` (out of 16) whose derived `PeerId` /// must match the DHT's actual closest peers to the pool midpoint address. /// @@ -1183,7 +876,7 @@ impl PaymentVerifier { pool: &evmlib::merkle_payments::MerklePaymentCandidatePool, ) -> Result> { let mut candidate_peer_ids = Vec::with_capacity(pool.candidate_nodes.len()); - let mut seen = HashSet::with_capacity(pool.candidate_nodes.len()); + let mut seen = std::collections::HashSet::with_capacity(pool.candidate_nodes.len()); for candidate in &pool.candidate_nodes { let pid = peer_id_from_public_key_bytes(&candidate.pub_key).map_err(|e| { Error::Payment(format!( @@ -1244,7 +937,8 @@ impl PaymentVerifier { // Set-membership check against the returned closest-peers list. // Candidate `PeerId`s are deduplicated upstream, so each match // corresponds to a distinct peer. - let network_set: HashSet = network_peer_ids.iter().copied().collect(); + let network_set: std::collections::HashSet = + network_peer_ids.iter().copied().collect(); let matched = candidate_peer_ids .iter() .filter(|pid| network_set.contains(pid)) @@ -1594,13 +1288,27 @@ impl PaymentVerifier { Ok(()) } + + /// Verify this node is among the paid recipients. + fn validate_local_recipient(&self, payment: &ProofOfPayment) -> Result<()> { + let local_addr = &self.config.local_rewards_address; + let is_recipient = payment + .peer_quotes + .iter() + .any(|(_, quote)| quote.rewards_address == *local_addr); + if !is_recipient { + return Err(Error::Payment( + "Payment proof does not include this node as a recipient".to_string(), + )); + } + Ok(()) + } } #[cfg(test)] #[allow(clippy::expect_used, clippy::panic)] mod tests { use super::*; - use alloy::primitives::FixedBytes; use evmlib::merkle_payments::MerklePaymentCandidatePool; /// Create a verifier for unit tests. EVM is always on, but tests can @@ -2131,149 +1839,55 @@ mod tests { ); } - /// Build a deterministic `PeerId` from a single byte tag. - fn synthetic_peer_id(tag: u8) -> PeerId { - let mut bytes = [0u8; 32]; - bytes[0] = tag; - PeerId::from_bytes(bytes) - } - - /// Build a vector of synthetic `PeerId`s tagged with bytes 1..=n. - fn synthetic_peer_ids(n: u8) -> Vec { - (1..=n).map(synthetic_peer_id).collect() - } - - fn encoded_peer_id(peer_id: PeerId) -> evmlib::EncodedPeerId { - let mut bytes = [0u8; 32]; - bytes.copy_from_slice(peer_id.as_bytes()); - evmlib::EncodedPeerId::new(bytes) - } - - fn single_node_peer_ids() -> Vec { - (1..=CLOSE_GROUP_SIZE) - .map(|idx| { - let tag = u8::try_from(idx).expect("CLOSE_GROUP_SIZE fits in u8"); - synthetic_peer_id(tag) - }) - .collect() - } - - fn proof_of_payment_for_peer_ids( - xorname: XorName, - quoted_peer_ids: &[PeerId], - local_peer_id: PeerId, - local_rewards_address: RewardsAddress, - ) -> ProofOfPayment { - let peer_quotes = quoted_peer_ids - .iter() - .enumerate() - .map(|(idx, peer_id)| { - let tag = u8::try_from(idx).expect("CLOSE_GROUP_SIZE fits in u8"); - let rewards_address = if *peer_id == local_peer_id { - local_rewards_address - } else { - RewardsAddress::new([tag; 20]) - }; - ( - encoded_peer_id(*peer_id), - make_fake_quote(xorname, SystemTime::now(), rewards_address), - ) - }) - .collect(); - - ProofOfPayment { peer_quotes } + /// Helper: build an `EncodedPeerId` that matches the BLAKE3 hash of an ML-DSA public key. + fn encoded_peer_id_for_pub_key(pub_key: &[u8]) -> evmlib::EncodedPeerId { + let ant_peer_id = peer_id_from_public_key_bytes(pub_key).expect("valid ML-DSA pub key"); + evmlib::EncodedPeerId::new(*ant_peer_id.as_bytes()) } - fn make_quote_payment_info(price: Amount, rewards_address: RewardsAddress) -> QuotePaymentInfo { - QuotePaymentInfo { - quote_hash: FixedBytes::from([0xABu8; 32]), - rewards_address, - amount: PaymentVerifier::expected_single_node_payment_amount(price) - .expect("expected amount"), - price, - } - } - - #[test] - fn local_quoted_peer_accepts_when_local_peer_id_was_quoted() { - let quoted_peer_ids = single_node_peer_ids(); - let local_peer_id = quoted_peer_ids[0]; - - let result = PaymentVerifier::validate_local_quoted_peer(local_peer_id, "ed_peer_ids); - - assert!( - result.is_ok(), - "local peer ID in quoted peer set must pass: {result:?}" - ); - } + #[tokio::test] + async fn test_local_not_in_paid_set_rejected() { + use evmlib::RewardsAddress; + use saorsa_core::MlDsa65; + use saorsa_pqc::pqc::MlDsaOperations; - #[test] - fn local_quoted_peer_rejects_when_local_peer_id_missing() { - let quoted_peer_ids = single_node_peer_ids(); - let local_peer_id = synthetic_peer_id(0xF0); + // Verifier with a local rewards address set + let local_addr = RewardsAddress::new([0xAAu8; 20]); + let config = PaymentVerifierConfig { + evm: EvmVerifierConfig { + network: EvmNetwork::ArbitrumOne, + }, + cache_capacity: 100, + local_rewards_address: local_addr, + }; + let verifier = PaymentVerifier::new(config); - let result = PaymentVerifier::validate_local_quoted_peer(local_peer_id, "ed_peer_ids); + let xorname = [0xEEu8; 32]; + // Quotes pay a DIFFERENT rewards address + let other_addr = RewardsAddress::new([0xBBu8; 20]); - let err = result.expect_err("local peer ID outside quoted set must be rejected"); - assert!( - err.to_string() - .contains("does not include this node's peer ID"), - "expected local peer ID rejection, got: {err}" - ); - } + // Use real ML-DSA keys so the pub_key→peer_id binding check passes + let ml_dsa = MlDsa65::new(); + let mut peer_quotes = Vec::new(); + for _ in 0..CLOSE_GROUP_SIZE { + let (public_key, _secret_key) = ml_dsa.generate_keypair().expect("keygen"); + let pub_key_bytes = public_key.as_bytes().to_vec(); + let encoded = encoded_peer_id_for_pub_key(&pub_key_bytes); - #[test] - fn local_quote_rewards_address_accepts_matching_config() { - let xorname = [0x44u8; 32]; - let quoted_peer_ids = single_node_peer_ids(); - let local_peer_id = quoted_peer_ids[0]; - let local_rewards_address = RewardsAddress::new([0xA0; 20]); - let payment = proof_of_payment_for_peer_ids( - xorname, - "ed_peer_ids, - local_peer_id, - local_rewards_address, - ); + let mut quote = make_fake_quote(xorname, SystemTime::now(), other_addr); + quote.pub_key = pub_key_bytes; - let result = PaymentVerifier::validate_local_quote_rewards_address( - local_peer_id, - "ed_peer_ids, - &payment, - &local_rewards_address, - ); + peer_quotes.push((encoded, quote)); + } - assert!( - result.is_ok(), - "local quote rewards address must pass when it matches config: {result:?}" - ); - } + let proof_bytes = serialize_proof(peer_quotes); + let result = verifier.verify_payment(&xorname, Some(&proof_bytes)).await; - #[test] - fn local_quote_rewards_address_rejects_config_mismatch() { - let xorname = [0x45u8; 32]; - let quoted_peer_ids = single_node_peer_ids(); - let local_peer_id = quoted_peer_ids[0]; - let quoted_rewards_address = RewardsAddress::new([0xA1; 20]); - let configured_rewards_address = RewardsAddress::new([0xA2; 20]); - let payment = proof_of_payment_for_peer_ids( - xorname, - "ed_peer_ids, - local_peer_id, - quoted_rewards_address, - ); - - let result = PaymentVerifier::validate_local_quote_rewards_address( - local_peer_id, - "ed_peer_ids, - &payment, - &configured_rewards_address, - ); - - let err = result.expect_err("local rewards address mismatch must be rejected"); + assert!(result.is_err(), "Should reject payment not addressed to us"); + let err_msg = format!("{}", result.expect_err("should fail")); assert!( - err.to_string() - .contains("does not match the configured local rewards address"), - "expected local rewards mismatch rejection, got: {err}" + err_msg.contains("does not include this node as a recipient"), + "Error should mention recipient rejection: {err_msg}" ); } @@ -2314,180 +1928,6 @@ mod tests { ); } - #[test] - fn single_node_close_group_accepts_valid_quote_set() { - let xorname = [0x11u8; 32]; - let quoted_peer_ids = single_node_peer_ids(); - let close_group_peer_ids = quoted_peer_ids.clone(); - - let result = PaymentVerifier::check_single_node_close_group_match( - "ed_peer_ids, - &close_group_peer_ids, - &xorname, - ); - - assert!( - result.is_ok(), - "all quoted peers in the close-group view must pass: {result:?}" - ); - } - - #[test] - fn single_node_close_group_accepts_tolerated_unknown_peers() { - let xorname = [0x22u8; 32]; - let quoted_peer_ids = single_node_peer_ids(); - let close_group_peer_ids = - quoted_peer_ids[..SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED].to_vec(); - - let result = PaymentVerifier::check_single_node_close_group_match( - "ed_peer_ids, - &close_group_peer_ids, - &xorname, - ); - - assert!( - result.is_ok(), - "up to {SINGLE_NODE_UNKNOWN_PEER_TOLERANCE} unknown quoted peers must pass: {result:?}" - ); - } - - #[test] - fn single_node_close_group_rejects_sparse_local_view() { - let xorname = [0x33u8; 32]; - let quoted_peer_ids = single_node_peer_ids(); - let sparse_count = SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED - .checked_sub(1) - .expect("close-group match threshold is non-zero"); - let close_group_peer_ids = quoted_peer_ids[..sparse_count].to_vec(); - - let result = PaymentVerifier::check_single_node_close_group_match( - "ed_peer_ids, - &close_group_peer_ids, - &xorname, - ); - - let err = result.expect_err("sparse close-group view must be rejected"); - assert!( - err.to_string().contains("has only"), - "expected sparse-view rejection, got: {err}" - ); - } - - #[test] - fn single_node_close_group_rejects_unknown_peer_tolerance_exceeded() { - let xorname = [0x34u8; 32]; - let quoted_peer_ids = single_node_peer_ids(); - let matched_count = SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED - .checked_sub(1) - .expect("close-group match threshold is non-zero"); - let mut close_group_peer_ids = quoted_peer_ids[..matched_count].to_vec(); - close_group_peer_ids.push(synthetic_peer_id(0xF0)); - - let result = PaymentVerifier::check_single_node_close_group_match( - "ed_peer_ids, - &close_group_peer_ids, - &xorname, - ); - - let err = result.expect_err("excess unknown quoted peers must be rejected"); - assert!( - err.to_string().contains(&format!( - "need at least {SINGLE_NODE_CLOSE_GROUP_MATCH_REQUIRED}" - )), - "expected close-group match threshold rejection, got: {err}" - ); - } - - #[test] - fn completed_single_node_payment_accepts_matching_recipient_and_overpayment() { - let rewards_address = RewardsAddress::new([0x44u8; 20]); - let quote = make_quote_payment_info(Amount::from(10u64), rewards_address); - let rewards_prefix = - PaymentVerifier::completed_payment_rewards_prefix("e.rewards_address).to_vec(); - let on_chain_amount = Amount::from(31u64); - - let result = PaymentVerifier::validate_completed_single_node_payment( - "e, - &rewards_prefix, - on_chain_amount, - ); - - assert_eq!(result.expect("valid completed payment"), on_chain_amount); - } - - #[test] - fn ordered_median_tied_quotes_checks_selected_median_first() { - const TIED_PRICE_ATTO: u64 = 10; - - let quotes = std::array::from_fn(|idx| { - let tag = u8::try_from(idx).expect("CLOSE_GROUP_SIZE fits in u8"); - let price = Amount::from(TIED_PRICE_ATTO); - QuotePaymentInfo { - quote_hash: FixedBytes::from([tag; 32]), - rewards_address: RewardsAddress::new([tag; 20]), - amount: PaymentVerifier::expected_single_node_payment_amount(price) - .expect("expected amount"), - price, - } - }); - let single_payment = SingleNodePayment { quotes }; - let median = single_payment.paid_quote().expect("median quote"); - - let ordered = PaymentVerifier::ordered_median_tied_quotes(&single_payment, median); - - assert_eq!(ordered.len(), CLOSE_GROUP_SIZE); - assert_eq!( - ordered.first().expect("first tied quote").quote_hash, - median.quote_hash - ); - let selected_median_count = ordered - .iter() - .filter(|quote| quote.quote_hash == median.quote_hash) - .count(); - assert_eq!(selected_median_count, 1); - } - - #[test] - fn completed_single_node_payment_rejects_underpayment() { - let rewards_address = RewardsAddress::new([0x55u8; 20]); - let quote = make_quote_payment_info(Amount::from(10u64), rewards_address); - let rewards_prefix = - PaymentVerifier::completed_payment_rewards_prefix("e.rewards_address).to_vec(); - let on_chain_amount = Amount::from(29u64); - - let result = PaymentVerifier::validate_completed_single_node_payment( - "e, - &rewards_prefix, - on_chain_amount, - ); - - let err = result.expect_err("underpayment must be rejected"); - assert!( - err.to_string().contains("Underpayment"), - "expected underpayment rejection, got: {err}" - ); - } - - #[test] - fn completed_single_node_payment_rejects_recipient_mismatch() { - let rewards_address = RewardsAddress::new([0x66u8; 20]); - let quote = make_quote_payment_info(Amount::from(10u64), rewards_address); - let wrong_rewards_prefix = [0x77u8; COMPLETED_PAYMENT_REWARDS_PREFIX_LEN]; - let on_chain_amount = Amount::from(30u64); - - let result = PaymentVerifier::validate_completed_single_node_payment( - "e, - &wrong_rewards_prefix, - on_chain_amount, - ); - - let err = result.expect_err("recipient mismatch must be rejected"); - assert!( - err.to_string().contains("Recipient mismatch"), - "expected recipient mismatch rejection, got: {err}" - ); - } - // ========================================================================= // Merkle-tagged proof tests // ========================================================================= @@ -3381,6 +2821,18 @@ mod tests { // can reason about which IDs are "in the network's top-K" vs not. // ========================================================================= + /// Build a deterministic `PeerId` from a single byte tag. + fn synthetic_peer_id(tag: u8) -> PeerId { + let mut bytes = [0u8; 32]; + bytes[0] = tag; + PeerId::from_bytes(bytes) + } + + /// Build a vector of synthetic `PeerId`s tagged with bytes 1..=n. + fn synthetic_peer_ids(n: u8) -> Vec { + (1..=n).map(synthetic_peer_id).collect() + } + #[test] fn closeness_match_passes_when_all_16_candidates_in_top_16() { // Trivial case: every candidate is in the network's top-16. diff --git a/src/upgrade/apply.rs b/src/upgrade/apply.rs index 9d19870a..7af8c7d7 100644 --- a/src/upgrade/apply.rs +++ b/src/upgrade/apply.rs @@ -20,7 +20,7 @@ use std::path::{Path, PathBuf}; use tar::Archive; /// Maximum allowed upgrade archive size (200 MiB). -const MAX_ARCHIVE_SIZE_BYTES: usize = 200 * 1024 * 1024; +pub(super) const MAX_ARCHIVE_SIZE_BYTES: usize = 200 * 1024 * 1024; /// Exit code that signals the service manager to restart the process. /// @@ -176,9 +176,24 @@ impl AutoApplyUpgrader { .parent() .ok_or_else(|| Error::Upgrade("Cannot determine binary directory".to_string()))?; - // Create temp directory for upgrade - let temp_dir = tempfile::Builder::new() - .prefix("ant-upgrade-") + // Create temp directory for upgrade. + // + // On Unix, create it with 0700 so a same-host attacker on a different + // UID cannot read/write the staging area between when the cache + // re-verifies the ML-DSA signature on a private copy and when + // `extract_binary` reads it (closes a verify-vs-extract TOCTOU on + // the staging directory). The `tempfile::Builder::permissions` + // path is supported on tempfile 3 — on platforms that don't honour + // it the call is a no-op and the ML-DSA verification on the + // private copy still bounds the residual. + let mut tempdir_builder = tempfile::Builder::new(); + tempdir_builder.prefix("ant-upgrade-"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + tempdir_builder.permissions(std::fs::Permissions::from_mode(0o700)); + } + let temp_dir = tempdir_builder .tempdir_in(binary_dir) .map_err(|e| Error::Upgrade(format!("Failed to create temp dir: {e}")))?; @@ -317,21 +332,26 @@ impl AutoApplyUpgrader { version_str: &str, ) -> Result { if let Some(ref cache) = self.binary_cache { - // Fast path — cache hit without locking - if let Some(cached_path) = cache.get_verified(version_str) { - info!("Cached binary verified for version {}", version_str); - let dest = dest_dir.join( - cached_path - .file_name() - .unwrap_or_else(|| std::ffi::OsStr::new("ant-node")), - ); - if let Err(e) = fs::copy(&cached_path, &dest) { - warn!("Failed to copy from cache, will re-download: {e}"); - return self - .download_verify_extract(info, dest_dir, Some(cache)) - .await; + // Fast path — cache hit without locking. The cache re-verifies + // the ML-DSA signature over the archive on every call, so a + // tampered cache entry returns None here and we fall through to + // a fresh, fully verified download. + // `dest_dir` is this upgrade's process-private temp dir, so the + // cache stages + verifies the archive there; extraction then + // reads exactly the verified bytes (no shared-file TOCTOU). + if let Some(verified_archive) = cache.get_verified_archive(version_str, dest_dir) { + match Self::extract_binary(&verified_archive, dest_dir) { + Ok(binary) => { + info!("Reused signature-verified cached archive for {version_str}"); + return Ok(binary); + } + Err(e) => { + warn!("Failed to extract from cached archive, will re-download: {e}"); + return self + .download_verify_extract(info, dest_dir, Some(cache)) + .await; + } } - return Ok(dest); } // Cache miss — acquire exclusive download lock via spawn_blocking @@ -345,19 +365,15 @@ impl AutoApplyUpgrader { .await .map_err(|e| Error::Upgrade(format!("Lock task failed: {e}")))??; - // Re-check cache under the lock — another node may have populated it - if let Some(cached_path) = cache.get_verified(version_str) { - info!( - "Cached binary became available under lock for version {}", - version_str - ); - let dest = dest_dir.join( - cached_path - .file_name() - .unwrap_or_else(|| std::ffi::OsStr::new("ant-node")), - ); - fs::copy(&cached_path, &dest)?; - return Ok(dest); + // Re-check cache under the lock — another node may have populated + // it. Same re-verification guarantee as the fast path. + if let Some(verified_archive) = cache.get_verified_archive(version_str, dest_dir) { + if let Ok(binary) = Self::extract_binary(&verified_archive, dest_dir) { + info!( + "Signature-verified cached archive became available under lock for {version_str}" + ); + return Ok(binary); + } } // Still missing — download while holding the lock @@ -400,15 +416,22 @@ impl AutoApplyUpgrader { signature::verify_from_file(&archive_path, &sig_path)?; info!("Archive signature verified successfully"); - // Step 4: Extract binary from verified archive + // Step 4: Extract binary from the just-verified archive. info!("Extracting binary from archive..."); let extracted_binary = Self::extract_binary(&archive_path, dest_dir)?; - // Store in binary cache if available + // Step 5: Cache the signature-verified ARCHIVE (+ its signature) + // AFTER successful extraction. We cache the signed artifact, never + // the extracted binary, so every later cache hit can re-verify the + // signature. Caching only after extract proves the archive is + // actually usable on this platform avoids turning a + // validly-signed-but-malformed release into a shared cache poison + // pill (every later node would hit cache, fail extract, and + // re-download). if let Some(c) = cache { let version_str = info.version.to_string(); - if let Err(e) = c.store(&version_str, &extracted_binary) { - warn!("Failed to store binary in cache: {e}"); + if let Err(e) = c.store_archive(&version_str, &archive_path, &sig_path) { + warn!("Failed to store verified archive in cache: {e}"); } } diff --git a/src/upgrade/binary_cache.rs b/src/upgrade/binary_cache.rs index 43aeb2ff..0fb8035d 100644 --- a/src/upgrade/binary_cache.rs +++ b/src/upgrade/binary_cache.rs @@ -1,124 +1,444 @@ -//! Disk cache for downloaded upgrade binaries. +//! Disk cache for downloaded upgrade archives. //! //! When multiple ant-node instances detect the same upgrade, only the first -//! one needs to download and verify the archive. `BinaryCache` stores the -//! extracted binary alongside a SHA-256 integrity metadata file so that -//! subsequent nodes can copy it directly. +//! one needs to download the archive. `BinaryCache` stores the **signed +//! archive together with its detached ML-DSA-65 signature** so that +//! subsequent nodes can reuse it. //! -//! **Security note:** SHA-256 is used only for cache integrity (detecting -//! corruption or partial writes). The actual security gate remains the -//! ML-DSA-65 signature verification performed during the initial download. +//! ## Security model +//! +//! The ML-DSA-65 signature is the security gate, and it covers the *archive* +//! bytes — not the extracted binary. A previous version cached the extracted +//! binary and, on a cache hit, returned it after only a SHA-256 check against +//! a sibling metadata file. SHA-256 is not a security control: anyone able to +//! write to the shared cache directory (a co-located process, a shared +//! container volume, a low-privilege foothold) could replace the cached +//! binary and its `.meta.json` with a matching hash, and the next node would +//! execute it **without any signature verification** — persistent RCE. +//! +//! This module now caches the *archive + signature* and, on **every** cache +//! hit, re-runs ML-DSA-65 verification over the cached archive before it is +//! used. A tampered archive fails verification (the release key is pinned in +//! the binary and cannot be forged); a tampered or missing signature fails +//! likewise. The extracted binary is always derived fresh from the +//! just-verified archive by the caller, so a poisoned cache entry can never +//! be executed. The SHA-256 metadata is retained only as a fast corruption +//! pre-check, never as the trust decision. +//! +//! ## Residual: cache entries are not bound to a specific release version +//! +//! `signature::SIGNING_CONTEXT = "ant-node-release-v1"` is constant across +//! versions, so the ML-DSA signature attests to "this archive is a valid +//! ant-node release", not "this archive is release X.Y.Z". An attacker with +//! cache-dir write access who possesses any past validly-signed release can +//! plant it under a newer version's cache key; the next node performing +//! that upgrade accepts it and runs it as the newer version. Net effect: +//! forced downgrade or wrong-arch crash loop, not arbitrary RCE. +//! +//! This is out of scope of the cache-poisoning RCE class this module +//! addresses (which trusted SHA-256 alone on cache hits): the `cache_dir` +//! is `0o700` (defence in depth, see `cache_dir.rs`) and the attacker +//! already needs same-UID write to exploit this — they can replace the +//! running binary directly. Closing the gap properly requires upstream +//! release-signing changes (the signing context must include the version +//! string, e.g. `b"ant-node-release-v1:1.2.3"`) and is tracked as a +//! follow-up. use crate::error::{Error, Result}; use crate::logging::{debug, warn}; +use crate::upgrade::signature; use fs2::FileExt; +use saorsa_pqc::api::sig::MlDsaPublicKey; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::fs::{self, File}; -use std::io::{Read, Write}; -use std::path::PathBuf; +use std::fs::{self, File, OpenOptions}; +use std::io::{self, Read, Write}; +use std::path::{Path, PathBuf}; + +/// Maximum size accepted for the `.meta.json` sidecar. +/// +/// A well-formed `CachedArchiveMeta` serialises to roughly 120 bytes; the +/// 4 KiB cap is comfortably above any legitimate payload and tight enough +/// that an attacker who plants a metadata file the size of `/dev/zero` +/// cannot stall the metadata read into a hang or OOM. +const MAX_META_BYTES: u64 = 4 * 1024; -/// On-disk cache for downloaded upgrade binaries. +/// On-disk cache for downloaded, signature-verified upgrade archives. #[derive(Clone)] pub struct BinaryCache { - /// Directory that holds cached binaries and metadata. + /// Directory that holds cached archives, signatures, and metadata. cache_dir: PathBuf, + /// Verification key override. `None` in production → the pinned release + /// key embedded in [`signature`] is used (the real, unforgeable gate). + /// Only ever `Some` via the `#[cfg(test)]` constructor, so test builds + /// can exercise the cache with a generated keypair without weakening the + /// production trust anchor in any way. + verify_key: Option, } -/// Metadata written alongside each cached binary. +/// Metadata written alongside each cached archive. +/// +/// The SHA-256 here is a fast integrity/corruption pre-check only. It is +/// **not** a security control: the ML-DSA-65 signature over the archive is +/// re-verified on every cache hit regardless of this value. #[derive(Serialize, Deserialize)] -struct CachedBinaryMeta { +struct CachedArchiveMeta { /// Semantic version string (e.g. "1.2.3"). version: String, - /// Hex-encoded SHA-256 digest of the cached binary. - sha256: String, - /// When the binary was cached (seconds since UNIX epoch). + /// Hex-encoded SHA-256 digest of the cached archive (corruption check). + archive_sha256: String, + /// When the archive was cached (seconds since UNIX epoch). cached_at_epoch_secs: u64, } impl BinaryCache { /// Create a new binary cache backed by the given directory. + /// + /// Production constructor: the cache verifies cached archives against the + /// pinned release public key embedded in the binary. #[must_use] pub fn new(cache_dir: PathBuf) -> Self { - Self { cache_dir } + Self { + cache_dir, + verify_key: None, + } } - /// Return the path where a cached binary for `version` would be stored. + /// Test-only constructor that verifies against an explicit public key + /// instead of the pinned release key (the production trust anchor is + /// unchanged; this only exists so unit tests can produce verifiable + /// signatures with a generated keypair). + #[cfg(test)] #[must_use] - pub fn cached_binary_path(&self, version: &str) -> PathBuf { - let name = if cfg!(windows) { - format!("ant-node-{version}.exe") - } else { - format!("ant-node-{version}") - }; - self.cache_dir.join(name) + pub fn new_with_verify_key(cache_dir: PathBuf, verify_key: MlDsaPublicKey) -> Self { + Self { + cache_dir, + verify_key: Some(verify_key), + } } - /// Return the cached binary path if it exists and its SHA-256 matches - /// the stored metadata. Returns `None` on any mismatch or error. + /// Path of the cached archive for `version`. #[must_use] - pub fn get_verified(&self, version: &str) -> Option { - let bin_path = self.cached_binary_path(version); + pub fn cached_archive_path(&self, version: &str) -> PathBuf { + self.cache_dir.join(format!("ant-node-{version}.archive")) + } + + /// Path of the cached detached signature for `version`. + #[must_use] + fn cached_signature_path(&self, version: &str) -> PathBuf { + self.cache_dir.join(format!("ant-node-{version}.sig")) + } + + /// Verify `archive` against `sig` using the pinned release key in + /// production, or the injected test key under `#[cfg(test)]`. + fn verify_archive(&self, archive: &Path, sig: &Path) -> Result<()> { + self.verify_key.as_ref().map_or_else( + || signature::verify_from_file(archive, sig), + |key| signature::verify_from_file_with_key(archive, sig, key), + ) + } + + /// Copy the cached archive into the caller-private `private_dir`, + /// **cryptographically re-verify that private copy**, and return its + /// path — or `None` if there is no usable, trusted cache entry. + /// + /// On every call this: + /// 1. loads the sibling metadata and checks the version matches, + /// 2. copies the cached archive + signature into `private_dir` (a + /// location only this process writes, e.g. the per-upgrade temp dir), + /// 3. SHA-256 pre-checks the private copy against the metadata (fast + /// corruption check), then + /// 4. **re-verifies the ML-DSA-65 signature over the private copy** with + /// the pinned release key — the actual security gate. + /// + /// Verifying the *private copy* (not the shared cache file) closes the + /// TOCTOU window: an attacker with write access to the shared cache dir + /// cannot swap the bytes between verification and extraction, because the + /// caller extracts from the returned private path, which is the exact + /// byte sequence that was verified and is unreachable to the attacker. + /// + /// Any failure (missing/corrupt metadata, copy error, hash mismatch, + /// missing signature, or — critically — a signature that does not verify + /// against the pinned release key) returns `None`, forcing a fresh, + /// fully verified download. + /// + /// The caller MUST extract the binary from the returned (private) archive + /// path, so the executed bytes always derive from signature-verified + /// input that no other principal could have modified post-verification. + /// + /// `private_dir` is a load-bearing security invariant: it MUST be a + /// process-private, mode-`0o700` directory that no other principal + /// can write to. The caller in `apply.rs` creates it via + /// `tempfile::Builder::permissions(0o700).tempdir_in(binary_dir)` — + /// any future caller MUST uphold the same invariant, otherwise the + /// reopens by path in `sha256_file` and `verify_archive` would re- + /// introduce a TOCTOU window. + // The verifier-side cache-hit gate is read top-to-bottom by anyone + // auditing the security model. Splitting it into smaller helpers just + // to placate clippy's line limit would scatter the threat model across + // call sites without improving safety. + #[allow(clippy::too_many_lines)] + #[must_use] + pub fn get_verified_archive(&self, version: &str, private_dir: &Path) -> Option { + let cached_archive = self.cached_archive_path(version); + let cached_sig = self.cached_signature_path(version); let meta_path = self.meta_path(version); - let meta_data = fs::read_to_string(&meta_path).ok()?; - let meta: CachedBinaryMeta = serde_json::from_str(&meta_data).ok()?; + // Read the metadata sidecar with a small, opened-handle size cap so + // an attacker with cache-dir write cannot plant `meta.json` as a + // symlink to `/dev/zero` (or any large/special file) and force a + // hang/OOM here before the archive/sig hardening runs. + let meta_data = { + let (mut meta_file, meta_len) = match open_regular_capped(&meta_path, MAX_META_BYTES) { + Ok(pair) => pair, + Err(e) => { + debug!("Rejecting cache metadata for {version}: {e}"); + return None; + } + }; + // `meta_len` is capped at MAX_META_BYTES (4 KiB), so this + // truncation can never happen in practice; saturating_cast + // makes that explicit for clippy on 32-bit targets. + let cap = usize::try_from(meta_len).unwrap_or(usize::MAX); + let mut buf = String::with_capacity(cap); + if let Err(e) = meta_file.read_to_string(&mut buf) { + debug!("Failed to read cache metadata for {version}: {e}"); + return None; + } + buf + }; + let meta: CachedArchiveMeta = serde_json::from_str(&meta_data).ok()?; if meta.version != version { debug!("Binary cache version mismatch in metadata"); return None; } - let actual_hash = sha256_file(&bin_path).ok()?; - if actual_hash != meta.sha256 { + // Open archive + signature ONCE each with size and file-type + // validation on the opened handles. Subsequent reads / hash / + // signature verification all go through the FDs opened here — there + // is no second path-based stat or open after this point, so an + // attacker who races a swap on the cache-dir paths (symlink, FIFO, + // device, oversized file) after these validations cannot redirect + // what gets staged into the private dir. + // + // Memory pressure note: `signature::verify_from_file*` reads the + // archive into memory in full (it is the FIPS-204 verifier's + // contract — message must be provided as a slice). `sha256_file` + // streams in 8 KiB chunks and is not an OOM vector. The + // `MAX_ARCHIVE_SIZE_BYTES` cap bounds the in-memory load and the + // staging-dir disk footprint together. + let (mut archive_file, archive_len) = match open_regular_capped( + &cached_archive, + crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64, + ) { + Ok(pair) => pair, + Err(e) => { + warn!("Rejecting cached archive for {version}: {e}"); + return None; + } + }; + let (mut sig_file, sig_len) = + match open_regular_capped(&cached_sig, signature::SIGNATURE_SIZE as u64) { + Ok(pair) => pair, + Err(e) => { + warn!("Rejecting cached signature for {version}: {e}"); + return None; + } + }; + if sig_len != signature::SIGNATURE_SIZE as u64 { + // open_regular_capped enforces ≤ max; we additionally require + // EXACTLY SIGNATURE_SIZE (a shorter sig is not valid ML-DSA-65). warn!( - "Binary cache SHA-256 mismatch for version {version} (expected {}, got {})", - meta.sha256, actual_hash + "Cached signature for {version} has wrong size ({sig_len} bytes, \ + expected {})", + signature::SIGNATURE_SIZE ); return None; } - Some(bin_path) + // Stream the validated archive + signature into the caller-private + // directory FROM THE ALREADY-OPEN HANDLES (not from the path), so + // the bytes the verifier reads are the exact bytes the open-handle + // metadata checks were performed against. `take()` is belt-and- + // braces against an attacker who extends the file after open. + let private_archive = private_dir.join(format!("cached-{version}.archive")); + let private_sig = private_dir.join(format!("cached-{version}.sig")); + + let cleanup = |reason: &str| { + debug!("Cleaning staged cache copy for {version}: {reason}"); + let _ = fs::remove_file(&private_archive); + let _ = fs::remove_file(&private_sig); + }; + + if let Err(e) = (|| -> io::Result<()> { + let mut dest = File::create(&private_archive)?; + io::copy(&mut (&mut archive_file).take(archive_len), &mut dest)?; + Ok(()) + })() { + debug!("Could not stage cached archive for {version}: {e}"); + cleanup("archive copy failed"); + return None; + } + if let Err(e) = (|| -> io::Result<()> { + let mut dest = File::create(&private_sig)?; + io::copy(&mut (&mut sig_file).take(sig_len), &mut dest)?; + Ok(()) + })() { + debug!("Could not stage cached signature for {version}: {e}"); + cleanup("signature copy failed"); + return None; + } + + // Fast corruption pre-check on the PRIVATE copy (NOT the security + // decision). A copy error or truncation surfaces here. + let actual_hash = match sha256_file(&private_archive) { + Ok(h) => h, + Err(e) => { + cleanup(&format!("sha256 read failed: {e}")); + return None; + } + }; + if actual_hash != meta.archive_sha256 { + warn!( + "Binary cache SHA-256 mismatch for version {version} \ + (expected {}, got {actual_hash}) — ignoring cache entry", + meta.archive_sha256 + ); + cleanup("sha256 mismatch"); + return None; + } + + // THE SECURITY GATE: re-verify the ML-DSA-65 signature over the + // PRIVATE archive copy on every hit. The returned path is this same + // private copy, so the caller extracts exactly the bytes that were + // verified — a cache entry tampered with on disk (binary/archive + // swap, forged metadata, or a post-verify swap attempt) cannot + // produce a private copy whose signature verifies against the + // pinned release key. + if let Err(e) = self.verify_archive(&private_archive, &private_sig) { + warn!( + "Cached archive for version {version} FAILED ML-DSA signature \ + re-verification ({e}); discarding cache entry (possible \ + on-disk tampering). A fresh verified download will run." + ); + cleanup("signature re-verification failed"); + return None; + } + + debug!("Cached archive for version {version} passed ML-DSA re-verification"); + Some(private_archive) } - /// Store a binary in the cache. + /// Store a signature-verified archive in the cache. + /// + /// Both files are persisted (via write-to-temp-then-rename so readers + /// never observe partial writes); the metadata file is written last so + /// [`get_verified_archive`](Self::get_verified_archive) only succeeds + /// once every file is complete. /// - /// Uses a write-to-temp-then-rename strategy so that readers never - /// observe partially written files. The metadata file is written last - /// so that `get_verified` only succeeds once both files are complete. + /// Defence in depth: this re-verifies the archive against its signature + /// before caching, so a poisoned entry cannot be created through the + /// supported path even if a caller forgot to verify first. /// /// # Errors /// - /// Returns an error if the binary cannot be read or the cache files - /// cannot be written. - pub fn store(&self, version: &str, source_path: &std::path::Path) -> Result<()> { - let hash = sha256_file(source_path)?; + /// Returns an error if the signature does not verify, the inputs cannot + /// be read, or the cache files cannot be written. + pub fn store_archive( + &self, + version: &str, + archive_path: &Path, + signature_path: &Path, + ) -> Result<()> { + // Defence in depth: refuse to persist a non-regular file, an + // oversize archive, or a misshapen signature — mirroring the + // `get_verified_archive` cache-hit policy. `symlink_metadata` + // refuses to chase a symlink the caller may have planted. + // + // Note the intentional asymmetry with `open_regular_capped` + // (which uses `fs::metadata` and DOES follow symlinks): on the + // store path the source file is supplied by the caller (typically + // a path under our control after download), so a symlink there is + // surprising and worth rejecting. On the read path the cache dir + // is shared and an attacker may have planted a symlink — but the + // attacker already has write access, so chasing a symlink-to- + // regular is no worse than them editing the regular file + // directly, while still letting the post-open `is_file()` reject + // symlink-to-special. + let archive_meta = fs::symlink_metadata(archive_path)?; + if !archive_meta.file_type().is_file() { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: source is not a \ + regular file (symlink/special)" + ))); + } + let archive_len = archive_meta.len(); + if archive_len > crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: size {archive_len} bytes \ + exceeds MAX_ARCHIVE_SIZE_BYTES" + ))); + } + let sig_meta = fs::symlink_metadata(signature_path)?; + if !sig_meta.file_type().is_file() { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: signature is not a \ + regular file (symlink/special)" + ))); + } + let sig_len = sig_meta.len(); + if sig_len != signature::SIGNATURE_SIZE as u64 { + return Err(Error::Upgrade(format!( + "Refusing to cache archive for {version}: signature size {sig_len} \ + bytes, expected {}", + signature::SIGNATURE_SIZE + ))); + } - let dest = self.cached_binary_path(version); + self.verify_archive(archive_path, signature_path) + .map_err(|e| { + Error::Upgrade(format!( + "Refusing to cache archive for {version}: signature does not verify ({e})" + )) + })?; + + let archive_hash = sha256_file(archive_path)?; + + let dest_archive = self.cached_archive_path(version); + let dest_sig = self.cached_signature_path(version); let meta_path = self.meta_path(version); - // Write binary to a temp file then rename into place. - // Remove dest first on Windows where rename fails if it exists. - let tmp_bin = self.cache_dir.join(format!(".ant-node-{version}.tmp")); - fs::copy(source_path, &tmp_bin)?; - let _ = fs::remove_file(&dest); - fs::rename(&tmp_bin, &dest)?; + Self::atomic_copy( + archive_path, + &dest_archive, + &self + .cache_dir + .join(format!(".ant-node-{version}.archive.tmp")), + )?; + Self::atomic_copy( + signature_path, + &dest_sig, + &self.cache_dir.join(format!(".ant-node-{version}.sig.tmp")), + )?; let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map_err(|e| Error::Upgrade(format!("System clock error: {e}")))? .as_secs(); - let meta = CachedBinaryMeta { + let meta = CachedArchiveMeta { version: version.to_string(), - sha256: hash, + archive_sha256: archive_hash, cached_at_epoch_secs: now, }; - let meta_json = serde_json::to_string(&meta) - .map_err(|e| Error::Upgrade(format!("Failed to serialize binary cache meta: {e}")))?; + let meta_json = serde_json::to_string(&meta).map_err(|e| { + Error::Upgrade(format!("Failed to serialize cached archive metadata: {e}")) + })?; - // Write metadata to a temp file then rename into place + // Metadata written last so a reader never sees a complete meta file + // pointing at an incomplete archive/signature pair. let tmp_meta = self.cache_dir.join(format!(".ant-node-{version}.meta.tmp")); let mut f = File::create(&tmp_meta)?; f.write_all(meta_json.as_bytes())?; @@ -127,7 +447,10 @@ impl BinaryCache { let _ = fs::remove_file(&meta_path); fs::rename(&tmp_meta, &meta_path)?; - debug!("Cached binary for version {version} at {}", dest.display()); + debug!( + "Cached verified archive for version {version} at {}", + dest_archive.display() + ); Ok(()) } @@ -135,11 +458,11 @@ impl BinaryCache { /// /// This prevents multiple nodes from downloading the same archive /// concurrently — the first acquires the lock and downloads, the rest - /// wait and then find the binary already cached. + /// wait and then find the archive already cached. /// /// The lock is released when the returned guard is dropped. /// - /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in + /// **Note:** `lock_exclusive()` blocks the calling thread. Callers in /// async contexts should wrap this call in `tokio::task::spawn_blocking`. /// /// # Errors @@ -156,13 +479,17 @@ impl BinaryCache { // -- private helpers ----------------------------------------------------- + /// Copy `src` to `dest` atomically via a temp file + rename. + fn atomic_copy(src: &Path, dest: &Path, tmp: &Path) -> Result<()> { + fs::copy(src, tmp)?; + // Remove dest first on Windows where rename fails if it exists. + let _ = fs::remove_file(dest); + fs::rename(tmp, dest)?; + Ok(()) + } + fn meta_path(&self, version: &str) -> PathBuf { - let name = if cfg!(windows) { - format!("ant-node-{version}.exe.meta.json") - } else { - format!("ant-node-{version}.meta.json") - }; - self.cache_dir.join(name) + self.cache_dir.join(format!("ant-node-{version}.meta.json")) } } @@ -173,8 +500,85 @@ pub struct DownloadLockGuard { _file: File, } +/// Open `path` as a regular file with size at most `max_len`, validating +/// the metadata on the **opened handle** so a race between any prior stat +/// and the read cannot substitute a special file (FIFO/device/socket) or +/// an oversized payload. A symlink whose target is a regular file is +/// accepted (it's just an indirect path to a regular file — the attacker +/// who placed the link already needed write access to the cache dir, the +/// same access level as directly editing the regular file); a symlink +/// whose target is a special file is rejected by the `is_file()` check on +/// the opened handle. +/// +/// On Unix, `open()` of a FIFO/named-pipe for reading blocks until a +/// writer connects, so a cache-dir attacker could otherwise hang the +/// upgrade indefinitely by planting a FIFO at the cache entry's path. We +/// (a) reject non-regular files via a `fs::metadata()` pre-check (follows +/// symlinks, so a symlink-to-regular is still accepted), and (b) on Unix +/// also open with `O_NONBLOCK` as a belt-and-braces defence in case the +/// pre-check races a swap. The post-open `is_file()` on the opened handle +/// remains the TOCTOU-safe gate. +/// +/// Returns `(File, len)` on success; the returned `File` is positioned at +/// offset 0 and may be `io::copy`'d into a destination — callers should +/// wrap with `Read::take(max_len)` so an attacker who extends the file +/// after the metadata read cannot stream beyond the cap. +fn open_regular_capped(path: &Path, max_len: u64) -> io::Result<(File, u64)> { + // Pre-check: refuse to even open a non-regular file. This is the + // first line of defence against an attacker who planted a FIFO at + // `path` — opening a FIFO for reading on Unix blocks until a writer + // connects, hanging the upgrade indefinitely. `fs::metadata` follows + // symlinks, so a symlink whose target is a regular file is accepted + // here and a symlink whose target is a FIFO/device/socket is rejected. + let pre_meta = fs::metadata(path)?; + if !pre_meta.file_type().is_file() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "not a regular file (FIFO/device/socket/dir)", + )); + } + + // Belt-and-braces against a pre-check vs open() race: on Unix also + // open with O_NONBLOCK, so even if an attacker swaps the regular file + // for a FIFO between the metadata read and open(), the open() returns + // immediately instead of blocking on a writer. Reads on a regular file + // ignore O_NONBLOCK, so this is a no-op for the happy path. The + // post-open is_file() check below still catches the swap. + let file = { + let mut opts = OpenOptions::new(); + opts.read(true); + #[cfg(unix)] + { + use std::os::unix::fs::OpenOptionsExt; + // `O_NONBLOCK` is per-arch on Linux (0o4000 on x86/arm/aarch64 + // /riscv, 0o200 on mips, 0x4000 on sparc, etc.). Use `libc` + // so we always pick the right constant for the target arch + // instead of silently setting a different flag. Reads on a + // regular file ignore `O_NONBLOCK` on all our supported + // platforms, so this is a no-op for the happy path. + opts.custom_flags(libc::O_NONBLOCK); + } + opts.open(path)? + }; + let meta = file.metadata()?; + if !meta.file_type().is_file() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "not a regular file (FIFO/device/socket/dir)", + )); + } + let len = meta.len(); + if len > max_len { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("file exceeds size cap ({len} > {max_len})"), + )); + } + Ok((file, len)) +} + /// Compute the hex-encoded SHA-256 digest of a file. -fn sha256_file(path: &std::path::Path) -> Result { +fn sha256_file(path: &Path) -> Result { let mut file = File::open(path)?; let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; @@ -198,58 +602,376 @@ fn sha256_file(path: &std::path::Path) -> Result { #[allow(clippy::unwrap_used, clippy::expect_used)] mod tests { use super::*; + use saorsa_pqc::api::sig::{ml_dsa_65, MlDsaPublicKey, MlDsaSecretKey}; + use std::sync::OnceLock; use tempfile::TempDir; + /// One generated keypair for the whole test module (keygen is expensive). + fn test_keypair() -> &'static (MlDsaPublicKey, MlDsaSecretKey) { + static KP: OnceLock<(MlDsaPublicKey, MlDsaSecretKey)> = OnceLock::new(); + KP.get_or_init(|| ml_dsa_65().generate_keypair().unwrap()) + } + + fn cache_with_test_key(dir: &Path) -> BinaryCache { + BinaryCache::new_with_verify_key(dir.to_path_buf(), test_keypair().0.clone()) + } + + /// A caller-private staging directory (the per-upgrade temp dir in + /// production). Returned so it outlives the call. + fn priv_dir() -> TempDir { + TempDir::new().unwrap() + } + + /// Write an archive + a valid detached signature over it. + fn make_signed_archive(dir: &Path, contents: &[u8]) -> (PathBuf, PathBuf) { + let archive = dir.join("src-archive"); + fs::write(&archive, contents).unwrap(); + let sig = ml_dsa_65() + .sign_with_context(&test_keypair().1, contents, signature::SIGNING_CONTEXT) + .unwrap(); + let sig_path = dir.join("src-archive.sig"); + fs::write(&sig_path, sig.to_bytes()).unwrap(); + (archive, sig_path) + } + #[test] fn test_miss_returns_none() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); - assert!(cache.get_verified("1.0.0").is_none()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); } #[test] - fn test_store_and_get_verified() { + fn test_store_and_get_verified_archive() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"signed archive bytes"); + cache.store_archive("1.2.3", &archive, &sig).unwrap(); + + let got = cache + .get_verified_archive("1.2.3", pd.path()) + .expect("cache hit"); + assert_eq!(fs::read(&got).unwrap(), b"signed archive bytes"); + // The returned path must be the PRIVATE copy, not the shared cache + // file (that is what closes the verify/extract TOCTOU). + assert!( + got.starts_with(pd.path()), + "returned archive must be the caller-private copy, got {got:?}" + ); + assert_ne!(got, cache.cached_archive_path("1.2.3")); + } - // Create a fake binary - let src = tmp.path().join("source-bin"); - fs::write(&src, b"hello world binary").unwrap(); + #[test] + fn test_store_rejects_unsigned_archive() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); - cache.store("1.2.3", &src).unwrap(); + let archive = tmp.path().join("a"); + fs::write(&archive, b"unsigned").unwrap(); + let bad_sig = tmp.path().join("a.sig"); + fs::write(&bad_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap(); - let result = cache.get_verified("1.2.3"); - assert!(result.is_some()); - let cached_path = result.unwrap(); - assert_eq!(fs::read(&cached_path).unwrap(), b"hello world binary"); + assert!(cache.store_archive("1.0.0", &archive, &bad_sig).is_err()); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); } + /// An attacker who swaps the cached archive on disk (and even forges a + /// matching SHA-256 in the metadata) cannot get it trusted, because + /// the ML-DSA signature is re-verified on every hit. #[test] - fn test_sha256_mismatch_returns_none() { + fn test_tampered_cached_archive_is_rejected() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"legit release archive"); + cache.store_archive("2.0.0", &archive, &sig).unwrap(); + assert!(cache.get_verified_archive("2.0.0", pd.path()).is_some()); + + // Attacker overwrites the cached archive with a malicious payload... + let cached_archive = cache.cached_archive_path("2.0.0"); + fs::write(&cached_archive, b"malicious payload").unwrap(); + + // ...and forges the metadata SHA-256 so the corruption pre-check passes. + let forged_hash = { + let mut h = Sha256::new(); + h.update(b"malicious payload"); + hex::encode(h.finalize()) + }; + let meta = CachedArchiveMeta { + version: "2.0.0".to_string(), + archive_sha256: forged_hash, + cached_at_epoch_secs: 0, + }; + fs::write( + cache.meta_path("2.0.0"), + serde_json::to_string(&meta).unwrap(), + ) + .unwrap(); + + // The SHA-256 pre-check now passes, but ML-DSA re-verification of the + // swapped archive against the key fails → entry rejected. + assert!( + cache.get_verified_archive("2.0.0", pd.path()).is_none(), + "tampered cache entry must NOT be trusted even with a forged \ + matching SHA-256 — the signature gate runs on every hit" + ); + } - // Store a valid binary - let src = tmp.path().join("source-bin"); - fs::write(&src, b"original content").unwrap(); - cache.store("1.0.0", &src).unwrap(); + /// TOCTOU defence: even if an attacker swaps the *shared* cache archive + /// for malicious bytes immediately after a hit, the previously returned + /// path (a caller-private copy) still contains the verified bytes, so + /// what gets extracted/executed is exactly what was signature-verified. + #[test] + fn test_returned_archive_is_private_copy_immune_to_post_hit_swap() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"the real signed release"); + cache.store_archive("3.0.0", &archive, &sig).unwrap(); + + let verified = cache + .get_verified_archive("3.0.0", pd.path()) + .expect("cache hit"); + + // Attacker swaps the SHARED cache archive right after verification. + fs::write( + cache.cached_archive_path("3.0.0"), + b"post-verify malicious swap", + ) + .unwrap(); + + // The path the caller will extract from is the private copy and is + // unaffected by the shared-file swap. + assert_eq!( + fs::read(&verified).unwrap(), + b"the real signed release", + "extraction must read the verified private bytes, not the \ + attacker's post-verification swap" + ); + } - // Corrupt the cached binary - let cached = cache.cached_binary_path("1.0.0"); - fs::write(&cached, b"corrupted content").unwrap(); + #[test] + fn test_missing_signature_returns_none() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"data"); + cache.store_archive("1.0.0", &archive, &sig).unwrap(); - assert!(cache.get_verified("1.0.0").is_none()); + // Attacker deletes the signature to try to skip verification. + fs::remove_file(cache.cached_signature_path("1.0.0")).unwrap(); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); } #[test] fn test_missing_meta_returns_none() { let tmp = TempDir::new().unwrap(); - let cache = BinaryCache::new(tmp.path().to_path_buf()); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + let (archive, sig) = make_signed_archive(tmp.path(), b"data"); + cache.store_archive("1.0.0", &archive, &sig).unwrap(); + fs::remove_file(cache.meta_path("1.0.0")).unwrap(); + assert!(cache.get_verified_archive("1.0.0", pd.path()).is_none()); + } + + /// Size policy: an attacker with cache-dir write cannot OOM/disk-exhaust + /// the verifier by dropping a multi-GB archive — `get_verified_archive` + /// stat-checks the cached archive against `MAX_ARCHIVE_SIZE_BYTES` BEFORE + /// any copy or `fs::read` reaches `signature::verify_from_file`. + #[test] + fn test_oversize_cached_archive_is_rejected_before_copy() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Plant a real signed entry so the meta/sig pass earlier checks… + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("3.1.0", &archive, &sig).unwrap(); + // …then truncate-grow the cached archive past the limit. + let cached_archive = cache.cached_archive_path("3.1.0"); + let oversize = crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1; + { + let f = File::create(&cached_archive).unwrap(); + f.set_len(oversize).unwrap(); + } + + // The size gate rejects pre-copy → no private archive ever staged. + assert!(cache.get_verified_archive("3.1.0", pd.path()).is_none()); + let private_archive = pd.path().join("cached-3.1.0.archive"); + assert!( + !private_archive.exists(), + "oversize entry must NOT be staged into private dir" + ); + } + + #[test] + fn test_wrong_size_signature_is_rejected_before_copy() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("3.2.0", &archive, &sig).unwrap(); + // Replace the cached signature with the wrong size. + fs::write(cache.cached_signature_path("3.2.0"), b"too-short").unwrap(); + + assert!(cache.get_verified_archive("3.2.0", pd.path()).is_none()); + } + + /// `store_archive` itself refuses to persist an oversize archive — even + /// from a (hypothetically) misbehaving caller that bypassed the + /// download-time size cap. + #[test] + fn test_store_archive_rejects_oversize() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + + // Make a sparse "archive" past the limit and any signature. + let big = tmp.path().join("big.archive"); + { + let f = File::create(&big).unwrap(); + f.set_len(crate::upgrade::apply::MAX_ARCHIVE_SIZE_BYTES as u64 + 1) + .unwrap(); + } + let any_sig = tmp.path().join("any.sig"); + fs::write(&any_sig, vec![0u8; signature::SIGNATURE_SIZE]).unwrap(); + + assert!(cache.store_archive("9.9.9", &big, &any_sig).is_err()); + } + + /// Round-3 regression: a cache-dir writer cannot bypass the size gate + /// by planting a symlink whose `stat(2)` size is small but whose + /// target reads indefinitely (e.g. `/dev/zero`). `symlink_metadata` + /// + `is_file()` rejects the entry before any `fs::copy` reads it. + #[cfg(unix)] + #[test] + fn test_symlink_cached_archive_is_rejected_before_copy() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Plant a legit signed entry so meta/version/sig-size are good… + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("4.0.0", &archive, &sig).unwrap(); + // …then replace the cached archive with a symlink to /dev/zero. + let cached_archive = cache.cached_archive_path("4.0.0"); + fs::remove_file(&cached_archive).unwrap(); + std::os::unix::fs::symlink("/dev/zero", &cached_archive).unwrap(); + + assert!( + cache.get_verified_archive("4.0.0", pd.path()).is_none(), + "a symlinked cached archive must be rejected pre-copy, \ + not chased into /dev/zero" + ); + // Nothing should have been staged. + assert!(!pd.path().join("cached-4.0.0.archive").exists()); + } + + /// `.meta.json` is read through the same size/file-type gate as the + /// archive and signature: planting a multi-MB metadata file (or a + /// metadata symlink to a special file) is rejected pre-parse without + /// risking a hang or large allocation. + #[test] + fn test_oversized_meta_is_rejected() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Establish a valid entry so archive/sig are well-formed. + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("5.0.0", &archive, &sig).unwrap(); + + // Overwrite meta with a file well above MAX_META_BYTES of garbage. + let meta_path = cache.meta_path("5.0.0"); + let huge = vec![b'a'; usize::try_from(MAX_META_BYTES).unwrap_or(usize::MAX) + 1024]; + fs::write(&meta_path, &huge).unwrap(); + + assert!( + cache.get_verified_archive("5.0.0", pd.path()).is_none(), + "oversized metadata file must be rejected before parsing" + ); + } + + /// A cache-dir attacker who replaces the cached archive with a FIFO + /// must not be able to hang `get_verified_archive` waiting for a + /// writer to connect. The pre-check + `O_NONBLOCK` belt-and-braces + /// returns immediately with an error, the cache hit is abandoned, and + /// the caller falls back to a fresh verified download. + #[cfg(unix)] + #[test] + fn test_fifo_cached_archive_does_not_hang() { + use std::time::{Duration, Instant}; + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + // Plant a legit signed entry so meta/version/sig-size are good, + // then replace the cached archive with a FIFO. Without the + // pre-check + O_NONBLOCK, opening the FIFO for reading would + // block until a writer connected. + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("6.0.0", &archive, &sig).unwrap(); + let cached_archive = cache.cached_archive_path("6.0.0"); + fs::remove_file(&cached_archive).unwrap(); + + // Use libc::mkfifo directly so a CI image that drops coreutils + // can't silently skip this test (an earlier shell-out version + // would hide a packaging regression). The unsafe block is scoped + // to the single FFI call — `mkfifo(2)` takes a NUL-terminated + // path, returns 0 on success and -1 on error with errno set. + let cstr = std::ffi::CString::new(cached_archive.as_os_str().as_encoded_bytes()).unwrap(); + #[allow(unsafe_code)] + let rc = unsafe { libc::mkfifo(cstr.as_ptr(), 0o600) }; + assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error()); + + // Measure only the cache-hit path so cold-process startup or + // unrelated test parallelism don't blow the budget. + let start = Instant::now(); + let got = cache.get_verified_archive("6.0.0", pd.path()); + let elapsed = start.elapsed(); + + assert!( + got.is_none(), + "a FIFO planted at the cached archive path must be rejected" + ); + // 5s gives generous headroom on a contended CI macOS runner + // while still catching a real "open is blocking on the FIFO". + assert!( + elapsed < Duration::from_secs(5), + "open of FIFO returned in {elapsed:?}, expected ≪ 5s — \ + pre-check or O_NONBLOCK is not catching this" + ); + // Nothing should have been staged. + assert!(!pd.path().join("cached-6.0.0.archive").exists()); + } + + /// `.meta.json` planted as a symlink to a special file (e.g. + /// `/dev/zero`) is rejected by the open-handle file-type check, + /// without hanging or OOM'ing on the read. + #[cfg(unix)] + #[test] + fn test_meta_symlink_to_special_file_is_rejected() { + let tmp = TempDir::new().unwrap(); + let cache = cache_with_test_key(tmp.path()); + let pd = priv_dir(); + + let (archive, sig) = make_signed_archive(tmp.path(), b"legit"); + cache.store_archive("5.1.0", &archive, &sig).unwrap(); - // Write a binary but no meta file - let cached = cache.cached_binary_path("1.0.0"); - fs::write(&cached, b"binary data").unwrap(); + let meta_path = cache.meta_path("5.1.0"); + fs::remove_file(&meta_path).unwrap(); + std::os::unix::fs::symlink("/dev/zero", &meta_path).unwrap(); - assert!(cache.get_verified("1.0.0").is_none()); + assert!( + cache.get_verified_archive("5.1.0", pd.path()).is_none(), + "metadata symlink to a special file must be rejected" + ); } } diff --git a/src/upgrade/cache_dir.rs b/src/upgrade/cache_dir.rs index 75458e96..aa099ddb 100644 --- a/src/upgrade/cache_dir.rs +++ b/src/upgrade/cache_dir.rs @@ -26,6 +26,27 @@ pub fn upgrade_cache_dir() -> Result { let cache_dir = project_dirs.data_dir().join("upgrades"); fs::create_dir_all(&cache_dir)?; + // Defence in depth: restrict the shared upgrade cache to the owning + // user (0700) so a co-located low-privilege process cannot + // write/tamper with cached archives in the first place. The ML-DSA + // re-verification on every cache hit is the primary control; this just + // shrinks the attack surface. Best-effort on Unix; a failure to tighten + // permissions must not break upgrades (the crypto gate still holds). + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(meta) = fs::metadata(&cache_dir) { + let mut perms = meta.permissions(); + perms.set_mode(0o700); + if let Err(e) = fs::set_permissions(&cache_dir, perms) { + crate::logging::warn!( + "Could not tighten upgrade cache dir permissions to 0700 ({e}); \ + ML-DSA re-verification still protects cached archives" + ); + } + } + } + Ok(cache_dir) }