From 479bb4a7ff0c4dc722b442bca6824e39fd7d451a Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Sat, 14 Feb 2026 18:20:30 +0000 Subject: [PATCH 01/15] fix: update account data handling to strip discriminator prefix and include discriminator length --- external/photon | 2 +- sdk-libs/client/src/indexer/types/account.rs | 16 ++++++--- .../client/src/indexer/types/interface.rs | 9 +++-- sdk-libs/photon-api/src/codegen.rs | 35 +++++++++++++++++-- 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/external/photon b/external/photon index 84ddfc0f58..49064168e6 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit 84ddfc0f586806373567faf75f45158076a4f133 +Subproject commit 49064168e6a546ef8c7408b5d0f5ee835fc7b9a7 diff --git a/sdk-libs/client/src/indexer/types/account.rs b/sdk-libs/client/src/indexer/types/account.rs index 3c0ab5eaa8..584eb4812b 100644 --- a/sdk-libs/client/src/indexer/types/account.rs +++ b/sdk-libs/client/src/indexer/types/account.rs @@ -92,10 +92,15 @@ impl TryFrom<&photon_api::types::AccountV2> for CompressedAccount { fn try_from(account: &photon_api::types::AccountV2) -> Result { let data = if let Some(data) = &account.data { + let disc_len = *data.discriminator_length as usize; + let full_data = base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) + .map_err(|e| IndexerError::decode_error("data", e))?; + // Strip the discriminator prefix — photon now sends combined + // (disc + payload) but CompressedAccountData.data is payload-only. + let payload = full_data[disc_len..].to_vec(); Ok::, IndexerError>(Some(CompressedAccountData { discriminator: (*data.discriminator).to_le_bytes(), - data: base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) - .map_err(|e| IndexerError::decode_error("data", e))?, + data: payload, data_hash: decode_base58_to_fixed_array(&data.data_hash)?, })) } else { @@ -149,10 +154,13 @@ impl TryFrom<&photon_api::types::Account> for CompressedAccount { fn try_from(account: &photon_api::types::Account) -> Result { let data = if let Some(data) = &account.data { + let disc_len = *data.discriminator_length as usize; + let full_data = base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) + .map_err(|e| IndexerError::decode_error("data", e))?; + let payload = full_data[disc_len..].to_vec(); Ok::, IndexerError>(Some(CompressedAccountData { discriminator: (*data.discriminator).to_le_bytes(), - data: base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) - .map_err(|e| IndexerError::decode_error("data", e))?, + data: payload, data_hash: decode_base58_to_fixed_array(&data.data_hash)?, })) } else { diff --git a/sdk-libs/client/src/indexer/types/interface.rs b/sdk-libs/client/src/indexer/types/interface.rs index 61d8d109aa..8cef0170a0 100644 --- a/sdk-libs/client/src/indexer/types/interface.rs +++ b/sdk-libs/client/src/indexer/types/interface.rs @@ -57,13 +57,18 @@ fn decode_tree_info_v2( } /// Decode cold data from photon_api AccountData format. +/// Photon sends combined (disc + payload) in the `data` field; +/// strip the discriminator prefix so `ColdData.data` is payload-only. fn decode_account_data(data: &photon_api::types::AccountData) -> Result { let disc_val = *data.discriminator; let discriminator = disc_val.to_le_bytes(); + let disc_len = *data.discriminator_length as usize; + let full_data = base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) + .map_err(|e| IndexerError::decode_error("data", e))?; + let payload = full_data[disc_len..].to_vec(); Ok(ColdData { discriminator, - data: base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) - .map_err(|e| IndexerError::decode_error("data", e))?, + data: payload, data_hash: decode_base58_to_fixed_array(&data.data_hash)?, }) } diff --git a/sdk-libs/photon-api/src/codegen.rs b/sdk-libs/photon-api/src/codegen.rs index 4dd88bda05..2f50543ca7 100644 --- a/sdk-libs/photon-api/src/codegen.rs +++ b/sdk-libs/photon-api/src/codegen.rs @@ -200,7 +200,8 @@ All endpoints return AccountV2.*/ /// "required": [ /// "data", /// "dataHash", - /// "discriminator" + /// "discriminator", + /// "discriminatorLength" /// ], /// "properties": { /// "data": { @@ -211,6 +212,9 @@ All endpoints return AccountV2.*/ /// }, /// "discriminator": { /// "$ref": "#/components/schemas/UnsignedInteger" + /// }, + /// "discriminatorLength": { + /// "$ref": "#/components/schemas/UnsignedInteger" /// } /// }, /// "additionalProperties": false @@ -224,6 +228,8 @@ All endpoints return AccountV2.*/ #[serde(rename = "dataHash")] pub data_hash: Hash, pub discriminator: UnsignedInteger, + #[serde(rename = "discriminatorLength")] + pub discriminator_length: UnsignedInteger, } impl AccountData { pub fn builder() -> builder::AccountData { @@ -28763,13 +28769,22 @@ All endpoints return AccountV2.*/ super::UnsignedInteger, ::std::string::String, >, + discriminator_length: ::std::result::Result< + super::UnsignedInteger, + ::std::string::String, + >, } impl ::std::default::Default for AccountData { fn default() -> Self { Self { data: Err("no value supplied for data".to_string()), data_hash: Err("no value supplied for data_hash".to_string()), - discriminator: Err("no value supplied for discriminator".to_string()), + discriminator: Err( + "no value supplied for discriminator".to_string(), + ), + discriminator_length: Err( + "no value supplied for discriminator_length".to_string(), + ), } } } @@ -28810,6 +28825,20 @@ All endpoints return AccountV2.*/ }); self } + pub fn discriminator_length(mut self, value: T) -> Self + where + T: ::std::convert::TryInto, + T::Error: ::std::fmt::Display, + { + self.discriminator_length = value + .try_into() + .map_err(|e| { + format!( + "error converting supplied value for discriminator_length: {e}" + ) + }); + self + } } impl ::std::convert::TryFrom for super::AccountData { type Error = super::error::ConversionError; @@ -28820,6 +28849,7 @@ All endpoints return AccountV2.*/ data: value.data?, data_hash: value.data_hash?, discriminator: value.discriminator?, + discriminator_length: value.discriminator_length?, }) } } @@ -28829,6 +28859,7 @@ All endpoints return AccountV2.*/ data: Ok(value.data), data_hash: Ok(value.data_hash), discriminator: Ok(value.discriminator), + discriminator_length: Ok(value.discriminator_length), } } } From 9d0d2b320ba97aa1369453fe3b79305677c4fe1b Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Sun, 15 Feb 2026 21:34:08 +0000 Subject: [PATCH 02/15] chore: update photon subproject and refactor account data handling to remove discriminator length --- external/photon | 2 +- sdk-libs/client/src/indexer/types/account.rs | 16 +++------ .../client/src/indexer/types/interface.rs | 9 ++--- sdk-libs/photon-api/src/codegen.rs | 35 ++----------------- 4 files changed, 9 insertions(+), 53 deletions(-) diff --git a/external/photon b/external/photon index 49064168e6..84ddfc0f58 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit 49064168e6a546ef8c7408b5d0f5ee835fc7b9a7 +Subproject commit 84ddfc0f586806373567faf75f45158076a4f133 diff --git a/sdk-libs/client/src/indexer/types/account.rs b/sdk-libs/client/src/indexer/types/account.rs index 584eb4812b..3c0ab5eaa8 100644 --- a/sdk-libs/client/src/indexer/types/account.rs +++ b/sdk-libs/client/src/indexer/types/account.rs @@ -92,15 +92,10 @@ impl TryFrom<&photon_api::types::AccountV2> for CompressedAccount { fn try_from(account: &photon_api::types::AccountV2) -> Result { let data = if let Some(data) = &account.data { - let disc_len = *data.discriminator_length as usize; - let full_data = base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) - .map_err(|e| IndexerError::decode_error("data", e))?; - // Strip the discriminator prefix — photon now sends combined - // (disc + payload) but CompressedAccountData.data is payload-only. - let payload = full_data[disc_len..].to_vec(); Ok::, IndexerError>(Some(CompressedAccountData { discriminator: (*data.discriminator).to_le_bytes(), - data: payload, + data: base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) + .map_err(|e| IndexerError::decode_error("data", e))?, data_hash: decode_base58_to_fixed_array(&data.data_hash)?, })) } else { @@ -154,13 +149,10 @@ impl TryFrom<&photon_api::types::Account> for CompressedAccount { fn try_from(account: &photon_api::types::Account) -> Result { let data = if let Some(data) = &account.data { - let disc_len = *data.discriminator_length as usize; - let full_data = base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) - .map_err(|e| IndexerError::decode_error("data", e))?; - let payload = full_data[disc_len..].to_vec(); Ok::, IndexerError>(Some(CompressedAccountData { discriminator: (*data.discriminator).to_le_bytes(), - data: payload, + data: base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) + .map_err(|e| IndexerError::decode_error("data", e))?, data_hash: decode_base58_to_fixed_array(&data.data_hash)?, })) } else { diff --git a/sdk-libs/client/src/indexer/types/interface.rs b/sdk-libs/client/src/indexer/types/interface.rs index 8cef0170a0..61d8d109aa 100644 --- a/sdk-libs/client/src/indexer/types/interface.rs +++ b/sdk-libs/client/src/indexer/types/interface.rs @@ -57,18 +57,13 @@ fn decode_tree_info_v2( } /// Decode cold data from photon_api AccountData format. -/// Photon sends combined (disc + payload) in the `data` field; -/// strip the discriminator prefix so `ColdData.data` is payload-only. fn decode_account_data(data: &photon_api::types::AccountData) -> Result { let disc_val = *data.discriminator; let discriminator = disc_val.to_le_bytes(); - let disc_len = *data.discriminator_length as usize; - let full_data = base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) - .map_err(|e| IndexerError::decode_error("data", e))?; - let payload = full_data[disc_len..].to_vec(); Ok(ColdData { discriminator, - data: payload, + data: base64::decode_config(&*data.data, base64::STANDARD_NO_PAD) + .map_err(|e| IndexerError::decode_error("data", e))?, data_hash: decode_base58_to_fixed_array(&data.data_hash)?, }) } diff --git a/sdk-libs/photon-api/src/codegen.rs b/sdk-libs/photon-api/src/codegen.rs index 2f50543ca7..4dd88bda05 100644 --- a/sdk-libs/photon-api/src/codegen.rs +++ b/sdk-libs/photon-api/src/codegen.rs @@ -200,8 +200,7 @@ All endpoints return AccountV2.*/ /// "required": [ /// "data", /// "dataHash", - /// "discriminator", - /// "discriminatorLength" + /// "discriminator" /// ], /// "properties": { /// "data": { @@ -212,9 +211,6 @@ All endpoints return AccountV2.*/ /// }, /// "discriminator": { /// "$ref": "#/components/schemas/UnsignedInteger" - /// }, - /// "discriminatorLength": { - /// "$ref": "#/components/schemas/UnsignedInteger" /// } /// }, /// "additionalProperties": false @@ -228,8 +224,6 @@ All endpoints return AccountV2.*/ #[serde(rename = "dataHash")] pub data_hash: Hash, pub discriminator: UnsignedInteger, - #[serde(rename = "discriminatorLength")] - pub discriminator_length: UnsignedInteger, } impl AccountData { pub fn builder() -> builder::AccountData { @@ -28769,22 +28763,13 @@ All endpoints return AccountV2.*/ super::UnsignedInteger, ::std::string::String, >, - discriminator_length: ::std::result::Result< - super::UnsignedInteger, - ::std::string::String, - >, } impl ::std::default::Default for AccountData { fn default() -> Self { Self { data: Err("no value supplied for data".to_string()), data_hash: Err("no value supplied for data_hash".to_string()), - discriminator: Err( - "no value supplied for discriminator".to_string(), - ), - discriminator_length: Err( - "no value supplied for discriminator_length".to_string(), - ), + discriminator: Err("no value supplied for discriminator".to_string()), } } } @@ -28825,20 +28810,6 @@ All endpoints return AccountV2.*/ }); self } - pub fn discriminator_length(mut self, value: T) -> Self - where - T: ::std::convert::TryInto, - T::Error: ::std::fmt::Display, - { - self.discriminator_length = value - .try_into() - .map_err(|e| { - format!( - "error converting supplied value for discriminator_length: {e}" - ) - }); - self - } } impl ::std::convert::TryFrom for super::AccountData { type Error = super::error::ConversionError; @@ -28849,7 +28820,6 @@ All endpoints return AccountV2.*/ data: value.data?, data_hash: value.data_hash?, discriminator: value.discriminator?, - discriminator_length: value.discriminator_length?, }) } } @@ -28859,7 +28829,6 @@ All endpoints return AccountV2.*/ data: Ok(value.data), data_hash: Ok(value.data_hash), discriminator: Ok(value.discriminator), - discriminator_length: Ok(value.discriminator_length), } } } From a91aaf9d2959f59c942539d910d7a46432199787 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Tue, 17 Feb 2026 17:52:24 +0000 Subject: [PATCH 03/15] chore: update photon subproject to latest commit --- external/photon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/photon b/external/photon index 84ddfc0f58..00f79f0188 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit 84ddfc0f586806373567faf75f45158076a4f133 +Subproject commit 00f79f018865eba291bbbdad107a844bc63ca30c From f4c1c105adb3a997415d47edff7a5d531ab008b4 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 18 Feb 2026 11:20:07 +0000 Subject: [PATCH 04/15] chore: update photon subproject to latest commit --- external/photon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/photon b/external/photon index 00f79f0188..d002be8d72 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit 00f79f018865eba291bbbdad107a844bc63ca30c +Subproject commit d002be8d72d7f2d01e00589adfd0577a3c57dadf From 0183b9dd257b31358ff31b44c446ed864817131b Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Thu, 19 Feb 2026 11:13:02 +0000 Subject: [PATCH 05/15] chore: update photon subproject to latest commit --- external/photon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/photon b/external/photon index d002be8d72..6562a7d2f2 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit d002be8d72d7f2d01e00589adfd0577a3c57dadf +Subproject commit 6562a7d2f250d802c8e43986f37802f98ba175de From 57247cd6baaafb7402998a043a107378e6c7cb25 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Fri, 20 Feb 2026 00:55:11 +0000 Subject: [PATCH 06/15] feat: add --helius-rpc cli flag feat: add support for getProgramAccounts standard rpc calls for compression feat: structured error logging --- .../dashboard/src/app/api/[...path]/route.ts | 22 + forester/dashboard/src/lib/api.ts | 2 +- forester/src/api_server.rs | 120 +- forester/src/cli.rs | 14 +- .../src/compressible/bootstrap_helpers.rs | 24 +- forester/src/compressible/ctoken/bootstrap.rs | 7 +- forester/src/compressible/mint/bootstrap.rs | 2 + forester/src/compressible/pda/bootstrap.rs | 17 +- forester/src/config.rs | 33 +- forester/src/epoch_manager.rs | 1494 +++++++++++++---- forester/src/errors.rs | 7 + forester/src/forester_status.rs | 2 +- forester/src/lib.rs | 47 +- forester/src/logging.rs | 121 ++ forester/src/main.rs | 28 +- forester/src/processor/v1/helpers.rs | 179 +- forester/src/processor/v2/common.rs | 2 + forester/src/processor/v2/helpers.rs | 6 +- forester/src/processor/v2/processor.rs | 97 +- forester/src/telemetry.rs | 11 +- forester/src/tree_data_sync.rs | 31 +- forester/tests/e2e_test.rs | 1 + forester/tests/legacy/priority_fee_test.rs | 6 +- forester/tests/priority_fee_test.rs | 5 +- forester/tests/test_compressible_ctoken.rs | 1 + forester/tests/test_compressible_mint.rs | 1 + forester/tests/test_compressible_pda.rs | 1 + forester/tests/test_utils.rs | 1 + 28 files changed, 1762 insertions(+), 520 deletions(-) create mode 100644 forester/dashboard/src/app/api/[...path]/route.ts create mode 100644 forester/src/logging.rs diff --git a/forester/dashboard/src/app/api/[...path]/route.ts b/forester/dashboard/src/app/api/[...path]/route.ts new file mode 100644 index 0000000000..3c53696752 --- /dev/null +++ b/forester/dashboard/src/app/api/[...path]/route.ts @@ -0,0 +1,22 @@ +import { NextResponse } from "next/server"; + +const BACKEND_URL = + process.env.FORESTER_API_URL || "http://localhost:8080"; + +export async function GET( + request: Request, + { params }: { params: Promise<{ path: string[] }> } +) { + const { path } = await params; + const backendPath = path.join("/"); + try { + const response = await fetch(`${BACKEND_URL}/${backendPath}`); + const data = await response.json(); + return NextResponse.json(data, { status: response.status }); + } catch { + return NextResponse.json( + { error: "Backend unavailable" }, + { status: 502 } + ); + } +} diff --git a/forester/dashboard/src/lib/api.ts b/forester/dashboard/src/lib/api.ts index 186359c71f..7e1ee6e8e9 100644 --- a/forester/dashboard/src/lib/api.ts +++ b/forester/dashboard/src/lib/api.ts @@ -1,5 +1,5 @@ const API_URL = - process.env.NEXT_PUBLIC_FORESTER_API_URL || "http://localhost:8080"; + process.env.NEXT_PUBLIC_FORESTER_API_URL ?? "/api"; export class ApiError extends Error { constructor( diff --git a/forester/src/api_server.rs b/forester/src/api_server.rs index e83b0f04e4..d08e3b5cfa 100644 --- a/forester/src/api_server.rs +++ b/forester/src/api_server.rs @@ -98,11 +98,13 @@ pub struct CompressibleDashboardState { /// Configuration for the HTTP API server. pub struct ApiServerConfig { + pub run_id: Arc, pub rpc_url: String, pub port: u16, pub allow_public_bind: bool, pub compressible_state: Option, pub prometheus_url: Option, + pub helius_rpc: bool, } /// Default timeout for status endpoint in seconds @@ -120,16 +122,23 @@ pub struct ApiServerHandle { pub thread_handle: JoinHandle<()>, /// Sender to trigger graceful shutdown pub shutdown_tx: oneshot::Sender<()>, + pub run_id: Arc, } impl ApiServerHandle { /// Trigger graceful shutdown and wait for the server to stop pub fn shutdown(self) { + let run_id = self.run_id.clone(); // Send shutdown signal (ignore error if receiver already dropped) let _ = self.shutdown_tx.send(()); // Wait for the thread to finish if let Err(e) = self.thread_handle.join() { - error!("API server thread panicked: {:?}", e); + error!( + event = "api_server_thread_panicked", + run_id = %run_id, + error = ?e, + "API server thread panicked" + ); } } } @@ -142,6 +151,7 @@ impl ApiServerHandle { pub(crate) async fn fetch_metrics_snapshot( client: &reqwest::Client, prometheus_url: &Option, + run_id: &str, ) -> MetricsSnapshot { let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) @@ -170,7 +180,12 @@ pub(crate) async fn fetch_metrics_snapshot( }; } Err(e) => { - warn!("Prometheus query failed: {}", e); + warn!( + event = "api_server_prometheus_query_failed", + run_id = %run_id, + error = %e, + "Prometheus query failed" + ); } } } @@ -187,6 +202,8 @@ pub(crate) async fn fetch_metrics_snapshot( pub(crate) async fn fetch_compressible_snapshot( trackers: &Option, rpc_url: &str, + helius_rpc: bool, + run_id: &str, ) -> CompressibleSnapshot { use crate::compressible::traits::CompressibleTracker; @@ -211,7 +228,7 @@ pub(crate) async fn fetch_compressible_snapshot( // Standalone mode: RPC with timeout let fetch_result = tokio::time::timeout( COMPRESSIBLE_FETCH_TIMEOUT, - crate::compressible::count_compressible_accounts(rpc_url), + crate::compressible::count_compressible_accounts(rpc_url, helius_rpc), ) .await; @@ -227,7 +244,12 @@ pub(crate) async fn fetch_compressible_snapshot( cached_at: now, }, Ok(Err(e)) => { - warn!("RPC compressible count failed: {}", e); + warn!( + event = "api_server_compressible_count_failed", + run_id = %run_id, + error = %e, + "RPC compressible count failed" + ); CompressibleSnapshot { data: CompressibleResponse { enabled: false, @@ -241,8 +263,10 @@ pub(crate) async fn fetch_compressible_snapshot( } Err(_) => { warn!( - "Compressible count timed out after {}s", - COMPRESSIBLE_FETCH_TIMEOUT.as_secs() + event = "api_server_compressible_count_timeout", + run_id = %run_id, + timeout_seconds = COMPRESSIBLE_FETCH_TIMEOUT.as_secs(), + "Compressible count timed out" ); CompressibleSnapshot { data: CompressibleResponse { @@ -268,9 +292,10 @@ async fn run_metrics_provider( client: reqwest::Client, prometheus_url: Option, mut shutdown: broadcast::Receiver<()>, + run_id: Arc, ) { loop { - let snapshot = fetch_metrics_snapshot(&client, &prometheus_url).await; + let snapshot = fetch_metrics_snapshot(&client, &prometheus_url, run_id.as_ref()).await; if tx.send(snapshot).is_err() { break; // all receivers dropped } @@ -279,7 +304,11 @@ async fn run_metrics_provider( _ = shutdown.recv() => break, } } - info!("Metrics provider stopped"); + info!( + event = "api_server_metrics_provider_stopped", + run_id = %run_id, + "Metrics provider stopped" + ); } /// Periodically fetches compressible counts and publishes via watch channel. @@ -288,6 +317,8 @@ async fn run_compressible_provider( trackers: Option, rpc_url: String, mut shutdown: broadcast::Receiver<()>, + helius_rpc: bool, + run_id: Arc, ) { // In-memory trackers are cheap (.len()); RPC is expensive (getProgramAccounts) let interval = if trackers.is_some() { @@ -297,7 +328,8 @@ async fn run_compressible_provider( }; loop { - let snapshot = fetch_compressible_snapshot(&trackers, &rpc_url).await; + let snapshot = + fetch_compressible_snapshot(&trackers, &rpc_url, helius_rpc, run_id.as_ref()).await; if tx.send(snapshot).is_err() { break; } @@ -306,7 +338,11 @@ async fn run_compressible_provider( _ = shutdown.recv() => break, } } - info!("Compressible provider stopped"); + info!( + event = "api_server_compressible_provider_stopped", + run_id = %run_id, + "Compressible provider stopped" + ); } // --------------------------------------------------------------------------- @@ -319,26 +355,40 @@ async fn run_compressible_provider( /// An `ApiServerHandle` that can be used to trigger graceful shutdown pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { let (shutdown_tx, shutdown_rx) = oneshot::channel::<()>(); + let run_id_for_handle = config.run_id.clone(); let thread_handle = std::thread::spawn(move || { + let run_id = config.run_id.clone(); let rt = match tokio::runtime::Runtime::new() { Ok(rt) => rt, Err(e) => { - error!("Failed to create tokio runtime for API server: {}", e); + error!( + event = "api_server_runtime_create_failed", + run_id = %run_id, + error = %e, + "Failed to create tokio runtime for API server" + ); return; } }; rt.block_on(async move { let addr = if config.allow_public_bind { warn!( - "API server binding to 0.0.0.0:{} - endpoints will be publicly accessible", - config.port + event = "api_server_public_bind_enabled", + run_id = %run_id, + port = config.port, + "API server binding to 0.0.0.0; endpoints will be publicly accessible" ); SocketAddr::from(([0, 0, 0, 0], config.port)) } else { SocketAddr::from(([127, 0, 0, 1], config.port)) }; - info!("Starting HTTP API server on {}", addr); + info!( + event = "api_server_started", + run_id = %run_id, + address = %addr, + "Starting HTTP API server" + ); // Shared HTTP client with timeout for external requests (Prometheus) let http_client = reqwest::Client::builder() @@ -369,6 +419,7 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { http_client.clone(), config.prometheus_url.clone(), provider_shutdown_tx.subscribe(), + run_id.clone(), )); tokio::spawn(run_compressible_provider( @@ -376,6 +427,8 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { trackers, config.rpc_url.clone(), provider_shutdown_tx.subscribe(), + config.helius_rpc, + run_id.clone(), )); let cors = warp::cors() @@ -391,8 +444,10 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { // --- Status route (unchanged — per-request RPC call) --- let rpc_url_for_status = config.rpc_url.clone(); + let run_id_for_status = run_id.clone(); let status_route = warp::path("status").and(warp::get()).and_then(move || { let rpc_url = rpc_url_for_status.clone(); + let run_id = run_id_for_status.clone(); async move { let timeout_duration = Duration::from_secs(STATUS_TIMEOUT_SECS); match tokio::time::timeout(timeout_duration, get_forester_status(&rpc_url)) @@ -403,7 +458,12 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { warp::http::StatusCode::OK, )), Ok(Err(e)) => { - error!("Failed to get forester status: {:?}", e); + error!( + event = "api_server_status_fetch_failed", + run_id = %run_id, + error = ?e, + "Failed to get forester status" + ); let error_response = ErrorResponse { error: format!("Failed to get forester status: {}", e), }; @@ -414,8 +474,10 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { } Err(_elapsed) => { error!( - "Forester status request timed out after {}s", - STATUS_TIMEOUT_SECS + event = "api_server_status_timeout", + run_id = %run_id, + timeout_seconds = STATUS_TIMEOUT_SECS, + "Forester status request timed out" ); let error_response = ErrorResponse { error: format!( @@ -453,21 +515,33 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { warp::serve(routes) .bind(addr) .await - .graceful(async move { - let _ = shutdown_rx.await; - info!("API server received shutdown signal"); - // Signal providers to stop - let _ = provider_shutdown_tx.send(()); + .graceful({ + let run_id_for_shutdown = run_id.clone(); + async move { + let _ = shutdown_rx.await; + info!( + event = "api_server_shutdown_signal_received", + run_id = %run_id_for_shutdown, + "API server received shutdown signal" + ); + // Signal providers to stop + let _ = provider_shutdown_tx.send(()); + } }) .run() .await; - info!("API server shut down gracefully"); + info!( + event = "api_server_stopped", + run_id = %run_id, + "API server shut down gracefully" + ); }); }); ApiServerHandle { thread_handle, shutdown_tx, + run_id: run_id_for_handle, } } diff --git a/forester/src/cli.rs b/forester/src/cli.rs index 8da994cc17..05aef7fc2c 100644 --- a/forester/src/cli.rs +++ b/forester/src/cli.rs @@ -36,7 +36,7 @@ pub struct StartArgs { env = "INDEXER_URL", help = "Photon indexer URL. API key can be included as query param: https://host?api-key=KEY" )] - pub indexer_url: Option, + pub indexer_url: String, #[arg(long, env = "PROVER_URL")] pub prover_url: Option, @@ -270,6 +270,14 @@ pub struct StartArgs { )] pub api_server_public_bind: bool, + #[arg( + long, + env = "HELIUS_RPC", + help = "Use Helius getProgramAccountsV2 for compressible account queries (default: standard getProgramAccounts)", + default_value = "false" + )] + pub helius_rpc: bool, + #[arg( long, env = "GROUP_AUTHORITY", @@ -438,6 +446,7 @@ mod tests { "forester", "--processor-mode", "v1", "--rpc-url", "http://test.com", + "--indexer-url", "http://indexer.test.com", "--payer", "[1,2,3]", "--derivation", "[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]" ]).unwrap(); @@ -448,6 +457,7 @@ mod tests { "forester", "--processor-mode", "v2", "--rpc-url", "http://test.com", + "--indexer-url", "http://indexer.test.com", "--payer", "[1,2,3]", "--derivation", "[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]" ]).unwrap(); @@ -457,6 +467,7 @@ mod tests { let args = StartArgs::try_parse_from([ "forester", "--rpc-url", "http://test.com", + "--indexer-url", "http://indexer.test.com", "--payer", "[1,2,3]", "--derivation", "[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]" ]).unwrap(); @@ -467,6 +478,7 @@ mod tests { "forester", "--processor-mode", "invalid-mode", "--rpc-url", "http://test.com", + "--indexer-url", "http://indexer.test.com", "--payer", "[1,2,3]", "--derivation", "[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32]" ]); diff --git a/forester/src/compressible/bootstrap_helpers.rs b/forester/src/compressible/bootstrap_helpers.rs index 625aa97bf7..e6f622f915 100644 --- a/forester/src/compressible/bootstrap_helpers.rs +++ b/forester/src/compressible/bootstrap_helpers.rs @@ -227,6 +227,13 @@ pub fn is_localhost(rpc_url: &str) -> bool { rpc_url.contains("localhost") || rpc_url.contains("127.0.0.1") } +/// Whether to use Helius `getProgramAccountsV2` instead of standard +/// `getProgramAccounts`. Returns `true` only when `--helius-rpc` CLI flag +/// is set **and** the URL is not localhost. +pub fn use_helius_rpc(rpc_url: &str, helius_rpc_flag: bool) -> bool { + helius_rpc_flag && !is_localhost(rpc_url) +} + /// Generic bootstrap using standard getProgramAccounts API /// /// Calls `process_fn` for each account that passes initial extraction. @@ -390,6 +397,7 @@ pub async fn run_bootstrap( shutdown_rx: Option>, process_fn: F, label: &str, + helius_rpc: bool, ) -> Result where F: FnMut(RawAccountData) -> bool, @@ -415,8 +423,8 @@ where label, program_id ); - let result = if is_localhost(rpc_url) { - debug!("Detected localhost, using standard getProgramAccounts"); + let result = if !use_helius_rpc(rpc_url, helius_rpc) { + debug!("Using standard getProgramAccounts"); let api_result = bootstrap_standard_api( &client, rpc_url, @@ -489,13 +497,14 @@ pub async fn count_program_accounts( rpc_url: &str, program_id: &Pubkey, filters: Option>, + helius_rpc: bool, ) -> Result { let client = reqwest::Client::builder() .timeout(Duration::from_secs(30)) .build() .map_err(|e| anyhow::anyhow!("Failed to create HTTP client: {}", e))?; - if is_localhost(rpc_url) { + if !use_helius_rpc(rpc_url, helius_rpc) { let mut params = json!({ "encoding": "base64", "commitment": "confirmed", @@ -563,7 +572,10 @@ pub async fn count_program_accounts( /// Uses `count_program_accounts` with `dataSlice` to minimize /// bandwidth. Both queries run concurrently. Errors are propagated so callers /// can distinguish "0 accounts" from "RPC failure". -pub async fn count_compressible_accounts(rpc_url: &str) -> Result<(usize, usize)> { +pub async fn count_compressible_accounts( + rpc_url: &str, + helius_rpc: bool, +) -> Result<(usize, usize)> { let program_id = Pubkey::new_from_array(light_token_interface::LIGHT_TOKEN_PROGRAM_ID); let ctoken_filters = vec![json!({"memcmp": { @@ -579,8 +591,8 @@ pub async fn count_compressible_accounts(rpc_url: &str) -> Result<(usize, usize) }})]; let (ctoken_result, mint_result) = tokio::join!( - count_program_accounts(rpc_url, &program_id, Some(ctoken_filters)), - count_program_accounts(rpc_url, &program_id, Some(mint_filters)), + count_program_accounts(rpc_url, &program_id, Some(ctoken_filters), helius_rpc), + count_program_accounts(rpc_url, &program_id, Some(mint_filters), helius_rpc), ); Ok((ctoken_result?, mint_result?)) diff --git a/forester/src/compressible/ctoken/bootstrap.rs b/forester/src/compressible/ctoken/bootstrap.rs index caf03b79d9..a85e3e28b2 100644 --- a/forester/src/compressible/ctoken/bootstrap.rs +++ b/forester/src/compressible/ctoken/bootstrap.rs @@ -11,7 +11,7 @@ use super::state::CTokenAccountTracker; use crate::{ compressible::{ bootstrap_helpers::{ - bootstrap_standard_api, bootstrap_v2_api, is_localhost, RawAccountData, + bootstrap_standard_api, bootstrap_v2_api, use_helius_rpc, RawAccountData, }, config::{ACCOUNT_TYPE_OFFSET, CTOKEN_ACCOUNT_TYPE_FILTER}, }, @@ -24,6 +24,7 @@ pub async fn bootstrap_ctoken_accounts( rpc_url: String, tracker: Arc, shutdown_rx: Option>, + helius_rpc: bool, ) -> Result<()> { info!("Starting bootstrap of CToken accounts"); @@ -88,8 +89,8 @@ pub async fn bootstrap_ctoken_accounts( true }; - if is_localhost(&rpc_url) { - info!("Detected localhost, using standard getProgramAccounts"); + if !use_helius_rpc(&rpc_url, helius_rpc) { + info!("Using standard getProgramAccounts"); let (total_fetched, total_inserted) = bootstrap_standard_api( &client, &rpc_url, diff --git a/forester/src/compressible/mint/bootstrap.rs b/forester/src/compressible/mint/bootstrap.rs index 104c8dd00c..27a55b059e 100644 --- a/forester/src/compressible/mint/bootstrap.rs +++ b/forester/src/compressible/mint/bootstrap.rs @@ -18,6 +18,7 @@ pub async fn bootstrap_mint_accounts( rpc_url: String, tracker: Arc, shutdown_rx: Option>, + helius_rpc: bool, ) -> Result<()> { // Light Token Program ID let program_id = @@ -50,6 +51,7 @@ pub async fn bootstrap_mint_accounts( shutdown_rx, process_account, "Mint", + helius_rpc, ) .await?; diff --git a/forester/src/compressible/pda/bootstrap.rs b/forester/src/compressible/pda/bootstrap.rs index 0142b56570..c2000070b8 100644 --- a/forester/src/compressible/pda/bootstrap.rs +++ b/forester/src/compressible/pda/bootstrap.rs @@ -7,7 +7,7 @@ use super::state::PdaAccountTracker; use crate::{ compressible::{ bootstrap_helpers::{ - bootstrap_standard_api, bootstrap_v2_api, is_localhost, RawAccountData, + bootstrap_standard_api, bootstrap_v2_api, use_helius_rpc, RawAccountData, }, config::PdaProgramConfig, traits::CompressibleTracker, @@ -20,6 +20,7 @@ pub async fn bootstrap_pda_accounts( rpc_url: String, tracker: Arc, shutdown_rx: Option>, + helius_rpc: bool, ) -> Result<()> { info!("Starting bootstrap of compressible PDA accounts"); @@ -54,8 +55,15 @@ pub async fn bootstrap_pda_accounts( program_config.program_id ); - let result = - bootstrap_program(&client, &rpc_url, &tracker, &program_config, &shutdown_flag).await; + let result = bootstrap_program( + &client, + &rpc_url, + &tracker, + &program_config, + &shutdown_flag, + helius_rpc, + ) + .await; if let Err(e) = result { error!( @@ -81,6 +89,7 @@ async fn bootstrap_program( tracker: &PdaAccountTracker, program_config: &PdaProgramConfig, shutdown_flag: &std::sync::atomic::AtomicBool, + helius_rpc: bool, ) -> Result<()> { let program_id = &program_config.program_id; @@ -108,7 +117,7 @@ async fn bootstrap_program( } })]); - if is_localhost(rpc_url) { + if !use_helius_rpc(rpc_url, helius_rpc) { let (total_fetched, total_inserted) = bootstrap_standard_api( client, rpc_url, diff --git a/forester/src/config.rs b/forester/src/config.rs index 0dc22b9227..42eee8d2ee 100644 --- a/forester/src/config.rs +++ b/forester/src/config.rs @@ -97,6 +97,8 @@ pub struct GeneralConfig { pub sleep_when_idle_ms: u64, pub queue_polling_mode: QueuePollingMode, pub group_authority: Option, + /// Use Helius getProgramAccountsV2 instead of standard getProgramAccounts + pub helius_rpc: bool, } impl Default for GeneralConfig { @@ -114,6 +116,7 @@ impl Default for GeneralConfig { sleep_when_idle_ms: 45_000, queue_polling_mode: QueuePollingMode::Indexer, group_authority: None, + helius_rpc: false, } } } @@ -121,35 +124,23 @@ impl Default for GeneralConfig { impl GeneralConfig { pub fn test_address_v2() -> Self { GeneralConfig { - slot_update_interval_seconds: 10, - tree_discovery_interval_seconds: 1, - enable_metrics: true, skip_v1_state_trees: true, skip_v1_address_trees: true, skip_v2_state_trees: true, - skip_v2_address_trees: false, - tree_ids: vec![], sleep_after_processing_ms: 50, sleep_when_idle_ms: 100, - queue_polling_mode: QueuePollingMode::Indexer, - group_authority: None, + ..Default::default() } } pub fn test_state_v2() -> Self { GeneralConfig { - slot_update_interval_seconds: 10, - tree_discovery_interval_seconds: 1, - enable_metrics: true, skip_v1_state_trees: true, skip_v1_address_trees: true, - skip_v2_state_trees: false, skip_v2_address_trees: true, - tree_ids: vec![], sleep_after_processing_ms: 50, sleep_when_idle_ms: 100, - queue_polling_mode: QueuePollingMode::Indexer, - group_authority: None, + ..Default::default() } } } @@ -245,12 +236,13 @@ impl ForesterConfig { .rpc_url .clone() .ok_or(ConfigError::MissingField { field: "rpc_url" })?; + let indexer_url = args.indexer_url.clone(); Ok(Self { external_services: ExternalServicesConfig { rpc_url, ws_rpc_url: args.ws_rpc_url.clone(), - indexer_url: args.indexer_url.clone(), + indexer_url: Some(indexer_url), prover_url: args.prover_url.clone(), prover_append_url: args .prover_append_url @@ -341,6 +333,7 @@ impl ForesterConfig { }) }) .transpose()?, + helius_rpc: args.helius_rpc, }, rpc_pool_config: RpcPoolConfig { max_size: args.rpc_pool_size, @@ -436,18 +429,10 @@ impl ForesterConfig { indexer_config: IndexerConfig::default(), transaction_config: TransactionConfig::default(), general_config: GeneralConfig { - slot_update_interval_seconds: 10, tree_discovery_interval_seconds: 60, enable_metrics: args.enable_metrics(), - skip_v1_state_trees: false, - skip_v2_state_trees: false, - skip_v1_address_trees: false, - skip_v2_address_trees: false, - tree_ids: vec![], - sleep_after_processing_ms: 10_000, - sleep_when_idle_ms: 45_000, queue_polling_mode: QueuePollingMode::OnChain, // Status uses on-chain reads - group_authority: None, + ..Default::default() }, rpc_pool_config: RpcPoolConfig { max_size: 10, diff --git a/forester/src/epoch_manager.rs b/forester/src/epoch_manager.rs index 49da980118..b58cf20223 100644 --- a/forester/src/epoch_manager.rs +++ b/forester/src/epoch_manager.rs @@ -4,12 +4,12 @@ use std::{ atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, Arc, }, - time::Duration, + time::{Duration, SystemTime, UNIX_EPOCH}, }; use anyhow::{anyhow, Context}; use borsh::BorshSerialize; -use dashmap::{mapref::entry::Entry, DashMap}; +use dashmap::DashMap; use forester_utils::{ forester_epoch::{get_epoch_phases, Epoch, ForesterSlot, TreeAccounts, TreeForesterSchedule}, rpc_pool::SolanaRpcPool, @@ -41,7 +41,7 @@ use solana_sdk::{ use tokio::{ sync::{broadcast, broadcast::error::RecvError, mpsc, oneshot, Mutex}, task::JoinHandle, - time::{sleep, Instant}, + time::{sleep, Instant, MissedTickBehavior}, }; use tracing::{debug, error, info, info_span, instrument, trace, warn}; @@ -50,6 +50,7 @@ use crate::{ errors::{ ChannelError, ForesterError, InitializationError, RegistrationError, WorkReportError, }, + logging::{should_emit_rate_limited_warning, ServiceHeartbeat}, metrics::{push_metrics, queue_metric_update, update_forester_sol_balance}, pagerduty::send_pagerduty_alert, processor::{ @@ -84,6 +85,7 @@ type StateBatchProcessorMap = Arc>>)>>; type AddressBatchProcessorMap = Arc>>)>>; +type ProcessorInitLockMap = Arc>>>; /// Timing for a single circuit type (circuit inputs + proof generation) #[derive(Copy, Clone, Debug, Default)] @@ -207,12 +209,16 @@ pub struct EpochManager { proof_caches: Arc>>, state_processors: StateBatchProcessorMap, address_processors: AddressBatchProcessorMap, + state_processor_init_locks: ProcessorInitLockMap, + address_processor_init_locks: ProcessorInitLockMap, compressible_tracker: Option>, pda_tracker: Option>, mint_tracker: Option>, /// Cached zkp_batch_size per tree to filter queue updates below threshold zkp_batch_sizes: Arc>, address_lookup_tables: Arc>, + heartbeat: Arc, + run_id: Arc, } impl Clone for EpochManager { @@ -234,11 +240,15 @@ impl Clone for EpochManager { proof_caches: self.proof_caches.clone(), state_processors: self.state_processors.clone(), address_processors: self.address_processors.clone(), + state_processor_init_locks: self.state_processor_init_locks.clone(), + address_processor_init_locks: self.address_processor_init_locks.clone(), compressible_tracker: self.compressible_tracker.clone(), pda_tracker: self.pda_tracker.clone(), mint_tracker: self.mint_tracker.clone(), zkp_batch_sizes: self.zkp_batch_sizes.clone(), address_lookup_tables: self.address_lookup_tables.clone(), + heartbeat: self.heartbeat.clone(), + run_id: self.run_id.clone(), } } } @@ -259,6 +269,8 @@ impl EpochManager { pda_tracker: Option>, mint_tracker: Option>, address_lookup_tables: Arc>, + heartbeat: Arc, + run_id: String, ) -> Result { let authority = Arc::new(config.payer_keypair.insecure_clone()); Ok(Self { @@ -278,11 +290,15 @@ impl EpochManager { proof_caches: Arc::new(DashMap::new()), state_processors: Arc::new(DashMap::new()), address_processors: Arc::new(DashMap::new()), + state_processor_init_locks: Arc::new(DashMap::new()), + address_processor_init_locks: Arc::new(DashMap::new()), compressible_tracker, pda_tracker, mint_tracker, zkp_batch_sizes: Arc::new(DashMap::new()), address_lookup_tables, + heartbeat, + run_id: Arc::::from(run_id), }) } @@ -324,7 +340,11 @@ impl EpochManager { balance_check_handle, ), |(h2, h3, h4)| { - info!("Aborting EpochManager background tasks"); + info!( + event = "background_tasks_aborting", + run_id = %self.run_id, + "Aborting EpochManager background tasks" + ); h2.abort(); h3.abort(); h4.abort(); @@ -336,16 +356,49 @@ impl EpochManager { epoch_opt = rx.recv() => { match epoch_opt { Some(epoch) => { - debug!("Received new epoch: {}", epoch); + debug!( + event = "epoch_queued_for_processing", + run_id = %self.run_id, + epoch, + "Received epoch from monitor" + ); let self_clone = Arc::clone(&self); tokio::spawn(async move { if let Err(e) = self_clone.process_epoch(epoch).await { - error!("Error processing epoch {}: {:?}", epoch, e); + if let Some(ForesterError::Registration( + RegistrationError::FinalizeRegistrationPhaseEnded { + epoch, + current_slot, + active_phase_end_slot, + }, + )) = e.downcast_ref::() + { + debug!( + event = "epoch_processing_skipped_finalize_registration_phase_ended", + run_id = %self_clone.run_id, + epoch = *epoch, + current_slot = *current_slot, + active_phase_end_slot = *active_phase_end_slot, + "Skipping epoch processing because FinalizeRegistration is no longer possible" + ); + } else { + error!( + event = "epoch_processing_failed", + run_id = %self_clone.run_id, + epoch, + error = ?e, + "Error processing epoch" + ); + } } }); } None => { - error!("Epoch monitor channel closed unexpectedly!"); + error!( + event = "epoch_monitor_channel_closed", + run_id = %self.run_id, + "Epoch monitor channel closed unexpectedly" + ); break Err(anyhow!( "Epoch monitor channel closed - forester cannot function without it" )); @@ -355,13 +408,27 @@ impl EpochManager { result = &mut monitor_handle => { match result { Ok(Ok(())) => { - error!("Epoch monitor exited unexpectedly with Ok(())"); + error!( + event = "epoch_monitor_exited_unexpected_ok", + run_id = %self.run_id, + "Epoch monitor exited unexpectedly with Ok(())" + ); } Ok(Err(e)) => { - error!("Epoch monitor exited with error: {:?}", e); + error!( + event = "epoch_monitor_exited_with_error", + run_id = %self.run_id, + error = ?e, + "Epoch monitor exited with error" + ); } Err(e) => { - error!("Epoch monitor task panicked or was cancelled: {:?}", e); + error!( + event = "epoch_monitor_task_failed", + run_id = %self.run_id, + error = ?e, + "Epoch monitor task panicked or was cancelled" + ); } } if let Some(pagerduty_key) = &self.config.external_services.pagerduty_routing_key { @@ -396,11 +463,26 @@ impl EpochManager { &self.config.payer_keypair.pubkey().to_string(), balance_in_sol, ); - debug!("Current SOL balance: {} SOL", balance_in_sol); + debug!( + event = "forester_balance_updated", + run_id = %self.run_id, + balance_sol = balance_in_sol, + "Current SOL balance updated" + ); } - Err(e) => error!("Failed to get balance: {:?}", e), + Err(e) => error!( + event = "forester_balance_fetch_failed", + run_id = %self.run_id, + error = ?e, + "Failed to get balance" + ), }, - Err(e) => error!("Failed to get RPC connection for balance check: {:?}", e), + Err(e) => error!( + event = "forester_balance_rpc_connection_failed", + run_id = %self.run_id, + error = ?e, + "Failed to get RPC connection for balance check" + ), } } } @@ -410,18 +492,37 @@ impl EpochManager { loop { match receiver.recv().await { Ok(new_tree) => { - info!("Received new tree: {:?}", new_tree); + info!( + event = "new_tree_received", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + tree_type = ?new_tree.tree_type, + "Received new tree" + ); if let Err(e) = self.add_new_tree(new_tree).await { - error!("Failed to add new tree: {:?}", e); + error!( + event = "new_tree_add_failed", + run_id = %self.run_id, + error = ?e, + "Failed to add new tree" + ); // Continue processing other trees instead of crashing } } Err(e) => match e { RecvError::Lagged(lag) => { - warn!("Lagged in receiving new trees: {:?}", lag); + warn!( + event = "new_tree_receiver_lagged", + run_id = %self.run_id, + lag, "Lagged while receiving new trees" + ); } RecvError::Closed => { - info!("New tree receiver closed"); + info!( + event = "new_tree_receiver_closed", + run_id = %self.run_id, + "New tree receiver closed" + ); break; } }, @@ -431,23 +532,54 @@ impl EpochManager { } async fn add_new_tree(&self, new_tree: TreeAccounts) -> Result<()> { - info!("Adding new tree: {:?}", new_tree); + info!( + event = "new_tree_add_started", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + tree_type = ?new_tree.tree_type, + "Adding new tree" + ); let mut trees = self.trees.lock().await; trees.push(new_tree); drop(trees); - info!("New tree added to the list of trees"); + info!( + event = "new_tree_added", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + "New tree added to tracked list" + ); let (current_slot, current_epoch) = self.get_current_slot_and_epoch().await?; let phases = get_epoch_phases(&self.protocol_config, current_epoch); // Check if we're currently in the active phase if current_slot >= phases.active.start && current_slot < phases.active.end { - info!("Currently in active phase. Attempting to process the new tree immediately."); - info!("Recovering registration info..."); + info!( + event = "new_tree_active_phase_injection", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + current_slot, + active_phase_start_slot = phases.active.start, + active_phase_end_slot = phases.active.end, + "In active phase; attempting immediate processing for new tree" + ); + info!( + event = "new_tree_recover_registration_started", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + epoch = current_epoch, + "Recovering registration info for new tree" + ); match self.recover_registration_info(current_epoch).await { Ok(mut epoch_info) => { - info!("Recovered registration info for current epoch"); + info!( + event = "new_tree_recover_registration_succeeded", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + epoch = current_epoch, + "Recovered registration info for current epoch" + ); let tree_schedule = TreeForesterSchedule::new_with_schedule( &new_tree, current_slot, @@ -458,8 +590,15 @@ impl EpochManager { let self_clone = Arc::new(self.clone()); - info!("Spawning task to process new tree in current epoch"); + info!( + event = "new_tree_processing_task_spawned", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + epoch = current_epoch, + "Spawning task to process new tree in current epoch" + ); tokio::spawn(async move { + let tree_pubkey = tree_schedule.tree_accounts.merkle_tree; if let Err(e) = self_clone .process_queue( &epoch_info.epoch, @@ -468,9 +607,20 @@ impl EpochManager { ) .await { - error!("Error processing queue for new tree: {:?}", e); + error!( + event = "new_tree_process_queue_failed", + run_id = %self_clone.run_id, + tree = %tree_pubkey, + error = ?e, + "Error processing queue for new tree" + ); } else { - info!("Successfully processed new tree in current epoch"); + info!( + event = "new_tree_process_queue_succeeded", + run_id = %self_clone.run_id, + tree = %tree_pubkey, + "Successfully processed new tree in current epoch" + ); } }); } @@ -482,19 +632,33 @@ impl EpochManager { ) { debug!("Not registered for current epoch yet, new tree will be picked up during next registration"); } else { - warn!("Failed to recover registration info for new tree: {:?}", e); + warn!( + event = "new_tree_recover_registration_failed", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + epoch = current_epoch, + error = ?e, + "Failed to recover registration info for new tree" + ); } } } info!( - "Injected new tree into current epoch {}: {:?}", - current_epoch, new_tree + event = "new_tree_injected_into_current_epoch", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + epoch = current_epoch, + "Injected new tree into current epoch" ); } else { info!( - "Not in active phase (current slot: {}, active start: {}). Tree will be picked up in next registration.", - current_slot, phases.active.start + event = "new_tree_queued_for_next_registration", + run_id = %self.run_id, + tree = %new_tree.merkle_tree, + current_slot, + active_phase_start_slot = phases.active.start, + "Not in active phase; new tree will be picked up in next registration" ); } @@ -507,15 +671,20 @@ impl EpochManager { let mut consecutive_failures = 0u32; const MAX_BACKOFF_SECS: u64 = 60; - info!("Starting epoch monitor"); + info!( + event = "epoch_monitor_started", + run_id = %self.run_id, + "Starting epoch monitor" + ); loop { let (slot, current_epoch) = match self.get_current_slot_and_epoch().await { Ok(result) => { if consecutive_failures > 0 { info!( - "Epoch monitor recovered after {} consecutive failures", - consecutive_failures + event = "epoch_monitor_recovered", + run_id = %self.run_id, + consecutive_failures, "Epoch monitor recovered after failures" ); } consecutive_failures = 0; @@ -528,13 +697,21 @@ impl EpochManager { if consecutive_failures == 1 { warn!( - "Epoch monitor: failed to get slot/epoch: {:?}. Retrying in {:?}", - e, backoff + event = "epoch_monitor_slot_epoch_failed", + run_id = %self.run_id, + consecutive_failures, + error = ?e, + backoff_ms = backoff.as_millis() as u64, + "Epoch monitor failed to get slot/epoch; retrying" ); } else if consecutive_failures.is_multiple_of(10) { error!( - "Epoch monitor: {} consecutive failures, last error: {:?}. Still retrying every {:?}", - consecutive_failures, e, backoff + event = "epoch_monitor_slot_epoch_failed_repeated", + run_id = %self.run_id, + consecutive_failures, + error = ?e, + backoff_ms = backoff.as_millis() as u64, + "Epoch monitor still failing repeatedly" ); } @@ -544,19 +721,36 @@ impl EpochManager { }; debug!( - "last_epoch: {:?}, current_epoch: {:?}, slot: {:?}", - last_epoch, current_epoch, slot + event = "epoch_monitor_tick", + run_id = %self.run_id, + last_epoch = ?last_epoch, + current_epoch, + slot, + "Epoch monitor tick" ); if last_epoch.is_none_or(|last| current_epoch > last) { - debug!("New epoch detected: {}", current_epoch); + debug!( + event = "epoch_monitor_new_epoch_detected", + run_id = %self.run_id, + epoch = current_epoch, + "New epoch detected" + ); let phases = get_epoch_phases(&self.protocol_config, current_epoch); if slot < phases.registration.end { - debug!("Sending current epoch {} for processing", current_epoch); + debug!( + event = "epoch_monitor_send_current_epoch", + run_id = %self.run_id, + epoch = current_epoch, + "Sending current epoch for processing" + ); if let Err(e) = tx.send(current_epoch).await { error!( - "Failed to send current epoch {} for processing: {:?}. Channel closed, exiting.", - current_epoch, e + event = "epoch_monitor_send_current_epoch_failed", + run_id = %self.run_id, + epoch = current_epoch, + error = ?e, + "Failed to send current epoch for processing; channel closed" ); return Err(anyhow!("Epoch channel closed: {}", e)); } @@ -577,7 +771,13 @@ impl EpochManager { let mut rpc = match self.rpc_pool.get_connection().await { Ok(rpc) => rpc, Err(e) => { - warn!("Failed to get RPC connection for slot waiting: {:?}", e); + warn!( + event = "epoch_monitor_wait_rpc_connection_failed", + run_id = %self.run_id, + target_epoch, + error = ?e, + "Failed to get RPC connection while waiting for registration slot" + ); tokio::time::sleep(Duration::from_secs(1)).await; break; } @@ -591,36 +791,59 @@ impl EpochManager { let slots_to_wait = wait_target.saturating_sub(slot); debug!( - "Waiting for epoch {} registration phase. Current slot: {}, Wait target: {} (registration starts at {}), Slots to wait: {}", - target_epoch, slot, wait_target, target_phases.registration.start, slots_to_wait + event = "epoch_monitor_wait_for_registration", + run_id = %self.run_id, + target_epoch, + current_slot = slot, + wait_target_slot = wait_target, + registration_start_slot = target_phases.registration.start, + slots_to_wait, + "Waiting for target epoch registration phase" ); if let Err(e) = wait_until_slot_reached(&mut *rpc, &self.slot_tracker, wait_target) .await { - error!("Error waiting for registration phase: {:?}", e); + error!( + event = "epoch_monitor_wait_for_registration_failed", + run_id = %self.run_id, + target_epoch, + error = ?e, + "Error waiting for registration phase" + ); break; } let current_slot = self.slot_tracker.estimated_current_slot(); if current_slot >= target_phases.registration.end { debug!( - "Epoch {} registration ended while waiting (current slot {} >= end {}), trying next epoch", - target_epoch, current_slot, target_phases.registration.end + event = "epoch_monitor_registration_ended_while_waiting", + run_id = %self.run_id, + target_epoch, + current_slot, + registration_end_slot = target_phases.registration.end, + "Target epoch registration ended while waiting; trying next epoch" ); target_epoch += 1; continue; } debug!( - "Epoch {} registration phase ready, sending for processing (current slot: {}, registration end: {})", - target_epoch, current_slot, target_phases.registration.end + event = "epoch_monitor_send_target_epoch_after_wait", + run_id = %self.run_id, + target_epoch, + current_slot, + registration_end_slot = target_phases.registration.end, + "Target epoch registration phase ready; sending for processing" ); if let Err(e) = tx.send(target_epoch).await { error!( - "Failed to send epoch {} for processing: {:?}", - target_epoch, e + event = "epoch_monitor_send_target_epoch_failed", + run_id = %self.run_id, + target_epoch, + error = ?e, + "Failed to send target epoch for processing" ); break; } @@ -631,13 +854,20 @@ impl EpochManager { // If we're within the registration window, send it if slot < target_phases.registration.end { debug!( - "Epoch {} registration phase is open (slot {} < end {}), sending for processing", - target_epoch, slot, target_phases.registration.end + event = "epoch_monitor_send_target_epoch_window_open", + run_id = %self.run_id, + target_epoch, + slot, + registration_end_slot = target_phases.registration.end, + "Target epoch registration window is open; sending for processing" ); if let Err(e) = tx.send(target_epoch).await { error!( - "Failed to send epoch {} for processing: {:?}", - target_epoch, e + event = "epoch_monitor_send_target_epoch_failed", + run_id = %self.run_id, + target_epoch, + error = ?e, + "Failed to send target epoch for processing" ); break; } @@ -647,8 +877,12 @@ impl EpochManager { // Registration already ended, try next epoch debug!( - "Epoch {} registration already ended (slot {} >= end {}), checking next epoch", - target_epoch, slot, target_phases.registration.end + event = "epoch_monitor_target_epoch_registration_closed", + run_id = %self.run_id, + target_epoch, + slot, + registration_end_slot = target_phases.registration.end, + "Target epoch registration already ended; checking next epoch" ); target_epoch += 1; } @@ -718,7 +952,13 @@ impl EpochManager { if slot > current_phases.registration.start { debug!("Processing previous epoch: {}", previous_epoch); if let Err(e) = tx.send(previous_epoch).await { - error!("Failed to send previous epoch for processing: {:?}", e); + error!( + event = "initial_epoch_send_previous_failed", + run_id = %self.run_id, + epoch = previous_epoch, + error = ?e, + "Failed to send previous epoch for processing" + ); return Ok(()); } } @@ -731,7 +971,13 @@ impl EpochManager { current_epoch ); if let Err(e) = tx.send(current_epoch).await { - error!("Failed to send current epoch for processing: {:?}", e); + error!( + event = "initial_epoch_send_current_failed", + run_id = %self.run_id, + epoch = current_epoch, + error = ?e, + "Failed to send current epoch for processing" + ); return Ok(()); // Channel closed, exit gracefully } } else { @@ -752,20 +998,32 @@ impl EpochManager { current_epoch ); if let Err(e) = tx.send(current_epoch).await { - error!("Failed to send current epoch for processing: {:?}", e); + error!( + event = "initial_epoch_send_current_registered_failed", + run_id = %self.run_id, + epoch = current_epoch, + error = ?e, + "Failed to send current epoch for processing" + ); return Ok(()); // Channel closed, exit gracefully } } else { - warn!( - "Skipping current epoch {} - registration ended at slot {} (current slot: {})", - current_epoch, current_phases.registration.end, slot + info!( + event = "skip_current_epoch_registration_closed", + run_id = %self.run_id, + epoch = current_epoch, + registration_end_slot = current_phases.registration.end, + current_slot = slot, + "Skipping current epoch because registration has ended" ); } } Err(e) => { warn!( - "Failed to get RPC connection to check registration, skipping: {:?}", - e + event = "registration_check_rpc_failed", + run_id = %self.run_id, + error = ?e, + "Failed to get RPC connection to check registration, skipping" ); } } @@ -818,7 +1076,13 @@ impl EpochManager { epoch ); } else { - warn!("Failed to recover registration info: {:?}", e); + warn!( + event = "recover_registration_info_failed", + run_id = %self.run_id, + epoch, + error = ?e, + "Failed to recover registration info" + ); } // Attempt to register match self @@ -839,9 +1103,16 @@ impl EpochManager { next_phases.registration.start.saturating_sub(current_slot); info!( - "Too late to register for epoch {} (registration ended at slot {}, current slot: {}). Next available epoch: {}. Registration opens at slot {} ({} slots to wait).", - failed_epoch, registration_end, current_slot, next_epoch, next_phases.registration.start, slots_to_wait - ); + event = "registration_window_missed", + run_id = %self.run_id, + failed_epoch, + registration_end_slot = registration_end, + current_slot, + next_epoch, + next_registration_start_slot = next_phases.registration.start, + slots_to_wait, + "Too late to register for requested epoch; next epoch will be used" + ); return Ok(()); } Err(e) => return Err(e.into()), @@ -870,16 +1141,25 @@ impl EpochManager { if self.sync_slot().await? < phases.report_work.end { self.report_work_onchain(®istration_info).await?; } else { + let current_slot = self.slot_tracker.estimated_current_slot(); info!( - "Skipping on-chain work report for epoch {} (report_work phase ended)", - registration_info.epoch.epoch + event = "skip_onchain_work_report_phase_ended", + run_id = %self.run_id, + epoch = registration_info.epoch.epoch, + current_slot, + report_work_end_slot = phases.report_work.end, + "Skipping on-chain work report because report_work phase has ended" ); } // TODO: implement // self.claim(®istration_info).await?; - info!("Exiting process_epoch"); + info!( + event = "process_epoch_completed", + run_id = %self.run_id, + epoch, "Exiting process_epoch" + ); Ok(()) } @@ -920,8 +1200,13 @@ impl EpochManager { if slot < phases.registration.start { let slots_to_wait = phases.registration.start.saturating_sub(slot); info!( - "Registration for epoch {} hasn't started yet (current slot: {}, starts at: {}). Waiting {} slots...", - epoch, slot, phases.registration.start, slots_to_wait + event = "registration_wait_for_window", + run_id = %self.run_id, + epoch, + current_slot = slot, + registration_start_slot = phases.registration.start, + slots_to_wait, + "Registration window not open yet; waiting" ); let wait_duration = slot_duration() * slots_to_wait as u32; sleep(wait_duration).await; @@ -932,10 +1217,13 @@ impl EpochManager { Ok(registration_info) => return Ok(registration_info), Err(e) => { warn!( - "Failed to register for epoch {} (attempt {}): {:?}", + event = "registration_attempt_failed", + run_id = %self.run_id, epoch, - attempt + 1, - e + attempt = attempt + 1, + max_attempts = max_retries, + error = ?e, + "Failed to register for epoch; retrying" ); if attempt < max_retries - 1 { sleep(retry_delay).await; @@ -954,7 +1242,13 @@ impl EpochManager { ) .await { - error!("Failed to send PagerDuty alert: {:?}", alert_err); + error!( + event = "pagerduty_alert_failed", + run_id = %self.run_id, + epoch, + error = ?alert_err, + "Failed to send PagerDuty alert" + ); } } return Err(ForesterError::Other(e)); @@ -972,7 +1266,11 @@ impl EpochManager { #[instrument(level = "debug", skip(self), fields(forester = %self.config.payer_keypair.pubkey(), epoch = epoch ))] async fn register_for_epoch(&self, epoch: u64) -> Result { - info!("Registering for epoch: {}", epoch); + info!( + event = "registration_attempt_started", + run_id = %self.run_id, + epoch, "Registering for epoch" + ); let mut rpc = LightClient::new(LightClientConfig { url: self.config.external_services.rpc_url.to_string(), photon_url: self.config.external_services.indexer_url.clone(), @@ -992,8 +1290,9 @@ impl EpochManager { if let Some(existing_pda) = existing_registration { info!( - "Already registered for epoch {}. Recovering registration info.", - epoch + event = "registration_already_exists", + run_id = %self.run_id, + epoch, "Already registered for epoch; recovering registration info" ); let registration_info = self .recover_registration_info_internal( @@ -1067,8 +1366,12 @@ impl EpochManager { Ok(registration_info) } else if slot < phases.registration.start { warn!( - "Too early to register for epoch {}. Current slot: {}, Registration starts: {}", - epoch, slot, phases.registration.start + event = "registration_too_early", + run_id = %self.run_id, + epoch, + current_slot = slot, + registration_start_slot = phases.registration.start, + "Too early to register for epoch" ); Err(RegistrationError::RegistrationPhaseNotStarted { epoch, @@ -1078,8 +1381,12 @@ impl EpochManager { .into()) } else { warn!( - "Too late to register for epoch {}. Current slot: {}, Registration end: {}", - epoch, slot, phases.registration.end + event = "registration_too_late", + run_id = %self.run_id, + epoch, + current_slot = slot, + registration_end_slot = phases.registration.end, + "Too late to register for epoch" ); Err(RegistrationError::RegistrationPhaseEnded { epoch, @@ -1144,16 +1451,26 @@ impl EpochManager { if current_slot >= active_phase_start_slot { info!( - "Active phase has already started. Current slot: {}. Active phase start slot: {}. Slots left: {}.", - current_slot, active_phase_start_slot, active_phase_end_slot.saturating_sub(current_slot) + event = "active_phase_already_started", + run_id = %self.run_id, + current_slot, + active_phase_start_slot, + active_phase_end_slot, + slots_left = active_phase_end_slot.saturating_sub(current_slot), + "Active phase has already started" ); } else { let waiting_slots = active_phase_start_slot - current_slot; let waiting_secs = waiting_slots / 2; - info!("Waiting for active phase to start. Current slot: {}. Active phase start slot: {}. Waiting time: ~ {} seconds", + info!( + event = "wait_for_active_phase", + run_id = %self.run_id, current_slot, active_phase_start_slot, - waiting_secs); + waiting_slots, + approx_wait_seconds = waiting_secs, + "Waiting for active phase to start" + ); } self.prewarm_all_trees_during_wait(epoch_info, active_phase_start_slot) @@ -1175,13 +1492,19 @@ impl EpochManager { let current_slot = rpc.get_slot().await?; if current_slot > epoch_info.epoch.phases.active.end { info!( - "Skipping FinalizeRegistration for epoch {}: active phase ended (current slot: {}, end: {})", - epoch_info.epoch.epoch, current_slot, epoch_info.epoch.phases.active.end + event = "skip_finalize_registration_phase_ended", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + current_slot, + active_phase_end_slot = epoch_info.epoch.phases.active.end, + "Skipping FinalizeRegistration because active phase ended" ); - return Err(anyhow::anyhow!( - "Epoch {} active phase has ended, cannot finalize registration", - epoch_info.epoch.epoch - )); + return Err(RegistrationError::FinalizeRegistrationPhaseEnded { + epoch: epoch_info.epoch.epoch, + current_slot, + active_phase_end_slot: epoch_info.epoch.phases.active.end, + } + .into()); } // TODO: we can put this ix into every tx of the first batch of the current active phase @@ -1237,7 +1560,12 @@ impl EpochManager { debug!("Added compression tree to epoch {}", epoch_info.epoch.epoch); } - info!("Finished waiting for active phase"); + info!( + event = "active_phase_ready", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + "Finished waiting for active phase" + ); Ok(epoch_info) } @@ -1249,13 +1577,19 @@ impl EpochManager { fields(forester = %self.config.payer_keypair.pubkey(), epoch = epoch_info.epoch.epoch ))] async fn perform_active_work(&self, epoch_info: &ForesterEpochInfo) -> Result<()> { - info!("Performing active work"); + self.heartbeat.increment_active_cycle(); let current_slot = self.slot_tracker.estimated_current_slot(); let active_phase_end = epoch_info.epoch.phases.active.end; if !self.is_in_active_phase(current_slot, epoch_info)? { - info!("No longer in active phase. Skipping work."); + info!( + event = "active_work_skipped_not_in_phase", + run_id = %self.run_id, + current_slot, + active_phase_end, + "No longer in active phase. Skipping work." + ); return Ok(()); } @@ -1268,16 +1602,29 @@ impl EpochManager { .cloned() .collect(); + info!( + event = "active_work_cycle_started", + run_id = %self.run_id, + current_slot, + active_phase_end, + tree_count = trees_to_process.len(), + "Starting active work cycle" + ); + let self_arc = Arc::new(self.clone()); let epoch_info_arc = Arc::new(epoch_info.clone()); let mut handles: Vec>> = Vec::with_capacity(trees_to_process.len()); for tree in trees_to_process { - info!( - "Creating thread for tree {} (type: {:?})", - tree.tree_accounts.merkle_tree, tree.tree_accounts.tree_type + debug!( + event = "tree_processing_task_spawned", + run_id = %self.run_id, + tree = %tree.tree_accounts.merkle_tree, + tree_type = ?tree.tree_accounts.tree_type, + "Spawning tree processing task" ); + self.heartbeat.add_tree_tasks_spawned(1); let self_clone = self_arc.clone(); let epoch_info_clone = epoch_info_arc.clone(); @@ -1295,17 +1642,43 @@ impl EpochManager { handles.push(handle); } - info!("Waiting for {} tree processing tasks", handles.len()); + debug!("Waiting for {} tree processing tasks", handles.len()); let results = join_all(handles).await; + let mut success_count = 0usize; + let mut error_count = 0usize; + let mut panic_count = 0usize; for result in results { match result { - Ok(Ok(())) => { - debug!("Queue processed successfully"); + Ok(Ok(())) => success_count += 1, + Ok(Err(e)) => { + error_count += 1; + error!( + event = "tree_processing_task_failed", + run_id = %self.run_id, + error = ?e, + "Error processing queue" + ); + } + Err(e) => { + panic_count += 1; + error!( + event = "tree_processing_task_panicked", + run_id = %self.run_id, + error = ?e, + "Tree processing task panicked" + ); } - Ok(Err(e)) => error!("Error processing queue: {:?}", e), - Err(e) => error!("Task panicked: {:?}", e), } } + info!( + event = "active_work_cycle_completed", + run_id = %self.run_id, + tree_tasks = success_count + error_count + panic_count, + succeeded = success_count, + failed = error_count, + panicked = panic_count, + "Active work cycle completed" + ); debug!("Waiting for active phase to end"); let mut rpc = self.rpc_pool.get_connection().await?; @@ -1332,19 +1705,23 @@ impl EpochManager { epoch_pda: &ForesterEpochPda, mut tree_schedule: TreeForesterSchedule, ) -> Result<()> { + self.heartbeat.increment_queue_started(); let mut current_slot = self.slot_tracker.estimated_current_slot(); let total_slots = tree_schedule.slots.len(); let eligible_slots = tree_schedule.slots.iter().filter(|s| s.is_some()).count(); let tree_type = tree_schedule.tree_accounts.tree_type; - info!( - "process_queue tree={}, total_slots={}, eligible_slots={}, current_slot={}, active_phase_end={}", - tree_schedule.tree_accounts.merkle_tree, + debug!( + event = "process_queue_started", + run_id = %self.run_id, + tree = %tree_schedule.tree_accounts.merkle_tree, + tree_type = ?tree_type, total_slots, eligible_slots, current_slot, - epoch_info.phases.active.end + active_phase_end = epoch_info.phases.active.end, + "Processing queue for tree" ); 'outer_slot_loop: while current_slot < epoch_info.phases.active.end { @@ -1389,16 +1766,21 @@ impl EpochManager { } Err(e) => { error!( - "Error processing light slot {:?}: {:?}", - light_slot_details.slot, e + event = "light_slot_processing_error", + run_id = %self.run_id, + light_slot = light_slot_details.slot, + error = ?e, + "Error processing light slot" ); } } tree_schedule.slots[slot_idx] = None; } else { - info!( - "No further eligible slots in schedule for tree {}", - tree_schedule.tree_accounts.merkle_tree + debug!( + event = "process_queue_no_eligible_slots", + run_id = %self.run_id, + tree = %tree_schedule.tree_accounts.merkle_tree, + "No further eligible slots in schedule" ); break 'outer_slot_loop; } @@ -1406,9 +1788,12 @@ impl EpochManager { current_slot = self.slot_tracker.estimated_current_slot(); } - info!( - "Exiting process_queue for tree {}", - tree_schedule.tree_accounts.merkle_tree + self.heartbeat.increment_queue_finished(); + debug!( + event = "process_queue_finished", + run_id = %self.run_id, + tree = %tree_schedule.tree_accounts.merkle_tree, + "Exiting process_queue" ); Ok(()) } @@ -1426,12 +1811,15 @@ impl EpochManager { tree_accounts: &TreeAccounts, forester_slot_details: &ForesterSlot, ) -> Result<()> { - info!( - "Processing slot {} ({}-{}) epoch {}", - forester_slot_details.slot, - forester_slot_details.start_solana_slot, - forester_slot_details.end_solana_slot, - epoch_info.epoch + debug!( + event = "light_slot_processing_started", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + epoch = epoch_info.epoch, + light_slot = forester_slot_details.slot, + slot_start = forester_slot_details.start_solana_slot, + slot_end = forester_slot_details.end_solana_slot, + "Processing light slot" ); let mut rpc = self.rpc_pool.get_connection().await?; wait_until_slot_reached( @@ -1454,7 +1842,15 @@ impl EpochManager { let current_light_slot = (estimated_slot - epoch_info.phases.active.start) / epoch_pda.protocol_config.slot_length; if current_light_slot != forester_slot_details.slot { - warn!("Light slot mismatch. Exiting processing for this slot."); + warn!( + event = "light_slot_mismatch", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + expected_light_slot = forester_slot_details.slot, + actual_light_slot = current_light_slot, + estimated_slot, + "Light slot mismatch; exiting processing for this slot" + ); break 'inner_processing_loop; } @@ -1486,16 +1882,23 @@ impl EpochManager { Ok(count) => count, Err(e) => { error!( - "Failed processing in slot {:?}: {:?}", - forester_slot_details.slot, e + event = "light_slot_processing_failed", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + light_slot = forester_slot_details.slot, + error = ?e, + "Failed processing in light slot" ); break 'inner_processing_loop; } }; if items_processed_this_iteration > 0 { debug!( - "Processed {} items in slot {:?}", - items_processed_this_iteration, forester_slot_details.slot + event = "light_slot_items_processed", + run_id = %self.run_id, + light_slot = forester_slot_details.slot, + items = items_processed_this_iteration, + "Processed items in light slot" ); } @@ -1507,7 +1910,21 @@ impl EpochManager { .await; if let Err(e) = push_metrics(&self.config.external_services.pushgateway_url).await { - warn!("Failed to push metrics: {:?}", e); + if should_emit_rate_limited_warning("push_metrics_v1", Duration::from_secs(30)) { + warn!( + event = "metrics_push_failed", + run_id = %self.run_id, + error = ?e, + "Failed to push metrics" + ); + } else { + debug!( + event = "metrics_push_failed_suppressed", + run_id = %self.run_id, + error = ?e, + "Suppressing repeated metrics push failure" + ); + } } estimated_slot = self.slot_tracker.estimated_current_slot(); @@ -1535,12 +1952,15 @@ impl EpochManager { forester_slot_details: &ForesterSlot, consecutive_eligibility_end: u64, ) -> Result<()> { - info!( - "Processing V2 light slot {} ({}-{}, consecutive_end={})", - forester_slot_details.slot, - forester_slot_details.start_solana_slot, - forester_slot_details.end_solana_slot, - consecutive_eligibility_end + debug!( + event = "v2_light_slot_processing_started", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + light_slot = forester_slot_details.slot, + slot_start = forester_slot_details.start_solana_slot, + slot_end = forester_slot_details.end_solana_slot, + consecutive_eligibility_end_slot = consecutive_eligibility_end, + "Processing V2 light slot" ); let tree_pubkey = tree_accounts.merkle_tree; @@ -1562,8 +1982,12 @@ impl EpochManager { if items_sent > 0 { let cached_send_duration = cached_send_start.elapsed(); info!( - "Sent {} items from cache for tree {} in {:?}", - items_sent, tree_pubkey, cached_send_duration + event = "cached_proofs_sent", + run_id = %self.run_id, + tree = %tree_pubkey, + items = items_sent, + duration_ms = cached_send_duration.as_millis() as u64, + "Sent items from proof cache" ); self.update_metrics_and_counts(epoch_info.epoch, items_sent, cached_send_duration) .await; @@ -1587,7 +2011,15 @@ impl EpochManager { let current_light_slot = (estimated_slot - epoch_info.phases.active.start) / epoch_pda.protocol_config.slot_length; if current_light_slot != forester_slot_details.slot { - warn!("V2 slot mismatch. Exiting processing."); + warn!( + event = "v2_light_slot_mismatch", + run_id = %self.run_id, + tree = %tree_pubkey, + expected_light_slot = forester_slot_details.slot, + actual_light_slot = current_light_slot, + estimated_slot, + "V2 slot mismatch; exiting processing" + ); break 'inner_processing_loop; } @@ -1619,7 +2051,14 @@ impl EpochManager { { Ok(count) => { if count > 0 { - info!("V2 processed {} items for tree {}", count, tree_pubkey); + info!( + event = "v2_tree_processed_items", + run_id = %self.run_id, + tree = %tree_pubkey, + items = count, + epoch = epoch_info.epoch, + "V2 processed items for tree" + ); self.update_metrics_and_counts( epoch_info.epoch, count, @@ -1632,13 +2071,33 @@ impl EpochManager { } } Err(e) => { - error!("V2 processing failed for tree {}: {:?}", tree_pubkey, e); + error!( + event = "v2_tree_processing_failed", + run_id = %self.run_id, + tree = %tree_pubkey, + error = ?e, + "V2 processing failed for tree" + ); tokio::time::sleep(POLL_INTERVAL).await; } } if let Err(e) = push_metrics(&self.config.external_services.pushgateway_url).await { - warn!("Failed to push metrics: {:?}", e); + if should_emit_rate_limited_warning("push_metrics_v2", Duration::from_secs(30)) { + warn!( + event = "metrics_push_failed", + run_id = %self.run_id, + error = ?e, + "Failed to push metrics" + ); + } else { + debug!( + event = "metrics_push_failed_suppressed", + run_id = %self.run_id, + error = ?e, + "Suppressing repeated metrics push failure" + ); + } } estimated_slot = self.slot_tracker.estimated_current_slot(); } @@ -1680,16 +2139,26 @@ impl EpochManager { current_epoch_num, ) .map_err(|e| { - error!("Failed to calculate eligible forester index: {:?}", e); + error!( + event = "eligibility_index_calculation_failed", + run_id = %self.run_id, + queue = %queue_pubkey, + epoch = current_epoch_num, + light_slot = current_light_slot, + error = ?e, + "Failed to calculate eligible forester index" + ); anyhow::anyhow!("Eligibility calculation failed: {}", e) })?; if !epoch_pda.is_eligible(eligible_forester_slot_index) { warn!( - "Forester {} is no longer eligible to process tree {} in light slot {}.", - self.config.payer_keypair.pubkey(), - queue_pubkey, - current_light_slot + event = "forester_not_eligible_for_slot", + run_id = %self.run_id, + forester = %self.config.payer_keypair.pubkey(), + queue = %queue_pubkey, + light_slot = current_light_slot, + "Forester is no longer eligible to process this queue in current light slot" ); return Ok(false); } @@ -1781,10 +2250,12 @@ impl EpochManager { let num_batches = accounts.len().div_ceil(config.batch_size); info!( - "Processing {} compressible accounts in {} batches (batch_size={})", - accounts.len(), - num_batches, - config.batch_size + event = "compression_ctoken_started", + run_id = %self.run_id, + accounts = accounts.len(), + batches = num_batches, + batch_size = config.batch_size, + "Starting ctoken compression batches" ); let compressor = CTokenCompressor::new( @@ -1835,11 +2306,17 @@ impl EpochManager { // Signal cancellation to all other futures cancelled.store(true, Ordering::Relaxed); warn!( - "Cancelling compression: forester no longer eligible (current_slot={}, eligibility_end={})", + event = "compression_ctoken_cancelled_not_eligible", + run_id = %self.run_id, current_slot, - consecutive_eligibility_end + eligibility_end_slot = consecutive_eligibility_end, + "Cancelling compression because forester is no longer eligible" ); - return Err((batch_idx, batch.len(), anyhow!("Forester no longer eligible"))); + return Err(( + batch_idx, + batch.len(), + anyhow!("Forester no longer eligible"), + )); } debug!( @@ -1864,10 +2341,12 @@ impl EpochManager { } Err(e) => { error!( - "Compression batch {}/{} failed: {:?}", - batch_idx + 1, - num_batches, - e + event = "compression_ctoken_batch_failed", + run_id = %self.run_id, + batch = batch_idx + 1, + total_batches = num_batches, + error = ?e, + "Compression batch failed" ); Err((batch_idx, batch.len(), e)) } @@ -1887,29 +2366,36 @@ impl EpochManager { match result { Ok((batch_idx, count, sig)) => { info!( - "Successfully compressed {} accounts in batch {}/{}: {}", - count, - batch_idx + 1, - num_batches, - sig + event = "compression_ctoken_batch_succeeded", + run_id = %self.run_id, + batch = batch_idx + 1, + total_batches = num_batches, + accounts = count, + signature = %sig, + "Compression batch succeeded" ); total_compressed += count; } Err((batch_idx, count, e)) => { error!( - "Compression batch {}/{} ({} accounts) failed: {:?}", - batch_idx + 1, - num_batches, - count, - e + event = "compression_ctoken_batch_failed_final", + run_id = %self.run_id, + batch = batch_idx + 1, + total_batches = num_batches, + accounts = count, + error = ?e, + "Compression batch failed" ); } } } info!( - "Completed ctoken compression for epoch {}: compressed {} accounts", - epoch_info.epoch, total_compressed + event = "compression_ctoken_completed", + run_id = %self.run_id, + epoch = epoch_info.epoch, + compressed_accounts = total_compressed, + "Completed ctoken compression" ); // Process PDA compression if configured @@ -1917,7 +2403,12 @@ impl EpochManager { .dispatch_pda_compression(consecutive_eligibility_end) .await .unwrap_or_else(|e| { - error!("PDA compression failed: {:?}", e); + error!( + event = "compression_pda_dispatch_failed", + run_id = %self.run_id, + error = ?e, + "PDA compression failed" + ); 0 }); @@ -1926,14 +2417,25 @@ impl EpochManager { .dispatch_mint_compression(consecutive_eligibility_end) .await .unwrap_or_else(|e| { - error!("Mint compression failed: {:?}", e); + error!( + event = "compression_mint_dispatch_failed", + run_id = %self.run_id, + error = ?e, + "Mint compression failed" + ); 0 }); let total = total_compressed + pda_compressed + mint_compressed; info!( - "Completed all compression for epoch {}: {} ctoken + {} PDA + {} Mint = {} total", - epoch_info.epoch, total_compressed, pda_compressed, mint_compressed, total + event = "compression_all_completed", + run_id = %self.run_id, + epoch = epoch_info.epoch, + ctoken_compressed = total_compressed, + pda_compressed, + mint_compressed, + total_compressed = total, + "Completed all compression" ); Ok(total) } @@ -1986,9 +2488,11 @@ impl EpochManager { } info!( - "Processing {} compressible PDA accounts for program {}", - accounts.len(), - program_config.program_id + event = "compression_pda_program_started", + run_id = %self.run_id, + program = %program_config.program_id, + accounts = accounts.len(), + "Processing compressible PDA accounts for program" ); let pda_compressor = crate::compressible::pda::PdaCompressor::new( @@ -2002,8 +2506,11 @@ impl EpochManager { Ok(cfg) => cfg, Err(e) => { error!( - "Failed to fetch config for program {}: {:?}", - program_config.program_id, e + event = "compression_pda_program_config_failed", + run_id = %self.run_id, + program = %program_config.program_id, + error = ?e, + "Failed to fetch config for PDA program" ); continue; } @@ -2014,8 +2521,11 @@ impl EpochManager { if current_slot >= consecutive_eligibility_end { cancelled.store(true, Ordering::Relaxed); warn!( - "Stopping PDA compression: forester no longer eligible (current_slot={}, eligibility_end={})", - current_slot, consecutive_eligibility_end + event = "compression_pda_cancelled_not_eligible", + run_id = %self.run_id, + current_slot, + eligibility_end_slot = consecutive_eligibility_end, + "Stopping PDA compression because forester is no longer eligible" ); break; } @@ -2044,8 +2554,12 @@ impl EpochManager { Err((account_state, e)) => { if e.to_string() != "Cancelled" { error!( - "Failed to compress PDA {} for program {}: {:?}", - account_state.pubkey, program_config.program_id, e + event = "compression_pda_account_failed", + run_id = %self.run_id, + account = %account_state.pubkey, + program = %program_config.program_id, + error = ?e, + "Failed to compress PDA account" ); } } @@ -2053,7 +2567,12 @@ impl EpochManager { } } - info!("Completed PDA compression: {} accounts", total_compressed); + info!( + event = "compression_pda_completed", + run_id = %self.run_id, + compressed_accounts = total_compressed, + "Completed PDA compression" + ); Ok(total_compressed) } @@ -2085,9 +2604,11 @@ impl EpochManager { } info!( - "Processing {} compressible Mint accounts concurrently (max_concurrent={})", - accounts.len(), - config.max_concurrent_batches + event = "compression_mint_started", + run_id = %self.run_id, + accounts = accounts.len(), + max_concurrent = config.max_concurrent_batches, + "Processing compressible Mint accounts" ); let mint_compressor = crate::compressible::mint::MintCompressor::new( @@ -2114,13 +2635,24 @@ impl EpochManager { } Err((mint_state, e)) => { if e.to_string() != "Cancelled" { - error!("Failed to compress Mint {}: {:?}", mint_state.pubkey, e); + error!( + event = "compression_mint_account_failed", + run_id = %self.run_id, + mint = %mint_state.pubkey, + error = ?e, + "Failed to compress mint account" + ); } } } } - info!("Completed Mint compression: {} accounts", total_compressed); + info!( + event = "compression_mint_completed", + run_id = %self.run_id, + compressed_accounts = total_compressed, + "Completed Mint compression" + ); Ok(total_compressed) } @@ -2174,15 +2706,25 @@ impl EpochManager { if num_sent > 0 { debug!( - "processed {} items v1 tree {}", - num_sent, tree_accounts.merkle_tree + event = "v1_tree_items_processed", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + items = num_sent, + "Processed items for V1 tree" ); } match self.rollover_if_needed(tree_accounts).await { Ok(_) => Ok(num_sent), Err(e) => { - error!("Failed to rollover tree: {:?}", e); + error!( + event = "tree_rollover_failed", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + tree_type = ?tree_accounts.tree_type, + error = ?e, + "Failed to rollover tree" + ); Err(e) } } @@ -2201,6 +2743,7 @@ impl EpochManager { BatchContext { rpc_pool: self.rpc_pool.clone(), authority: self.authority.clone(), + run_id: self.run_id.clone(), derivation: self.config.derivation_pubkey, epoch: epoch_info.epoch, merkle_tree: tree_accounts.merkle_tree, @@ -2257,6 +2800,14 @@ impl EpochManager { epoch_info: &Epoch, tree_accounts: &TreeAccounts, ) -> Result>>> { + // Serialize initialization per tree to avoid duplicate expensive processor construction. + let init_lock = self + .state_processor_init_locks + .entry(tree_accounts.merkle_tree) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone(); + let _init_guard = init_lock.lock().await; + // First check if we already have a processor for this tree // We REUSE processors across epochs to preserve cached state for optimistic processing if let Some(entry) = self.state_processors.get(&tree_accounts.merkle_tree) { @@ -2295,17 +2846,11 @@ impl EpochManager { self.zkp_batch_sizes .insert(tree_accounts.merkle_tree, batch_size); - // Insert the new processor (or get existing if another task beat us to it) - match self.state_processors.entry(tree_accounts.merkle_tree) { - Entry::Occupied(occupied) => { - // Another task already inserted - use theirs (they may have cached state) - Ok(occupied.get().1.clone()) - } - Entry::Vacant(vacant) => { - vacant.insert((epoch_info.epoch, processor.clone())); - Ok(processor) - } - } + self.state_processors.insert( + tree_accounts.merkle_tree, + (epoch_info.epoch, processor.clone()), + ); + Ok(processor) } async fn get_or_create_address_processor( @@ -2313,6 +2858,14 @@ impl EpochManager { epoch_info: &Epoch, tree_accounts: &TreeAccounts, ) -> Result>>> { + // Serialize initialization per tree to avoid duplicate expensive processor construction. + let init_lock = self + .address_processor_init_locks + .entry(tree_accounts.merkle_tree) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone(); + let _init_guard = init_lock.lock().await; + if let Some(entry) = self.address_processors.get(&tree_accounts.merkle_tree) { let (stored_epoch, processor_ref) = entry.value(); let processor_clone = processor_ref.clone(); @@ -2347,14 +2900,11 @@ impl EpochManager { self.zkp_batch_sizes .insert(tree_accounts.merkle_tree, batch_size); - // Insert the new processor (or get existing if another task beat us to it) - match self.address_processors.entry(tree_accounts.merkle_tree) { - Entry::Occupied(occupied) => Ok(occupied.get().1.clone()), - Entry::Vacant(vacant) => { - vacant.insert((epoch_info.epoch, processor.clone())); - Ok(processor) - } - } + self.address_processors.insert( + tree_accounts.merkle_tree, + (epoch_info.epoch, processor.clone()), + ); + Ok(processor) } async fn process_v2( @@ -2387,28 +2937,52 @@ impl EpochManager { Err(e) => { if is_v2_error(&e, V2Error::is_constraint) { warn!( - "State processing hit constraint error for tree {}: {}. Dropping processor to flush cache.", - tree_accounts.merkle_tree, - e + event = "v2_state_constraint_error", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + error = %e, + "State processing hit constraint error. Dropping processor to flush cache." ); drop(proc); // Release lock before removing self.state_processors.remove(&tree_accounts.merkle_tree); self.proof_caches.remove(&tree_accounts.merkle_tree); Err(e) } else if is_v2_error(&e, V2Error::is_hashchain_mismatch) { - warn!( - "State processing hit hashchain mismatch for tree {}: {}. Clearing cache and retrying.", - tree_accounts.merkle_tree, - e + let warning_key = format!( + "v2_state_hashchain_mismatch:{}", + tree_accounts.merkle_tree ); + if should_emit_rate_limited_warning( + warning_key, + Duration::from_secs(15), + ) { + warn!( + event = "v2_state_hashchain_mismatch", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + error = %e, + "State processing hit hashchain mismatch. Clearing cache and retrying." + ); + } + self.heartbeat.increment_v2_recoverable_error(); proc.clear_cache().await; Ok(ProcessingResult::default()) } else { - warn!( - "Failed to process state queue for tree {}: {}. Will retry next tick without dropping processor.", - tree_accounts.merkle_tree, - e - ); + let warning_key = + format!("v2_state_process_failed:{}", tree_accounts.merkle_tree); + if should_emit_rate_limited_warning( + warning_key, + Duration::from_secs(10), + ) { + warn!( + event = "v2_state_process_failed_retrying", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + error = %e, + "Failed to process state queue. Will retry next tick without dropping processor." + ); + } + self.heartbeat.increment_v2_recoverable_error(); Ok(ProcessingResult::default()) } } @@ -2437,28 +3011,52 @@ impl EpochManager { Err(e) => { if is_v2_error(&e, V2Error::is_constraint) { warn!( - "Address processing hit constraint error for tree {}: {}. Dropping processor to flush cache.", - tree_accounts.merkle_tree, - e + event = "v2_address_constraint_error", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + error = %e, + "Address processing hit constraint error. Dropping processor to flush cache." ); drop(proc); self.address_processors.remove(&tree_accounts.merkle_tree); self.proof_caches.remove(&tree_accounts.merkle_tree); Err(e) } else if is_v2_error(&e, V2Error::is_hashchain_mismatch) { - warn!( - "Address processing hit hashchain mismatch for tree {}: {}. Clearing cache and retrying.", - tree_accounts.merkle_tree, - e + let warning_key = format!( + "v2_address_hashchain_mismatch:{}", + tree_accounts.merkle_tree ); + if should_emit_rate_limited_warning( + warning_key, + Duration::from_secs(15), + ) { + warn!( + event = "v2_address_hashchain_mismatch", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + error = %e, + "Address processing hit hashchain mismatch. Clearing cache and retrying." + ); + } + self.heartbeat.increment_v2_recoverable_error(); proc.clear_cache().await; Ok(ProcessingResult::default()) } else { - warn!( - "Failed to process address queue for tree {}: {}. Will retry next tick without dropping processor.", - tree_accounts.merkle_tree, - e - ); + let warning_key = + format!("v2_address_process_failed:{}", tree_accounts.merkle_tree); + if should_emit_rate_limited_warning( + warning_key, + Duration::from_secs(10), + ) { + warn!( + event = "v2_address_process_failed_retrying", + run_id = %self.run_id, + tree = %tree_accounts.merkle_tree, + error = %e, + "Failed to process address queue. Will retry next tick without dropping processor." + ); + } + self.heartbeat.increment_v2_recoverable_error(); Ok(ProcessingResult::default()) } } @@ -2466,8 +3064,10 @@ impl EpochManager { } _ => { warn!( - "Unsupported tree type for V2 processing: {:?}", - tree_accounts.tree_type + event = "v2_unsupported_tree_type", + run_id = %self.run_id, + tree_type = ?tree_accounts.tree_type, + "Unsupported tree type for V2 processing" ); Ok(ProcessingResult::default()) } @@ -2489,6 +3089,7 @@ impl EpochManager { queue_metric_update(epoch_num, items_processed, duration); self.increment_processed_items_count(epoch_num, items_processed) .await; + self.heartbeat.add_items_processed(items_processed); } } @@ -2519,8 +3120,10 @@ impl EpochManager { if v2_state_trees.is_empty() { if skipped_count > 0 { info!( - "No trees to pre-warm: {} StateV2 trees skipped by config", - skipped_count + event = "prewarm_skipped_all_trees_filtered", + run_id = %self.run_id, + skipped_trees = skipped_count, + "No trees to pre-warm; all StateV2 trees skipped by config" ); } return; @@ -2528,8 +3131,11 @@ impl EpochManager { if slots_until_active < 15 { info!( - "Skipping pre-warming: only {} slots until active phase, not enough time", - slots_until_active + event = "prewarm_skipped_not_enough_time", + run_id = %self.run_id, + slots_until_active, + min_required_slots = 15, + "Skipping pre-warming; not enough slots until active phase" ); return; } @@ -2554,7 +3160,13 @@ impl EpochManager { let mut rpc = match self_clone.rpc_pool.get_connection().await { Ok(r) => r, Err(e) => { - warn!("Failed to get RPC for cache validation: {:?}", e); + warn!( + event = "prewarm_cache_validation_rpc_failed", + run_id = %self_clone.run_id, + tree = %tree_pubkey, + error = ?e, + "Failed to get RPC for cache validation" + ); return; } }; @@ -2562,8 +3174,12 @@ impl EpochManager { self_clone.fetch_current_root(&mut *rpc, &tree_accounts).await { info!( - "Tree {} has {} cached proofs from previous epoch (root: {:?}), skipping pre-warm", - tree_pubkey, cache_len, ¤t_root[..4] + event = "prewarm_skipped_cache_already_warm", + run_id = %self_clone.run_id, + tree = %tree_pubkey, + cached_proofs = cache_len, + root_prefix = ?¤t_root[..4], + "Tree already has cached proofs from previous epoch; skipping pre-warm" ); return; } @@ -2576,8 +3192,11 @@ impl EpochManager { Ok(p) => p, Err(e) => { warn!( - "Failed to create processor for pre-warming tree {}: {:?}", - tree_pubkey, e + event = "prewarm_processor_create_failed", + run_id = %self_clone.run_id, + tree = %tree_pubkey, + error = ?e, + "Failed to create processor for pre-warming tree" ); return; } @@ -2596,8 +3215,11 @@ impl EpochManager { Ok(result) => { if result.items_processed > 0 { info!( - "Pre-warmed {} items for tree {} during wait (metrics: {:?})", - result.items_processed, tree_pubkey, result.metrics + event = "prewarm_tree_completed", + run_id = %self_clone.run_id, + tree = %tree_pubkey, + items = result.items_processed, + "Pre-warmed items for tree during wait" ); self_clone .add_processing_metrics(epoch_info.epoch.epoch, result.metrics) @@ -2621,20 +3243,32 @@ impl EpochManager { (slot_duration() * timeout_slots as u32).min(Duration::from_secs(30)); info!( - "Starting pre-warming for {} trees ({} skipped by config) with {}ms timeout", - v2_state_trees.len(), - skipped_count, - timeout_duration.as_millis() + event = "prewarm_started", + run_id = %self.run_id, + trees = v2_state_trees.len(), + skipped_trees = skipped_count, + timeout_ms = timeout_duration.as_millis() as u64, + "Starting pre-warming" ); match tokio::time::timeout(timeout_duration, futures::future::join_all(prewarm_futures)) .await { Ok(_) => { - info!("Completed pre-warming for all trees"); + info!( + event = "prewarm_completed", + run_id = %self.run_id, + trees = v2_state_trees.len(), + "Completed pre-warming for all trees" + ); } Err(_) => { - info!("Pre-warming timed out after {:?}", timeout_duration); + info!( + event = "prewarm_timed_out", + run_id = %self.run_id, + timeout_ms = timeout_duration.as_millis() as u64, + "Pre-warming timed out" + ); } } } @@ -2651,8 +3285,12 @@ impl EpochManager { let current_slot = self.slot_tracker.estimated_current_slot(); if current_slot >= consecutive_eligibility_end { debug!( - "Skipping cached proof send for tree {}: past eligibility window (slot {} >= {})", - tree_pubkey, current_slot, consecutive_eligibility_end + event = "cached_proofs_skipped_outside_eligibility", + run_id = %self.run_id, + tree = %tree_pubkey, + current_slot, + eligibility_end_slot = consecutive_eligibility_end, + "Skipping cached proof send because eligibility window has ended" ); return Ok(None); } @@ -2663,7 +3301,12 @@ impl EpochManager { }; if cache.is_warming().await { - debug!("Cache still warming for tree {}, skipping", tree_pubkey); + debug!( + event = "cached_proofs_skipped_cache_warming", + run_id = %self.run_id, + tree = %tree_pubkey, + "Skipping cached proofs because cache is still warming" + ); return Ok(None); } @@ -2672,8 +3315,11 @@ impl EpochManager { Ok(root) => root, Err(e) => { warn!( - "Failed to fetch current root for tree {}: {:?}", - tree_pubkey, e + event = "cached_proofs_root_fetch_failed", + run_id = %self.run_id, + tree = %tree_pubkey, + error = ?e, + "Failed to fetch current root for tree" ); return Ok(None); } @@ -2683,9 +3329,11 @@ impl EpochManager { Some(proofs) => proofs, None => { debug!( - "No valid cached proofs for tree {} (root: {:?})", - tree_pubkey, - ¤t_root[..4] + event = "cached_proofs_not_available", + run_id = %self.run_id, + tree = %tree_pubkey, + root_prefix = ?¤t_root[..4], + "No valid cached proofs for tree" ); return Ok(None); } @@ -2696,10 +3344,12 @@ impl EpochManager { } info!( - "Sending {} cached proofs for tree {} (root: {:?})", - cached_proofs.len(), - tree_pubkey, - ¤t_root[..4] + event = "cached_proofs_send_started", + run_id = %self.run_id, + tree = %tree_pubkey, + proofs = cached_proofs.len(), + root_prefix = ?¤t_root[..4], + "Sending cached proofs for tree" ); let items_sent = self @@ -2813,14 +3463,21 @@ impl EpochManager { { Ok(sig) => { info!( - "Sent cached proofs tx: {} ({} instructions)", - sig, - instructions.len() + event = "cached_proofs_tx_sent", + run_id = %self.run_id, + signature = %sig, + instruction_count = instructions.len(), + "Sent cached proofs transaction" ); total_items += chunk_items; } Err(e) => { - warn!("Failed to send cached proofs tx: {:?}", e); + warn!( + event = "cached_proofs_tx_send_failed", + run_id = %self.run_id, + error = ?e, + "Failed to send cached proofs transaction" + ); } } } @@ -2834,7 +3491,13 @@ impl EpochManager { if is_tree_ready_for_rollover(&mut *rpc, tree_account.merkle_tree, tree_account.tree_type) .await? { - info!("Starting {} rollover.", tree_account.merkle_tree); + info!( + event = "tree_rollover_started", + run_id = %self.run_id, + tree = %tree_account.merkle_tree, + tree_type = ?tree_account.tree_type, + "Starting tree rollover" + ); self.perform_rollover(tree_account).await?; } Ok(()) @@ -2855,12 +3518,23 @@ impl EpochManager { #[instrument(level = "debug", skip(self, epoch_info), fields(forester = %self.config.payer_keypair.pubkey(), epoch = epoch_info.epoch.epoch ))] async fn wait_for_report_work_phase(&self, epoch_info: &ForesterEpochInfo) -> Result<()> { - info!("Waiting for report work phase"); + info!( + event = "wait_for_report_work_phase", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + report_work_start_slot = epoch_info.epoch.phases.report_work.start, + "Waiting for report work phase" + ); let mut rpc = self.rpc_pool.get_connection().await?; let report_work_start_slot = epoch_info.epoch.phases.report_work.start; wait_until_slot_reached(&mut *rpc, &self.slot_tracker, report_work_start_slot).await?; - info!("Finished waiting for report work phase"); + info!( + event = "report_work_phase_ready", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + "Finished waiting for report work phase" + ); Ok(()) } @@ -2874,8 +3548,15 @@ impl EpochManager { }; info!( - "Sending work report: epoch={} items={} metrics={:?}", - report.epoch, report.processed_items, report.metrics + event = "work_report_sent_to_channel", + run_id = %self.run_id, + epoch = report.epoch, + items = report.processed_items, + total_circuit_inputs_ms = report.metrics.total_circuit_inputs().as_millis() as u64, + total_proof_generation_ms = report.metrics.total_proof_generation().as_millis() as u64, + total_round_trip_ms = report.metrics.total_round_trip().as_millis() as u64, + tx_sending_ms = report.metrics.tx_sending_duration.as_millis() as u64, + "Sending work report to channel" ); self.work_report_sender @@ -2885,6 +3566,7 @@ impl EpochManager { epoch: report.epoch, error: e.to_string(), })?; + self.heartbeat.increment_work_report_sent(); Ok(()) } @@ -2892,7 +3574,12 @@ impl EpochManager { #[instrument(level = "debug", skip(self, epoch_info), fields(forester = %self.config.payer_keypair.pubkey(), epoch = epoch_info.epoch.epoch ))] async fn report_work_onchain(&self, epoch_info: &ForesterEpochInfo) -> Result<()> { - info!("Reporting work on-chain"); + info!( + event = "work_report_onchain_started", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + "Reporting work on-chain" + ); let mut rpc = LightClient::new(LightClientConfig { url: self.config.external_services.rpc_url.to_string(), photon_url: self.config.external_services.indexer_url.clone(), @@ -2935,11 +3622,21 @@ impl EpochManager { .await { Ok(_) => { - info!("Work reported on-chain"); + info!( + event = "work_report_onchain_succeeded", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + "Work reported on-chain" + ); } Err(e) => { if e.to_string().contains("already been processed") { - info!("Work already reported for epoch {}", epoch_info.epoch.epoch); + info!( + event = "work_report_onchain_already_reported", + run_id = %self.run_id, + epoch = epoch_info.epoch.epoch, + "Work already reported on-chain for epoch" + ); return Ok(()); } if let RpcError::ClientError(client_error) = &e { @@ -2985,7 +3682,13 @@ impl EpochManager { ) .await?; - info!("Address rollover signature: {:?}", rollover_signature); + info!( + event = "address_tree_rollover_succeeded", + run_id = %self.run_id, + tree = %tree_account.merkle_tree, + signature = %rollover_signature, + "Address tree rollover succeeded" + ); Ok(()) } TreeType::StateV1 => { @@ -3007,7 +3710,13 @@ impl EpochManager { ) .await?; - info!("State rollover signature: {:?}", rollover_signature); + info!( + event = "state_tree_rollover_succeeded", + run_id = %self.run_id, + tree = %tree_account.merkle_tree, + signature = %rollover_signature, + "State tree rollover succeeded" + ); Ok(()) } @@ -3052,9 +3761,73 @@ fn should_skip_tree(config: &ForesterConfig, tree_type: &TreeType) -> bool { } } +pub fn generate_run_id() -> String { + let epoch_ms = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + format!("{}-{}", std::process::id(), epoch_ms) +} + +fn spawn_heartbeat_task( + heartbeat: Arc, + slot_tracker: Arc, + protocol_config: Arc, + run_id: String, +) -> JoinHandle<()> { + tokio::spawn(async move { + let mut interval = tokio::time::interval(Duration::from_secs(20)); + interval.set_missed_tick_behavior(MissedTickBehavior::Skip); + let mut previous = heartbeat.snapshot(); + + loop { + interval.tick().await; + + let slot = slot_tracker.estimated_current_slot(); + let epoch = protocol_config.get_current_active_epoch(slot).ok(); + let epoch_known = epoch.is_some(); + let epoch_value = epoch.unwrap_or_default(); + let current = heartbeat.snapshot(); + let delta = current.delta_since(&previous); + previous = current; + + info!( + event = "service_heartbeat", + run_id = %run_id, + slot, + epoch = epoch_value, + epoch_known, + cycle_delta = delta.active_cycles, + tree_tasks_delta = delta.tree_tasks_spawned, + queues_started_delta = delta.queues_started, + queues_finished_delta = delta.queues_finished, + items_processed_delta = delta.items_processed, + work_reports_delta = delta.work_reports_sent, + recoverable_v2_errors_delta = delta.v2_recoverable_errors, + cycle_total = current.active_cycles, + items_processed_total = current.items_processed, + "Forester heartbeat" + ); + } + }) +} + #[instrument( level = "info", - skip(config, protocol_config, rpc_pool, shutdown, work_report_sender, slot_tracker), + skip( + config, + protocol_config, + rpc_pool, + shutdown, + work_report_sender, + slot_tracker, + tx_cache, + ops_cache, + compressible_tracker, + pda_tracker, + mint_tracker, + run_id + ), fields(forester = %config.payer_keypair.pubkey()) )] #[allow(clippy::too_many_arguments)] @@ -3070,9 +3843,22 @@ pub async fn run_service( compressible_tracker: Option>, pda_tracker: Option>, mint_tracker: Option>, + run_id: String, ) -> Result<()> { - info_span!("run_service", forester = %config.payer_keypair.pubkey()) - .in_scope(|| async { + let heartbeat = Arc::new(ServiceHeartbeat::default()); + let heartbeat_handle = spawn_heartbeat_task( + heartbeat.clone(), + slot_tracker.clone(), + protocol_config.clone(), + run_id.clone(), + ); + + let run_id_for_logs = run_id.clone(); + let result = info_span!( + "run_service", + forester = %config.payer_keypair.pubkey() + ) + .in_scope(|| async move { let processor_mode_str = match ( config.general_config.skip_v1_state_trees && config.general_config.skip_v1_address_trees, @@ -3084,7 +3870,12 @@ pub async fn run_service( (false, false) => "all", _ => "unknown", }; - info!("Starting forester in {} mode", processor_mode_str); + info!( + event = "forester_starting", + run_id = %run_id_for_logs, + processor_mode = processor_mode_str, + "Starting forester" + ); const INITIAL_RETRY_DELAY: Duration = Duration::from_secs(1); const MAX_RETRY_DELAY: Duration = Duration::from_secs(30); @@ -3102,7 +3893,12 @@ pub async fn run_service( tokio::select! { biased; _ = &mut shutdown => { - info!("Received shutdown signal during tree fetch. Stopping."); + info!( + event = "shutdown_received", + run_id = %run_id_for_logs, + phase = "tree_fetch", + "Received shutdown signal during tree fetch. Stopping." + ); return Ok(()); } result = rpc_pool.get_connection() => { @@ -3111,7 +3907,12 @@ pub async fn run_service( tokio::select! { biased; _ = &mut shutdown => { - info!("Received shutdown signal during tree fetch. Stopping."); + info!( + event = "shutdown_received", + run_id = %run_id_for_logs, + phase = "tree_fetch", + "Received shutdown signal during tree fetch. Stopping." + ); return Ok(()); } fetch_result = fetch_trees(&*rpc) => { @@ -3120,15 +3921,22 @@ pub async fn run_service( let group_authority = match config.general_config.group_authority { Some(ga) => Some(ga), None => { - match fetch_protocol_group_authority(&*rpc).await { + match fetch_protocol_group_authority(&*rpc, run_id_for_logs.as_str()).await { Ok(ga) => { - info!("Using protocol default group authority: {}", ga); + info!( + event = "group_authority_default_fetched", + run_id = %run_id_for_logs, + group_authority = %ga, + "Using protocol default group authority" + ); Some(ga) } Err(e) => { warn!( - "Failed to fetch protocol group authority, processing all trees: {:?}", - e + event = "group_authority_fetch_failed", + run_id = %run_id_for_logs, + error = ?e, + "Failed to fetch protocol group authority; processing all trees" ); None } @@ -3140,10 +3948,12 @@ pub async fn run_service( let before_count = fetched_trees.len(); fetched_trees.retain(|tree| tree.owner == group_authority); info!( - "Filtered trees by group authority {}: {} -> {} trees", - group_authority, - before_count, - fetched_trees.len() + event = "trees_filtered_by_group_authority", + run_id = %run_id_for_logs, + group_authority = %group_authority, + trees_before = before_count, + trees_after = fetched_trees.len(), + "Filtered trees by group authority" ); } @@ -3151,13 +3961,24 @@ pub async fn run_service( let tree_ids = &config.general_config.tree_ids; fetched_trees.retain(|tree| tree_ids.contains(&tree.merkle_tree)); if fetched_trees.is_empty() { - error!("None of the specified trees found: {:?}", tree_ids); + error!( + event = "trees_filter_explicit_ids_empty", + run_id = %run_id_for_logs, + requested_tree_count = tree_ids.len(), + requested_trees = ?tree_ids, + "None of the specified trees were found" + ); return Err(anyhow::anyhow!( "None of the specified trees found: {:?}", tree_ids )); } - info!("Processing only trees: {:?}", tree_ids); + info!( + event = "trees_filter_explicit_ids", + run_id = %run_id_for_logs, + tree_count = tree_ids.len(), + "Processing only explicitly requested trees" + ); } break fetched_trees; } @@ -3171,8 +3992,13 @@ pub async fn run_service( )); } warn!( - "Failed to fetch trees (attempt {}/{}), retrying in {:?}: {:?}", - attempts, max_attempts, delay, e + event = "fetch_trees_failed_retrying", + run_id = %run_id_for_logs, + attempt = attempts, + max_attempts, + retry_delay_ms = delay.as_millis() as u64, + error = ?e, + "Failed to fetch trees; retrying" ); } } @@ -3189,8 +4015,13 @@ pub async fn run_service( )); } warn!( - "Failed to get RPC connection (attempt {}/{}), retrying in {:?}: {:?}", - attempts, max_attempts, delay, e + event = "rpc_connection_failed_retrying", + run_id = %run_id_for_logs, + attempt = attempts, + max_attempts, + retry_delay_ms = delay.as_millis() as u64, + error = ?e, + "Failed to get RPC connection; retrying" ); } } @@ -3200,7 +4031,12 @@ pub async fn run_service( tokio::select! { biased; _ = &mut shutdown => { - info!("Received shutdown signal during retry wait. Stopping."); + info!( + event = "shutdown_received", + run_id = %run_id_for_logs, + phase = "tree_fetch_retry_wait", + "Received shutdown signal during retry wait. Stopping." + ); return Ok(()); } _ = sleep(delay) => { @@ -3214,7 +4050,12 @@ pub async fn run_service( let (new_tree_sender, _) = broadcast::channel(100); if !config.general_config.tree_ids.is_empty() { - info!("Processing specific trees, tree discovery will be limited"); + info!( + event = "tree_discovery_limited_to_explicit_ids", + run_id = %run_id_for_logs, + tree_count = config.general_config.tree_ids.len(), + "Processing specific trees; tree discovery will be limited" + ); } while retry_count < config.retry_config.max_retries { @@ -3226,9 +4067,11 @@ pub async fn run_service( match load_lookup_table_async(&*rpc, lut_address).await { Ok(lut) => { info!( - "Loaded lookup table {} with {} addresses", - lut_address, - lut.addresses.len() + event = "lookup_table_loaded", + run_id = %run_id_for_logs, + lookup_table = %lut_address, + address_count = lut.addresses.len(), + "Loaded lookup table" ); Arc::new(vec![lut]) } @@ -3260,6 +4103,8 @@ pub async fn run_service( pda_tracker.clone(), mint_tracker.clone(), address_lookup_tables, + heartbeat.clone(), + run_id.clone(), ) .await { @@ -3273,7 +4118,12 @@ pub async fn run_service( let result = tokio::select! { result = epoch_manager.run() => result, _ = shutdown => { - info!("Received shutdown signal. Stopping the service."); + info!( + event = "shutdown_received", + run_id = %run_id_for_logs, + phase = "service_run", + "Received shutdown signal. Stopping the service." + ); Ok(()) } }; @@ -3282,9 +4132,11 @@ pub async fn run_service( } Err(e) => { warn!( - "Failed to create EpochManager (attempt {}): {:?}", - retry_count + 1, - e + event = "epoch_manager_create_failed", + run_id = %run_id_for_logs, + attempt = retry_count + 1, + error = ?e, + "Failed to create EpochManager" ); retry_count += 1; if retry_count < config.retry_config.max_retries { @@ -3293,9 +4145,12 @@ pub async fn run_service( retry_delay = std::cmp::min(retry_delay * 2, MAX_RETRY_DELAY); } else { error!( - "Failed to start forester after {} attempts over {:?}", - config.retry_config.max_retries, - start_time.elapsed() + event = "forester_start_failed_max_retries", + run_id = %run_id_for_logs, + attempts = config.retry_config.max_retries, + elapsed_ms = start_time.elapsed().as_millis() as u64, + error = ?e, + "Failed to start forester after max retries" ); return Err(InitializationError::MaxRetriesExceeded { attempts: config.retry_config.max_retries, @@ -3312,7 +4167,10 @@ pub async fn run_service( .into(), ) }) - .await + .await; + + heartbeat_handle.abort(); + result } /// Async version of load_lookup_table that works with the Rpc trait @@ -3379,18 +4237,14 @@ mod tests { indexer_config: Default::default(), transaction_config: Default::default(), general_config: GeneralConfig { - slot_update_interval_seconds: 10, - tree_discovery_interval_seconds: 1, enable_metrics: false, skip_v1_state_trees: skip_v1_state, skip_v1_address_trees: skip_v1_address, skip_v2_state_trees: skip_v2_state, skip_v2_address_trees: skip_v2_address, - tree_ids: vec![], sleep_after_processing_ms: 50, sleep_when_idle_ms: 100, - queue_polling_mode: crate::cli::QueuePollingMode::Indexer, - group_authority: None, + ..Default::default() }, rpc_pool_config: Default::default(), registry_pubkey: Pubkey::default(), diff --git a/forester/src/errors.rs b/forester/src/errors.rs index 250c1f684c..f88c6e1f21 100644 --- a/forester/src/errors.rs +++ b/forester/src/errors.rs @@ -81,6 +81,13 @@ pub enum RegistrationError { registration_end: u64, }, + #[error("Cannot finalize registration for epoch {epoch}. Current slot: {current_slot}, active phase ended: {active_phase_end_slot}")] + FinalizeRegistrationPhaseEnded { + epoch: u64, + current_slot: u64, + active_phase_end_slot: u64, + }, + #[error("Epoch registration returned no result")] EmptyRegistration, diff --git a/forester/src/forester_status.rs b/forester/src/forester_status.rs index 62812710f5..801d6a1799 100644 --- a/forester/src/forester_status.rs +++ b/forester/src/forester_status.rs @@ -250,7 +250,7 @@ pub async fn get_forester_status_with_options( // Filter trees by protocol group authority if enabled if filter_by_group_authority { - match fetch_protocol_group_authority(&rpc).await { + match fetch_protocol_group_authority(&rpc, "status").await { Ok(group_authority) => { let before_count = trees.len(); trees.retain(|tree| tree.owner == group_authority); diff --git a/forester/src/lib.rs b/forester/src/lib.rs index 203015ddbd..eb224a1e4e 100644 --- a/forester/src/lib.rs +++ b/forester/src/lib.rs @@ -9,6 +9,7 @@ pub mod errors; pub mod forester_status; pub mod health_check; pub mod helius_priority_fee_types; +pub mod logging; pub mod metrics; pub mod pagerduty; pub mod processor; @@ -38,7 +39,7 @@ use tokio::sync::{mpsc, oneshot, Mutex}; use tracing::debug; use crate::{ - epoch_manager::{run_service, WorkReport}, + epoch_manager::{generate_run_id, run_service, WorkReport}, metrics::QUEUE_LENGTH, processor::tx_cache::ProcessedHashCache, queue_helpers::{ @@ -148,6 +149,30 @@ pub async fn run_pipeline( shutdown_compressible: Option>, shutdown_bootstrap: Option>, work_report_sender: mpsc::Sender, +) -> Result<()> { + run_pipeline_with_run_id::( + config, + rpc_rate_limiter, + send_tx_rate_limiter, + shutdown_service, + shutdown_compressible, + shutdown_bootstrap, + work_report_sender, + generate_run_id(), + ) + .await +} + +#[allow(clippy::too_many_arguments)] +pub async fn run_pipeline_with_run_id( + config: Arc, + rpc_rate_limiter: Option, + send_tx_rate_limiter: Option, + shutdown_service: oneshot::Receiver<()>, + shutdown_compressible: Option>, + shutdown_bootstrap: Option>, + work_report_sender: mpsc::Sender, + run_id: String, ) -> Result<()> { let mut builder = SolanaRpcPoolBuilder::::default() .url(config.external_services.rpc_url.to_string()) @@ -240,6 +265,9 @@ pub async fn run_pipeline( } }); + // Extract flag before async closures to avoid moving config Arc + let helius_rpc = config.general_config.helius_rpc; + // Spawn bootstrap task for ctokens with shutdown support if let Some(mut shutdown_bootstrap_rx) = shutdown_bootstrap { let tracker_clone = ctoken_tracker.clone(); @@ -254,7 +282,10 @@ pub async fn run_pipeline( let rpc_url = rpc_url.clone(); let tracker = tracker_clone.clone(); async move { - compressible::bootstrap_ctoken_accounts(rpc_url, tracker, None).await + compressible::bootstrap_ctoken_accounts( + rpc_url, tracker, None, helius_rpc, + ) + .await } }); @@ -321,7 +352,10 @@ pub async fn run_pipeline( let rpc_url = rpc_url.clone(); let tracker = pda_tracker_clone.clone(); async move { - compressible::pda::bootstrap_pda_accounts(rpc_url, tracker, None).await + compressible::pda::bootstrap_pda_accounts( + rpc_url, tracker, None, helius_rpc, + ) + .await } }); @@ -377,8 +411,10 @@ pub async fn run_pipeline( let rpc_url = rpc_url.clone(); let tracker = mint_tracker_clone.clone(); async move { - compressible::mint::bootstrap_mint_accounts(rpc_url, tracker, None) - .await + compressible::mint::bootstrap_mint_accounts( + rpc_url, tracker, None, helius_rpc, + ) + .await } }); @@ -420,6 +456,7 @@ pub async fn run_pipeline( compressible_tracker, pda_tracker, mint_tracker, + run_id, ) .await; diff --git a/forester/src/logging.rs b/forester/src/logging.rs new file mode 100644 index 0000000000..22ef24f049 --- /dev/null +++ b/forester/src/logging.rs @@ -0,0 +1,121 @@ +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU64, Ordering}, + Mutex, OnceLock, + }, + time::{Duration, Instant}, +}; + +#[derive(Debug, Default)] +pub struct ServiceHeartbeat { + active_cycles: AtomicU64, + tree_tasks_spawned: AtomicU64, + queues_started: AtomicU64, + queues_finished: AtomicU64, + items_processed: AtomicU64, + work_reports_sent: AtomicU64, + v2_recoverable_errors: AtomicU64, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct HeartbeatSnapshot { + pub active_cycles: u64, + pub tree_tasks_spawned: u64, + pub queues_started: u64, + pub queues_finished: u64, + pub items_processed: u64, + pub work_reports_sent: u64, + pub v2_recoverable_errors: u64, +} + +impl HeartbeatSnapshot { + pub fn delta_since(&self, previous: &Self) -> Self { + Self { + active_cycles: self.active_cycles.saturating_sub(previous.active_cycles), + tree_tasks_spawned: self + .tree_tasks_spawned + .saturating_sub(previous.tree_tasks_spawned), + queues_started: self.queues_started.saturating_sub(previous.queues_started), + queues_finished: self + .queues_finished + .saturating_sub(previous.queues_finished), + items_processed: self + .items_processed + .saturating_sub(previous.items_processed), + work_reports_sent: self + .work_reports_sent + .saturating_sub(previous.work_reports_sent), + v2_recoverable_errors: self + .v2_recoverable_errors + .saturating_sub(previous.v2_recoverable_errors), + } + } +} + +impl ServiceHeartbeat { + pub fn increment_active_cycle(&self) { + self.active_cycles.fetch_add(1, Ordering::Relaxed); + } + + pub fn add_tree_tasks_spawned(&self, count: usize) { + self.tree_tasks_spawned + .fetch_add(count as u64, Ordering::Relaxed); + } + + pub fn increment_queue_started(&self) { + self.queues_started.fetch_add(1, Ordering::Relaxed); + } + + pub fn increment_queue_finished(&self) { + self.queues_finished.fetch_add(1, Ordering::Relaxed); + } + + pub fn add_items_processed(&self, count: usize) { + self.items_processed + .fetch_add(count as u64, Ordering::Relaxed); + } + + pub fn increment_work_report_sent(&self) { + self.work_reports_sent.fetch_add(1, Ordering::Relaxed); + } + + pub fn increment_v2_recoverable_error(&self) { + self.v2_recoverable_errors.fetch_add(1, Ordering::Relaxed); + } + + pub fn snapshot(&self) -> HeartbeatSnapshot { + HeartbeatSnapshot { + active_cycles: self.active_cycles.load(Ordering::Relaxed), + tree_tasks_spawned: self.tree_tasks_spawned.load(Ordering::Relaxed), + queues_started: self.queues_started.load(Ordering::Relaxed), + queues_finished: self.queues_finished.load(Ordering::Relaxed), + items_processed: self.items_processed.load(Ordering::Relaxed), + work_reports_sent: self.work_reports_sent.load(Ordering::Relaxed), + v2_recoverable_errors: self.v2_recoverable_errors.load(Ordering::Relaxed), + } + } +} + +pub fn should_emit_rate_limited_warning(key: impl Into, interval: Duration) -> bool { + static LAST_EMIT_AT: OnceLock>> = OnceLock::new(); + const MAX_KEYS: usize = 2_048; + + let key = key.into(); + let now = Instant::now(); + let map = LAST_EMIT_AT.get_or_init(|| Mutex::new(HashMap::new())); + let mut map = map.lock().expect("rate limiter mutex poisoned"); + + if map.len() > MAX_KEYS { + let stale_after = interval.saturating_mul(4); + map.retain(|_, ts| now.duration_since(*ts) < stale_after); + } + + match map.get(&key) { + Some(last_emit) if now.duration_since(*last_emit) < interval => false, + _ => { + map.insert(key, now); + true + } + } +} diff --git a/forester/src/main.rs b/forester/src/main.rs index 91cdb9d506..fe99ac90e0 100644 --- a/forester/src/main.rs +++ b/forester/src/main.rs @@ -4,11 +4,12 @@ use clap::Parser; use forester::{ api_server::{spawn_api_server, ApiServerConfig}, cli::{Cli, Commands}, + epoch_manager::generate_run_id, errors::ForesterError, forester_status, health_check::run_health_check, metrics::register_metrics, - run_pipeline, + run_pipeline_with_run_id, telemetry::setup_telemetry, ForesterConfig, }; @@ -87,12 +88,15 @@ async fn main() -> Result<(), ForesterError> { .map(RateLimiter::new); let rpc_url_for_api: String = config.external_services.rpc_url.to_string(); + let run_id = generate_run_id(); let api_server_handle = spawn_api_server(ApiServerConfig { + run_id: Arc::::from(run_id.clone()), rpc_url: rpc_url_for_api, port: args.api_server_port, allow_public_bind: args.api_server_public_bind, compressible_state: None, prometheus_url: args.prometheus_url.clone(), + helius_rpc: args.helius_rpc, }); // Create compressible shutdown channels if compressible is enabled @@ -124,7 +128,7 @@ async fn main() -> Result<(), ForesterError> { (None, None) }; - run_pipeline::( + run_pipeline_with_run_id::( config, rpc_rate_limiter, send_tx_limiter, @@ -132,6 +136,7 @@ async fn main() -> Result<(), ForesterError> { shutdown_receiver_compressible, shutdown_receiver_bootstrap, work_report_sender, + run_id, ) .await? } @@ -146,22 +151,33 @@ async fn main() -> Result<(), ForesterError> { } Commands::Dashboard(args) => { tracing::info!( - "Starting standalone dashboard API server on port {}", - args.port + event = "dashboard_server_starting", + port = args.port, + "Starting standalone dashboard API server" ); + let run_id = generate_run_id(); let api_server_handle = spawn_api_server(ApiServerConfig { + run_id: Arc::::from(run_id), rpc_url: args.rpc_url.clone(), port: args.port, allow_public_bind: args.public_bind, compressible_state: None, prometheus_url: args.prometheus_url.clone(), + helius_rpc: false, }); // Block until Ctrl+C if let Err(e) = ctrl_c().await { - tracing::error!("Failed to listen for Ctrl+C: {}", e); + tracing::error!( + event = "dashboard_ctrlc_listener_failed", + error = %e, + "Failed to listen for Ctrl+C" + ); } - tracing::info!("Received Ctrl+C, shutting down dashboard API server..."); + tracing::info!( + event = "dashboard_shutdown_signal_received", + "Received Ctrl+C, shutting down dashboard API server" + ); api_server_handle.shutdown(); } } diff --git a/forester/src/processor/v1/helpers.rs b/forester/src/processor/v1/helpers.rs index 0d00d82c4b..5999fc0dc5 100644 --- a/forester/src/processor/v1/helpers.rs +++ b/forester/src/processor/v1/helpers.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{sync::Arc, time::Duration}; use account_compression::{ processor::initialize_address_merkle_tree::Pubkey, @@ -19,7 +19,10 @@ use solana_program::instruction::Instruction; use tokio::time::Instant; use tracing::{info, warn}; -use crate::metrics::{update_indexer_proof_count, update_indexer_response_time}; +use crate::{ + logging::should_emit_rate_limited_warning, + metrics::{update_indexer_proof_count, update_indexer_response_time}, +}; const ADDRESS_PROOF_BATCH_SIZE: usize = 100; const ADDRESS_PROOF_MAX_RETRIES: u32 = 3; @@ -53,8 +56,9 @@ pub async fn fetch_proofs_and_create_instructions( for item in state_items.iter() { if item.tree_account.tree_type != TreeType::StateV1 { warn!( - "State item has unexpected tree type: {:?}", - item.tree_account.tree_type + event = "v1_state_item_unexpected_tree_type", + tree_type = ?item.tree_account.tree_type, + "State item has unexpected tree type" ); } } @@ -93,14 +97,22 @@ pub async fn fetch_proofs_and_create_instructions( let rpc = pool.get_connection().await?; if let Err(e) = wait_for_indexer(&*rpc).await { - warn!("Indexer not fully caught up, but proceeding anyway: {}", e); + if should_emit_rate_limited_warning("v1_wait_for_indexer", Duration::from_secs(30)) { + warn!( + event = "v1_wait_for_indexer_error", + error = %e, + "Indexer not fully caught up, but proceeding anyway" + ); + } } let address_proofs = if let Some((merkle_tree, addresses)) = address_data { let total_addresses = addresses.len(); info!( - "Fetching {} address proofs in batches of {}", - total_addresses, ADDRESS_PROOF_BATCH_SIZE + event = "v1_address_proofs_fetch_started", + requested = total_addresses, + batch_size = ADDRESS_PROOF_BATCH_SIZE, + "Fetching address proofs in batches" ); let start_time = Instant::now(); @@ -119,11 +131,12 @@ pub async fn fetch_proofs_and_create_instructions( // Exponential backoff: 500ms, 1000ms, 2000ms let delay_ms = ADDRESS_PROOF_RETRY_BASE_DELAY_MS * (1 << (attempt - 1)); warn!( - "Retrying address proof batch {} (attempt {}/{}), waiting {}ms", - batch_idx, - attempt + 1, - ADDRESS_PROOF_MAX_RETRIES + 1, - delay_ms + event = "v1_address_proof_batch_retrying", + batch_index = batch_idx, + attempt = attempt + 1, + max_attempts = ADDRESS_PROOF_MAX_RETRIES + 1, + delay_ms, + "Retrying address proof batch" ); tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await; } @@ -138,22 +151,22 @@ pub async fn fetch_proofs_and_create_instructions( let proofs_received = response.value.items.len(); info!( - "Address proof batch {}: requested={}, received={}, duration={:.3}s{}", - batch_idx, - batch_size, - proofs_received, - batch_duration.as_secs_f64(), - if attempt > 0 { - format!(" (after {} retries)", attempt) - } else { - String::new() - } + event = "v1_address_proof_batch_completed", + batch_index = batch_idx, + requested = batch_size, + received = proofs_received, + duration_s = batch_duration.as_secs_f64(), + retries = attempt, + "Address proof batch completed" ); if proofs_received != batch_size { warn!( - "Address proof count mismatch in batch {}: requested={}, received={}", - batch_idx, batch_size, proofs_received + event = "v1_address_proof_batch_count_mismatch", + batch_index = batch_idx, + requested = batch_size, + received = proofs_received, + "Address proof count mismatch in batch" ); } @@ -171,11 +184,12 @@ pub async fn fetch_proofs_and_create_instructions( if let Some(e) = last_error { let batch_duration = batch_start.elapsed(); warn!( - "Failed to get address proofs for batch {} after {} attempts ({:.3}s): {}", - batch_idx, - ADDRESS_PROOF_MAX_RETRIES + 1, - batch_duration.as_secs_f64(), - e + event = "v1_address_proof_batch_failed", + batch_index = batch_idx, + attempts = ADDRESS_PROOF_MAX_RETRIES + 1, + duration_s = batch_duration.as_secs_f64(), + error = %e, + "Failed to get address proofs for batch" ); return Err(anyhow::anyhow!( "Failed to get address proofs for batch {} after {} retries: {}", @@ -188,10 +202,11 @@ pub async fn fetch_proofs_and_create_instructions( let total_duration = start_time.elapsed(); info!( - "Address proofs complete: requested={}, received={}, total_duration={:.3}s", - total_addresses, - all_proofs.len(), - total_duration.as_secs_f64() + event = "v1_address_proofs_fetch_completed", + requested = total_addresses, + received = all_proofs.len(), + duration_s = total_duration.as_secs_f64(), + "Address proofs fetch completed" ); update_indexer_response_time( @@ -208,7 +223,11 @@ pub async fn fetch_proofs_and_create_instructions( let state_proofs = if let Some(states) = state_data { let total_states = states.len(); - info!("Fetching {} state proofs", total_states); + info!( + event = "v1_state_proofs_fetch_started", + requested = total_states, + "Fetching state proofs" + ); let start_time = Instant::now(); @@ -221,10 +240,11 @@ pub async fn fetch_proofs_and_create_instructions( // Exponential backoff: 500ms, 1000ms, 2000ms let delay_ms = ADDRESS_PROOF_RETRY_BASE_DELAY_MS * (1 << (attempt - 1)); warn!( - "Retrying state proofs (attempt {}/{}), waiting {}ms", - attempt + 1, - ADDRESS_PROOF_MAX_RETRIES + 1, - delay_ms + event = "v1_state_proofs_retrying", + attempt = attempt + 1, + max_attempts = ADDRESS_PROOF_MAX_RETRIES + 1, + delay_ms, + "Retrying state proofs" ); tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await; } @@ -239,21 +259,20 @@ pub async fn fetch_proofs_and_create_instructions( let proofs_received = response.value.items.len(); info!( - "State proofs complete: requested={}, received={}, duration={:.3}s{}", - total_states, - proofs_received, - duration.as_secs_f64(), - if attempt > 0 { - format!(" (after {} retries)", attempt) - } else { - String::new() - } + event = "v1_state_proofs_fetch_completed", + requested = total_states, + received = proofs_received, + duration_s = duration.as_secs_f64(), + retries = attempt, + "State proofs fetch completed" ); if proofs_received != total_states { warn!( - "State proof count mismatch: requested={}, received={}", - total_states, proofs_received + event = "v1_state_proof_count_mismatch", + requested = total_states, + received = proofs_received, + "State proof count mismatch" ); } @@ -282,10 +301,11 @@ pub async fn fetch_proofs_and_create_instructions( if let Some(e) = last_error { let duration = start_time.elapsed(); warn!( - "Failed to get state proofs after {} attempts ({:.3}s): {}", - ADDRESS_PROOF_MAX_RETRIES + 1, - duration.as_secs_f64(), - e + event = "v1_state_proofs_fetch_failed", + attempts = ADDRESS_PROOF_MAX_RETRIES + 1, + duration_s = duration.as_secs_f64(), + error = %e, + "Failed to get state proofs" ); return Err(anyhow::anyhow!( "Failed to get state proofs after {} retries: {}", @@ -361,9 +381,10 @@ pub async fn fetch_proofs_and_create_instructions( Ok(tree) => tree, Err(e) => { tracing::error!( - "Failed to deserialize onchain tree {}: {}", - item.tree_account.merkle_tree, - e + event = "v1_onchain_tree_deserialize_failed", + tree = %item.tree_account.merkle_tree, + error = %e, + "Failed to deserialize onchain tree" ); return Err(anyhow::anyhow!("Failed to deserialize onchain tree: {}", e)); } @@ -374,21 +395,23 @@ pub async fn fetch_proofs_and_create_instructions( let onchain_changelog_index = onchain_tree.changelog_index(); tracing::info!( - "Creating nullify instruction for tree {}: hash={}, leaf_index={}, root_seq={}, changelog_index={}, indexer_root={}", - item.tree_account.merkle_tree, - bs58::encode(&item.queue_item_data.hash).into_string(), - proof.leaf_index, - proof.root_seq, - proof.root_seq % STATE_MERKLE_TREE_CHANGELOG, - bs58::encode(&proof.root).into_string() + event = "v1_debug_nullify_instruction", + tree = %item.tree_account.merkle_tree, + hash = %bs58::encode(&item.queue_item_data.hash).into_string(), + leaf_index = proof.leaf_index, + root_seq = proof.root_seq, + changelog_index = proof.root_seq % STATE_MERKLE_TREE_CHANGELOG, + indexer_root = %bs58::encode(&proof.root).into_string(), + "Creating nullify instruction" ); tracing::info!( - "Onchain tree {} state: current_root={}, root_index={}, changelog_index={}", - item.tree_account.merkle_tree, - bs58::encode(&onchain_root).into_string(), - onchain_root_index, - onchain_changelog_index + event = "v1_debug_onchain_tree_state", + tree = %item.tree_account.merkle_tree, + current_root = %bs58::encode(&onchain_root).into_string(), + root_index = onchain_root_index, + changelog_index = onchain_changelog_index, + "Onchain tree state" ); let capacity = onchain_tree.roots.capacity(); @@ -405,10 +428,11 @@ pub async fn fetch_proofs_and_create_instructions( .collect(); tracing::info!( - "Onchain root history (len={}, capacity={}): {:?}", - onchain_tree.roots.len(), + event = "v1_debug_onchain_root_history", + history_len = onchain_tree.roots.len(), capacity, - root_history, + root_history = ?root_history, + "Onchain root history" ); let indexer_root_position = @@ -421,9 +445,10 @@ pub async fn fetch_proofs_and_create_instructions( }); tracing::info!( - "Indexer root {} present_at_buffer_index={:?}", - bs58::encode(&proof.root).into_string(), - indexer_root_position, + event = "v1_debug_indexer_root_position", + indexer_root = %bs58::encode(&proof.root).into_string(), + present_at_buffer_index = ?indexer_root_position, + "Indexer root position in onchain buffer" ); if indexer_root_position.is_none() { @@ -527,8 +552,10 @@ pub fn calculate_compute_unit_price(target_lamports: u64, compute_units: u64) -> pub fn get_capped_priority_fee(cap_config: CapConfig) -> u64 { if cap_config.max_fee_lamports < cap_config.min_fee_lamports { warn!( - "Invalid priority fee cap config: max_fee_lamports ({}) < min_fee_lamports ({}); clamping max to min", - cap_config.max_fee_lamports, cap_config.min_fee_lamports + event = "v1_priority_fee_cap_invalid", + max_fee_lamports = cap_config.max_fee_lamports, + min_fee_lamports = cap_config.min_fee_lamports, + "Invalid priority fee cap config; clamping max to min" ); } let max_fee_lamports = cap_config.max_fee_lamports.max(cap_config.min_fee_lamports); diff --git a/forester/src/processor/v2/common.rs b/forester/src/processor/v2/common.rs index 21703db081..9917e5a9eb 100644 --- a/forester/src/processor/v2/common.rs +++ b/forester/src/processor/v2/common.rs @@ -78,6 +78,7 @@ pub struct ProverConfig { pub struct BatchContext { pub rpc_pool: Arc>, pub authority: Arc, + pub run_id: Arc, pub derivation: Pubkey, pub epoch: u64, pub merkle_tree: Pubkey, @@ -104,6 +105,7 @@ impl Clone for BatchContext { Self { rpc_pool: self.rpc_pool.clone(), authority: self.authority.clone(), + run_id: self.run_id.clone(), derivation: self.derivation, epoch: self.epoch, merkle_tree: self.merkle_tree, diff --git a/forester/src/processor/v2/helpers.rs b/forester/src/processor/v2/helpers.rs index 22dd157aad..ed135cb6a4 100644 --- a/forester/src/processor/v2/helpers.rs +++ b/forester/src/processor/v2/helpers.rs @@ -134,7 +134,7 @@ pub async fn fetch_paginated_batches( let page_size_elements = PAGE_SIZE_BATCHES * zkp_batch_size; if total_elements <= page_size_elements { - tracing::info!( + tracing::debug!( "fetch_paginated_batches: single page fetch with start_index=None, total_elements={}, page_size={}", total_elements, page_size_elements ); @@ -302,7 +302,7 @@ pub async fn fetch_batches( fetch_len: u64, zkp_batch_size: u64, ) -> crate::Result> { - tracing::info!( + tracing::debug!( "fetch_batches: tree={}, output_start={:?}, input_start={:?}, fetch_len={}, zkp_batch_size={}", context.merkle_tree, output_start_index, input_start_index, fetch_len, zkp_batch_size ); @@ -574,7 +574,7 @@ pub async fn fetch_streaming_address_batches( let page_size_elements = ADDRESS_PAGE_SIZE_BATCHES * zkp_batch_size; let num_pages = total_elements.div_ceil(page_size_elements) as usize; - tracing::info!( + tracing::debug!( "address fetch: {} elements ({} batches) in {} pages of {} batches each", total_elements, total_elements / zkp_batch_size, diff --git a/forester/src/processor/v2/processor.rs b/forester/src/processor/v2/processor.rs index 4df6ed4eae..1d166efa35 100644 --- a/forester/src/processor/v2/processor.rs +++ b/forester/src/processor/v2/processor.rs @@ -13,6 +13,7 @@ use tracing::{debug, info, warn}; use crate::{ epoch_manager::{CircuitMetrics, ProcessingMetrics}, + logging::should_emit_rate_limited_warning, processor::v2::{ batch_job_builder::BatchJobBuilder, common::WorkerPool, @@ -90,10 +91,13 @@ where let zkp_batch_size = strategy.fetch_zkp_batch_size(&context).await?; let current_root = strategy.fetch_onchain_root(&context).await?; info!( - "Initializing {} processor for tree {} with on-chain root {:?}[..4]", - strategy.name(), - context.merkle_tree, - ¤t_root[..4] + event = "v2_processor_initialized", + run_id = %context.run_id, + processor = strategy.name(), + tree = %context.merkle_tree, + zkp_batch_size, + root_prefix = ?¤t_root[..4], + "Initializing V2 processor" ); Ok(Self { context, @@ -142,9 +146,13 @@ where let remaining = total_batches.saturating_sub(cached.batches_processed); if remaining > 0 { info!( - "Using cached state: {} remaining batches (processed {}/{}, actual available: {})", - remaining, cached.batches_processed, total_batches, - if actual_available == usize::MAX { "max".to_string() } else { actual_available.to_string() } + event = "v2_cached_state_reused", + tree = %self.context.merkle_tree, + remaining_batches = remaining, + processed_batches = cached.batches_processed, + total_batches, + actual_available = if actual_available == usize::MAX { "max".to_string() } else { actual_available.to_string() }, + "Using cached queue state" ); let batches_to_process = remaining.min(self.context.max_batches_per_tree); @@ -180,7 +188,20 @@ where { let rpc = self.context.rpc_pool.get_connection().await?; if let Err(e) = wait_for_indexer(&*rpc).await { - warn!("wait_for_indexer error (proceeding anyway): {}", e); + if should_emit_rate_limited_warning("v2_wait_for_indexer", Duration::from_secs(30)) + { + warn!( + event = "wait_for_indexer_error", + error = %e, + "wait_for_indexer error (proceeding anyway)" + ); + } else { + debug!( + event = "wait_for_indexer_error_suppressed", + error = %e, + "Suppressing repeated wait_for_indexer warning" + ); + } } } @@ -232,10 +253,12 @@ where } RootReconcileDecision::ResetToOnchainAndStop(root) => { warn!( - "Root divergence: expected {:?}[..4], indexer {:?}[..4], on-chain {:?}[..4]. Resetting.", - &self.current_root[..4], - &queue_data.initial_root[..4], - &root[..4] + event = "v2_root_divergence_reset", + tree = %self.context.merkle_tree, + expected_root_prefix = ?&self.current_root[..4], + indexer_root_prefix = ?&queue_data.initial_root[..4], + onchain_root_prefix = ?&root[..4], + "Root divergence detected; resetting to on-chain root" ); self.current_root = root; self.cached_state = None; @@ -337,8 +360,10 @@ where if let Some(v2) = e.downcast_ref::() { if v2.is_constraint() { warn!( - "Tx sender constraint error for tree {}: {}", - self.context.merkle_tree, e + event = "v2_tx_sender_constraint_error", + tree = %self.context.merkle_tree, + error = %e, + "Tx sender constraint error" ); return Err(tx_result.unwrap_err()); } @@ -353,8 +378,10 @@ where ), Err(e) => { warn!( - "Tx sender error for tree {}: {}", - self.context.merkle_tree, e + event = "v2_tx_sender_error", + tree = %self.context.merkle_tree, + error = %e, + "Tx sender error" ); (0, Default::default(), Duration::ZERO) } @@ -395,8 +422,10 @@ where if let Err(e) = tx_result { warn!( - "Returning partial metrics despite error for tree {}: {}", - self.context.merkle_tree, e + event = "v2_partial_metrics_after_error", + tree = %self.context.merkle_tree, + error = %e, + "Returning partial metrics despite processing error" ); } @@ -572,10 +601,11 @@ where .clone(); info!( - "Pre-warming {} proofs for tree {} with root {:?}", - num_batches, - self.context.merkle_tree, - &initial_root[..4] + event = "v2_prewarm_started", + tree = %self.context.merkle_tree, + proofs = num_batches, + root_prefix = ?&initial_root[..4], + "Pre-warming proofs for tree" ); let (jobs_sent, timings, _staging_tree) = self @@ -612,8 +642,11 @@ where } Err(e) => { warn!( - "Proof generation failed during pre-warm for seq={}: {}", - result.seq, e + event = "v2_prewarm_proof_generation_failed", + tree = %self.context.merkle_tree, + seq = result.seq, + error = %e, + "Proof generation failed during pre-warm" ); } } @@ -623,16 +656,20 @@ where if proofs_cached < jobs_sent { warn!( - "Pre-warmed {} proofs but expected {} for tree {}", - proofs_cached, jobs_sent, self.context.merkle_tree + event = "v2_prewarm_partial", + tree = %self.context.merkle_tree, + proofs_cached, + expected_proofs = jobs_sent, + "Pre-warm completed with fewer proofs than expected" ); } else { info!( - "Pre-warmed {} proofs for tree {} (zkp_batch_size={}, items={})", + event = "v2_prewarm_completed", + tree = %self.context.merkle_tree, proofs_cached, - self.context.merkle_tree, - self.zkp_batch_size, - proofs_cached * self.zkp_batch_size as usize + zkp_batch_size = self.zkp_batch_size, + items = proofs_cached * self.zkp_batch_size as usize, + "Pre-warm completed" ); } diff --git a/forester/src/telemetry.rs b/forester/src/telemetry.rs index 2d881406b2..ae006d6c03 100644 --- a/forester/src/telemetry.rs +++ b/forester/src/telemetry.rs @@ -31,15 +31,22 @@ pub fn setup_telemetry() { let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); - let stdout_layer = fmt::Layer::new() + let stdout_layer = fmt::layer() .with_writer(std::io::stdout) + .compact() .with_ansi(true); if let Some(file_appender) = file_appender { let (non_blocking, guard) = tracing_appender::non_blocking(file_appender); let _ = LOG_GUARD.set(guard); - let file_layer = fmt::Layer::new().with_writer(non_blocking); + let file_layer = fmt::layer() + .json() + .with_current_span(true) + .with_span_list(true) + .flatten_event(true) + .with_ansi(false) + .with_writer(non_blocking); tracing_subscriber::registry() .with(stdout_layer) diff --git a/forester/src/tree_data_sync.rs b/forester/src/tree_data_sync.rs index 7d452e4da9..f21187d5e8 100644 --- a/forester/src/tree_data_sync.rs +++ b/forester/src/tree_data_sync.rs @@ -35,8 +35,9 @@ pub async fn fetch_trees(rpc: &R) -> Result> { } Err(e) => { warn!( - "Filtered tree fetch failed, falling back to unfiltered: {:?}", - e + event = "filtered_tree_fetch_failed_fallback_unfiltered", + error = ?e, + "Filtered tree fetch failed; falling back to unfiltered fetch" ); fetch_trees_unfiltered(rpc).await } @@ -92,7 +93,11 @@ pub async fn fetch_trees_filtered(rpc_url: &str) -> Result> { } } Err(e) => { - warn!("Failed to fetch batched trees: {:?}", e); + warn!( + event = "fetch_batched_trees_failed", + error = ?e, + "Failed to fetch batched trees" + ); errors.push(format!("batched: {}", e)); } } @@ -108,7 +113,11 @@ pub async fn fetch_trees_filtered(rpc_url: &str) -> Result> { } } Err(e) => { - warn!("Failed to fetch state V1 trees: {:?}", e); + warn!( + event = "fetch_state_v1_trees_failed", + error = ?e, + "Failed to fetch StateV1 trees" + ); errors.push(format!("state_v1: {}", e)); } } @@ -124,7 +133,11 @@ pub async fn fetch_trees_filtered(rpc_url: &str) -> Result> { } } Err(e) => { - warn!("Failed to fetch address V1 trees: {:?}", e); + warn!( + event = "fetch_address_v1_trees_failed", + error = ?e, + "Failed to fetch AddressV1 trees" + ); errors.push(format!("address_v1: {}", e)); } } @@ -316,7 +329,7 @@ fn create_tree_accounts( tree_accounts } -pub async fn fetch_protocol_group_authority(rpc: &R) -> Result { +pub async fn fetch_protocol_group_authority(rpc: &R, run_id: &str) -> Result { let registered_program_pda = light_registry::account_compression_cpi::sdk::get_registered_program_pda( &light_registry::ID, @@ -336,8 +349,10 @@ pub async fn fetch_protocol_group_authority(rpc: &R) -> Result { .map_err(|e| anyhow::anyhow!("Failed to deserialize RegisteredProgram: {}", e))?; info!( - "Fetched protocol group authority: {}", - registered_program.group_authority_pda + event = "protocol_group_authority_fetched", + run_id = %run_id, + group_authority = %registered_program.group_authority_pda, + "Fetched protocol group authority" ); Ok(registered_program.group_authority_pda) diff --git a/forester/tests/e2e_test.rs b/forester/tests/e2e_test.rs index afca1dc78f..855b765908 100644 --- a/forester/tests/e2e_test.rs +++ b/forester/tests/e2e_test.rs @@ -234,6 +234,7 @@ async fn e2e_test() { sleep_when_idle_ms: 100, queue_polling_mode: Default::default(), group_authority: None, + helius_rpc: false, }, rpc_pool_config: RpcPoolConfig { max_size: 50, diff --git a/forester/tests/legacy/priority_fee_test.rs b/forester/tests/legacy/priority_fee_test.rs index f7c805db30..875d20c5fc 100644 --- a/forester/tests/legacy/priority_fee_test.rs +++ b/forester/tests/legacy/priority_fee_test.rs @@ -30,10 +30,8 @@ async fn test_priority_fee_request() { std::env::var("FORESTER_WS_RPC_URL") .expect("FORESTER_WS_RPC_URL must be set in environment"), ), - indexer_url: Some( - std::env::var("FORESTER_INDEXER_URL") - .expect("FORESTER_INDEXER_URL must be set in environment"), - ), + indexer_url: std::env::var("FORESTER_INDEXER_URL") + .expect("FORESTER_INDEXER_URL must be set in environment"), prover_url: Some( std::env::var("FORESTER_PROVER_URL") .expect("FORESTER_PROVER_URL must be set in environment"), diff --git a/forester/tests/priority_fee_test.rs b/forester/tests/priority_fee_test.rs index 3772539302..dddb76a737 100644 --- a/forester/tests/priority_fee_test.rs +++ b/forester/tests/priority_fee_test.rs @@ -27,9 +27,7 @@ async fn test_priority_fee_request() { ws_rpc_url: Some( std::env::var("WS_RPC_URL").expect("WS_RPC_URL must be set in environment"), ), - indexer_url: Some( - std::env::var("INDEXER_URL").expect("INDEXER_URL must be set in environment"), - ), + indexer_url: std::env::var("INDEXER_URL").expect("INDEXER_URL must be set in environment"), prover_url: Some( std::env::var("PROVER_URL").expect("PROVER_URL must be set in environment"), ), @@ -84,6 +82,7 @@ async fn test_priority_fee_request() { api_server_port: 8080, group_authority: None, light_pda_programs: vec![], + helius_rpc: false, prometheus_url: None, }; diff --git a/forester/tests/test_compressible_ctoken.rs b/forester/tests/test_compressible_ctoken.rs index 3342fb5cb8..080e1c93a4 100644 --- a/forester/tests/test_compressible_ctoken.rs +++ b/forester/tests/test_compressible_ctoken.rs @@ -539,6 +539,7 @@ async fn run_bootstrap_test( rpc_url_clone, tracker_clone, Some(shutdown_rx), + false, ) .await { diff --git a/forester/tests/test_compressible_mint.rs b/forester/tests/test_compressible_mint.rs index 688f6c3c47..0e73f773e7 100644 --- a/forester/tests/test_compressible_mint.rs +++ b/forester/tests/test_compressible_mint.rs @@ -214,6 +214,7 @@ async fn test_compressible_mint_bootstrap() { rpc_url, tracker_clone, Some(shutdown_rx), + false, ) .await { diff --git a/forester/tests/test_compressible_pda.rs b/forester/tests/test_compressible_pda.rs index e04f630432..f3a447fe23 100644 --- a/forester/tests/test_compressible_pda.rs +++ b/forester/tests/test_compressible_pda.rs @@ -398,6 +398,7 @@ async fn test_compressible_pda_bootstrap() { rpc_url, tracker_clone, Some(shutdown_rx), + false, ) .await { diff --git a/forester/tests/test_utils.rs b/forester/tests/test_utils.rs index d9aaa2f0c2..1a1d3f18f6 100644 --- a/forester/tests/test_utils.rs +++ b/forester/tests/test_utils.rs @@ -115,6 +115,7 @@ pub fn forester_config() -> ForesterConfig { sleep_when_idle_ms: 100, queue_polling_mode: QueuePollingMode::OnChain, group_authority: None, + helius_rpc: false, }, rpc_pool_config: RpcPoolConfig { max_size: 50, From 263e9304d7fa3d52b382ec9017b5d23b1c038227 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Fri, 20 Feb 2026 17:30:54 +0000 Subject: [PATCH 07/15] feat: enhance compressible data tracking - Introduced `forester_api_urls` argument in `DashboardArgs` for specifying multiple API base URLs. - Enhanced `EpochManager` to handle non-retryable registration errors gracefully. - Implemented `CompressibleTrackerHandles` struct to manage multiple trackers for compressible data. - Refactored `initialize_compressible_trackers` to streamline tracker initialization and bootstrap processes. - Updated `run_pipeline_with_run_id` to accept preconfigured tracker handles, improving modularity. - Modified `main` function to initialize compressible trackers and manage shutdown signals effectively. --- external/photon | 2 +- .../dashboard/src/app/api/[...path]/route.ts | 75 +- .../dashboard/src/app/compressible/page.tsx | 8 +- .../src/components/CompressiblePanel.tsx | 360 ++++++++- .../dashboard/src/hooks/useCompressible.ts | 2 +- forester/dashboard/src/lib/api.ts | 37 +- forester/dashboard/src/lib/utils.ts | 26 +- forester/dashboard/src/types/forester.ts | 40 + forester/src/api_server.rs | 705 +++++++++++++++++- forester/src/cli.rs | 8 + forester/src/epoch_manager.rs | 24 + forester/src/lib.rs | 438 ++++++----- forester/src/main.rs | 87 ++- 13 files changed, 1512 insertions(+), 300 deletions(-) diff --git a/external/photon b/external/photon index 6562a7d2f2..54ead0a77e 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit 6562a7d2f250d802c8e43986f37802f98ba175de +Subproject commit 54ead0a77e50eed0f7beb704868ce6cb0b914b8f diff --git a/forester/dashboard/src/app/api/[...path]/route.ts b/forester/dashboard/src/app/api/[...path]/route.ts index 3c53696752..0fa4cf84fc 100644 --- a/forester/dashboard/src/app/api/[...path]/route.ts +++ b/forester/dashboard/src/app/api/[...path]/route.ts @@ -1,22 +1,83 @@ import { NextResponse } from "next/server"; +export const dynamic = "force-dynamic"; + const BACKEND_URL = - process.env.FORESTER_API_URL || "http://localhost:8080"; + process.env.FORESTER_API_URL || "http://127.0.0.1:8080"; + +const BACKEND_TIMEOUT_MS = Number( + process.env.FORESTER_API_TIMEOUT_MS ?? 8000 +); + +function isAbortError(error: unknown): boolean { + return ( + typeof error === "object" && + error !== null && + "name" in error && + (error as { name?: string }).name === "AbortError" + ); +} + +function joinBackendUrl(path: string): string { + const base = BACKEND_URL.replace(/\/+$/, ""); + return `${base}/${path}`; +} export async function GET( - request: Request, + _request: Request, { params }: { params: Promise<{ path: string[] }> } ) { const { path } = await params; const backendPath = path.join("/"); + const upstream = joinBackendUrl(backendPath); + + const timeoutMs = + Number.isFinite(BACKEND_TIMEOUT_MS) && BACKEND_TIMEOUT_MS > 0 + ? BACKEND_TIMEOUT_MS + : 8000; + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeoutMs); + try { - const response = await fetch(`${BACKEND_URL}/${backendPath}`); - const data = await response.json(); - return NextResponse.json(data, { status: response.status }); - } catch { + const response = await fetch(upstream, { + cache: "no-store", + signal: controller.signal, + }); + + const contentType = response.headers.get("content-type") ?? ""; + const payload = contentType.includes("application/json") + ? await response.json() + : { message: await response.text() }; + + if (!response.ok) { + return NextResponse.json( + { + error: `Forester backend returned ${response.status}`, + upstream, + details: payload, + }, + { status: response.status } + ); + } + + return NextResponse.json(payload, { status: response.status }); + } catch (error) { + if (isAbortError(error)) { + return NextResponse.json( + { + error: `Backend request timed out after ${timeoutMs}ms`, + upstream, + }, + { status: 504 } + ); + } + return NextResponse.json( - { error: "Backend unavailable" }, + { error: "Backend unavailable", upstream }, { status: 502 } ); + } finally { + clearTimeout(timer); } } diff --git a/forester/dashboard/src/app/compressible/page.tsx b/forester/dashboard/src/app/compressible/page.tsx index 8ea4fc3274..72f857cfcf 100644 --- a/forester/dashboard/src/app/compressible/page.tsx +++ b/forester/dashboard/src/app/compressible/page.tsx @@ -23,7 +23,13 @@ export default function CompressiblePage() { return (
-

Compressible Accounts

+
+

Compressible Accounts

+

+ Track what is currently compressible, what is waiting on rent + windows, and how fresh this view is. +

+
); diff --git a/forester/dashboard/src/components/CompressiblePanel.tsx b/forester/dashboard/src/components/CompressiblePanel.tsx index b753e99c41..e4a511b323 100644 --- a/forester/dashboard/src/components/CompressiblePanel.tsx +++ b/forester/dashboard/src/components/CompressiblePanel.tsx @@ -1,59 +1,363 @@ -import type { CompressibleResponse } from "@/types/forester"; -import { formatNumber } from "@/lib/utils"; +import type { + CompressibleResponse, + CompressibleTypeStats, +} from "@/types/forester"; +import { + formatAgeFromUnixSeconds, + formatNumber, + formatSlotCountdown, + truncateAddress, +} from "@/lib/utils"; interface CompressiblePanelProps { data: CompressibleResponse; } export function CompressiblePanel({ data }: CompressiblePanelProps) { - if (!data.enabled) { - return ( -
-

- No compressible account data available. -

-

- The dashboard will query on-chain data automatically. If this - persists, check the RPC connection. -

-
- ); - } + const source = data.source ?? "none"; + const sourceBadge = + source === "tracker" + ? "bg-emerald-100 text-emerald-800" + : source === "forester-apis" + ? "bg-sky-100 text-sky-800" + : source === "rpc" + ? "bg-blue-100 text-blue-800" + : "bg-gray-100 text-gray-700"; + + const sourceLabel = + source === "tracker" + ? "In-memory tracker" + : source === "forester-apis" + ? "Forester API aggregate" + : source === "rpc" + ? "RPC scan" + : "Unavailable"; - const cards = [ + const rows: { + key: string; + label: string; + description: string; + stats?: CompressibleTypeStats; + fallbackCount?: number; + contributesToTotal?: boolean; + }[] = [ + { + key: "ctoken", + label: "Light Token", + description: "Fungible decompressed token accounts (includes ATA)", + stats: data.ctoken, + fallbackCount: data.ctoken_count, + contributesToTotal: true, + }, { - label: "CToken Accounts", - value: data.ctoken_count, - desc: "Compressed token accounts tracked", + key: "ata", + label: "ATA", + description: "Associated token accounts", + stats: data.ata, + fallbackCount: data.ata_count, + contributesToTotal: false, }, { - label: "PDA Accounts", - value: data.pda_count, - desc: "Program-derived accounts tracked", + key: "pda", + label: "PDA", + description: "Program-derived account records", + stats: data.pda, + fallbackCount: data.pda_count, + contributesToTotal: true, }, { - label: "Mint Accounts", - value: data.mint_count, - desc: "Mint accounts tracked", + key: "mint", + label: "Mint", + description: "Non-fungible mint accounts", + stats: data.mint, + fallbackCount: data.mint_count, + contributesToTotal: true, }, ]; + const inferredTracked = rows.reduce((acc, row) => { + if (row.contributesToTotal === false) { + return acc; + } + const tracked = row.stats?.tracked ?? row.fallbackCount ?? 0; + return acc + tracked; + }, 0); + + const totalTracked = data.total_tracked ?? inferredTracked; + const totalReady = data.total_ready; + const totalWaiting = data.total_waiting; + const totalUnknown = + totalReady != null && totalWaiting != null + ? Math.max(0, totalTracked - totalReady - totalWaiting) + : totalTracked; + + const noData = + !data.enabled && + rows.every((row) => (row.stats?.tracked ?? row.fallbackCount ?? 0) === 0); + + const upstreamTotal = data.upstreams?.length ?? 0; + const upstreamHealthy = data.upstreams?.filter((u) => u.ok).length ?? 0; + const nextGlobalReadySlot = rows + .map((row) => row.stats?.next_ready_slot) + .filter((slot): slot is number => slot != null) + .reduce((min, slot) => (min == null ? slot : Math.min(min, slot)), null); + + const notices: { level: "info" | "warn" | "error"; message: string }[] = []; + if (totalTracked === 0) { + notices.push({ + level: "info", + message: "No compressible accounts are currently tracked.", + }); + } + if ((totalReady ?? 0) > 0) { + notices.push({ + level: "info", + message: + "Funding status: some tracked accounts are currently eligible for compression.", + }); + } else if ((totalWaiting ?? 0) > 0) { + notices.push({ + level: "info", + message: `Funding status: tracked accounts are currently funded. Next eligibility: ${formatSlotCountdown( + data.current_slot, + nextGlobalReadySlot + )}.`, + }); + } + if (upstreamTotal > 0 && upstreamHealthy < upstreamTotal) { + notices.push({ + level: "error", + message: `Only ${upstreamHealthy}/${upstreamTotal} upstream forester API endpoints are healthy.`, + }); + } + return (
-
- {cards.map((card) => ( +
+
+ + Source: {sourceLabel} + + + Updated: {formatAgeFromUnixSeconds(data.cached_at)} + + + Slot:{" "} + {data.current_slot != null ? formatNumber(data.current_slot) : "unknown"} + + {data.refresh_interval_secs != null && ( + + Refresh: {data.refresh_interval_secs}s + + )} + {upstreamTotal > 0 && ( + + Upstreams: {upstreamHealthy}/{upstreamTotal} + + )} +
+ + {noData && ( +

+ No compressible account data available yet. +

+ )} + + {data.error && ( +
+ {data.error} +
+ )} + + {data.note && ( +
+ {data.note} +
+ )} + + {notices.map((notice, idx) => ( +
+ {notice.message} +
+ ))} + + {data.upstreams && data.upstreams.length > 0 && ( +
+

+ Upstream Forester APIs +

+
+ {data.upstreams.map((upstream) => ( +
+ + {truncateAddress(upstream.base_url, 12)} + + + {upstream.ok ? "ok" : "down"} + +
+ ))} +
+
+ )} +
+ +
+ {[ + { + label: "Tracked Accounts", + value: formatNumber(totalTracked), + desc: "Total visible across account types", + }, + { + label: "Ready Now", + value: totalReady != null ? formatNumber(totalReady) : "unknown", + desc: "Eligible for compression at current slot", + }, + { + label: "Waiting", + value: totalWaiting != null ? formatNumber(totalWaiting) : "unknown", + desc: "Not yet at compressible slot", + }, + { + label: "Unknown", + value: totalUnknown != null ? formatNumber(totalUnknown) : "unknown", + desc: "Tracked with missing readiness context", + }, + ].map((card) => (
{card.label}
- {card.value != null ? formatNumber(card.value) : "-"} + {card.value}
{card.desc}
))}
+

+ ATA counts are included in Light Token totals. +

+ +
+ {rows.map((row) => { + const tracked = row.stats?.tracked ?? row.fallbackCount; + const ready = row.stats?.ready; + const waiting = row.stats?.waiting; + const unknown = + tracked != null && ready != null && waiting != null + ? Math.max(0, tracked - ready - waiting) + : tracked; + + return ( +
+
{row.label}
+
{row.description}
+
+
+ Tracked + + {tracked != null ? formatNumber(tracked) : "unknown"} + +
+
+ Ready now + + {ready != null ? formatNumber(ready) : "unknown"} + +
+
+ Waiting + + {waiting != null ? formatNumber(waiting) : "unknown"} + +
+
+ Unknown + + {unknown != null ? formatNumber(unknown) : "unknown"} + +
+
+ Next ready + + {formatSlotCountdown( + data.current_slot, + row.stats?.next_ready_slot + )} + +
+
+
+ ); + })} +
+ + {data.pda_programs && data.pda_programs.length > 0 && ( +
+

+ PDA Program Breakdown +

+
+ {data.pda_programs.map((program) => ( +
+
+ + {truncateAddress(program.program_id, 6)} + + + tracked {formatNumber(program.tracked)} + +
+
+ + ready{" "} + {program.ready != null ? formatNumber(program.ready) : "unknown"} + + + waiting{" "} + {program.waiting != null + ? formatNumber(program.waiting) + : "unknown"} + + + next{" "} + {formatSlotCountdown(data.current_slot, program.next_ready_slot)} + +
+
+ ))} +
+
+ )}
); } diff --git a/forester/dashboard/src/hooks/useCompressible.ts b/forester/dashboard/src/hooks/useCompressible.ts index 1158185f79..364ceded13 100644 --- a/forester/dashboard/src/hooks/useCompressible.ts +++ b/forester/dashboard/src/hooks/useCompressible.ts @@ -4,7 +4,7 @@ import type { CompressibleResponse } from "@/types/forester"; export function useCompressible() { return useSWR("/compressible", fetcher, { - refreshInterval: 15000, + refreshInterval: 5000, revalidateOnFocus: true, }); } diff --git a/forester/dashboard/src/lib/api.ts b/forester/dashboard/src/lib/api.ts index 7e1ee6e8e9..ff9084b9dd 100644 --- a/forester/dashboard/src/lib/api.ts +++ b/forester/dashboard/src/lib/api.ts @@ -1,6 +1,10 @@ const API_URL = process.env.NEXT_PUBLIC_FORESTER_API_URL ?? "/api"; +const REQUEST_TIMEOUT_MS = Number( + process.env.NEXT_PUBLIC_FORESTER_API_TIMEOUT_MS ?? 8000 +); + export class ApiError extends Error { constructor( message: string, @@ -12,16 +16,44 @@ export class ApiError extends Error { } } +function isAbortError(error: unknown): boolean { + return ( + typeof error === "object" && + error !== null && + "name" in error && + (error as { name?: string }).name === "AbortError" + ); +} + export async function fetchApi(path: string): Promise { + const url = `${API_URL}${path}`; + const timeoutMs = + Number.isFinite(REQUEST_TIMEOUT_MS) && REQUEST_TIMEOUT_MS > 0 + ? REQUEST_TIMEOUT_MS + : 8000; + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeoutMs); + let res: Response; try { - res = await fetch(`${API_URL}${path}`); + res = await fetch(url, { + cache: "no-store", + signal: controller.signal, + }); } catch (e) { - // Network error — server not reachable + if (isAbortError(e)) { + throw new Error( + `Request to forester API timed out after ${timeoutMs}ms (${url}).` + ); + } throw new Error( `Cannot connect to forester API at ${API_URL}. Make sure the API server is running.` ); + } finally { + clearTimeout(timer); } + if (!res.ok) { let serverMsg = ""; try { @@ -36,6 +68,7 @@ export async function fetchApi(path: string): Promise { serverMsg ); } + return res.json(); } diff --git a/forester/dashboard/src/lib/utils.ts b/forester/dashboard/src/lib/utils.ts index 0249948133..72c581fb68 100644 --- a/forester/dashboard/src/lib/utils.ts +++ b/forester/dashboard/src/lib/utils.ts @@ -16,8 +16,10 @@ export function formatPercentage(n: number, decimals = 2): string { return `${n.toFixed(decimals)}%`; } +const DEFAULT_SLOT_DURATION_MS = 400; + export function slotsToTime(slots: number): string { - const seconds = Math.round(slots * 0.46); + const seconds = Math.round((slots * DEFAULT_SLOT_DURATION_MS) / 1000); if (seconds < 60) return `${seconds}s`; if (seconds < 3600) return `${Math.round(seconds / 60)}m`; const hours = Math.floor(seconds / 3600); @@ -25,6 +27,28 @@ export function slotsToTime(slots: number): string { return mins > 0 ? `${hours}h ${mins}m` : `${hours}h`; } +export function formatAgeFromUnixSeconds(unixTs: number | null | undefined): string { + if (unixTs == null || unixTs <= 0) return "unknown"; + const ageSec = Math.max(0, Math.floor(Date.now() / 1000) - unixTs); + if (ageSec < 5) return "just now"; + if (ageSec < 60) return `${ageSec}s ago`; + if (ageSec < 3600) return `${Math.round(ageSec / 60)}m ago`; + const hours = Math.floor(ageSec / 3600); + const mins = Math.round((ageSec % 3600) / 60); + return mins > 0 ? `${hours}h ${mins}m ago` : `${hours}h ago`; +} + +export function formatSlotCountdown( + currentSlot: number | null | undefined, + nextReadySlot: number | null | undefined +): string { + if (nextReadySlot == null) return "n/a"; + if (currentSlot == null) return `slot ${nextReadySlot.toLocaleString()}`; + if (currentSlot > nextReadySlot) return "ready now"; + const remaining = nextReadySlot - currentSlot; + return `${remaining.toLocaleString()} slots (~${slotsToTime(remaining)})`; +} + export function batchStateLabel(state: number): string { switch (state) { case 0: diff --git a/forester/dashboard/src/types/forester.ts b/forester/dashboard/src/types/forester.ts index c9f047dfe4..8f932fbd42 100644 --- a/forester/dashboard/src/types/forester.ts +++ b/forester/dashboard/src/types/forester.ts @@ -83,6 +83,46 @@ export interface MetricsResponse { export interface CompressibleResponse { enabled: boolean; ctoken_count?: number; + ata_count?: number; pda_count?: number; mint_count?: number; + current_slot?: number; + total_tracked?: number; + total_ready?: number; + total_waiting?: number; + ctoken?: CompressibleTypeStats; + ata?: CompressibleTypeStats; + pda?: CompressibleTypeStats; + mint?: CompressibleTypeStats; + pda_programs?: PdaProgramStats[]; + upstreams?: CompressibleUpstreamStatus[]; + note?: string; + error?: string; + refresh_interval_secs?: number; + source?: string; + cached_at?: number; +} + +export interface CompressibleTypeStats { + tracked: number; + ready?: number; + waiting?: number; + next_ready_slot?: number; +} + +export interface PdaProgramStats { + program_id: string; + tracked: number; + ready?: number; + waiting?: number; + next_ready_slot?: number; +} + + +export interface CompressibleUpstreamStatus { + base_url: string; + ok: boolean; + source?: string; + cached_at?: number; + error?: string; } diff --git a/forester/src/api_server.rs b/forester/src/api_server.rs index d08e3b5cfa..2305bc306e 100644 --- a/forester/src/api_server.rs +++ b/forester/src/api_server.rs @@ -6,7 +6,10 @@ use tracing::{error, info, warn}; use warp::Filter; use crate::{ - compressible::{CTokenAccountTracker, MintAccountTracker, PdaAccountTracker}, + compressible::{ + traits::{CompressibleState, CompressibleTracker}, + CTokenAccountState, CTokenAccountTracker, MintAccountTracker, PdaAccountTracker, + }, forester_status::get_forester_status, metrics::REGISTRY, }; @@ -36,9 +39,72 @@ pub struct CompressibleResponse { #[serde(skip_serializing_if = "Option::is_none")] pub ctoken_count: Option, #[serde(skip_serializing_if = "Option::is_none")] + pub ata_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub pda_count: Option, #[serde(skip_serializing_if = "Option::is_none")] pub mint_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub current_slot: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub total_tracked: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub total_ready: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub total_waiting: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ctoken: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub ata: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pda: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mint: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pda_programs: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub upstreams: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub note: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub refresh_interval_secs: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompressibleTypeStats { + pub tracked: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub ready: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub waiting: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_ready_slot: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PdaProgramStats { + pub program_id: String, + pub tracked: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub ready: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub waiting: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub next_ready_slot: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompressibleUpstreamStatus { + pub base_url: String, + pub ok: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub cached_at: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -73,8 +139,22 @@ impl CompressibleSnapshot { data: CompressibleResponse { enabled: false, ctoken_count: None, + ata_count: None, pda_count: None, mint_count: None, + current_slot: None, + total_tracked: None, + total_ready: None, + total_waiting: None, + ctoken: None, + ata: None, + pda: None, + mint: None, + pda_programs: None, + upstreams: None, + note: None, + error: None, + refresh_interval_secs: None, }, source: "none".to_string(), cached_at: 0, @@ -82,6 +162,396 @@ impl CompressibleSnapshot { } } +fn summarize_slots(slots: I, current_slot: Option) -> CompressibleTypeStats +where + I: IntoIterator, +{ + let all_slots: Vec = slots.into_iter().collect(); + let tracked = all_slots.len(); + + if let Some(slot) = current_slot { + let mut ready = 0usize; + let mut next_ready_slot: Option = None; + for compressible_slot in all_slots { + if slot > compressible_slot { + ready += 1; + } else { + next_ready_slot = Some( + next_ready_slot + .map(|current_min| current_min.min(compressible_slot)) + .unwrap_or(compressible_slot), + ); + } + } + CompressibleTypeStats { + tracked, + ready: Some(ready), + waiting: Some(tracked.saturating_sub(ready)), + next_ready_slot, + } + } else { + CompressibleTypeStats { + tracked, + ready: None, + waiting: None, + next_ready_slot: None, + } + } +} + +fn is_ctoken_ata(state: &CTokenAccountState) -> bool { + let owner = solana_sdk::pubkey::Pubkey::new_from_array(state.account.owner.to_bytes()); + let mint = solana_sdk::pubkey::Pubkey::new_from_array(state.account.mint.to_bytes()); + let light_token_program_id = + solana_sdk::pubkey::Pubkey::new_from_array(light_token_interface::LIGHT_TOKEN_PROGRAM_ID); + + let expected_ata = solana_sdk::pubkey::Pubkey::find_program_address( + &[ + owner.as_ref(), + light_token_program_id.as_ref(), + mint.as_ref(), + ], + &light_token_program_id, + ) + .0; + + state.pubkey == expected_ata +} + +fn summarize_ctoken_and_ata_slots( + tracker: &CTokenAccountTracker, + current_slot: Option, +) -> (CompressibleTypeStats, CompressibleTypeStats) { + let mut ctoken_slots: Vec = Vec::new(); + let mut ata_slots: Vec = Vec::new(); + + for entry in tracker.accounts().iter() { + let state = entry.value(); + let slot = state.compressible_slot(); + ctoken_slots.push(slot); + if is_ctoken_ata(state) { + ata_slots.push(slot); + } + } + + ( + summarize_slots(ctoken_slots, current_slot), + summarize_slots(ata_slots, current_slot), + ) +} + +fn aggregate_optional_sum(values: impl Iterator>) -> Option { + let mut sum = 0usize; + let mut seen = false; + for value in values { + if let Some(v) = value { + sum = sum.saturating_add(v); + seen = true; + } + } + if seen { + Some(sum) + } else { + None + } +} + +fn aggregate_type_stats( + stats: impl Iterator>, +) -> Option { + let mut tracked = 0usize; + let mut ready = 0usize; + let mut waiting = 0usize; + let mut ready_seen = false; + let mut waiting_seen = false; + let mut next_ready_slot: Option = None; + let mut any = false; + + for stat in stats.flatten() { + any = true; + tracked = tracked.saturating_add(stat.tracked); + if let Some(v) = stat.ready { + ready = ready.saturating_add(v); + ready_seen = true; + } + if let Some(v) = stat.waiting { + waiting = waiting.saturating_add(v); + waiting_seen = true; + } + if let Some(slot) = stat.next_ready_slot { + next_ready_slot = Some( + next_ready_slot + .map(|current| current.min(slot)) + .unwrap_or(slot), + ); + } + } + + if !any { + return None; + } + + Some(CompressibleTypeStats { + tracked, + ready: ready_seen.then_some(ready), + waiting: waiting_seen.then_some(waiting), + next_ready_slot, + }) +} + +async fn fetch_upstream_compressible( + client: &reqwest::Client, + base_url: String, +) -> (CompressibleUpstreamStatus, Option) { + let normalized = base_url.trim_end_matches('/').to_string(); + let endpoint = format!("{}/compressible", normalized); + + let response = + match tokio::time::timeout(Duration::from_secs(6), client.get(&endpoint).send()).await { + Ok(Ok(resp)) => resp, + Ok(Err(e)) => { + return ( + CompressibleUpstreamStatus { + base_url: normalized, + ok: false, + source: None, + cached_at: None, + error: Some(format!("request failed: {}", e)), + }, + None, + ); + } + Err(_) => { + return ( + CompressibleUpstreamStatus { + base_url: normalized, + ok: false, + source: None, + cached_at: None, + error: Some("request timed out after 6s".to_string()), + }, + None, + ); + } + }; + + if !response.status().is_success() { + let status = response.status(); + return ( + CompressibleUpstreamStatus { + base_url: normalized, + ok: false, + source: None, + cached_at: None, + error: Some(format!("upstream returned {}", status)), + }, + None, + ); + } + + let snapshot = match response.json::().await { + Ok(snapshot) => snapshot, + Err(e) => { + return ( + CompressibleUpstreamStatus { + base_url: normalized, + ok: false, + source: None, + cached_at: None, + error: Some(format!("invalid json payload: {}", e)), + }, + None, + ); + } + }; + + ( + CompressibleUpstreamStatus { + base_url: normalized, + ok: true, + source: Some(snapshot.source.clone()), + cached_at: Some(snapshot.cached_at), + error: None, + }, + Some(snapshot), + ) +} + +async fn fetch_compressible_snapshot_from_foresters( + forester_api_urls: &[String], + run_id: &str, +) -> Option { + if forester_api_urls.is_empty() { + return None; + } + + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs() as i64; + + let client = reqwest::Client::new(); + let calls = forester_api_urls + .iter() + .cloned() + .map(|url| fetch_upstream_compressible(&client, url)); + + let responses = futures::future::join_all(calls).await; + let mut upstreams: Vec = Vec::with_capacity(responses.len()); + let mut snapshots: Vec = Vec::new(); + + for (status, snapshot) in responses { + upstreams.push(status); + if let Some(snapshot) = snapshot { + snapshots.push(snapshot); + } + } + + let ok_count = snapshots.len(); + let fail_count = upstreams.len().saturating_sub(ok_count); + + if snapshots.is_empty() { + warn!( + event = "api_server_upstream_compressible_all_failed", + run_id = %run_id, + total_upstreams = upstreams.len(), + "All upstream forester compressible endpoints failed" + ); + return Some(CompressibleSnapshot { + data: CompressibleResponse { + enabled: false, + ctoken_count: None, + ata_count: None, + pda_count: None, + mint_count: None, + current_slot: None, + total_tracked: None, + total_ready: None, + total_waiting: None, + ctoken: None, + ata: None, + pda: None, + mint: None, + pda_programs: None, + upstreams: Some(upstreams), + note: None, + error: Some( + "Failed to fetch /compressible from all configured forester APIs".to_string(), + ), + refresh_interval_secs: Some(5), + }, + source: "forester-apis".to_string(), + cached_at: now, + }); + } + + let ctoken_count = aggregate_optional_sum(snapshots.iter().map(|s| s.data.ctoken_count)); + let ata_count = aggregate_optional_sum(snapshots.iter().map(|s| s.data.ata_count)); + let pda_count = aggregate_optional_sum(snapshots.iter().map(|s| s.data.pda_count)); + let mint_count = aggregate_optional_sum(snapshots.iter().map(|s| s.data.mint_count)); + + let ctoken = aggregate_type_stats(snapshots.iter().map(|s| s.data.ctoken.clone())); + let ata = aggregate_type_stats(snapshots.iter().map(|s| s.data.ata.clone())); + let pda = aggregate_type_stats(snapshots.iter().map(|s| s.data.pda.clone())); + let mint = aggregate_type_stats(snapshots.iter().map(|s| s.data.mint.clone())); + + let mut pda_program_map: HashMap = HashMap::new(); + for snapshot in &snapshots { + if let Some(programs) = &snapshot.data.pda_programs { + for row in programs { + let entry = pda_program_map.entry(row.program_id.clone()).or_insert( + CompressibleTypeStats { + tracked: 0, + ready: Some(0), + waiting: Some(0), + next_ready_slot: None, + }, + ); + entry.tracked = entry.tracked.saturating_add(row.tracked); + entry.ready = Some( + entry + .ready + .unwrap_or(0) + .saturating_add(row.ready.unwrap_or(0)), + ); + entry.waiting = Some( + entry + .waiting + .unwrap_or(0) + .saturating_add(row.waiting.unwrap_or(0)), + ); + if let Some(slot) = row.next_ready_slot { + entry.next_ready_slot = Some( + entry + .next_ready_slot + .map(|current| current.min(slot)) + .unwrap_or(slot), + ); + } + } + } + } + + let mut pda_programs: Vec = pda_program_map + .into_iter() + .map(|(program_id, stats)| PdaProgramStats { + program_id, + tracked: stats.tracked, + ready: stats.ready, + waiting: stats.waiting, + next_ready_slot: stats.next_ready_slot, + }) + .collect(); + pda_programs.sort_by(|a, b| a.program_id.cmp(&b.program_id)); + + let current_slot = snapshots.iter().filter_map(|s| s.data.current_slot).max(); + let total_tracked = aggregate_optional_sum(snapshots.iter().map(|s| s.data.total_tracked)) + .or_else(|| { + Some(ctoken_count.unwrap_or(0) + pda_count.unwrap_or(0) + mint_count.unwrap_or(0)) + }); + let total_ready = aggregate_optional_sum(snapshots.iter().map(|s| s.data.total_ready)); + let total_waiting = aggregate_optional_sum(snapshots.iter().map(|s| s.data.total_waiting)); + + let note = if fail_count > 0 { + Some(format!( + "Aggregated from {}/{} forester API endpoints ({} unavailable).", + ok_count, + upstreams.len(), + fail_count + )) + } else { + Some(format!( + "Aggregated from {} forester API endpoint(s).", + ok_count + )) + }; + + Some(CompressibleSnapshot { + data: CompressibleResponse { + enabled: true, + ctoken_count, + ata_count, + pda_count, + mint_count, + current_slot, + total_tracked, + total_ready, + total_waiting, + ctoken, + ata, + pda, + mint, + pda_programs: (!pda_programs.is_empty()).then_some(pda_programs), + upstreams: Some(upstreams), + note, + error: None, + refresh_interval_secs: Some(5), + }, + source: "forester-apis".to_string(), + cached_at: now, + }) +} + #[derive(Clone)] pub(crate) struct CompressibleTrackers { pub ctoken: Option>, @@ -105,6 +575,7 @@ pub struct ApiServerConfig { pub compressible_state: Option, pub prometheus_url: Option, pub helius_rpc: bool, + pub forester_api_urls: Vec, } /// Default timeout for status endpoint in seconds @@ -201,30 +672,149 @@ pub(crate) async fn fetch_metrics_snapshot( /// Fetch compressible counts: try in-memory trackers first, then RPC. pub(crate) async fn fetch_compressible_snapshot( trackers: &Option, + forester_api_urls: &[String], rpc_url: &str, helius_rpc: bool, run_id: &str, ) -> CompressibleSnapshot { - use crate::compressible::traits::CompressibleTracker; - let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs() as i64; if let Some(ref t) = trackers { + let client = reqwest::Client::new(); + let current_slot = match crate::compressible::bootstrap_helpers::get_current_slot( + &client, rpc_url, + ) + .await + { + Ok(slot) => Some(slot), + Err(e) => { + warn!( + event = "api_server_compressible_slot_fetch_failed", + run_id = %run_id, + error = %e, + "Failed to fetch current slot for compressible readiness breakdown" + ); + None + } + }; + let (ctoken, ata) = if let Some(tracker) = t.ctoken.as_ref() { + let (ctoken_stats, ata_stats) = summarize_ctoken_and_ata_slots(tracker, current_slot); + (Some(ctoken_stats), Some(ata_stats)) + } else { + (None, None) + }; + + let pda = t.pda.as_ref().map(|tracker| { + summarize_slots( + tracker + .accounts() + .iter() + .map(|entry| entry.value().compressible_slot()), + current_slot, + ) + }); + + let mint = t.mint.as_ref().map(|tracker| { + summarize_slots( + tracker + .accounts() + .iter() + .map(|entry| entry.value().compressible_slot()), + current_slot, + ) + }); + + let mut pda_programs: Option> = None; + if let Some(pda_tracker) = t.pda.as_ref() { + let mut by_program: HashMap> = HashMap::new(); + for entry in pda_tracker.accounts().iter() { + let state = entry.value(); + by_program + .entry(state.program_id.to_string()) + .or_default() + .push(state.compressible_slot()); + } + let mut rows: Vec = by_program + .into_iter() + .map(|(program_id, slots)| { + let stats = summarize_slots(slots, current_slot); + PdaProgramStats { + program_id, + tracked: stats.tracked, + ready: stats.ready, + waiting: stats.waiting, + next_ready_slot: stats.next_ready_slot, + } + }) + .collect(); + rows.sort_by(|a, b| a.program_id.cmp(&b.program_id)); + pda_programs = Some(rows); + } + + let total_tracked = ctoken.as_ref().map(|s| s.tracked).unwrap_or(0) + + pda.as_ref().map(|s| s.tracked).unwrap_or(0) + + mint.as_ref().map(|s| s.tracked).unwrap_or(0); + + let (total_ready, total_waiting, note) = if current_slot.is_some() { + ( + Some( + ctoken.as_ref().and_then(|s| s.ready).unwrap_or(0) + + pda.as_ref().and_then(|s| s.ready).unwrap_or(0) + + mint.as_ref().and_then(|s| s.ready).unwrap_or(0), + ), + Some( + ctoken.as_ref().and_then(|s| s.waiting).unwrap_or(0) + + pda.as_ref().and_then(|s| s.waiting).unwrap_or(0) + + mint.as_ref().and_then(|s| s.waiting).unwrap_or(0), + ), + None, + ) + } else { + ( + None, + None, + Some( + "Current slot is unavailable; readiness breakdown is temporarily unknown." + .to_string(), + ), + ) + }; + return CompressibleSnapshot { data: CompressibleResponse { enabled: true, - ctoken_count: t.ctoken.as_ref().map(|tr| tr.len()), - pda_count: t.pda.as_ref().map(|tr| tr.len()), - mint_count: t.mint.as_ref().map(|tr| tr.len()), + ctoken_count: ctoken.as_ref().map(|s| s.tracked), + ata_count: ata.as_ref().map(|s| s.tracked), + pda_count: pda.as_ref().map(|s| s.tracked), + mint_count: mint.as_ref().map(|s| s.tracked), + current_slot, + total_tracked: Some(total_tracked), + total_ready, + total_waiting, + ctoken, + ata, + pda, + mint, + pda_programs, + upstreams: None, + note, + error: None, + refresh_interval_secs: Some(5), }, source: "tracker".to_string(), cached_at: now, }; } + if let Some(snapshot) = + fetch_compressible_snapshot_from_foresters(forester_api_urls, run_id).await + { + return snapshot; + } + // Standalone mode: RPC with timeout let fetch_result = tokio::time::timeout( COMPRESSIBLE_FETCH_TIMEOUT, @@ -233,16 +823,52 @@ pub(crate) async fn fetch_compressible_snapshot( .await; match fetch_result { - Ok(Ok((ctoken_count, mint_count))) => CompressibleSnapshot { - data: CompressibleResponse { - enabled: true, - ctoken_count: Some(ctoken_count), - pda_count: None, - mint_count: Some(mint_count), - }, - source: "rpc".to_string(), - cached_at: now, - }, + Ok(Ok((ctoken_count, mint_count))) => { + let ctoken = CompressibleTypeStats { + tracked: ctoken_count, + ready: None, + waiting: None, + next_ready_slot: None, + }; + let mint = CompressibleTypeStats { + tracked: mint_count, + ready: None, + waiting: None, + next_ready_slot: None, + }; + let client = reqwest::Client::new(); + let current_slot = + crate::compressible::bootstrap_helpers::get_current_slot(&client, rpc_url) + .await + .ok(); + + CompressibleSnapshot { + data: CompressibleResponse { + enabled: true, + ctoken_count: Some(ctoken_count), + ata_count: None, + pda_count: None, + mint_count: Some(mint_count), + current_slot, + total_tracked: Some(ctoken_count + mint_count), + total_ready: None, + total_waiting: None, + ctoken: Some(ctoken), + ata: None, + pda: None, + mint: Some(mint), + pda_programs: None, + upstreams: None, + note: Some( + "Standalone API mode: readiness and PDA breakdown require in-memory trackers inside this process, or --forester-api-url upstream(s).".to_string(), + ), + error: None, + refresh_interval_secs: Some(30), + }, + source: "rpc".to_string(), + cached_at: now, + } + } Ok(Err(e)) => { warn!( event = "api_server_compressible_count_failed", @@ -254,8 +880,22 @@ pub(crate) async fn fetch_compressible_snapshot( data: CompressibleResponse { enabled: false, ctoken_count: None, + ata_count: None, pda_count: None, mint_count: None, + current_slot: None, + total_tracked: None, + total_ready: None, + total_waiting: None, + ctoken: None, + ata: None, + pda: None, + mint: None, + pda_programs: None, + upstreams: None, + note: None, + error: Some(format!("RPC compressible count failed: {}", e)), + refresh_interval_secs: Some(30), }, source: "none".to_string(), cached_at: now, @@ -272,8 +912,25 @@ pub(crate) async fn fetch_compressible_snapshot( data: CompressibleResponse { enabled: false, ctoken_count: None, + ata_count: None, pda_count: None, mint_count: None, + current_slot: None, + total_tracked: None, + total_ready: None, + total_waiting: None, + ctoken: None, + ata: None, + pda: None, + mint: None, + pda_programs: None, + upstreams: None, + note: None, + error: Some(format!( + "Compressible count timed out after {} seconds", + COMPRESSIBLE_FETCH_TIMEOUT.as_secs() + )), + refresh_interval_secs: Some(30), }, source: "none".to_string(), cached_at: now, @@ -315,21 +972,28 @@ async fn run_metrics_provider( async fn run_compressible_provider( tx: watch::Sender, trackers: Option, + forester_api_urls: Vec, rpc_url: String, mut shutdown: broadcast::Receiver<()>, helius_rpc: bool, run_id: Arc, ) { - // In-memory trackers are cheap (.len()); RPC is expensive (getProgramAccounts) - let interval = if trackers.is_some() { + // In-memory trackers and upstream forester APIs are cheap compared to full RPC scans. + let interval = if trackers.is_some() || !forester_api_urls.is_empty() { Duration::from_secs(5) } else { Duration::from_secs(30) }; loop { - let snapshot = - fetch_compressible_snapshot(&trackers, &rpc_url, helius_rpc, run_id.as_ref()).await; + let snapshot = fetch_compressible_snapshot( + &trackers, + &forester_api_urls, + &rpc_url, + helius_rpc, + run_id.as_ref(), + ) + .await; if tx.send(snapshot).is_err() { break; } @@ -425,6 +1089,7 @@ pub fn spawn_api_server(config: ApiServerConfig) -> ApiServerHandle { tokio::spawn(run_compressible_provider( compressible_tx, trackers, + config.forester_api_urls.clone(), config.rpc_url.clone(), provider_shutdown_tx.subscribe(), config.helius_rpc, diff --git a/forester/src/cli.rs b/forester/src/cli.rs index 05aef7fc2c..41da7cf56b 100644 --- a/forester/src/cli.rs +++ b/forester/src/cli.rs @@ -389,6 +389,14 @@ pub struct DashboardArgs { help = "Prometheus server URL for querying metrics (e.g. http://prometheus:9090)" )] pub prometheus_url: Option, + + #[arg( + long = "forester-api-url", + env = "FORESTER_API_URLS", + value_delimiter = ',', + help = "Forester API base URL(s) to aggregate compressible data from (e.g. http://forester-a:8080,http://forester-b:8080)" + )] + pub forester_api_urls: Vec, } #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] diff --git a/forester/src/epoch_manager.rs b/forester/src/epoch_manager.rs index b58cf20223..d3b3141495 100644 --- a/forester/src/epoch_manager.rs +++ b/forester/src/epoch_manager.rs @@ -1216,6 +1216,30 @@ impl EpochManager { match self.register_for_epoch(epoch).await { Ok(registration_info) => return Ok(registration_info), Err(e) => { + if let Some(RegistrationError::RegistrationPhaseEnded { + epoch: ended_epoch, + current_slot, + registration_end, + }) = e.downcast_ref::() + { + warn!( + event = "registration_attempt_non_retryable", + run_id = %self.run_id, + epoch, + attempt = attempt + 1, + max_attempts = max_retries, + error = ?e, + "Registration phase ended; stopping retries for this epoch" + ); + return Err(ForesterError::Registration( + RegistrationError::RegistrationPhaseEnded { + epoch: *ended_epoch, + current_slot: *current_slot, + registration_end: *registration_end, + }, + )); + } + warn!( event = "registration_attempt_failed", run_id = %self.run_id, diff --git a/forester/src/lib.rs b/forester/src/lib.rs index eb224a1e4e..7d6125ea4b 100644 --- a/forester/src/lib.rs +++ b/forester/src/lib.rs @@ -141,6 +141,225 @@ pub async fn run_queue_info( Ok(()) } +#[derive(Clone, Default)] +pub struct CompressibleTrackerHandles { + pub ctoken_tracker: Option>, + pub pda_tracker: Option>, + pub mint_tracker: Option>, +} + +pub async fn initialize_compressible_trackers( + config: Arc, + shutdown_compressible: Option>, + shutdown_bootstrap: Option>, +) -> Result { + let Some(compressible_config) = &config.compressible_config else { + return Ok(CompressibleTrackerHandles::default()); + }; + + // Validate on-chain CompressibleConfig at startup (fail fast on misconfiguration) + compressible::validate_compressible_config(&config.external_services.rpc_url).await?; + + let Some(shutdown_rx) = shutdown_compressible else { + tracing::warn!("Compressible config enabled but no shutdown receiver provided"); + return Ok(CompressibleTrackerHandles::default()); + }; + + // Create all shutdown receivers upfront (before any are moved) + let shutdown_rx_ctoken = shutdown_rx.resubscribe(); + let shutdown_rx_mint = shutdown_rx.resubscribe(); + let shutdown_rx_mint_bootstrap = shutdown_rx.resubscribe(); + // Keep original for PDA subscriptions (will resubscribe per-program) + let shutdown_rx_pda_base = shutdown_rx; + + // Create ctoken tracker + let ctoken_tracker = Arc::new(compressible::CTokenAccountTracker::new()); + let tracker_clone = ctoken_tracker.clone(); + let ws_url = compressible_config.ws_url.clone(); + + // Spawn account subscriber for ctokens + tokio::spawn(async move { + let mut subscriber = compressible::AccountSubscriber::new( + ws_url, + tracker_clone, + compressible::SubscriptionConfig::ctoken(), + shutdown_rx_ctoken, + ); + if let Err(e) = subscriber.run().await { + tracing::error!("Compressible subscriber error: {:?}", e); + } + }); + + // Extract flag before async closures to avoid moving config Arc + let helius_rpc = config.general_config.helius_rpc; + + // Spawn bootstrap task for ctokens with shutdown support + if let Some(mut shutdown_bootstrap_rx) = shutdown_bootstrap { + let tracker_clone = ctoken_tracker.clone(); + let rpc_url = config.external_services.rpc_url.clone(); + + tokio::spawn(async move { + let retry_config = RetryConfig::new("CToken bootstrap") + .with_max_attempts(3) + .with_initial_delay(Duration::from_secs(5)); + + let bootstrap_future = retry_with_backoff(retry_config, || { + let rpc_url = rpc_url.clone(); + let tracker = tracker_clone.clone(); + async move { + compressible::bootstrap_ctoken_accounts(rpc_url, tracker, None, helius_rpc) + .await + } + }); + + tokio::select! { + result = bootstrap_future => { + match result { + Ok(()) => tracing::info!("CToken bootstrap complete"), + Err(e) => tracing::error!("CToken bootstrap failed after retries: {:?}", e), + } + } + _ = &mut shutdown_bootstrap_rx => { + tracing::info!("CToken bootstrap interrupted by shutdown signal"); + } + } + }); + } + + // Create PDA tracker if there are PDA programs configured + let pda_tracker = if !compressible_config.pda_programs.is_empty() { + let pda_tracker = Arc::new(compressible::pda::PdaAccountTracker::new( + compressible_config.pda_programs.clone(), + )); + + // Spawn account subscribers for each PDA program + for pda_config in &compressible_config.pda_programs { + let pda_tracker_sub = pda_tracker.clone(); + let ws_url_pda = compressible_config.ws_url.clone(); + let shutdown_rx_pda = shutdown_rx_pda_base.resubscribe(); + let program_id = pda_config.program_id; + let discriminator = pda_config.discriminator; + let program_name = format!( + "pda-{}", + program_id.to_string().chars().take(8).collect::() + ); + + tokio::spawn(async move { + let mut subscriber = compressible::AccountSubscriber::new( + ws_url_pda, + pda_tracker_sub, + compressible::SubscriptionConfig::pda( + program_id, + discriminator, + program_name.clone(), + ), + shutdown_rx_pda, + ); + if let Err(e) = subscriber.run().await { + tracing::error!("PDA subscriber error for {}: {:?}", program_name, e); + } + }); + } + + // Spawn bootstrap task for PDAs with shutdown support + let pda_tracker_clone = pda_tracker.clone(); + let rpc_url = config.external_services.rpc_url.clone(); + let mut shutdown_rx_pda_bootstrap = shutdown_rx_pda_base.resubscribe(); + + tokio::spawn(async move { + let retry_config = RetryConfig::new("PDA bootstrap") + .with_max_attempts(3) + .with_initial_delay(Duration::from_secs(5)); + + let bootstrap_future = retry_with_backoff(retry_config, || { + let rpc_url = rpc_url.clone(); + let tracker = pda_tracker_clone.clone(); + async move { + compressible::pda::bootstrap_pda_accounts(rpc_url, tracker, None, helius_rpc) + .await + } + }); + + tokio::select! { + result = bootstrap_future => { + match result { + Ok(()) => tracing::info!("PDA bootstrap complete"), + Err(e) => tracing::error!("PDA bootstrap failed after retries: {:?}", e), + } + } + _ = shutdown_rx_pda_bootstrap.recv() => { + tracing::info!("PDA bootstrap interrupted by shutdown signal"); + } + } + }); + + Some(pda_tracker) + } else { + None + }; + + // Create Mint tracker and spawn subscriptions + bootstrap + let mint_tracker = { + let mint_tracker = Arc::new(compressible::mint::MintAccountTracker::new()); + + // Spawn account subscriber for mints + let mint_tracker_sub = mint_tracker.clone(); + let ws_url_mint = compressible_config.ws_url.clone(); + + tokio::spawn(async move { + let mut subscriber = compressible::AccountSubscriber::new( + ws_url_mint, + mint_tracker_sub, + compressible::SubscriptionConfig::mint(), + shutdown_rx_mint, + ); + if let Err(e) = subscriber.run().await { + tracing::error!("Mint subscriber error: {:?}", e); + } + }); + + // Spawn bootstrap task for Mints with shutdown support + let mint_tracker_clone = mint_tracker.clone(); + let rpc_url = config.external_services.rpc_url.clone(); + let mut shutdown_rx_mint_bootstrap = shutdown_rx_mint_bootstrap; + + tokio::spawn(async move { + let retry_config = RetryConfig::new("Mint bootstrap") + .with_max_attempts(3) + .with_initial_delay(Duration::from_secs(5)); + + let bootstrap_future = retry_with_backoff(retry_config, || { + let rpc_url = rpc_url.clone(); + let tracker = mint_tracker_clone.clone(); + async move { + compressible::mint::bootstrap_mint_accounts(rpc_url, tracker, None, helius_rpc) + .await + } + }); + + tokio::select! { + result = bootstrap_future => { + match result { + Ok(()) => tracing::info!("Mint bootstrap complete"), + Err(e) => tracing::error!("Mint bootstrap failed after retries: {:?}", e), + } + } + _ = shutdown_rx_mint_bootstrap.recv() => { + tracing::info!("Mint bootstrap interrupted by shutdown signal"); + } + } + }); + + Some(mint_tracker) + }; + + Ok(CompressibleTrackerHandles { + ctoken_tracker: Some(ctoken_tracker), + pda_tracker, + mint_tracker, + }) +} + pub async fn run_pipeline( config: Arc, rpc_rate_limiter: Option, @@ -159,6 +378,7 @@ pub async fn run_pipeline( shutdown_bootstrap, work_report_sender, generate_run_id(), + None, ) .await } @@ -173,6 +393,7 @@ pub async fn run_pipeline_with_run_id( shutdown_bootstrap: Option>, work_report_sender: mpsc::Sender, run_id: String, + preconfigured_trackers: Option, ) -> Result<()> { let mut builder = SolanaRpcPoolBuilder::::default() .url(config.external_services.rpc_url.to_string()) @@ -233,216 +454,19 @@ pub async fn run_pipeline_with_run_id( config.transaction_config.ops_cache_ttl_seconds, ))); - let (compressible_tracker, pda_tracker, mint_tracker) = if let Some(compressible_config) = - &config.compressible_config - { - // Validate on-chain CompressibleConfig at startup (fail fast on misconfiguration) - compressible::validate_compressible_config(&config.external_services.rpc_url).await?; - - if let Some(shutdown_rx) = shutdown_compressible { - // Create all shutdown receivers upfront (before any are moved) - let shutdown_rx_ctoken = shutdown_rx.resubscribe(); - let shutdown_rx_mint = shutdown_rx.resubscribe(); - let shutdown_rx_mint_bootstrap = shutdown_rx.resubscribe(); - // Keep original for PDA subscriptions (will resubscribe per-program) - let shutdown_rx_pda_base = shutdown_rx; - - // Create ctoken tracker - let ctoken_tracker = Arc::new(compressible::CTokenAccountTracker::new()); - let tracker_clone = ctoken_tracker.clone(); - let ws_url = compressible_config.ws_url.clone(); - - // Spawn account subscriber for ctokens - tokio::spawn(async move { - let mut subscriber = compressible::AccountSubscriber::new( - ws_url, - tracker_clone, - compressible::SubscriptionConfig::ctoken(), - shutdown_rx_ctoken, - ); - if let Err(e) = subscriber.run().await { - tracing::error!("Compressible subscriber error: {:?}", e); - } - }); - - // Extract flag before async closures to avoid moving config Arc - let helius_rpc = config.general_config.helius_rpc; - - // Spawn bootstrap task for ctokens with shutdown support - if let Some(mut shutdown_bootstrap_rx) = shutdown_bootstrap { - let tracker_clone = ctoken_tracker.clone(); - let rpc_url = config.external_services.rpc_url.clone(); - - tokio::spawn(async move { - let retry_config = RetryConfig::new("CToken bootstrap") - .with_max_attempts(3) - .with_initial_delay(Duration::from_secs(5)); - - let bootstrap_future = retry_with_backoff(retry_config, || { - let rpc_url = rpc_url.clone(); - let tracker = tracker_clone.clone(); - async move { - compressible::bootstrap_ctoken_accounts( - rpc_url, tracker, None, helius_rpc, - ) - .await - } - }); - - tokio::select! { - result = bootstrap_future => { - match result { - Ok(()) => tracing::info!("CToken bootstrap complete"), - Err(e) => tracing::error!("CToken bootstrap failed after retries: {:?}", e), - } - } - _ = &mut shutdown_bootstrap_rx => { - tracing::info!("CToken bootstrap interrupted by shutdown signal"); - } - } - }); - } - - // Create PDA tracker if there are PDA programs configured - let pda_tracker = if !compressible_config.pda_programs.is_empty() { - let pda_tracker = Arc::new(compressible::pda::PdaAccountTracker::new( - compressible_config.pda_programs.clone(), - )); - - // Spawn account subscribers for each PDA program - for pda_config in &compressible_config.pda_programs { - let pda_tracker_sub = pda_tracker.clone(); - let ws_url_pda = compressible_config.ws_url.clone(); - let shutdown_rx_pda = shutdown_rx_pda_base.resubscribe(); - let program_id = pda_config.program_id; - let discriminator = pda_config.discriminator; - let program_name = format!( - "pda-{}", - program_id.to_string().chars().take(8).collect::() - ); - - tokio::spawn(async move { - let mut subscriber = compressible::AccountSubscriber::new( - ws_url_pda, - pda_tracker_sub, - compressible::SubscriptionConfig::pda( - program_id, - discriminator, - program_name.clone(), - ), - shutdown_rx_pda, - ); - if let Err(e) = subscriber.run().await { - tracing::error!("PDA subscriber error for {}: {:?}", program_name, e); - } - }); - } - - // Spawn bootstrap task for PDAs with shutdown support - let pda_tracker_clone = pda_tracker.clone(); - let rpc_url = config.external_services.rpc_url.clone(); - let mut shutdown_rx_pda_bootstrap = shutdown_rx_pda_base.resubscribe(); - - tokio::spawn(async move { - let retry_config = RetryConfig::new("PDA bootstrap") - .with_max_attempts(3) - .with_initial_delay(Duration::from_secs(5)); - - let bootstrap_future = retry_with_backoff(retry_config, || { - let rpc_url = rpc_url.clone(); - let tracker = pda_tracker_clone.clone(); - async move { - compressible::pda::bootstrap_pda_accounts( - rpc_url, tracker, None, helius_rpc, - ) - .await - } - }); - - tokio::select! { - result = bootstrap_future => { - match result { - Ok(()) => tracing::info!("PDA bootstrap complete"), - Err(e) => tracing::error!("PDA bootstrap failed after retries: {:?}", e), - } - } - _ = shutdown_rx_pda_bootstrap.recv() => { - tracing::info!("PDA bootstrap interrupted by shutdown signal"); - } - } - }); - - Some(pda_tracker) - } else { - None - }; - - // Create Mint tracker and spawn subscriptions + bootstrap - let mint_tracker = { - let mint_tracker = Arc::new(compressible::mint::MintAccountTracker::new()); - - // Spawn account subscriber for mints - let mint_tracker_sub = mint_tracker.clone(); - let ws_url_mint = compressible_config.ws_url.clone(); - - tokio::spawn(async move { - let mut subscriber = compressible::AccountSubscriber::new( - ws_url_mint, - mint_tracker_sub, - compressible::SubscriptionConfig::mint(), - shutdown_rx_mint, - ); - if let Err(e) = subscriber.run().await { - tracing::error!("Mint subscriber error: {:?}", e); - } - }); - - // Spawn bootstrap task for Mints with shutdown support - let mint_tracker_clone = mint_tracker.clone(); - let rpc_url = config.external_services.rpc_url.clone(); - let mut shutdown_rx_mint_bootstrap = shutdown_rx_mint_bootstrap; - - tokio::spawn(async move { - let retry_config = RetryConfig::new("Mint bootstrap") - .with_max_attempts(3) - .with_initial_delay(Duration::from_secs(5)); - - let bootstrap_future = retry_with_backoff(retry_config, || { - let rpc_url = rpc_url.clone(); - let tracker = mint_tracker_clone.clone(); - async move { - compressible::mint::bootstrap_mint_accounts( - rpc_url, tracker, None, helius_rpc, - ) - .await - } - }); - - tokio::select! { - result = bootstrap_future => { - match result { - Ok(()) => tracing::info!("Mint bootstrap complete"), - Err(e) => tracing::error!("Mint bootstrap failed after retries: {:?}", e), - } - } - _ = shutdown_rx_mint_bootstrap.recv() => { - tracing::info!("Mint bootstrap interrupted by shutdown signal"); - } - } - }); - - Some(mint_tracker) - }; - - (Some(ctoken_tracker), pda_tracker, mint_tracker) - } else { - tracing::warn!("Compressible config enabled but no shutdown receiver provided"); - (None, None, None) - } + let tracker_handles = if let Some(trackers) = preconfigured_trackers { + trackers } else { - (None, None, None) + initialize_compressible_trackers(config.clone(), shutdown_compressible, shutdown_bootstrap) + .await? }; + let CompressibleTrackerHandles { + ctoken_tracker: compressible_tracker, + pda_tracker, + mint_tracker, + } = tracker_handles; + debug!("Starting Forester pipeline"); let result = run_service( config, diff --git a/forester/src/main.rs b/forester/src/main.rs index fe99ac90e0..9669b2d05d 100644 --- a/forester/src/main.rs +++ b/forester/src/main.rs @@ -2,12 +2,13 @@ use std::sync::Arc; use clap::Parser; use forester::{ - api_server::{spawn_api_server, ApiServerConfig}, + api_server::{spawn_api_server, ApiServerConfig, CompressibleDashboardState}, cli::{Cli, Commands}, epoch_manager::generate_run_id, errors::ForesterError, forester_status, health_check::run_health_check, + initialize_compressible_trackers, metrics::register_metrics, run_pipeline_with_run_id, telemetry::setup_telemetry, @@ -87,6 +88,41 @@ async fn main() -> Result<(), ForesterError> { .send_tx_rate_limit .map(RateLimiter::new); + let mut shutdown_sender_compressible: Option> = None; + let mut shutdown_sender_bootstrap: Option> = None; + + let preconfigured_trackers = if config.compressible_config.is_some() { + let (sender, receiver) = tokio::sync::broadcast::channel(1); + let (bootstrap_sender, bootstrap_receiver) = oneshot::channel(); + shutdown_sender_compressible = Some(sender); + shutdown_sender_bootstrap = Some(bootstrap_sender); + Some( + initialize_compressible_trackers( + config.clone(), + Some(receiver), + Some(bootstrap_receiver), + ) + .await?, + ) + } else { + None + }; + + let compressible_state = preconfigured_trackers.as_ref().and_then(|trackers| { + if trackers.ctoken_tracker.is_none() + && trackers.pda_tracker.is_none() + && trackers.mint_tracker.is_none() + { + None + } else { + Some(CompressibleDashboardState { + ctoken_tracker: trackers.ctoken_tracker.clone(), + pda_tracker: trackers.pda_tracker.clone(), + mint_tracker: trackers.mint_tracker.clone(), + }) + } + }); + let rpc_url_for_api: String = config.external_services.rpc_url.to_string(); let run_id = generate_run_id(); let api_server_handle = spawn_api_server(ApiServerConfig { @@ -94,49 +130,35 @@ async fn main() -> Result<(), ForesterError> { rpc_url: rpc_url_for_api, port: args.api_server_port, allow_public_bind: args.api_server_public_bind, - compressible_state: None, + compressible_state, prometheus_url: args.prometheus_url.clone(), helius_rpc: args.helius_rpc, + forester_api_urls: Vec::new(), }); - // Create compressible shutdown channels if compressible is enabled - let (shutdown_receiver_compressible, shutdown_receiver_bootstrap) = - if config.compressible_config.is_some() { - let (shutdown_sender_compressible, shutdown_receiver_compressible) = - tokio::sync::broadcast::channel(1); - let (shutdown_sender_bootstrap, shutdown_receiver_bootstrap) = - oneshot::channel(); - spawn_shutdown_handler( - shutdown_sender_service, - Some(move || { - let _ = shutdown_sender_compressible.send(()); - let _ = shutdown_sender_bootstrap.send(()); - api_server_handle.shutdown(); - }), - ); - ( - Some(shutdown_receiver_compressible), - Some(shutdown_receiver_bootstrap), - ) - } else { - spawn_shutdown_handler( - shutdown_sender_service, - Some(move || { - api_server_handle.shutdown(); - }), - ); - (None, None) - }; + spawn_shutdown_handler( + shutdown_sender_service, + Some(move || { + if let Some(sender) = shutdown_sender_compressible { + let _ = sender.send(()); + } + if let Some(sender) = shutdown_sender_bootstrap { + let _ = sender.send(()); + } + api_server_handle.shutdown(); + }), + ); run_pipeline_with_run_id::( config, rpc_rate_limiter, send_tx_limiter, shutdown_receiver_service, - shutdown_receiver_compressible, - shutdown_receiver_bootstrap, + None, + None, work_report_sender, run_id, + preconfigured_trackers, ) .await? } @@ -164,6 +186,7 @@ async fn main() -> Result<(), ForesterError> { compressible_state: None, prometheus_url: args.prometheus_url.clone(), helius_rpc: false, + forester_api_urls: args.forester_api_urls.clone(), }); // Block until Ctrl+C From 93c98c4ad03395194cf3b25fec8ff2e2b76e9ec0 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Tue, 24 Feb 2026 15:49:34 +0000 Subject: [PATCH 08/15] fix: improve transaction handling in compressors - Enhanced error handling in `CTokenCompressor`, `MintCompressor`, and `PdaCompressor` to manage pending states more effectively. - Added checks to ensure accounts are marked as pending during transaction processing. chore: update epoch manager to check eligibility for compression - Modified `dispatch_compression`, `dispatch_pda_compression`, and `dispatch_mint_compression` to include eligibility checks based on the current light slot. refactor: improve account tracking with atomic counters - Introduced `compressed_count` and `pending` sets in account trackers for better management of compression states. - Updated `CompressibleTracker` trait to include methods for managing pending accounts and counting compressed accounts. fix: ensure proper handling of closed accounts in trackers - Added logic to remove closed accounts from trackers in `CTokenAccountTracker`, `MintAccountTracker`, and `PdaAccountTracker`. feat: add usePhotonStats hook for fetching photon statistics - Implemented a new hook `usePhotonStats` using SWR for fetching photon statistics from the API. - Introduced error handling for API responses. refactor: enhance utility functions for address exploration - Added `explorerUrl` function to generate Solana explorer URLs based on the current network. - Improved `formatSlotCountdown` to handle additional parameters for better status reporting. feat: extend forester types with new statistics - Updated `AggregateQueueStats`, `ForesterStatus`, and related interfaces to include new fields for batch processing statistics. - Introduced `PhotonStats` interface for tracking photon-related metrics. --- external/photon | 2 +- forester/dashboard/package.json | 2 + .../src/app/api/photon-stats/route.ts | 91 ++++ .../dashboard/src/app/compressible/page.tsx | 36 -- forester/dashboard/src/app/layout.tsx | 8 +- forester/dashboard/src/app/metrics/page.tsx | 44 -- forester/dashboard/src/app/page.tsx | 186 ++++++-- forester/dashboard/src/app/trees/page.tsx | 32 -- .../src/components/CompressiblePanel.tsx | 418 ++++-------------- .../dashboard/src/components/EpochCard.tsx | 109 +++-- .../dashboard/src/components/ForesterList.tsx | 177 ++++++-- .../dashboard/src/components/MetricsPanel.tsx | 125 +----- .../src/components/PhotonStatsPanel.tsx | 106 +++++ .../src/components/QueuePressureChart.tsx | 65 ++- forester/dashboard/src/components/Sidebar.tsx | 42 -- .../dashboard/src/components/TreeTable.tsx | 145 +++++- .../dashboard/src/hooks/useBalanceHistory.ts | 99 +++++ .../dashboard/src/hooks/usePhotonStats.ts | 18 + forester/dashboard/src/lib/utils.ts | 22 +- forester/dashboard/src/types/forester.ts | 18 + forester/src/api_server.rs | 35 +- .../src/compressible/ctoken/compressor.rs | 26 +- forester/src/compressible/ctoken/state.rs | 44 +- forester/src/compressible/mint/compressor.rs | 63 ++- forester/src/compressible/mint/state.rs | 37 +- forester/src/compressible/pda/compressor.rs | 57 ++- forester/src/compressible/pda/state.rs | 38 +- forester/src/compressible/traits.rs | 49 +- forester/src/epoch_manager.rs | 79 +++- forester/src/forester_status.rs | 26 +- sdk-libs/client/src/rpc/client.rs | 14 +- .../macros/src/light_pdas/program/compress.rs | 26 +- 32 files changed, 1438 insertions(+), 801 deletions(-) create mode 100644 forester/dashboard/src/app/api/photon-stats/route.ts delete mode 100644 forester/dashboard/src/app/compressible/page.tsx delete mode 100644 forester/dashboard/src/app/metrics/page.tsx delete mode 100644 forester/dashboard/src/app/trees/page.tsx create mode 100644 forester/dashboard/src/components/PhotonStatsPanel.tsx delete mode 100644 forester/dashboard/src/components/Sidebar.tsx create mode 100644 forester/dashboard/src/hooks/useBalanceHistory.ts create mode 100644 forester/dashboard/src/hooks/usePhotonStats.ts diff --git a/external/photon b/external/photon index 54ead0a77e..f5d793e721 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit 54ead0a77e50eed0f7beb704868ce6cb0b914b8f +Subproject commit f5d793e7217c48cb57b0f42892c12466b66c1830 diff --git a/forester/dashboard/package.json b/forester/dashboard/package.json index 2c70cedc7c..7a1b2ab90c 100644 --- a/forester/dashboard/package.json +++ b/forester/dashboard/package.json @@ -10,12 +10,14 @@ }, "dependencies": { "next": "^14.2.0", + "pg": "^8.18.0", "react": "^18.3.0", "react-dom": "^18.3.0", "swr": "^2.2.0" }, "devDependencies": { "@types/node": "^20.0.0", + "@types/pg": "^8.16.0", "@types/react": "^18.3.0", "@types/react-dom": "^18.3.0", "autoprefixer": "^10.4.0", diff --git a/forester/dashboard/src/app/api/photon-stats/route.ts b/forester/dashboard/src/app/api/photon-stats/route.ts new file mode 100644 index 0000000000..f3bd618019 --- /dev/null +++ b/forester/dashboard/src/app/api/photon-stats/route.ts @@ -0,0 +1,91 @@ +import { NextResponse } from "next/server"; +import { Pool } from "pg"; + +export const dynamic = "force-dynamic"; + +let pool: Pool | null = null; + +function getPool(): Pool | null { + const url = process.env.PHOTON_DATABASE_URL; + if (!url) return null; + if (!pool) { + pool = new Pool({ connectionString: url, max: 2, idleTimeoutMillis: 30_000 }); + } + return pool; +} + +export async function GET() { + const db = getPool(); + if (!db) { + return NextResponse.json( + { error: "PHOTON_DATABASE_URL not configured" }, + { status: 503 } + ); + } + + try { + const client = await db.connect(); + try { + const [accounts, tokens, compressed] = await Promise.all([ + client.query(` + SELECT + COUNT(*) AS total, + COUNT(*) FILTER (WHERE NOT spent) AS active + FROM accounts + `), + client.query(` + SELECT + COUNT(*) AS total, + COUNT(*) FILTER (WHERE NOT spent) AS active + FROM token_accounts + `), + client.query(` + SELECT + encode(a.owner, 'base64') AS owner_b64, + COUNT(*) AS total, + COUNT(*) FILTER (WHERE NOT a.spent) AS active + FROM accounts a + WHERE a.onchain_pubkey IS NOT NULL + GROUP BY a.owner + ORDER BY COUNT(*) DESC + `), + ]); + + const totalAccounts = Number(accounts.rows[0].total); + const activeAccounts = Number(accounts.rows[0].active); + const totalTokens = Number(tokens.rows[0].total); + const activeTokens = Number(tokens.rows[0].active); + + const compressedFromOnchain = compressed.rows.map((r: { owner_b64: string; total: string; active: string }) => ({ + owner: Buffer.from(r.owner_b64, "base64").toString("hex"), + total: Number(r.total), + active: Number(r.active), + })); + + const totalCompressedFromOnchain = compressedFromOnchain.reduce( + (sum: number, r: { total: number }) => sum + r.total, + 0 + ); + const activeCompressedFromOnchain = compressedFromOnchain.reduce( + (sum: number, r: { active: number }) => sum + r.active, + 0 + ); + + return NextResponse.json({ + accounts: { total: totalAccounts, active: activeAccounts }, + token_accounts: { total: totalTokens, active: activeTokens }, + compressed_from_onchain: { + total: totalCompressedFromOnchain, + active: activeCompressedFromOnchain, + by_owner: compressedFromOnchain, + }, + timestamp: Math.floor(Date.now() / 1000), + }); + } finally { + client.release(); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return NextResponse.json({ error: message }, { status: 500 }); + } +} diff --git a/forester/dashboard/src/app/compressible/page.tsx b/forester/dashboard/src/app/compressible/page.tsx deleted file mode 100644 index 72f857cfcf..0000000000 --- a/forester/dashboard/src/app/compressible/page.tsx +++ /dev/null @@ -1,36 +0,0 @@ -"use client"; - -import { useCompressible } from "@/hooks/useCompressible"; -import { ErrorState } from "@/components/ErrorState"; -import { CompressiblePanel } from "@/components/CompressiblePanel"; - -export default function CompressiblePage() { - const { data, error, isLoading } = useCompressible(); - - if (isLoading) { - return ( -
-
- Loading compressible status... -
-
- ); - } - - if (error || !data) { - return ; - } - - return ( -
-
-

Compressible Accounts

-

- Track what is currently compressible, what is waiting on rent - windows, and how fresh this view is. -

-
- -
- ); -} diff --git a/forester/dashboard/src/app/layout.tsx b/forester/dashboard/src/app/layout.tsx index 9a3889aff9..d6881a179b 100644 --- a/forester/dashboard/src/app/layout.tsx +++ b/forester/dashboard/src/app/layout.tsx @@ -1,5 +1,4 @@ import type { Metadata } from "next"; -import { Sidebar } from "@/components/Sidebar"; import "./globals.css"; export const metadata: Metadata = { @@ -14,11 +13,8 @@ export default function RootLayout({ }) { return ( - -
- -
{children}
-
+ +
{children}
); diff --git a/forester/dashboard/src/app/metrics/page.tsx b/forester/dashboard/src/app/metrics/page.tsx deleted file mode 100644 index 6d2ea64f14..0000000000 --- a/forester/dashboard/src/app/metrics/page.tsx +++ /dev/null @@ -1,44 +0,0 @@ -"use client"; - -import { useMetrics } from "@/hooks/useMetrics"; -import { ErrorState } from "@/components/ErrorState"; -import { MetricsPanel } from "@/components/MetricsPanel"; - -export default function MetricsPage() { - const { data: metrics, error, isLoading } = useMetrics(); - - if (isLoading) { - return ( -
-
Loading metrics...
-
- ); - } - - if (error || !metrics) { - return ; - } - - const isEmpty = - Object.keys(metrics.transactions_processed_total).length === 0 && - Object.keys(metrics.forester_balances).length === 0; - - return ( -
-

Metrics

- {isEmpty ? ( -
-

- No metrics data available. -

-

- Configure --prometheus-url to query aggregated metrics, or connect - to a running forester instance. -

-
- ) : ( - - )} -
- ); -} diff --git a/forester/dashboard/src/app/page.tsx b/forester/dashboard/src/app/page.tsx index 72ae9da8ae..a34384f919 100644 --- a/forester/dashboard/src/app/page.tsx +++ b/forester/dashboard/src/app/page.tsx @@ -1,27 +1,66 @@ "use client"; +import { useState, useEffect, useRef } from "react"; import { useForesterStatus } from "@/hooks/useForesterStatus"; -import { EpochCard } from "@/components/EpochCard"; +import { useMetrics } from "@/hooks/useMetrics"; +import { useCompressible } from "@/hooks/useCompressible"; +import { usePhotonStats } from "@/hooks/usePhotonStats"; +import { useBalanceHistory } from "@/hooks/useBalanceHistory"; import { ErrorState } from "@/components/ErrorState"; -import { ForesterList } from "@/components/ForesterList"; +import { EpochCard } from "@/components/EpochCard"; import { QueuePressureChart } from "@/components/QueuePressureChart"; +import { ForesterList } from "@/components/ForesterList"; +import { TreeTable } from "@/components/TreeTable"; +import { CompressiblePanel } from "@/components/CompressiblePanel"; +import { PhotonStatsPanel } from "@/components/PhotonStatsPanel"; +import { MetricsPanel } from "@/components/MetricsPanel"; import { formatNumber } from "@/lib/utils"; -export default function OverviewPage() { - const { data: status, error, isLoading } = useForesterStatus(); +export default function Dashboard() { + const { data: status, error: statusError, isLoading: statusLoading } = useForesterStatus(); + const { data: metrics } = useMetrics(); + const { data: compressible } = useCompressible(); + const { data: photonStats } = usePhotonStats(); + + // Track balance history for all foresters + const allForesters = [ + ...(status?.active_epoch_foresters ?? []), + ...(status?.registration_epoch_foresters ?? []), + ]; + const { getTrend } = useBalanceHistory(allForesters); - if (isLoading) { + // Auto-refresh indicator + const [lastUpdated, setLastUpdated] = useState(Date.now()); + const [secondsAgo, setSecondsAgo] = useState(0); + const prevSlot = useRef(null); + + useEffect(() => { + if (status && status.slot !== prevSlot.current) { + prevSlot.current = status.slot; + setLastUpdated(Date.now()); + } + }, [status]); + + useEffect(() => { + const interval = setInterval(() => { + setSecondsAgo(Math.floor((Date.now() - lastUpdated) / 1000)); + }, 1000); + return () => clearInterval(interval); + }, [lastUpdated]); + + if (statusLoading) { return (
-
Loading forester status...
+
Loading...
); } - if (error || !status) { - return ; + if (statusError || !status) { + return ; } + // Alerts const warnings: string[] = []; if (status.active_epoch_foresters.length === 0) { warnings.push("No foresters registered for the active epoch"); @@ -32,16 +71,33 @@ export default function OverviewPage() { ) { warnings.push("Registration closing soon with no foresters registered"); } + // Check for foresters not re-registered + const notReRegistered = status.active_epoch_foresters.filter( + (f) => !status.registration_epoch_foresters.some((r) => r.authority === f.authority) + ); + if (notReRegistered.length > 0 && status.active_epoch_progress_percentage > 50) { + warnings.push( + `${notReRegistered.length} active forester${notReRegistered.length !== 1 ? "s" : ""} not registered for next epoch` + ); + } + + const hasMetrics = metrics && + Object.keys(metrics.transaction_rate).length > 0; return ( -
+
+ {/* Header */}
-

Overview

- - Slot {formatNumber(status.slot)} - +

Forester

+
+ + Slot {formatNumber(status.slot)} + + +
+ {/* Warnings */} {warnings.map((w, i) => (
))} + {/* Stats row */}
- - - 0} + label="Trees" + value={status.active_trees} + sub={status.rolled_over_trees > 0 ? `${status.rolled_over_trees} rolled over` : undefined} /> + 0} + sub={`${formatNumber(status.total_pending_items)} items`} + /> + {compressible && compressible.total_tracked != null && ( + 0 + ? `${formatNumber(compressible.total_ready)} ready` + : undefined} + /> + )} + {hasMetrics && ( + a + b, 0)} + /> + )}
+ {/* Epoch + Queue Pressure */}
-
- + + {/* Compressible */} + {compressible && compressible.enabled && ( +
+ +
+ )} + + {/* Photon Stats */} + {photonStats && !photonStats.error && ( +
+ +
+ )} + + {/* Trees */} +
+ - -
+ + + {/* Metrics */} + {hasMetrics && ( +
+ +
+ )} +
+ ); +} + +function RefreshIndicator({ secondsAgo }: { secondsAgo: number }) { + const stale = secondsAgo > 15; + return ( +
+
+ + {secondsAgo < 2 ? "live" : `${secondsAgo}s ago`} +
); } @@ -85,10 +205,12 @@ function StatCard({ label, value, highlight, + sub, }: { label: string; value: number; highlight?: boolean; + sub?: string; }) { return (
@@ -98,6 +220,16 @@ function StatCard({ > {formatNumber(value)}
+ {sub &&
{sub}
} +
+ ); +} + +function Section({ title, children }: { title: string; children: React.ReactNode }) { + return ( +
+

{title}

+ {children}
); } diff --git a/forester/dashboard/src/app/trees/page.tsx b/forester/dashboard/src/app/trees/page.tsx deleted file mode 100644 index 2c21bbca75..0000000000 --- a/forester/dashboard/src/app/trees/page.tsx +++ /dev/null @@ -1,32 +0,0 @@ -"use client"; - -import { useForesterStatus } from "@/hooks/useForesterStatus"; -import { ErrorState } from "@/components/ErrorState"; -import { TreeTable } from "@/components/TreeTable"; - -export default function TreesPage() { - const { data: status, error, isLoading } = useForesterStatus(); - - if (isLoading) { - return ( -
-
Loading trees...
-
- ); - } - - if (error || !status) { - return ; - } - - return ( -
-

Trees

- -
- ); -} diff --git a/forester/dashboard/src/components/CompressiblePanel.tsx b/forester/dashboard/src/components/CompressiblePanel.tsx index e4a511b323..745e89b194 100644 --- a/forester/dashboard/src/components/CompressiblePanel.tsx +++ b/forester/dashboard/src/components/CompressiblePanel.tsx @@ -1,363 +1,103 @@ -import type { - CompressibleResponse, - CompressibleTypeStats, -} from "@/types/forester"; -import { - formatAgeFromUnixSeconds, - formatNumber, - formatSlotCountdown, - truncateAddress, -} from "@/lib/utils"; +import type { CompressibleResponse } from "@/types/forester"; +import { formatAgeFromUnixSeconds, formatNumber, formatSlotCountdown } from "@/lib/utils"; interface CompressiblePanelProps { data: CompressibleResponse; } export function CompressiblePanel({ data }: CompressiblePanelProps) { - const source = data.source ?? "none"; - const sourceBadge = - source === "tracker" - ? "bg-emerald-100 text-emerald-800" - : source === "forester-apis" - ? "bg-sky-100 text-sky-800" - : source === "rpc" - ? "bg-blue-100 text-blue-800" - : "bg-gray-100 text-gray-700"; - - const sourceLabel = - source === "tracker" - ? "In-memory tracker" - : source === "forester-apis" - ? "Forester API aggregate" - : source === "rpc" - ? "RPC scan" - : "Unavailable"; - - const rows: { - key: string; - label: string; - description: string; - stats?: CompressibleTypeStats; - fallbackCount?: number; - contributesToTotal?: boolean; - }[] = [ - { - key: "ctoken", - label: "Light Token", - description: "Fungible decompressed token accounts (includes ATA)", - stats: data.ctoken, - fallbackCount: data.ctoken_count, - contributesToTotal: true, - }, - { - key: "ata", - label: "ATA", - description: "Associated token accounts", - stats: data.ata, - fallbackCount: data.ata_count, - contributesToTotal: false, - }, - { - key: "pda", - label: "PDA", - description: "Program-derived account records", - stats: data.pda, - fallbackCount: data.pda_count, - contributesToTotal: true, - }, - { - key: "mint", - label: "Mint", - description: "Non-fungible mint accounts", - stats: data.mint, - fallbackCount: data.mint_count, - contributesToTotal: true, - }, + const rows = [ + { label: "Token", stats: data.ctoken, fallback: data.ctoken_count }, + { label: "Mint", stats: data.mint, fallback: data.mint_count }, + { label: "PDA", stats: data.pda, fallback: data.pda_count }, ]; - const inferredTracked = rows.reduce((acc, row) => { - if (row.contributesToTotal === false) { - return acc; - } - const tracked = row.stats?.tracked ?? row.fallbackCount ?? 0; - return acc + tracked; - }, 0); - - const totalTracked = data.total_tracked ?? inferredTracked; - const totalReady = data.total_ready; - const totalWaiting = data.total_waiting; - const totalUnknown = - totalReady != null && totalWaiting != null - ? Math.max(0, totalTracked - totalReady - totalWaiting) - : totalTracked; - - const noData = - !data.enabled && - rows.every((row) => (row.stats?.tracked ?? row.fallbackCount ?? 0) === 0); + const hasReadiness = rows.some((r) => r.stats?.ready != null); + const hasCompressed = rows.some((r) => r.stats?.compressed != null); - const upstreamTotal = data.upstreams?.length ?? 0; - const upstreamHealthy = data.upstreams?.filter((u) => u.ok).length ?? 0; - const nextGlobalReadySlot = rows - .map((row) => row.stats?.next_ready_slot) - .filter((slot): slot is number => slot != null) - .reduce((min, slot) => (min == null ? slot : Math.min(min, slot)), null); - - const notices: { level: "info" | "warn" | "error"; message: string }[] = []; - if (totalTracked === 0) { - notices.push({ - level: "info", - message: "No compressible accounts are currently tracked.", - }); - } - if ((totalReady ?? 0) > 0) { - notices.push({ - level: "info", - message: - "Funding status: some tracked accounts are currently eligible for compression.", - }); - } else if ((totalWaiting ?? 0) > 0) { - notices.push({ - level: "info", - message: `Funding status: tracked accounts are currently funded. Next eligibility: ${formatSlotCountdown( - data.current_slot, - nextGlobalReadySlot - )}.`, - }); - } - if (upstreamTotal > 0 && upstreamHealthy < upstreamTotal) { - notices.push({ - level: "error", - message: `Only ${upstreamHealthy}/${upstreamTotal} upstream forester API endpoints are healthy.`, - }); + if (data.error) { + return ( +
+

{data.error}

+
+ ); } return ( -
-
-
- - Source: {sourceLabel} - - - Updated: {formatAgeFromUnixSeconds(data.cached_at)} - - - Slot:{" "} - {data.current_slot != null ? formatNumber(data.current_slot) : "unknown"} - - {data.refresh_interval_secs != null && ( - - Refresh: {data.refresh_interval_secs}s - - )} - {upstreamTotal > 0 && ( - - Upstreams: {upstreamHealthy}/{upstreamTotal} - - )} -
- - {noData && ( -

- No compressible account data available yet. -

- )} - - {data.error && ( -
- {data.error} -
- )} - - {data.note && ( -
- {data.note} -
- )} - - {notices.map((notice, idx) => ( -
- {notice.message} -
- ))} - - {data.upstreams && data.upstreams.length > 0 && ( -
-

- Upstream Forester APIs -

-
- {data.upstreams.map((upstream) => ( -
- - {truncateAddress(upstream.base_url, 12)} - - - {upstream.ok ? "ok" : "down"} - -
- ))} -
-
- )} -
- -
- {[ - { - label: "Tracked Accounts", - value: formatNumber(totalTracked), - desc: "Total visible across account types", - }, - { - label: "Ready Now", - value: totalReady != null ? formatNumber(totalReady) : "unknown", - desc: "Eligible for compression at current slot", - }, - { - label: "Waiting", - value: totalWaiting != null ? formatNumber(totalWaiting) : "unknown", - desc: "Not yet at compressible slot", - }, - { - label: "Unknown", - value: totalUnknown != null ? formatNumber(totalUnknown) : "unknown", - desc: "Tracked with missing readiness context", - }, - ].map((card) => ( -
-
{card.label}
-
- {card.value} -
-
{card.desc}
-
- ))} -
-

- ATA counts are included in Light Token totals. -

- -
- {rows.map((row) => { - const tracked = row.stats?.tracked ?? row.fallbackCount; - const ready = row.stats?.ready; - const waiting = row.stats?.waiting; - const unknown = - tracked != null && ready != null && waiting != null - ? Math.max(0, tracked - ready - waiting) - : tracked; - - return ( -
-
{row.label}
-
{row.description}
-
-
- Tracked - - {tracked != null ? formatNumber(tracked) : "unknown"} - -
-
- Ready now - - {ready != null ? formatNumber(ready) : "unknown"} - -
-
- Waiting - - {waiting != null ? formatNumber(waiting) : "unknown"} - -
-
- Unknown - - {unknown != null ? formatNumber(unknown) : "unknown"} - -
-
- Next ready - - {formatSlotCountdown( - data.current_slot, - row.stats?.next_ready_slot - )} - -
-
-
- ); - })} +
+
+ + + + + {hasCompressed && ( + + )} + + {hasReadiness && ( + <> + + + + + )} + + + + {rows.map((row) => { + const tracked = row.stats?.tracked ?? row.fallback; + const compressed = row.stats?.compressed; + if ((tracked == null || tracked === 0) && (compressed == null || compressed === 0)) return null; + return ( + + + {hasCompressed && ( + + )} + + {hasReadiness && ( + <> + + + + + )} + + ); + })} + +
TypeCompressedPendingCompressibleCooling downNext ready
{row.label} + {compressed != null ? formatNumber(compressed) : "-"} + {formatNumber(tracked ?? 0)} + {row.stats?.ready != null ? formatNumber(row.stats.ready) : "-"} + + {row.stats?.waiting != null ? formatNumber(row.stats.waiting) : "-"} + + {formatSlotCountdown(data.current_slot, row.stats?.next_ready_slot, row.stats?.ready, row.stats?.waiting)} +
{data.pda_programs && data.pda_programs.length > 0 && (
-

- PDA Program Breakdown -

-
- {data.pda_programs.map((program) => ( -
-
- - {truncateAddress(program.program_id, 6)} - - - tracked {formatNumber(program.tracked)} - -
-
- - ready{" "} - {program.ready != null ? formatNumber(program.ready) : "unknown"} - - - waiting{" "} - {program.waiting != null - ? formatNumber(program.waiting) - : "unknown"} - - - next{" "} - {formatSlotCountdown(data.current_slot, program.next_ready_slot)} - -
+

PDA Programs

+
+ {data.pda_programs.map((p) => ( +
+ + {p.program_id.slice(0, 8)}...{p.program_id.slice(-4)} + + {formatNumber(p.tracked)} pending
))}
)} + +

+ Source: {data.source ?? "unknown"} + {data.cached_at ? ` \u00b7 ${formatAgeFromUnixSeconds(data.cached_at)}` : ""} + {data.note ? ` \u00b7 ${data.note}` : ""} +

); } diff --git a/forester/dashboard/src/components/EpochCard.tsx b/forester/dashboard/src/components/EpochCard.tsx index 78d9e11022..58fd848583 100644 --- a/forester/dashboard/src/components/EpochCard.tsx +++ b/forester/dashboard/src/components/EpochCard.tsx @@ -1,59 +1,112 @@ +"use client"; + +import { useState, useEffect, useRef } from "react"; import type { ForesterStatus } from "@/types/forester"; import { ProgressBar } from "./ProgressBar"; -import { formatPercentage, slotsToTime } from "@/lib/utils"; +import { formatPercentage, formatNumber } from "@/lib/utils"; interface EpochCardProps { status: ForesterStatus; } +const SLOT_MS = 400; + +function useCountdown(slots: number): string { + const [totalMs, setTotalMs] = useState(slots * SLOT_MS); + const slotsRef = useRef(slots); + + // Reset when slot count changes significantly (new data from API) + useEffect(() => { + if (Math.abs(slots - slotsRef.current) > 5) { + setTotalMs(slots * SLOT_MS); + slotsRef.current = slots; + } + }, [slots]); + + useEffect(() => { + const interval = setInterval(() => { + setTotalMs((prev) => Math.max(0, prev - 1000)); + }, 1000); + return () => clearInterval(interval); + }, []); + + const totalSec = Math.max(0, Math.floor(totalMs / 1000)); + const hours = Math.floor(totalSec / 3600); + const mins = Math.floor((totalSec % 3600) / 60); + const secs = totalSec % 60; + + if (hours > 0) return `${hours}h ${mins}m ${secs}s`; + if (mins > 0) return `${mins}m ${secs}s`; + return `${secs}s`; +} + export function EpochCard({ status }: EpochCardProps) { + const registrationClosingSoon = + status.slots_until_next_registration < 5000 && + status.registration_epoch_foresters.length === 0; + + const remainingSlots = status.active_phase_length - status.active_epoch_progress; + const epochCountdown = useCountdown(remainingSlots); + const regCountdown = useCountdown(status.slots_until_next_registration); + return (
-

- Epoch Status -

-
+

Epoch Timeline

+
+ {/* Active epoch */}
- Active Epoch {status.current_active_epoch} - - {formatPercentage(status.active_epoch_progress_percentage)} + + + Active Epoch {status.current_active_epoch} + {formatPercentage(status.active_epoch_progress_percentage)}
+
+ {formatNumber(status.active_epoch_progress)} / {formatNumber(status.active_phase_length)} slots + {epochCountdown} +
-
-
- Next Epoch -

{status.hours_until_next_epoch}h

+ + {/* Registration epoch */} +
+
+ + + Registration Epoch {status.current_registration_epoch} + + + {regCountdown} +
-
- Registration Epoch -

{status.current_registration_epoch}

+
+ {status.registration_epoch_foresters.length} forester{status.registration_epoch_foresters.length !== 1 ? "s" : ""} registered + {registrationClosingSoon && ( + — closing soon, no foresters! + )}
+
+ + {/* Stats grid */} +
- Next Registration -

- {slotsToTime(status.slots_until_next_registration)} -

+ Next epoch +

{epochCountdown}

- Light Slot + Light slot

{status.current_light_slot != null ? `${status.current_light_slot} / ${status.total_light_slots}` : "N/A"}

+
+ Slot length +

{formatNumber(status.light_slot_length)} slots

+
- {status.current_light_slot != null && - status.slots_until_next_light_slot != null && ( -
- Next light slot in{" "} - {slotsToTime(status.slots_until_next_light_slot)} ( - {status.slots_until_next_light_slot} slots) -
- )}
); diff --git a/forester/dashboard/src/components/ForesterList.tsx b/forester/dashboard/src/components/ForesterList.tsx index b6a56736aa..bb9f4c09e2 100644 --- a/forester/dashboard/src/components/ForesterList.tsx +++ b/forester/dashboard/src/components/ForesterList.tsx @@ -1,38 +1,163 @@ import type { ForesterInfo } from "@/types/forester"; -import { truncateAddress, formatSol } from "@/lib/utils"; +import { truncateAddress, formatSol, explorerUrl } from "@/lib/utils"; +import type { BalanceTrend } from "@/hooks/useBalanceHistory"; interface ForesterListProps { + active: ForesterInfo[]; + registering: ForesterInfo[]; + activeEpoch: number; + registrationEpoch: number; + getTrend?: (authority: string, hours?: number) => BalanceTrend | null; +} + +function BalanceTrendBadge({ trend }: { trend: BalanceTrend }) { + const ratePerHour = trend.hourlyRate; + const burning = ratePerHour < -0.005; // more than 0.005 SOL/hr + const fast = ratePerHour < -0.05; // more than 0.05 SOL/hr + + if (!burning) return null; + + const hoursLeft = + ratePerHour < 0 ? Math.abs(trend.current / ratePerHour) : Infinity; + const rateStr = Math.abs(ratePerHour).toFixed(3); + + return ( + + -{rateStr}/hr + {hoursLeft < 48 && ( + (~{hoursLeft.toFixed(0)}h left) + )} + + ); +} + +function ForesterRow({ + info, + trend, +}: { + info: ForesterInfo; + trend: BalanceTrend | null; +}) { + const low = info.balance_sol != null && info.balance_sol < 0.1; + return ( +
+ + {truncateAddress(info.authority, 6)} + +
+ + {formatSol(info.balance_sol)} + + {trend && } +
+
+ ); +} + +function ForesterSection({ + title, + epoch, + foresters, + badgeClass, + emptyMessage, + getTrend, +}: { title: string; + epoch: number; foresters: ForesterInfo[]; + badgeClass: string; + emptyMessage: string; + getTrend?: (authority: string, hours?: number) => BalanceTrend | null; +}) { + return ( +
+
+

{title}

+ + epoch {epoch} + + + {foresters.length} forester{foresters.length !== 1 ? "s" : ""} + +
+ {foresters.length === 0 ? ( +

{emptyMessage}

+ ) : ( +
+ {foresters.map((f, i) => ( + + ))} +
+ )} +
+ ); } -export function ForesterList({ title, foresters }: ForesterListProps) { +export function ForesterList({ + active, + registering, + activeEpoch, + registrationEpoch, + getTrend, +}: ForesterListProps) { + // Foresters registered for next epoch but not in current + const newNextEpoch = registering.filter( + (f) => !active.some((a) => a.authority === f.authority) + ); + // Foresters in current epoch that haven't re-registered for next + const notReRegistered = active.filter( + (f) => !registering.some((r) => r.authority === f.authority) + ); + return (
-

- {title}{" "} - ({foresters.length}) -

- {foresters.length === 0 ? ( -

No foresters registered

- ) : ( -
- {foresters.map((f, i) => { - const low = f.balance_sol != null && f.balance_sol < 0.1; - return ( -
- - {truncateAddress(f.authority, 6)} - - - {formatSol(f.balance_sol)} - -
- ); - })} +

Foresters

+
+ + +
+ {/* Continuity warnings */} + {(notReRegistered.length > 0 || newNextEpoch.length > 0) && ( +
+ {notReRegistered.length > 0 && ( +

+ {notReRegistered.length} active forester{notReRegistered.length !== 1 ? "s" : ""} not yet registered for next epoch:{" "} + {notReRegistered.map((f) => truncateAddress(f.authority, 4)).join(", ")} +

+ )} + {newNextEpoch.length > 0 && ( +

+ {newNextEpoch.length} new forester{newNextEpoch.length !== 1 ? "s" : ""} joining next epoch:{" "} + {newNextEpoch.map((f) => truncateAddress(f.authority, 4)).join(", ")} +

+ )}
)}
diff --git a/forester/dashboard/src/components/MetricsPanel.tsx b/forester/dashboard/src/components/MetricsPanel.tsx index 7ce372b42c..571ce29e20 100644 --- a/forester/dashboard/src/components/MetricsPanel.tsx +++ b/forester/dashboard/src/components/MetricsPanel.tsx @@ -1,125 +1,30 @@ import type { MetricsResponse } from "@/types/forester"; -import { formatNumber, formatSol } from "@/lib/utils"; +import { formatNumber } from "@/lib/utils"; interface MetricsPanelProps { metrics: MetricsResponse; } export function MetricsPanel({ metrics }: MetricsPanelProps) { - const totalTx = Object.values(metrics.transactions_processed_total).reduce( - (a, b) => a + b, - 0 - ); const rates = Object.entries(metrics.transaction_rate); - const balances = Object.entries(metrics.forester_balances); - const queues = Object.entries(metrics.queue_lengths); - - return ( -
-
- - 0 - ? new Date(metrics.last_run_timestamp * 1000).toLocaleString() - : "N/A" - } - /> - -
- - {rates.length > 0 && ( -
-
- {rates.map(([epoch, rate]) => ( -
-
Epoch {epoch}
-
- {rate.toFixed(2)} tx/s -
-
- {formatNumber( - metrics.transactions_processed_total[epoch] ?? 0 - )}{" "} - total -
-
- ))} -
-
- )} - - {balances.length > 0 && ( -
-
- {balances.map(([pubkey, balance]) => ( -
- - {pubkey.slice(0, 8)}...{pubkey.slice(-4)} - - - {formatSol(balance)} - -
- ))} -
-
- )} - - {queues.length > 0 && ( -
-
- {queues.map(([tree, length]) => ( -
- - {tree.slice(0, 8)}...{tree.slice(-4)} - - - {formatNumber(length)} - -
- ))} -
-
- )} -
- ); -} -function StatCard({ label, value }: { label: string; value: string }) { - return ( -
-
{label}
-
{value}
-
- ); -} + if (rates.length === 0) return null; -function Section({ - title, - children, -}: { - title: string; - children: React.ReactNode; -}) { return (
-

{title}

- {children} +
+ {rates.map(([epoch, rate]) => ( +
+
Epoch {epoch}
+
+ {rate.toFixed(2)} tx/s +
+
+ {formatNumber(metrics.transactions_processed_total[epoch] ?? 0)} total +
+
+ ))} +
); } diff --git a/forester/dashboard/src/components/PhotonStatsPanel.tsx b/forester/dashboard/src/components/PhotonStatsPanel.tsx new file mode 100644 index 0000000000..52ea820914 --- /dev/null +++ b/forester/dashboard/src/components/PhotonStatsPanel.tsx @@ -0,0 +1,106 @@ +import type { PhotonStats } from "@/types/forester"; +import { formatNumber, formatAgeFromUnixSeconds } from "@/lib/utils"; + +interface PhotonStatsPanelProps { + data: PhotonStats; +} + +function compactNumber(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`; + return formatNumber(n); +} + +export function PhotonStatsPanel({ data }: PhotonStatsPanelProps) { + if (data.error) { + return ( +
+

{data.error}

+
+ ); + } + + const rows = [ + { + label: "Compressed Accounts", + total: data.accounts.total, + active: data.accounts.active, + }, + { + label: "Token Accounts", + total: data.token_accounts.total, + active: data.token_accounts.active, + }, + { + label: "Compressed by Forester", + total: data.compressed_from_onchain.total, + active: data.compressed_from_onchain.active, + }, + ]; + + return ( +
+ {/* Summary cards */} +
+ + + +
+ + {/* Detail table */} +
+ + + + + + + + + + + {rows.map((row) => ( + + + + + + + ))} + +
CategoryTotalActiveSpent
+ {row.label} + + {formatNumber(row.total)} + + {formatNumber(row.active)} + + {formatNumber(row.total - row.active)} +
+
+ +

+ Source: photon DB + {data.timestamp + ? ` · ${formatAgeFromUnixSeconds(data.timestamp)}` + : ""} +

+
+ ); +} + +function MiniStat({ label, value }: { label: string; value: number }) { + return ( +
+
+ {label} +
+
+ {compactNumber(value)} +
+
+ ); +} diff --git a/forester/dashboard/src/components/QueuePressureChart.tsx b/forester/dashboard/src/components/QueuePressureChart.tsx index 5b6d15eefa..d06b8e6943 100644 --- a/forester/dashboard/src/components/QueuePressureChart.tsx +++ b/forester/dashboard/src/components/QueuePressureChart.tsx @@ -6,42 +6,44 @@ interface QueuePressureChartProps { } const entries: { - key: keyof AggregateQueueStats; + batchKey: keyof AggregateQueueStats; + itemKey: keyof AggregateQueueStats; label: string; color: string; }[] = [ { - key: "state_v1_total_pending", - label: "State V1", - color: "bg-purple-500", - }, - { - key: "state_v2_input_pending", + batchKey: "state_v2_input_pending_batches", + itemKey: "state_v2_input_pending", label: "State V2 Input", color: "bg-indigo-500", }, { - key: "state_v2_output_pending", + batchKey: "state_v2_output_pending_batches", + itemKey: "state_v2_output_pending", label: "State V2 Output", color: "bg-indigo-300", }, { - key: "address_v1_total_pending", - label: "Addr V1", - color: "bg-teal-500", - }, - { - key: "address_v2_input_pending", - label: "Addr V2 Input", + batchKey: "address_v2_input_pending_batches", + itemKey: "address_v2_input_pending", + label: "Addr V2", color: "bg-cyan-500", }, ]; +const v1Entries: { + key: keyof AggregateQueueStats; + label: string; + color: string; +}[] = [ + { key: "state_v1_total_pending", label: "State V1", color: "bg-purple-500" }, + { key: "address_v1_total_pending", label: "Addr V1", color: "bg-teal-500" }, +]; + export function QueuePressureChart({ stats }: QueuePressureChartProps) { - const maxVal = Math.max( - ...entries.map((e) => stats[e.key]), - 1 - ); + const allBatches = entries.map((e) => stats[e.batchKey]); + const allV1 = v1Entries.map((e) => stats[e.key]); + const maxVal = Math.max(...allBatches, ...allV1, 1); return (
@@ -50,13 +52,36 @@ export function QueuePressureChart({ stats }: QueuePressureChartProps) {
{entries.map((e) => { + const batches = stats[e.batchKey]; + const items = stats[e.itemKey]; + const pct = (batches / maxVal) * 100; + return ( +
+
+ {e.label} + + {formatNumber(batches)}{batches !== 1 ? " batches" : " batch"} + ({formatNumber(items)} items) + +
+
+
+
+
+ ); + })} + {v1Entries.map((e) => { const val = stats[e.key]; + if (val === 0) return null; const pct = (val / maxVal) * 100; return (
{e.label} - {formatNumber(val)} + {formatNumber(val)} items
-
-

Light Protocol

-

Forester Dashboard

-
- - - ); -} diff --git a/forester/dashboard/src/components/TreeTable.tsx b/forester/dashboard/src/components/TreeTable.tsx index c49d365c99..2a82faaa8f 100644 --- a/forester/dashboard/src/components/TreeTable.tsx +++ b/forester/dashboard/src/components/TreeTable.tsx @@ -10,6 +10,7 @@ import { formatNumber, formatPercentage, treeTypeColor, + explorerUrl, } from "@/lib/utils"; interface TreeTableProps { @@ -18,6 +19,20 @@ interface TreeTableProps { currentLightSlot: number | null; } +function pendingColor(count: number, isBatches: boolean): string { + if (count === 0) return "text-gray-400"; + if (isBatches) { + // V2: batches — 1 is normal, 2+ is busy, 3+ is hot + if (count >= 3) return "text-red-600 font-medium"; + if (count >= 2) return "text-amber-600 font-medium"; + return "text-gray-700"; + } + // V1: item count + if (count >= 500) return "text-red-600 font-medium"; + if (count >= 100) return "text-amber-600 font-medium"; + return "text-gray-700"; +} + type SortKey = "type" | "fullness" | "pending"; type FilterType = "all" | "StateV1" | "StateV2" | "AddressV1" | "AddressV2"; @@ -99,7 +114,9 @@ export function TreeTable({
-
+ + +
@@ -131,8 +148,17 @@ export function TreeTable({ color={treeTypeColor(tree.tree_type)} /> - @@ -206,38 +241,106 @@ export function TreeTable({ import { Fragment } from "react"; +// Distinct colors for up to 8 foresters; cycles if more +const FORESTER_COLORS = [ + "bg-emerald-400", + "bg-blue-400", + "bg-amber-400", + "bg-rose-400", + "bg-violet-400", + "bg-cyan-400", + "bg-orange-400", + "bg-pink-400", +]; + +const FORESTER_HEX = [ + "#34d399", + "#60a5fa", + "#fbbf24", + "#fb7185", + "#a78bfa", + "#22d3ee", + "#fb923c", + "#f472b6", +]; + +function foresterColor(index: number): string { + return FORESTER_COLORS[index % FORESTER_COLORS.length]; +} + function ScheduleGrid({ schedule, currentSlot, + foresters, }: { schedule: (number | null)[]; currentSlot: number | null; + foresters: ForesterInfo[]; }) { if (schedule.length === 0) return -; // Show a compact view: only around the current slot const start = currentSlot != null ? Math.max(0, currentSlot - 2) : 0; - const visible = schedule.slice(start, start + 8); + const visible = schedule.slice(start, start + 12); return ( -
+
{visible.map((slot, i) => { const idx = start + i; const isCurrent = idx === currentSlot; + const foresterName = + slot != null && foresters[slot] + ? truncateAddress(foresters[slot].authority, 3) + : slot != null + ? `#${slot}` + : "unassigned"; return (
); })} - {schedule.length > 8 && ( - +{schedule.length - 8} + {schedule.length > start + 12 && ( + +{schedule.length - start - 12} )}
); } + +export function ForesterScheduleLegend({ foresters }: { foresters: ForesterInfo[] }) { + if (foresters.length === 0) return null; + return ( +
+ Schedule: + {foresters.map((f, i) => ( +
+ + ))} +
+
+ unassigned +
+
+
+ current +
+
+ ); +} diff --git a/forester/dashboard/src/hooks/useBalanceHistory.ts b/forester/dashboard/src/hooks/useBalanceHistory.ts new file mode 100644 index 0000000000..1ed0773e26 --- /dev/null +++ b/forester/dashboard/src/hooks/useBalanceHistory.ts @@ -0,0 +1,99 @@ +import { useEffect, useRef, useCallback } from "react"; +import type { ForesterInfo } from "@/types/forester"; + +interface BalanceSnapshot { + timestamp: number; // unix ms + balances: Record; // authority -> balance_sol +} + +const STORAGE_KEY = "forester_balance_history"; +const MAX_HISTORY_HOURS = 24; +const SNAPSHOT_INTERVAL_MS = 60_000; // store at most once per minute + +function loadHistory(): BalanceSnapshot[] { + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) return []; + const data = JSON.parse(raw) as BalanceSnapshot[]; + const cutoff = Date.now() - MAX_HISTORY_HOURS * 3600_000; + return data.filter((s) => s.timestamp > cutoff); + } catch { + return []; + } +} + +function saveHistory(history: BalanceSnapshot[]) { + try { + const cutoff = Date.now() - MAX_HISTORY_HOURS * 3600_000; + const trimmed = history.filter((s) => s.timestamp > cutoff); + localStorage.setItem(STORAGE_KEY, JSON.stringify(trimmed)); + } catch { + // storage full or unavailable + } +} + +export interface BalanceTrend { + current: number; + hourlyRate: number; // SOL per hour (negative = burning) + hoursTracked: number; + oldest: number; +} + +export function useBalanceHistory(foresters: ForesterInfo[]) { + const lastSnapshot = useRef(0); + + // Record a snapshot if enough time has passed + useEffect(() => { + if (foresters.length === 0) return; + const now = Date.now(); + if (now - lastSnapshot.current < SNAPSHOT_INTERVAL_MS) return; + lastSnapshot.current = now; + + const balances: Record = {}; + for (const f of foresters) { + if (f.balance_sol != null) { + balances[f.authority] = f.balance_sol; + } + } + if (Object.keys(balances).length === 0) return; + + const history = loadHistory(); + history.push({ timestamp: now, balances }); + saveHistory(history); + }, [foresters]); + + const getTrend = useCallback( + (authority: string, hours: number = 6): BalanceTrend | null => { + const forester = foresters.find((f) => f.authority === authority); + if (!forester || forester.balance_sol == null) return null; + + const history = loadHistory(); + const cutoff = Date.now() - hours * 3600_000; + const relevant = history + .filter((s) => s.timestamp > cutoff && s.balances[authority] != null) + .sort((a, b) => a.timestamp - b.timestamp); + + if (relevant.length < 2) return null; + + const oldest = relevant[0]; + const newest = relevant[relevant.length - 1]; + const timeDiffHours = + (newest.timestamp - oldest.timestamp) / 3600_000; + if (timeDiffHours < 0.05) return null; // less than 3 minutes + + const balanceDiff = + newest.balances[authority] - oldest.balances[authority]; + const hourlyRate = balanceDiff / timeDiffHours; + + return { + current: forester.balance_sol, + hourlyRate, + hoursTracked: timeDiffHours, + oldest: oldest.balances[authority], + }; + }, + [foresters] + ); + + return { getTrend }; +} diff --git a/forester/dashboard/src/hooks/usePhotonStats.ts b/forester/dashboard/src/hooks/usePhotonStats.ts new file mode 100644 index 0000000000..14207ae0b9 --- /dev/null +++ b/forester/dashboard/src/hooks/usePhotonStats.ts @@ -0,0 +1,18 @@ +import useSWR from "swr"; +import type { PhotonStats } from "@/types/forester"; + +async function fetchPhotonStats(): Promise { + const res = await fetch("/api/photon-stats", { cache: "no-store" }); + if (!res.ok) { + const body = await res.json().catch(() => ({})); + throw new Error(body.error || `HTTP ${res.status}`); + } + return res.json(); +} + +export function usePhotonStats() { + return useSWR("photon-stats", fetchPhotonStats, { + refreshInterval: 30_000, + revalidateOnFocus: true, + }); +} diff --git a/forester/dashboard/src/lib/utils.ts b/forester/dashboard/src/lib/utils.ts index 72c581fb68..a505fe2fea 100644 --- a/forester/dashboard/src/lib/utils.ts +++ b/forester/dashboard/src/lib/utils.ts @@ -3,6 +3,18 @@ export function truncateAddress(addr: string, chars = 4): string { return `${addr.slice(0, chars)}...${addr.slice(-chars)}`; } +export function explorerUrl(address: string): string { + let network = "mainnet"; + if (typeof window !== "undefined") { + const host = window.location.hostname; + if (host.includes("-devnet")) network = "devnet"; + else if (host.includes("-mainnet")) network = "mainnet"; + else network = process.env.NEXT_PUBLIC_SOLANA_NETWORK ?? "mainnet"; + } + const cluster = network === "mainnet" ? "" : `?cluster=${network}`; + return `https://explorer.solana.com/address/${address}${cluster}`; +} + export function formatSol(lamports: number | null | undefined): string { if (lamports == null) return "-"; return `${lamports.toFixed(4)} SOL`; @@ -40,9 +52,15 @@ export function formatAgeFromUnixSeconds(unixTs: number | null | undefined): str export function formatSlotCountdown( currentSlot: number | null | undefined, - nextReadySlot: number | null | undefined + nextReadySlot: number | null | undefined, + ready?: number, + waiting?: number ): string { - if (nextReadySlot == null) return "n/a"; + if (nextReadySlot == null) { + // No waiting accounts — distinguish "all caught up" from "nothing tracked" + if (ready != null && ready > 0 && waiting === 0) return "caught up"; + return "—"; + } if (currentSlot == null) return `slot ${nextReadySlot.toLocaleString()}`; if (currentSlot > nextReadySlot) return "ready now"; const remaining = nextReadySlot - currentSlot; diff --git a/forester/dashboard/src/types/forester.ts b/forester/dashboard/src/types/forester.ts index 8f932fbd42..fc094aec26 100644 --- a/forester/dashboard/src/types/forester.ts +++ b/forester/dashboard/src/types/forester.ts @@ -46,6 +46,9 @@ export interface AggregateQueueStats { state_v2_output_pending: number; address_v1_total_pending: number; address_v2_input_pending: number; + state_v2_input_pending_batches: number; + state_v2_output_pending_batches: number; + address_v2_input_pending_batches: number; } export interface ForesterStatus { @@ -68,6 +71,7 @@ export interface ForesterStatus { total_trees: number; active_trees: number; rolled_over_trees: number; + total_pending_batches: number; total_pending_items: number; aggregate_queue_stats: AggregateQueueStats; } @@ -105,6 +109,7 @@ export interface CompressibleResponse { export interface CompressibleTypeStats { tracked: number; + compressed?: number; ready?: number; waiting?: number; next_ready_slot?: number; @@ -113,6 +118,7 @@ export interface CompressibleTypeStats { export interface PdaProgramStats { program_id: string; tracked: number; + compressed?: number; ready?: number; waiting?: number; next_ready_slot?: number; @@ -126,3 +132,15 @@ export interface CompressibleUpstreamStatus { cached_at?: number; error?: string; } + +export interface PhotonStats { + accounts: { total: number; active: number }; + token_accounts: { total: number; active: number }; + compressed_from_onchain: { + total: number; + active: number; + by_owner: { owner: string; total: number; active: number }[]; + }; + timestamp: number; + error?: string; +} diff --git a/forester/src/api_server.rs b/forester/src/api_server.rs index 2305bc306e..bbe74657a2 100644 --- a/forester/src/api_server.rs +++ b/forester/src/api_server.rs @@ -76,6 +76,8 @@ pub struct CompressibleResponse { pub struct CompressibleTypeStats { pub tracked: usize, #[serde(skip_serializing_if = "Option::is_none")] + pub compressed: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub ready: Option, #[serde(skip_serializing_if = "Option::is_none")] pub waiting: Option, @@ -88,6 +90,8 @@ pub struct PdaProgramStats { pub program_id: String, pub tracked: usize, #[serde(skip_serializing_if = "Option::is_none")] + pub compressed: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub ready: Option, #[serde(skip_serializing_if = "Option::is_none")] pub waiting: Option, @@ -162,7 +166,11 @@ impl CompressibleSnapshot { } } -fn summarize_slots(slots: I, current_slot: Option) -> CompressibleTypeStats +fn summarize_slots( + slots: I, + current_slot: Option, + compressed: Option, +) -> CompressibleTypeStats where I: IntoIterator, { @@ -185,6 +193,7 @@ where } CompressibleTypeStats { tracked, + compressed, ready: Some(ready), waiting: Some(tracked.saturating_sub(ready)), next_ready_slot, @@ -192,6 +201,7 @@ where } else { CompressibleTypeStats { tracked, + compressed, ready: None, waiting: None, next_ready_slot: None, @@ -234,20 +244,19 @@ fn summarize_ctoken_and_ata_slots( } } + let compressed = Some(tracker.total_compressed()); ( - summarize_slots(ctoken_slots, current_slot), - summarize_slots(ata_slots, current_slot), + summarize_slots(ctoken_slots, current_slot, compressed), + summarize_slots(ata_slots, current_slot, None), ) } fn aggregate_optional_sum(values: impl Iterator>) -> Option { let mut sum = 0usize; let mut seen = false; - for value in values { - if let Some(v) = value { - sum = sum.saturating_add(v); - seen = true; - } + for v in values.flatten() { + sum = sum.saturating_add(v); + seen = true; } if seen { Some(sum) @@ -293,6 +302,7 @@ fn aggregate_type_stats( Some(CompressibleTypeStats { tracked, + compressed: None, ready: ready_seen.then_some(ready), waiting: waiting_seen.then_some(waiting), next_ready_slot, @@ -462,6 +472,7 @@ async fn fetch_compressible_snapshot_from_foresters( let entry = pda_program_map.entry(row.program_id.clone()).or_insert( CompressibleTypeStats { tracked: 0, + compressed: None, ready: Some(0), waiting: Some(0), next_ready_slot: None, @@ -497,6 +508,7 @@ async fn fetch_compressible_snapshot_from_foresters( .map(|(program_id, stats)| PdaProgramStats { program_id, tracked: stats.tracked, + compressed: stats.compressed, ready: stats.ready, waiting: stats.waiting, next_ready_slot: stats.next_ready_slot, @@ -714,6 +726,7 @@ pub(crate) async fn fetch_compressible_snapshot( .iter() .map(|entry| entry.value().compressible_slot()), current_slot, + Some(tracker.total_compressed()), ) }); @@ -724,6 +737,7 @@ pub(crate) async fn fetch_compressible_snapshot( .iter() .map(|entry| entry.value().compressible_slot()), current_slot, + Some(tracker.total_compressed()), ) }); @@ -740,10 +754,11 @@ pub(crate) async fn fetch_compressible_snapshot( let mut rows: Vec = by_program .into_iter() .map(|(program_id, slots)| { - let stats = summarize_slots(slots, current_slot); + let stats = summarize_slots(slots, current_slot, None); PdaProgramStats { program_id, tracked: stats.tracked, + compressed: None, ready: stats.ready, waiting: stats.waiting, next_ready_slot: stats.next_ready_slot, @@ -826,12 +841,14 @@ pub(crate) async fn fetch_compressible_snapshot( Ok(Ok((ctoken_count, mint_count))) => { let ctoken = CompressibleTypeStats { tracked: ctoken_count, + compressed: None, ready: None, waiting: None, next_ready_slot: None, }; let mint = CompressibleTypeStats { tracked: mint_count, + compressed: None, ready: None, waiting: None, next_ready_slot: None, diff --git a/forester/src/compressible/ctoken/compressor.rs b/forester/src/compressible/ctoken/compressor.rs index 79d9898927..54be0ddc6f 100644 --- a/forester/src/compressible/ctoken/compressor.rs +++ b/forester/src/compressible/ctoken/compressor.rs @@ -211,15 +211,27 @@ impl CTokenCompressor { data: instruction.data(), }; + // Mark accounts as pending before sending to prevent other cycles from + // picking them up while the tx is in-flight. + let pubkeys: Vec = account_states.iter().map(|a| a.pubkey).collect(); + self.tracker.mark_pending(&pubkeys); + // Send transaction - let signature = rpc + let signature = match rpc .create_and_send_transaction( &[ix], &self.payer_keypair.pubkey(), &[&self.payer_keypair], ) .await - .map_err(|e| anyhow::anyhow!("Failed to send transaction: {}", e))?; + { + Ok(sig) => sig, + Err(e) => { + // Tx failed to send — return accounts to work pool + self.tracker.unmark_pending(&pubkeys); + return Err(anyhow::anyhow!("Failed to send transaction: {}", e)); + } + }; info!( "compress_and_close tx with ({:?}) accounts sent {}", @@ -227,23 +239,23 @@ impl CTokenCompressor { signature ); - // Wait for confirmation before removing from tracker + // Wait for confirmation let confirmed = rpc .confirm_transaction(signature) .await .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {}", e))?; if confirmed { - // Only remove from tracker after confirmed for account_state in account_states { - self.tracker.remove(&account_state.pubkey); + self.tracker.remove_compressed(&account_state.pubkey); } info!("compress_and_close tx confirmed: {}", signature); Ok(signature) } else { - // Transaction not confirmed - keep accounts in tracker for retry + // Transaction not confirmed — return accounts to work pool for retry + self.tracker.unmark_pending(&pubkeys); Err(anyhow::anyhow!( - "compress_and_close tx not confirmed: {} - accounts kept in tracker for retry", + "compress_and_close tx not confirmed: {} - accounts returned to work pool", signature )) } diff --git a/forester/src/compressible/ctoken/state.rs b/forester/src/compressible/ctoken/state.rs index 5dbc5b9961..b9e2c0d112 100644 --- a/forester/src/compressible/ctoken/state.rs +++ b/forester/src/compressible/ctoken/state.rs @@ -1,5 +1,7 @@ +use std::sync::atomic::AtomicU64; + use borsh::BorshDeserialize; -use dashmap::DashMap; +use dashmap::{DashMap, DashSet}; use light_compressible::rent::{get_rent_exemption_lamports, SLOTS_PER_EPOCH}; use light_token_interface::state::Token; use solana_sdk::pubkey::Pubkey; @@ -44,12 +46,16 @@ fn calculate_compressible_slot(account: &Token, lamports: u64, account_size: usi #[derive(Debug)] pub struct CTokenAccountTracker { accounts: DashMap, + compressed_count: AtomicU64, + pending: DashSet, } impl CTokenAccountTracker { pub fn new() -> Self { Self { accounts: DashMap::new(), + compressed_count: AtomicU64::new(0), + pending: DashSet::new(), } } @@ -126,11 +132,23 @@ impl CompressibleTracker for CTokenAccountTracker { fn accounts(&self) -> &DashMap { &self.accounts } + + fn compressed_counter(&self) -> &AtomicU64 { + &self.compressed_count + } + + fn pending(&self) -> &DashSet { + &self.pending + } } impl Default for CTokenAccountTracker { fn default() -> Self { - Self::new() + Self { + accounts: DashMap::new(), + compressed_count: AtomicU64::new(0), + pending: DashSet::new(), + } } } @@ -142,7 +160,27 @@ impl SubscriptionHandler for CTokenAccountTracker { data: &[u8], lamports: u64, ) -> Result<()> { - self.update_from_account(pubkey, data, lamports) + // If account data is empty (account was closed), remove from tracker + if data.is_empty() { + if self.remove(&pubkey).is_some() { + debug!("Removed closed ctoken account {} from tracker", pubkey); + } + return Ok(()); + } + match self.update_from_account(pubkey, data, lamports) { + Ok(()) => Ok(()), + Err(e) => { + // Deserialization failed — account is no longer a valid cToken, + // remove stale entry from tracker + if self.remove(&pubkey).is_some() { + warn!( + "Removed invalid ctoken account {} from tracker: {}", + pubkey, e + ); + } + Ok(()) + } + } } fn handle_removal(&self, pubkey: &Pubkey) { diff --git a/forester/src/compressible/mint/compressor.rs b/forester/src/compressible/mint/compressor.rs index 1c2cd6e317..d4409580bf 100644 --- a/forester/src/compressible/mint/compressor.rs +++ b/forester/src/compressible/mint/compressor.rs @@ -10,6 +10,7 @@ use futures::StreamExt; use light_client::{indexer::Indexer, rpc::Rpc}; use solana_sdk::{ instruction::Instruction, + pubkey::Pubkey, signature::{Keypair, Signature}, signer::Signer, }; @@ -99,21 +100,29 @@ impl MintCompressor { instructions.len() ); + // Mark as pending before sending + let pubkeys: Vec = mint_states.iter().map(|s| s.pubkey).collect(); + self.tracker.mark_pending(&pubkeys); + // Send all instructions in a single transaction let mut rpc = self.rpc_pool.get_connection().await?; - let signature = rpc + let signature = match rpc .create_and_send_transaction( &instructions, &self.payer_keypair.pubkey(), &[&self.payer_keypair], ) .await - .map_err(|e| { - anyhow::anyhow!( + { + Ok(sig) => sig, + Err(e) => { + self.tracker.unmark_pending(&pubkeys); + return Err(anyhow::anyhow!( "Failed to send batched CompressAndCloseMint transaction: {:?}", e - ) - })?; + )); + } + }; info!( "Batched CompressAndCloseMint tx for {} mints sent: {}", @@ -121,22 +130,22 @@ impl MintCompressor { signature ); - // Wait for confirmation before removing from tracker + // Wait for confirmation let confirmed = rpc .confirm_transaction(signature) .await .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; if confirmed { - // Only remove from tracker after confirmed for mint_state in mint_states { - self.tracker.remove(&mint_state.pubkey); + self.tracker.remove_compressed(&mint_state.pubkey); } info!("Batched CompressAndCloseMint tx confirmed: {}", signature); Ok(signature) } else { + self.tracker.unmark_pending(&pubkeys); tracing::warn!( - "Batch CompressAndCloseMint tx not confirmed: {} - accounts kept in tracker for retry", + "Batch CompressAndCloseMint tx not confirmed: {} - returned to work pool", signature ); Err(anyhow::anyhow!( @@ -171,6 +180,10 @@ impl MintCompressor { .collect(); } + // Mark all as pending upfront + let all_pubkeys: Vec = mint_states.iter().map(|s| s.pubkey).collect(); + self.tracker.mark_pending(&all_pubkeys); + // Create futures for each mint let compression_futures = mint_states.iter().cloned().map(|mint_state| { let compressor = self.clone(); @@ -178,6 +191,7 @@ impl MintCompressor { async move { // Check cancellation before processing if cancelled.load(Ordering::Relaxed) { + compressor.tracker.unmark_pending(&[mint_state.pubkey]); return Err((mint_state, anyhow::anyhow!("Cancelled"))); } @@ -194,9 +208,16 @@ impl MintCompressor { .collect() .await; - // Remove successfully compressed mints from tracker - for (_, mint_state) in results.iter().flatten() { - self.tracker.remove(&mint_state.pubkey); + // Remove successfully compressed mints; unmark failed ones + for result in &results { + match result { + Ok((_, mint_state)) => { + self.tracker.remove_compressed(&mint_state.pubkey); + } + Err((mint_state, _)) => { + self.tracker.unmark_pending(&[mint_state.pubkey]); + } + } } results @@ -215,8 +236,24 @@ impl MintCompressor { let mut rpc = self.rpc_pool.get_connection().await?; + // Pre-check: verify the Mint PDA still exists on-chain to avoid no-op txs + let account_info = rpc + .get_account(*mint_pda) + .await + .map_err(|e| anyhow::anyhow!("Failed to check Mint PDA {}: {:?}", mint_pda, e))?; + if account_info.is_none() { + debug!( + "Mint PDA {} no longer exists on-chain, removing from tracker", + mint_pda + ); + self.tracker.remove_compressed(mint_pda); + return Err(anyhow::anyhow!( + "Mint PDA {} already closed, skipping", + mint_pda + )); + } + // Build the CompressAndCloseMint instruction - // This is idempotent - succeeds silently if mint doesn't exist or is already compressed let ix = create_compress_and_close_mint_instruction( &mut *rpc, self.payer_keypair.pubkey(), diff --git a/forester/src/compressible/mint/state.rs b/forester/src/compressible/mint/state.rs index 4ddebb4847..616e756a35 100644 --- a/forester/src/compressible/mint/state.rs +++ b/forester/src/compressible/mint/state.rs @@ -1,5 +1,7 @@ +use std::sync::atomic::AtomicU64; + use borsh::BorshDeserialize; -use dashmap::DashMap; +use dashmap::{DashMap, DashSet}; use light_compressible::rent::{ get_last_funded_epoch, get_rent_exemption_lamports, SLOTS_PER_EPOCH, }; @@ -35,12 +37,16 @@ fn calculate_compressible_slot(mint: &Mint, lamports: u64, account_size: usize) #[derive(Debug)] pub struct MintAccountTracker { accounts: DashMap, + compressed_count: AtomicU64, + pending: DashSet, } impl MintAccountTracker { pub fn new() -> Self { Self { accounts: DashMap::new(), + compressed_count: AtomicU64::new(0), + pending: DashSet::new(), } } @@ -69,7 +75,13 @@ impl MintAccountTracker { }; if !mint.metadata.mint_decompressed { - debug!("Mint {} is not decompressed, skipping", pubkey); + // Mint was compressed — remove stale entry from tracker if present + if self.accounts.remove(&pubkey).is_some() { + debug!( + "Mint {} no longer decompressed, removed from tracker", + pubkey + ); + } return Ok(()); } @@ -120,11 +132,23 @@ impl CompressibleTracker for MintAccountTracker { fn accounts(&self) -> &DashMap { &self.accounts } + + fn compressed_counter(&self) -> &AtomicU64 { + &self.compressed_count + } + + fn pending(&self) -> &DashSet { + &self.pending + } } impl Default for MintAccountTracker { fn default() -> Self { - Self::new() + Self { + accounts: DashMap::new(), + compressed_count: AtomicU64::new(0), + pending: DashSet::new(), + } } } @@ -136,6 +160,13 @@ impl SubscriptionHandler for MintAccountTracker { data: &[u8], lamports: u64, ) -> Result<()> { + // If account data is empty (account was closed), remove from tracker + if data.is_empty() { + if self.accounts.remove(&pubkey).is_some() { + debug!("Removed closed Mint account {} from tracker", pubkey); + } + return Ok(()); + } self.update_from_account(pubkey, data, lamports) } diff --git a/forester/src/compressible/pda/compressor.rs b/forester/src/compressible/pda/compressor.rs index 3b5664f857..a77c99e0ba 100644 --- a/forester/src/compressible/pda/compressor.rs +++ b/forester/src/compressible/pda/compressor.rs @@ -153,6 +153,10 @@ impl PdaCompressor { return Vec::new(); } + // Mark all accounts as pending upfront so concurrent cycles skip them + let all_pubkeys: Vec = account_states.iter().map(|s| s.pubkey).collect(); + self.tracker.mark_pending(&all_pubkeys); + // Create futures for each account let compression_futures = account_states.iter().cloned().map(|account_state| { let compressor = self.clone(); @@ -163,6 +167,8 @@ impl PdaCompressor { async move { // Check cancellation before processing if cancelled.load(Ordering::Relaxed) { + // Unmark since we won't process this account + compressor.tracker.unmark_pending(&[account_state.pubkey]); return Err((account_state, anyhow::anyhow!("Cancelled"))); } @@ -182,9 +188,16 @@ impl PdaCompressor { .collect() .await; - // Remove successfully compressed PDAs from tracker - for (_, pda_state) in results.iter().flatten() { - self.tracker.remove(&pda_state.pubkey); + // Remove successfully compressed PDAs; unmark failed ones + for result in &results { + match result { + Ok((_, pda_state)) => { + self.tracker.remove_compressed(&pda_state.pubkey); + } + Err((pda_state, _)) => { + self.tracker.unmark_pending(&[pda_state.pubkey]); + } + } } results @@ -279,15 +292,25 @@ impl PdaCompressor { program_id ); + // Mark as pending before sending + let pubkeys: Vec = account_states.iter().map(|s| s.pubkey).collect(); + self.tracker.mark_pending(&pubkeys); + // Send single transaction - let signature = rpc + let signature = match rpc .create_and_send_transaction( &[ix], &self.payer_keypair.pubkey(), &[&self.payer_keypair], ) .await - .map_err(|e| anyhow::anyhow!("Failed to send transaction: {:?}", e))?; + { + Ok(sig) => sig, + Err(e) => { + self.tracker.unmark_pending(&pubkeys); + return Err(anyhow::anyhow!("Failed to send transaction: {:?}", e)); + } + }; info!( "Batched compress_accounts_idempotent tx for {} PDAs sent: {}", @@ -295,16 +318,15 @@ impl PdaCompressor { signature ); - // Wait for confirmation before removing from tracker + // Wait for confirmation let confirmed = rpc .confirm_transaction(signature) .await .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; if confirmed { - // Only remove from tracker after confirmed for state in account_states { - self.tracker.remove(&state.pubkey); + self.tracker.remove_compressed(&state.pubkey); } info!( "Batched compress_accounts_idempotent tx confirmed: {}", @@ -312,8 +334,9 @@ impl PdaCompressor { ); Ok(signature) } else { + self.tracker.unmark_pending(&pubkeys); tracing::warn!( - "compress_accounts_idempotent tx not confirmed: {} - accounts kept in tracker for retry", + "compress_accounts_idempotent tx not confirmed: {} - returned to work pool", signature ); Err(anyhow::anyhow!( @@ -342,6 +365,20 @@ impl PdaCompressor { let mut rpc = self.rpc_pool.get_connection().await?; + // Pre-check: verify the PDA still exists on-chain to avoid no-op txs + let account_info = rpc + .get_account(*pda) + .await + .map_err(|e| anyhow::anyhow!("Failed to check PDA {}: {:?}", pda, e))?; + if account_info.is_none() { + debug!( + "PDA {} no longer exists on-chain, removing from tracker", + pda + ); + self.tracker.remove_compressed(pda); + return Err(anyhow::anyhow!("PDA {} already closed, skipping", pda)); + } + // Get the compressed account let compressed_account = rpc .get_compressed_account(compressed_address, None) @@ -378,7 +415,7 @@ impl PdaCompressor { pda, program_id ); - // Send transaction + // Send transaction (pending is managed by the caller) let signature = rpc .create_and_send_transaction( &[ix], diff --git a/forester/src/compressible/pda/state.rs b/forester/src/compressible/pda/state.rs index af00bc806e..6b0aab0181 100644 --- a/forester/src/compressible/pda/state.rs +++ b/forester/src/compressible/pda/state.rs @@ -1,5 +1,7 @@ +use std::sync::atomic::AtomicU64; + use borsh::BorshDeserialize; -use dashmap::DashMap; +use dashmap::{DashMap, DashSet}; use light_account::CompressionInfo; use light_compressible::rent::{ get_last_funded_epoch, get_rent_exemption_lamports, SLOTS_PER_EPOCH, @@ -48,6 +50,8 @@ fn calculate_compressible_slot( pub struct PdaAccountTracker { accounts: DashMap, programs: Vec, + compressed_count: AtomicU64, + pending: DashSet, } impl PdaAccountTracker { @@ -55,6 +59,8 @@ impl PdaAccountTracker { Self { accounts: DashMap::new(), programs, + compressed_count: AtomicU64::new(0), + pending: DashSet::new(), } } @@ -110,10 +116,10 @@ impl PdaAccountTracker { }; if compression_info.is_compressed() { - debug!( - "Account {} is already compressed; skipping re-compression", - pubkey - ); + // Account was compressed — remove stale entry from tracker if present + if self.accounts.remove(&pubkey).is_some() { + debug!("PDA {} already compressed, removed from tracker", pubkey); + } return Ok(()); } @@ -150,11 +156,24 @@ impl CompressibleTracker for PdaAccountTracker { fn accounts(&self) -> &DashMap { &self.accounts } + + fn compressed_counter(&self) -> &AtomicU64 { + &self.compressed_count + } + + fn pending(&self) -> &DashSet { + &self.pending + } } impl Default for PdaAccountTracker { fn default() -> Self { - Self::new(Vec::new()) + Self { + accounts: DashMap::new(), + programs: Vec::new(), + compressed_count: AtomicU64::new(0), + pending: DashSet::new(), + } } } @@ -166,6 +185,13 @@ impl SubscriptionHandler for PdaAccountTracker { data: &[u8], lamports: u64, ) -> Result<()> { + // If account data is empty (account was closed), remove from tracker + if data.is_empty() { + if self.accounts.remove(&pubkey).is_some() { + debug!("Removed closed PDA account {} from tracker", pubkey); + } + return Ok(()); + } self.update_from_account(pubkey, program_id, data, lamports) } diff --git a/forester/src/compressible/traits.rs b/forester/src/compressible/traits.rs index d7df33abb3..a432305826 100644 --- a/forester/src/compressible/traits.rs +++ b/forester/src/compressible/traits.rs @@ -1,6 +1,8 @@ //! Shared traits for compressible account tracking. -use dashmap::DashMap; +use std::sync::atomic::{AtomicU64, Ordering}; + +use dashmap::{DashMap, DashSet}; use solana_sdk::pubkey::Pubkey; use crate::Result; @@ -15,18 +17,56 @@ pub trait CompressibleState: Clone + Send + Sync { } } -/// Implementors only need to provide `accounts()` - all other methods have default implementations. +/// Implementors only need to provide `accounts()`, `compressed_counter()`, +/// and `pending()` — all other methods have default implementations. pub trait CompressibleTracker: Send + Sync { fn accounts(&self) -> &DashMap; + /// Counter for total accounts successfully compressed and removed from the tracker. + fn compressed_counter(&self) -> &AtomicU64; + + /// Set of account pubkeys with in-flight compression transactions. + /// Accounts in this set are skipped by `get_ready_to_compress()`. + fn pending(&self) -> &DashSet; + fn insert(&self, state: S) { self.accounts().insert(*state.pubkey(), state); } fn remove(&self, pubkey: &Pubkey) -> Option { + self.pending().remove(pubkey); self.accounts().remove(pubkey).map(|(_, v)| v) } + /// Remove an account after successful compression, incrementing the compressed counter. + fn remove_compressed(&self, pubkey: &Pubkey) -> Option { + let removed = self.remove(pubkey); + if removed.is_some() { + self.compressed_counter().fetch_add(1, Ordering::Relaxed); + } + removed + } + + /// Total number of accounts successfully compressed since startup. + fn total_compressed(&self) -> u64 { + self.compressed_counter().load(Ordering::Relaxed) + } + + /// Mark accounts as pending (in-flight tx). They will be skipped by + /// `get_ready_to_compress()` until confirmed or returned to the pool. + fn mark_pending(&self, pubkeys: &[Pubkey]) { + for pk in pubkeys { + self.pending().insert(*pk); + } + } + + /// Return accounts to the work pool after a failed transaction. + fn unmark_pending(&self, pubkeys: &[Pubkey]) { + for pk in pubkeys { + self.pending().remove(pk); + } + } + fn len(&self) -> usize { self.accounts().len() } @@ -36,9 +76,12 @@ pub trait CompressibleTracker: Send + Sync { } fn get_ready_to_compress(&self, current_slot: u64) -> Vec { + let pending = self.pending(); self.accounts() .iter() - .filter(|entry| entry.value().is_ready_to_compress(current_slot)) + .filter(|entry| { + entry.value().is_ready_to_compress(current_slot) && !pending.contains(entry.key()) + }) .map(|entry| entry.value().clone()) .collect() } diff --git a/forester/src/epoch_manager.rs b/forester/src/epoch_manager.rs index d3b3141495..10e1bb3271 100644 --- a/forester/src/epoch_manager.rs +++ b/forester/src/epoch_manager.rs @@ -2203,6 +2203,7 @@ impl EpochManager { TreeType::Unknown => { self.dispatch_compression( epoch_info, + epoch_pda, forester_slot_details, consecutive_eligibility_end, ) @@ -2233,6 +2234,7 @@ impl EpochManager { async fn dispatch_compression( &self, epoch_info: &Epoch, + epoch_pda: &ForesterEpochPda, forester_slot_details: &ForesterSlot, consecutive_eligibility_end: u64, ) -> Result { @@ -2253,6 +2255,25 @@ impl EpochManager { return Ok(0); } + let current_light_slot = current_slot.saturating_sub(epoch_info.phases.active.start) + / epoch_pda.protocol_config.slot_length; + if !self + .check_forester_eligibility( + epoch_pda, + current_light_slot, + &Pubkey::default(), + epoch_info.epoch, + epoch_info, + ) + .await? + { + debug!( + "Skipping compression: forester not eligible for current light slot {}", + current_light_slot + ); + return Ok(0); + } + debug!("Dispatching compression for epoch {}", epoch_info.epoch); let tracker = self @@ -2424,7 +2445,7 @@ impl EpochManager { // Process PDA compression if configured let pda_compressed = self - .dispatch_pda_compression(consecutive_eligibility_end) + .dispatch_pda_compression(epoch_info, epoch_pda, consecutive_eligibility_end) .await .unwrap_or_else(|e| { error!( @@ -2438,7 +2459,7 @@ impl EpochManager { // Process Mint compression let mint_compressed = self - .dispatch_mint_compression(consecutive_eligibility_end) + .dispatch_mint_compression(epoch_info, epoch_pda, consecutive_eligibility_end) .await .unwrap_or_else(|e| { error!( @@ -2464,7 +2485,12 @@ impl EpochManager { Ok(total) } - async fn dispatch_pda_compression(&self, consecutive_eligibility_end: u64) -> Result { + async fn dispatch_pda_compression( + &self, + epoch_info: &Epoch, + epoch_pda: &ForesterEpochPda, + consecutive_eligibility_end: u64, + ) -> Result { let pda_tracker = match &self.pda_tracker { Some(tracker) => tracker, None => return Ok(0), @@ -2488,6 +2514,26 @@ impl EpochManager { return Ok(0); } + // Check forester eligibility before PDA compression + let current_light_slot = current_slot.saturating_sub(epoch_info.phases.active.start) + / epoch_pda.protocol_config.slot_length; + if !self + .check_forester_eligibility( + epoch_pda, + current_light_slot, + &Pubkey::default(), + epoch_info.epoch, + epoch_info, + ) + .await? + { + debug!( + "Skipping PDA compression: forester not eligible for current light slot {}", + current_light_slot + ); + return Ok(0); + } + let mut total_compressed = 0; // Shared cancellation flag across all programs @@ -2600,7 +2646,12 @@ impl EpochManager { Ok(total_compressed) } - async fn dispatch_mint_compression(&self, consecutive_eligibility_end: u64) -> Result { + async fn dispatch_mint_compression( + &self, + epoch_info: &Epoch, + epoch_pda: &ForesterEpochPda, + consecutive_eligibility_end: u64, + ) -> Result { let mint_tracker = match &self.mint_tracker { Some(tracker) => tracker, None => return Ok(0), @@ -2620,6 +2671,26 @@ impl EpochManager { return Ok(0); } + // Check forester eligibility before Mint compression + let current_light_slot = current_slot.saturating_sub(epoch_info.phases.active.start) + / epoch_pda.protocol_config.slot_length; + if !self + .check_forester_eligibility( + epoch_pda, + current_light_slot, + &Pubkey::default(), + epoch_info.epoch, + epoch_info, + ) + .await? + { + debug!( + "Skipping Mint compression: forester not eligible for current light slot {}", + current_light_slot + ); + return Ok(0); + } + let accounts = mint_tracker.get_ready_to_compress(current_slot); if accounts.is_empty() { diff --git a/forester/src/forester_status.rs b/forester/src/forester_status.rs index 801d6a1799..9c4feb44f6 100644 --- a/forester/src/forester_status.rs +++ b/forester/src/forester_status.rs @@ -74,7 +74,9 @@ pub struct ForesterStatus { pub active_trees: usize, /// Number of rolled-over trees pub rolled_over_trees: usize, - /// Total pending items across all trees + /// Total pending batches across all trees (each batch = 1 instruction) + pub total_pending_batches: u64, + /// Total pending items across all trees (batches × batch_size) pub total_pending_items: u64, /// Aggregate queue statistics by tree type pub aggregate_queue_stats: AggregateQueueStats, @@ -87,6 +89,10 @@ pub struct AggregateQueueStats { pub state_v2_output_pending: u64, pub address_v1_total_pending: u64, pub address_v2_input_pending: u64, + /// Batch counts (each batch = 1 instruction/tx) + pub state_v2_input_pending_batches: u64, + pub state_v2_output_pending_batches: u64, + pub address_v2_input_pending_batches: u64, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -236,6 +242,7 @@ pub async fn get_forester_status_with_options( total_trees: 0, active_trees: 0, rolled_over_trees: 0, + total_pending_batches: 0, total_pending_items: 0, aggregate_queue_stats: AggregateQueueStats { state_v1_total_pending: 0, @@ -243,6 +250,9 @@ pub async fn get_forester_status_with_options( state_v2_output_pending: 0, address_v1_total_pending: 0, address_v2_input_pending: 0, + state_v2_input_pending_batches: 0, + state_v2_output_pending_batches: 0, + address_v2_input_pending_batches: 0, }, }); } @@ -389,6 +399,9 @@ pub async fn get_forester_status_with_options( state_v2_output_pending: 0, address_v1_total_pending: 0, address_v2_input_pending: 0, + state_v2_input_pending_batches: 0, + state_v2_output_pending_batches: 0, + address_v2_input_pending_batches: 0, }; for t in &tree_statuses { @@ -402,6 +415,10 @@ pub async fn get_forester_status_with_options( info.input_pending_batches * info.zkp_batch_size; aggregate_queue_stats.state_v2_output_pending += info.output_pending_batches * info.zkp_batch_size; + aggregate_queue_stats.state_v2_input_pending_batches += + info.input_pending_batches; + aggregate_queue_stats.state_v2_output_pending_batches += + info.output_pending_batches; } } "AddressV1" => { @@ -411,12 +428,18 @@ pub async fn get_forester_status_with_options( if let Some(ref info) = t.v2_queue_info { aggregate_queue_stats.address_v2_input_pending += info.input_pending_batches * info.zkp_batch_size; + aggregate_queue_stats.address_v2_input_pending_batches += + info.input_pending_batches; } } _ => {} } } + let total_pending_batches = aggregate_queue_stats.state_v2_input_pending_batches + + aggregate_queue_stats.state_v2_output_pending_batches + + aggregate_queue_stats.address_v2_input_pending_batches; + Ok(ForesterStatus { slot, current_active_epoch, @@ -437,6 +460,7 @@ pub async fn get_forester_status_with_options( total_trees, active_trees, rolled_over_trees, + total_pending_batches, total_pending_items, aggregate_queue_stats, }) diff --git a/sdk-libs/client/src/rpc/client.rs b/sdk-libs/client/src/rpc/client.rs index 82d543c002..bbbbe710f1 100644 --- a/sdk-libs/client/src/rpc/client.rs +++ b/sdk-libs/client/src/rpc/client.rs @@ -587,7 +587,6 @@ fn convert_token_account_interface( indexer_tai.account.account.lamports, indexer_tai.account.account.owner, ); - // Extract token owner before moving token into CompressedTokenAccount let token_owner = indexer_tai.token.owner; let compressed_token = CompressedTokenAccount { token: indexer_tai.token, @@ -596,7 +595,7 @@ fn convert_token_account_interface( Ok(TokenAccountInterface::cold( indexer_tai.account.key, compressed_token, - token_owner, // owner_override: use token owner, not account key + token_owner, indexer_tai.account.account.owner, )) } @@ -1108,7 +1107,16 @@ impl Rpc for LightClient { .map_err(|e| RpcError::CustomError(format!("Indexer error: {e}")))?; let value = match resp.value { - Some(tai) => Some(convert_token_account_interface(tai)?), + Some(tai) => { + let mut iface = convert_token_account_interface(tai)?; + // For cold ATAs, the compressed token stores token.owner = + // ATA pubkey (for hash verification). Override parsed.owner + // with the wallet owner so ata_bump() derivation succeeds. + if iface.is_cold() { + iface.parsed.owner = *owner; + } + Some(iface) + } None => None, }; diff --git a/sdk-libs/macros/src/light_pdas/program/compress.rs b/sdk-libs/macros/src/light_pdas/program/compress.rs index bd4c563d1d..ca8f57e08f 100644 --- a/sdk-libs/macros/src/light_pdas/program/compress.rs +++ b/sdk-libs/macros/src/light_pdas/program/compress.rs @@ -134,7 +134,9 @@ impl CompressBuilder { use light_account::LightDiscriminator; use borsh::BorshDeserialize; let data = account_info.try_borrow_data()?; - let discriminator: [u8; 8] = data[..8] + let discriminator: [u8; 8] = data + .get(..8) + .ok_or(light_account::LightSdkTypesError::InvalidInstructionData)? .try_into() .map_err(|_| light_account::LightSdkTypesError::InvalidInstructionData)?; match discriminator { @@ -382,8 +384,15 @@ impl CompressBuilder { ) -> std::result::Result<(), #sdk_error> { use borsh::BorshDeserialize; let data = account_info.try_borrow_data()#borrow_error; - #(#compress_arms)* - Ok(()) + let discriminator: [u8; 8] = data + .get(..8) + .ok_or(#sdk_error::InvalidInstructionData)? + .try_into() + .map_err(|_| #sdk_error::InvalidInstructionData)?; + match discriminator { + #(#compress_arms)* + _ => Ok(()), + } } } }) @@ -398,8 +407,15 @@ impl CompressBuilder { ) -> std::result::Result<(), #sdk_error> { use borsh::BorshDeserialize; let data = account_info.try_borrow_data()#borrow_error; - #(#compress_arms)* - Ok(()) + let discriminator: [u8; 8] = data + .get(..8) + .ok_or(#sdk_error::InvalidInstructionData)? + .try_into() + .map_err(|_| #sdk_error::InvalidInstructionData)?; + match discriminator { + #(#compress_arms)* + _ => Ok(()), + } } } }) From 4f2aa344e3f5617ddeb1278ba13e5e0a931bf212 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 00:22:27 +0000 Subject: [PATCH 09/15] fix: improve error handling and method consistency in compressors and state management --- .../src/compressible/ctoken/compressor.rs | 11 +- forester/src/compressible/mint/compressor.rs | 2 +- forester/src/compressible/mint/state.rs | 4 +- forester/src/compressible/pda/state.rs | 4 +- .../macros/src/light_pdas/program/compress.rs | 202 +++++++++--------- .../src/light_pdas/program/instructions.rs | 2 +- 6 files changed, 118 insertions(+), 107 deletions(-) diff --git a/forester/src/compressible/ctoken/compressor.rs b/forester/src/compressible/ctoken/compressor.rs index 54be0ddc6f..ecaf826321 100644 --- a/forester/src/compressible/ctoken/compressor.rs +++ b/forester/src/compressible/ctoken/compressor.rs @@ -240,10 +240,13 @@ impl CTokenCompressor { ); // Wait for confirmation - let confirmed = rpc - .confirm_transaction(signature) - .await - .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {}", e))?; + let confirmed = match rpc.confirm_transaction(signature).await { + Ok(confirmed) => confirmed, + Err(e) => { + self.tracker.unmark_pending(&pubkeys); + return Err(anyhow::anyhow!("Failed to confirm transaction: {}", e)); + } + }; if confirmed { for account_state in account_states { diff --git a/forester/src/compressible/mint/compressor.rs b/forester/src/compressible/mint/compressor.rs index d4409580bf..f39c04e869 100644 --- a/forester/src/compressible/mint/compressor.rs +++ b/forester/src/compressible/mint/compressor.rs @@ -246,7 +246,7 @@ impl MintCompressor { "Mint PDA {} no longer exists on-chain, removing from tracker", mint_pda ); - self.tracker.remove_compressed(mint_pda); + self.tracker.remove(mint_pda); return Err(anyhow::anyhow!( "Mint PDA {} already closed, skipping", mint_pda diff --git a/forester/src/compressible/mint/state.rs b/forester/src/compressible/mint/state.rs index 616e756a35..1111edef51 100644 --- a/forester/src/compressible/mint/state.rs +++ b/forester/src/compressible/mint/state.rs @@ -76,7 +76,7 @@ impl MintAccountTracker { if !mint.metadata.mint_decompressed { // Mint was compressed — remove stale entry from tracker if present - if self.accounts.remove(&pubkey).is_some() { + if self.remove(&pubkey).is_some() { debug!( "Mint {} no longer decompressed, removed from tracker", pubkey @@ -162,7 +162,7 @@ impl SubscriptionHandler for MintAccountTracker { ) -> Result<()> { // If account data is empty (account was closed), remove from tracker if data.is_empty() { - if self.accounts.remove(&pubkey).is_some() { + if self.remove(&pubkey).is_some() { debug!("Removed closed Mint account {} from tracker", pubkey); } return Ok(()); diff --git a/forester/src/compressible/pda/state.rs b/forester/src/compressible/pda/state.rs index 6b0aab0181..92f9fe0a61 100644 --- a/forester/src/compressible/pda/state.rs +++ b/forester/src/compressible/pda/state.rs @@ -117,7 +117,7 @@ impl PdaAccountTracker { if compression_info.is_compressed() { // Account was compressed — remove stale entry from tracker if present - if self.accounts.remove(&pubkey).is_some() { + if self.remove(&pubkey).is_some() { debug!("PDA {} already compressed, removed from tracker", pubkey); } return Ok(()); @@ -187,7 +187,7 @@ impl SubscriptionHandler for PdaAccountTracker { ) -> Result<()> { // If account data is empty (account was closed), remove from tracker if data.is_empty() { - if self.accounts.remove(&pubkey).is_some() { + if self.remove(&pubkey).is_some() { debug!("Removed closed PDA account {} from tracker", pubkey); } return Ok(()); diff --git a/sdk-libs/macros/src/light_pdas/program/compress.rs b/sdk-libs/macros/src/light_pdas/program/compress.rs index ca8f57e08f..915228a1c4 100644 --- a/sdk-libs/macros/src/light_pdas/program/compress.rs +++ b/sdk-libs/macros/src/light_pdas/program/compress.rs @@ -85,60 +85,117 @@ impl CompressBuilder { // Code Generation Methods // ------------------------------------------------------------------------- - /// Generate the compress dispatch function. + /// Generate per-account compress dispatch arms appropriate for the backend. /// - /// Creates a function matching `CompressDispatchFn` signature that handles - /// discriminator-based deserialization and compression dispatch. - /// This function is placed inside the processor module. - pub fn generate_dispatch_fn(&self) -> Result { - let compress_arms: Vec<_> = self.accounts.iter().map(|info| { - let name = qualify_type_with_crate(&info.account_type); + /// - Anchor: match arms with fixed 8-byte `LIGHT_DISCRIMINATOR` + /// (`d if d == Type::LIGHT_DISCRIMINATOR => { ... }`) + /// - Pinocchio: if-chain blocks with variable-length `LIGHT_DISCRIMINATOR_SLICE` + /// (`{ if &data[..disc_len] == disc_slice { return ...; } }`) + fn generate_compress_arms(&self, backend: &dyn CodegenBackend) -> Vec { + let account_crate = backend.account_crate(); + let sdk_error = backend.sdk_error_type(); - if info.is_zero_copy { - // Pod (zero-copy) path: use bytemuck - quote! { - d if d == #name::LIGHT_DISCRIMINATOR => { - let pod_bytes = &data[8..8 + core::mem::size_of::<#name>()]; - let mut account_data: #name = *bytemuck::from_bytes(pod_bytes); - drop(data); - light_account::prepare_account_for_compression( - account_info, &mut account_data, meta, index, ctx, - ) + self.accounts + .iter() + .map(|info| { + let name = qualify_type_with_crate(&info.account_type); + + if backend.is_pinocchio() { + // Pinocchio: variable-length discriminator if-chain blocks. + // Uses LIGHT_DISCRIMINATOR_SLICE (&[u8]) and dynamic disc_len. + if info.is_zero_copy { + quote! { + { + let __disc_slice = <#name as #account_crate::LightDiscriminator>::LIGHT_DISCRIMINATOR_SLICE; + let __disc_len = __disc_slice.len(); + let __expected_len = __disc_len + core::mem::size_of::<#name>(); + if data.len() >= __expected_len && &data[..__disc_len] == __disc_slice { + let pod_bytes = &data[__disc_len..__expected_len]; + let mut account_data: #name = *bytemuck::from_bytes(pod_bytes); + drop(data); + return #account_crate::prepare_account_for_compression( + account_info, &mut account_data, meta, index, ctx, + ); + } + } + } + } else { + quote! { + { + let __disc_slice = <#name as #account_crate::LightDiscriminator>::LIGHT_DISCRIMINATOR_SLICE; + let __disc_len = __disc_slice.len(); + if data.len() >= __disc_len && &data[..__disc_len] == __disc_slice { + let mut reader = &data[__disc_len..]; + let mut account_data = #name::deserialize(&mut reader) + .map_err(|_| #sdk_error::InvalidInstructionData)?; + drop(data); + return #account_crate::prepare_account_for_compression( + account_info, &mut account_data, meta, index, ctx, + ); + } + } + } } - } - } else { - // Borsh path: use deserialize (not try_from_slice which requires all bytes consumed) - // Anchor allocates INIT_SPACE (max size) but actual Borsh data may be shorter - // due to variable-length fields (String, Vec), leaving trailing bytes. - quote! { - d if d == #name::LIGHT_DISCRIMINATOR => { - let mut reader = &data[8..]; - let mut account_data = #name::deserialize(&mut reader) - .map_err(|_| light_account::LightSdkTypesError::InvalidInstructionData)?; - drop(data); - light_account::prepare_account_for_compression( - account_info, &mut account_data, meta, index, ctx, - ) + } else { + // Anchor: fixed 8-byte discriminator match arms. + // Uses LIGHT_DISCRIMINATOR ([u8; 8]) with hardcoded offset 8. + if info.is_zero_copy { + quote! { + d if d == #name::LIGHT_DISCRIMINATOR => { + let pod_bytes = &data[8..8 + core::mem::size_of::<#name>()]; + let mut account_data: #name = *bytemuck::from_bytes(pod_bytes); + drop(data); + #account_crate::prepare_account_for_compression( + account_info, &mut account_data, meta, index, ctx, + ) + } + } + } else { + // Borsh: use deserialize (not try_from_slice which requires all bytes consumed). + // Anchor allocates INIT_SPACE (max size) but actual Borsh data may be shorter + // due to variable-length fields (String, Vec), leaving trailing bytes. + quote! { + d if d == #name::LIGHT_DISCRIMINATOR => { + let mut reader = &data[8..]; + let mut account_data = #name::deserialize(&mut reader) + .map_err(|_| #sdk_error::InvalidInstructionData)?; + drop(data); + #account_crate::prepare_account_for_compression( + account_info, &mut account_data, meta, index, ctx, + ) + } + } } } - } - }).collect(); + }) + .collect() + } + + /// Generate the compress dispatch function (Anchor only). + /// + /// Creates a function matching `CompressDispatchFn` signature that handles + /// discriminator-based deserialization and compression dispatch. + /// This function is placed inside the processor module. + pub fn generate_dispatch_fn(&self, backend: &dyn CodegenBackend) -> Result { + let account_crate = backend.account_crate(); + let sdk_error = backend.sdk_error_type(); + let compress_arms = self.generate_compress_arms(backend); Ok(syn::parse_quote! { fn __compress_dispatch<'info>( account_info: &anchor_lang::prelude::AccountInfo<'info>, - meta: &light_account::account_meta::CompressedAccountMetaNoLamportsNoAddress, + meta: &#account_crate::account_meta::CompressedAccountMetaNoLamportsNoAddress, index: usize, - ctx: &mut light_account::CompressCtx<'_, 'info>, - ) -> std::result::Result<(), light_account::LightSdkTypesError> { - use light_account::LightDiscriminator; + ctx: &mut #account_crate::CompressCtx<'_, 'info>, + ) -> std::result::Result<(), #sdk_error> { + use #account_crate::LightDiscriminator; use borsh::BorshDeserialize; let data = account_info.try_borrow_data()?; let discriminator: [u8; 8] = data .get(..8) - .ok_or(light_account::LightSdkTypesError::InvalidInstructionData)? + .ok_or(#sdk_error::InvalidInstructionData)? .try_into() - .map_err(|_| light_account::LightSdkTypesError::InvalidInstructionData)?; + .map_err(|_| #sdk_error::InvalidInstructionData)?; match discriminator { #(#compress_arms)* _ => Ok(()), @@ -314,13 +371,11 @@ impl CompressBuilder { /// Generate compress dispatch as an associated function on the enum using the specified backend. /// - /// # Discriminator uniqueness invariant - /// - /// The dispatch uses a sequential if-chain keyed on `LIGHT_DISCRIMINATOR_SLICE`. No - /// discriminator may be a prefix of another — including exact duplicates. Violating this - /// causes silent incorrect dispatch. The `LightProgramPinocchio` derive enforces this at - /// compile time via `generate_discriminator_collision_checks`; if the check fires, change - /// the discriminator bytes so that no pair shares a prefix. + /// - Anchor: uses a `match` on fixed 8-byte discriminators (same as `generate_dispatch_fn`) + /// - Pinocchio: uses a sequential if-chain keyed on `LIGHT_DISCRIMINATOR_SLICE` (variable + /// length). No discriminator may be a prefix of another — including exact duplicates. + /// Violating this causes silent incorrect dispatch. The `LightProgramPinocchio` derive + /// enforces this at compile time via `generate_discriminator_collision_checks`. pub fn generate_enum_dispatch_method_with_backend( &self, enum_name: &syn::Ident, @@ -330,48 +385,7 @@ impl CompressBuilder { let account_info_type = backend.account_info_type(); let sdk_error = backend.sdk_error_type(); let borrow_error = backend.borrow_error(); - - let compress_arms: Vec<_> = self - .accounts - .iter() - .map(|info| { - let name = qualify_type_with_crate(&info.account_type); - - if info.is_zero_copy { - quote! { - { - let __disc_slice = <#name as #account_crate::LightDiscriminator>::LIGHT_DISCRIMINATOR_SLICE; - let __disc_len = __disc_slice.len(); - let __expected_len = __disc_len + core::mem::size_of::<#name>(); - if data.len() >= __expected_len && &data[..__disc_len] == __disc_slice { - let pod_bytes = &data[__disc_len..__expected_len]; - let mut account_data: #name = *bytemuck::from_bytes(pod_bytes); - drop(data); - return #account_crate::prepare_account_for_compression( - account_info, &mut account_data, meta, index, ctx, - ); - } - } - } - } else { - quote! { - { - let __disc_slice = <#name as #account_crate::LightDiscriminator>::LIGHT_DISCRIMINATOR_SLICE; - let __disc_len = __disc_slice.len(); - if data.len() >= __disc_len && &data[..__disc_len] == __disc_slice { - let mut reader = &data[__disc_len..]; - let mut account_data = #name::deserialize(&mut reader) - .map_err(|_| #sdk_error::InvalidInstructionData)?; - drop(data); - return #account_crate::prepare_account_for_compression( - account_info, &mut account_data, meta, index, ctx, - ); - } - } - } - } - }) - .collect(); + let compress_arms = self.generate_compress_arms(backend); if backend.is_pinocchio() { Ok(quote! { @@ -384,15 +398,8 @@ impl CompressBuilder { ) -> std::result::Result<(), #sdk_error> { use borsh::BorshDeserialize; let data = account_info.try_borrow_data()#borrow_error; - let discriminator: [u8; 8] = data - .get(..8) - .ok_or(#sdk_error::InvalidInstructionData)? - .try_into() - .map_err(|_| #sdk_error::InvalidInstructionData)?; - match discriminator { - #(#compress_arms)* - _ => Ok(()), - } + #(#compress_arms)* + Ok(()) } } }) @@ -405,6 +412,7 @@ impl CompressBuilder { index: usize, ctx: &mut #account_crate::CompressCtx<'_, 'info>, ) -> std::result::Result<(), #sdk_error> { + use #account_crate::LightDiscriminator; use borsh::BorshDeserialize; let data = account_info.try_borrow_data()#borrow_error; let discriminator: [u8; 8] = data diff --git a/sdk-libs/macros/src/light_pdas/program/instructions.rs b/sdk-libs/macros/src/light_pdas/program/instructions.rs index 686ee2c2f7..556d876eb9 100644 --- a/sdk-libs/macros/src/light_pdas/program/instructions.rs +++ b/sdk-libs/macros/src/light_pdas/program/instructions.rs @@ -562,7 +562,7 @@ pub(crate) fn generate_light_program_items_with_backend( update_config_instruction, ) = if !backend.is_pinocchio() { let compress_accounts = compress_builder.generate_accounts_struct()?; - let compress_dispatch_fn = compress_builder.generate_dispatch_fn()?; + let compress_dispatch_fn = compress_builder.generate_dispatch_fn(backend)?; let compress_processor_fn = compress_builder.generate_processor()?; let compress_instruction = compress_builder.generate_entrypoint()?; From 7fe15c1bbd3d15999eb42ecaad4444c3ae53cb71 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 09:44:24 +0000 Subject: [PATCH 10/15] feat: add transaction verification to compressors and refactor MintAccountTracker initialization --- .../src/compressible/ctoken/compressor.rs | 10 ++++++++- forester/src/compressible/mint/compressor.rs | 12 +++++++++- forester/src/compressible/mint/state.rs | 6 +---- forester/src/compressible/pda/compressor.rs | 12 +++++++++- forester/src/compressible/traits.rs | 22 ++++++++++++++++++- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/forester/src/compressible/ctoken/compressor.rs b/forester/src/compressible/ctoken/compressor.rs index ecaf826321..269fb0322f 100644 --- a/forester/src/compressible/ctoken/compressor.rs +++ b/forester/src/compressible/ctoken/compressor.rs @@ -20,7 +20,10 @@ use solana_sdk::{ use tracing::{debug, info}; use super::{state::CTokenAccountTracker, types::CTokenAccountState}; -use crate::{compressible::traits::CompressibleTracker, Result}; +use crate::{ + compressible::traits::{verify_transaction_execution, CompressibleTracker}, + Result, +}; /// Compression executor for CToken accounts via the registry program's compress_and_close instruction. pub struct CTokenCompressor { @@ -249,6 +252,11 @@ impl CTokenCompressor { }; if confirmed { + if let Err(e) = verify_transaction_execution(&*rpc, signature).await { + self.tracker.unmark_pending(&pubkeys); + return Err(e); + } + for account_state in account_states { self.tracker.remove_compressed(&account_state.pubkey); } diff --git a/forester/src/compressible/mint/compressor.rs b/forester/src/compressible/mint/compressor.rs index f39c04e869..79d40c2c70 100644 --- a/forester/src/compressible/mint/compressor.rs +++ b/forester/src/compressible/mint/compressor.rs @@ -17,7 +17,10 @@ use solana_sdk::{ use tracing::{debug, info}; use super::{state::MintAccountTracker, types::MintAccountState}; -use crate::{compressible::traits::CompressibleTracker, Result}; +use crate::{ + compressible::traits::{verify_transaction_execution, CompressibleTracker}, + Result, +}; /// Compressor for decompressed Mint accounts - builds and sends CompressAndCloseMint transactions. pub struct MintCompressor { @@ -137,6 +140,11 @@ impl MintCompressor { .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; if confirmed { + if let Err(e) = verify_transaction_execution(&*rpc, signature).await { + self.tracker.unmark_pending(&pubkeys); + return Err(e); + } + for mint_state in mint_states { self.tracker.remove_compressed(&mint_state.pubkey); } @@ -295,6 +303,8 @@ impl MintCompressor { .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; if confirmed { + verify_transaction_execution(&*rpc, signature).await?; + info!("CompressAndCloseMint tx for Mint {} confirmed", mint_pda); Ok(signature) } else { diff --git a/forester/src/compressible/mint/state.rs b/forester/src/compressible/mint/state.rs index 1111edef51..cca5a0b6fe 100644 --- a/forester/src/compressible/mint/state.rs +++ b/forester/src/compressible/mint/state.rs @@ -43,11 +43,7 @@ pub struct MintAccountTracker { impl MintAccountTracker { pub fn new() -> Self { - Self { - accounts: DashMap::new(), - compressed_count: AtomicU64::new(0), - pending: DashSet::new(), - } + Self::default() } pub fn update_from_account( diff --git a/forester/src/compressible/pda/compressor.rs b/forester/src/compressible/pda/compressor.rs index a77c99e0ba..3366a903b8 100644 --- a/forester/src/compressible/pda/compressor.rs +++ b/forester/src/compressible/pda/compressor.rs @@ -26,7 +26,10 @@ use tracing::{debug, info}; use super::{state::PdaAccountTracker, types::PdaAccountState}; use crate::{ - compressible::{config::PdaProgramConfig, traits::CompressibleTracker}, + compressible::{ + config::PdaProgramConfig, + traits::{verify_transaction_execution, CompressibleTracker}, + }, Result, }; @@ -325,6 +328,11 @@ impl PdaCompressor { .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; if confirmed { + if let Err(e) = verify_transaction_execution(&*rpc, signature).await { + self.tracker.unmark_pending(&pubkeys); + return Err(e); + } + for state in account_states { self.tracker.remove_compressed(&state.pubkey); } @@ -437,6 +445,8 @@ impl PdaCompressor { .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; if confirmed { + verify_transaction_execution(&*rpc, signature).await?; + info!("compress_accounts_idempotent tx for PDA {} confirmed", pda); Ok(signature) } else { diff --git a/forester/src/compressible/traits.rs b/forester/src/compressible/traits.rs index a432305826..c4362d85dc 100644 --- a/forester/src/compressible/traits.rs +++ b/forester/src/compressible/traits.rs @@ -3,7 +3,8 @@ use std::sync::atomic::{AtomicU64, Ordering}; use dashmap::{DashMap, DashSet}; -use solana_sdk::pubkey::Pubkey; +use light_client::rpc::Rpc; +use solana_sdk::{pubkey::Pubkey, signature::Signature}; use crate::Result; @@ -99,3 +100,22 @@ pub trait SubscriptionHandler: Send + Sync { fn handle_removal(&self, pubkey: &Pubkey); } + +pub async fn verify_transaction_execution(rpc: &impl Rpc, signature: Signature) -> Result<()> { + let statuses = rpc + .get_signature_statuses(&[signature]) + .await + .map_err(|e| { + anyhow::anyhow!("Failed to get signature status for {}: {:?}", signature, e) + })?; + if let Some(Some(status)) = statuses.first() { + if let Some(err) = &status.err { + return Err(anyhow::anyhow!( + "Transaction {} confirmed but execution failed: {:?}", + signature, + err + )); + } + } + Ok(()) +} From 6120ced9a0e1da4ba825e2fb67d49184ca927ace Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 11:41:31 +0000 Subject: [PATCH 11/15] fix: improve transaction confirmation handling and error reporting in compressors --- forester/src/compressible/mint/compressor.rs | 11 ++++++---- forester/src/compressible/pda/compressor.rs | 13 ++++++----- forester/src/compressible/traits.rs | 23 +++++++++++++------- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/forester/src/compressible/mint/compressor.rs b/forester/src/compressible/mint/compressor.rs index 79d40c2c70..1cd12d388c 100644 --- a/forester/src/compressible/mint/compressor.rs +++ b/forester/src/compressible/mint/compressor.rs @@ -134,10 +134,13 @@ impl MintCompressor { ); // Wait for confirmation - let confirmed = rpc - .confirm_transaction(signature) - .await - .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; + let confirmed = match rpc.confirm_transaction(signature).await { + Ok(confirmed) => confirmed, + Err(e) => { + self.tracker.unmark_pending(&pubkeys); + return Err(anyhow::anyhow!("Failed to confirm transaction: {:?}", e)); + } + }; if confirmed { if let Err(e) = verify_transaction_execution(&*rpc, signature).await { diff --git a/forester/src/compressible/pda/compressor.rs b/forester/src/compressible/pda/compressor.rs index 3366a903b8..2503d28758 100644 --- a/forester/src/compressible/pda/compressor.rs +++ b/forester/src/compressible/pda/compressor.rs @@ -322,10 +322,13 @@ impl PdaCompressor { ); // Wait for confirmation - let confirmed = rpc - .confirm_transaction(signature) - .await - .map_err(|e| anyhow::anyhow!("Failed to confirm transaction: {:?}", e))?; + let confirmed = match rpc.confirm_transaction(signature).await { + Ok(confirmed) => confirmed, + Err(e) => { + self.tracker.unmark_pending(&pubkeys); + return Err(anyhow::anyhow!("Failed to confirm transaction: {:?}", e)); + } + }; if confirmed { if let Err(e) = verify_transaction_execution(&*rpc, signature).await { @@ -383,7 +386,7 @@ impl PdaCompressor { "PDA {} no longer exists on-chain, removing from tracker", pda ); - self.tracker.remove_compressed(pda); + self.tracker.remove(pda); return Err(anyhow::anyhow!("PDA {} already closed, skipping", pda)); } diff --git a/forester/src/compressible/traits.rs b/forester/src/compressible/traits.rs index c4362d85dc..c0b2e7c37a 100644 --- a/forester/src/compressible/traits.rs +++ b/forester/src/compressible/traits.rs @@ -108,14 +108,21 @@ pub async fn verify_transaction_execution(rpc: &impl Rpc, signature: Signature) .map_err(|e| { anyhow::anyhow!("Failed to get signature status for {}: {:?}", signature, e) })?; - if let Some(Some(status)) = statuses.first() { - if let Some(err) = &status.err { - return Err(anyhow::anyhow!( - "Transaction {} confirmed but execution failed: {:?}", - signature, - err - )); + + match statuses.first() { + Some(Some(status)) => { + if let Some(err) = &status.err { + return Err(anyhow::anyhow!( + "Transaction {} confirmed but execution failed: {:?}", + signature, + err + )); + } + Ok(()) } + _ => Err(anyhow::anyhow!( + "Transaction {} status unavailable", + signature + )), } - Ok(()) } From 858960a77e0ba569ae787f0bc9952a2fa3a147be Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 13:08:59 +0000 Subject: [PATCH 12/15] fix: remove unnecessary pubkey collection before marking accounts as pending --- forester/src/compressible/pda/compressor.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/forester/src/compressible/pda/compressor.rs b/forester/src/compressible/pda/compressor.rs index 2503d28758..bcfe1bb961 100644 --- a/forester/src/compressible/pda/compressor.rs +++ b/forester/src/compressible/pda/compressor.rs @@ -296,7 +296,6 @@ impl PdaCompressor { ); // Mark as pending before sending - let pubkeys: Vec = account_states.iter().map(|s| s.pubkey).collect(); self.tracker.mark_pending(&pubkeys); // Send single transaction From 5cd2da66aa6e35054170d8c5f708d869492b5e33 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 15:25:54 +0000 Subject: [PATCH 13/15] chore: update subproject commit for photon dependency --- external/photon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/photon b/external/photon index f5d793e721..a328175cbe 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit f5d793e7217c48cb57b0f42892c12466b66c1830 +Subproject commit a328175cbe65c3cf7158684b4b1d752e7ffb5c37 From 2e9f0db7534e66ae92a65eb74236675b5e160983 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 15:34:47 +0000 Subject: [PATCH 14/15] fix: implement retry logic for transaction status verification --- forester/src/compressible/traits.rs | 55 +++++++++++++++++++---------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/forester/src/compressible/traits.rs b/forester/src/compressible/traits.rs index c0b2e7c37a..1e1b18ff3d 100644 --- a/forester/src/compressible/traits.rs +++ b/forester/src/compressible/traits.rs @@ -102,27 +102,44 @@ pub trait SubscriptionHandler: Send + Sync { } pub async fn verify_transaction_execution(rpc: &impl Rpc, signature: Signature) -> Result<()> { - let statuses = rpc - .get_signature_statuses(&[signature]) - .await - .map_err(|e| { - anyhow::anyhow!("Failed to get signature status for {}: {:?}", signature, e) - })?; - - match statuses.first() { - Some(Some(status)) => { - if let Some(err) = &status.err { - return Err(anyhow::anyhow!( - "Transaction {} confirmed but execution failed: {:?}", + const MAX_RETRIES: u32 = 3; + const RETRY_DELAY: std::time::Duration = std::time::Duration::from_millis(500); + + for attempt in 0..MAX_RETRIES { + let statuses = rpc + .get_signature_statuses(&[signature]) + .await + .map_err(|e| { + anyhow::anyhow!("Failed to get signature status for {}: {:?}", signature, e) + })?; + + match statuses.first() { + Some(Some(status)) => { + if let Some(err) = &status.err { + return Err(anyhow::anyhow!( + "Transaction {} confirmed but execution failed: {:?}", + signature, + err + )); + } + return Ok(()); + } + _ if attempt < MAX_RETRIES - 1 => { + tracing::debug!( + "Transaction {} status not yet available, retrying ({}/{})", signature, - err - )); + attempt + 1, + MAX_RETRIES + ); + tokio::time::sleep(RETRY_DELAY).await; } - Ok(()) + _ => {} } - _ => Err(anyhow::anyhow!( - "Transaction {} status unavailable", - signature - )), } + + Err(anyhow::anyhow!( + "Transaction {} status unavailable after {} retries", + signature, + MAX_RETRIES + )) } From e7242655097dc3084378d89951ff1f7d0f7e9b87 Mon Sep 17 00:00:00 2001 From: Sergey Timoshin Date: Wed, 25 Feb 2026 16:10:50 +0000 Subject: [PATCH 15/15] refactor transaction handling --- external/photon | 2 +- forester/src/api_server.rs | 25 ++--- .../src/compressible/ctoken/compressor.rs | 100 ++++++++---------- forester/src/compressible/ctoken/state.rs | 18 ++++ forester/src/compressible/ctoken/types.rs | 2 + forester/src/compressible/mint/compressor.rs | 74 +++---------- forester/src/compressible/pda/compressor.rs | 75 +++---------- forester/src/compressible/traits.rs | 88 ++++++++++++++- forester/src/epoch_manager.rs | 11 +- 9 files changed, 192 insertions(+), 203 deletions(-) diff --git a/external/photon b/external/photon index a328175cbe..a252934447 160000 --- a/external/photon +++ b/external/photon @@ -1 +1 @@ -Subproject commit a328175cbe65c3cf7158684b4b1d752e7ffb5c37 +Subproject commit a2529344477d889b6c23c121f915a91f3678d861 diff --git a/forester/src/api_server.rs b/forester/src/api_server.rs index bbe74657a2..4ea4b091b4 100644 --- a/forester/src/api_server.rs +++ b/forester/src/api_server.rs @@ -210,22 +210,7 @@ where } fn is_ctoken_ata(state: &CTokenAccountState) -> bool { - let owner = solana_sdk::pubkey::Pubkey::new_from_array(state.account.owner.to_bytes()); - let mint = solana_sdk::pubkey::Pubkey::new_from_array(state.account.mint.to_bytes()); - let light_token_program_id = - solana_sdk::pubkey::Pubkey::new_from_array(light_token_interface::LIGHT_TOKEN_PROGRAM_ID); - - let expected_ata = solana_sdk::pubkey::Pubkey::find_program_address( - &[ - owner.as_ref(), - light_token_program_id.as_ref(), - mint.as_ref(), - ], - &light_token_program_id, - ) - .0; - - state.pubkey == expected_ata + state.is_ata } fn summarize_ctoken_and_ata_slots( @@ -269,6 +254,8 @@ fn aggregate_type_stats( stats: impl Iterator>, ) -> Option { let mut tracked = 0usize; + let mut compressed = 0u64; + let mut compressed_seen = false; let mut ready = 0usize; let mut waiting = 0usize; let mut ready_seen = false; @@ -279,6 +266,10 @@ fn aggregate_type_stats( for stat in stats.flatten() { any = true; tracked = tracked.saturating_add(stat.tracked); + if let Some(v) = stat.compressed { + compressed = compressed.saturating_add(v); + compressed_seen = true; + } if let Some(v) = stat.ready { ready = ready.saturating_add(v); ready_seen = true; @@ -302,7 +293,7 @@ fn aggregate_type_stats( Some(CompressibleTypeStats { tracked, - compressed: None, + compressed: compressed_seen.then_some(compressed), ready: ready_seen.then_some(ready), waiting: waiting_seen.then_some(waiting), next_ready_slot, diff --git a/forester/src/compressible/ctoken/compressor.rs b/forester/src/compressible/ctoken/compressor.rs index 269fb0322f..1aa8a864d4 100644 --- a/forester/src/compressible/ctoken/compressor.rs +++ b/forester/src/compressible/ctoken/compressor.rs @@ -17,11 +17,11 @@ use solana_sdk::{ signature::{Keypair, Signature}, signer::Signer, }; -use tracing::{debug, info}; +use tracing::debug; use super::{state::CTokenAccountTracker, types::CTokenAccountState}; use crate::{ - compressible::traits::{verify_transaction_execution, CompressibleTracker}, + compressible::traits::{send_and_confirm_with_tracking, CompressibleTracker}, Result, }; @@ -82,9 +82,38 @@ impl CTokenCompressor { debug!("Compressible config: {}", compressible_config); - // Get output tree from RPC let mut rpc = self.rpc_pool.get_connection().await?; + // Pre-check: filter out accounts that no longer exist on-chain + let all_pubkeys: Vec = account_states.iter().map(|a| a.pubkey).collect(); + let on_chain_accounts = rpc + .get_multiple_accounts(&all_pubkeys) + .await + .map_err(|e| anyhow::anyhow!("Failed to pre-check accounts: {:?}", e))?; + + let account_states: Vec<&CTokenAccountState> = account_states + .iter() + .zip(on_chain_accounts.iter()) + .filter_map(|(state, on_chain)| { + if on_chain.is_some() { + Some(state) + } else { + debug!( + "CToken account {} no longer exists on-chain, removing from tracker", + state.pubkey + ); + self.tracker.remove(&state.pubkey); + None + } + }) + .collect(); + + if account_states.is_empty() { + return Err(anyhow::anyhow!( + "All accounts already closed, nothing to compress" + )); + } + // Fetch latest active state trees and get a random one rpc.get_latest_active_state_trees() .await @@ -105,7 +134,7 @@ impl CTokenCompressor { let mut indices_vec = Vec::with_capacity(account_states.len()); - for account_state in account_states { + for account_state in &account_states { let source_index = packed_accounts.insert_or_get(account_state.pubkey); // Convert mint from light_compressed_account::Pubkey to solana_sdk::Pubkey @@ -214,61 +243,16 @@ impl CTokenCompressor { data: instruction.data(), }; - // Mark accounts as pending before sending to prevent other cycles from - // picking them up while the tx is in-flight. let pubkeys: Vec = account_states.iter().map(|a| a.pubkey).collect(); - self.tracker.mark_pending(&pubkeys); - - // Send transaction - let signature = match rpc - .create_and_send_transaction( - &[ix], - &self.payer_keypair.pubkey(), - &[&self.payer_keypair], - ) - .await - { - Ok(sig) => sig, - Err(e) => { - // Tx failed to send — return accounts to work pool - self.tracker.unmark_pending(&pubkeys); - return Err(anyhow::anyhow!("Failed to send transaction: {}", e)); - } - }; - info!( - "compress_and_close tx with ({:?}) accounts sent {}", - account_states.iter().map(|a| a.pubkey.to_string()), - signature - ); - - // Wait for confirmation - let confirmed = match rpc.confirm_transaction(signature).await { - Ok(confirmed) => confirmed, - Err(e) => { - self.tracker.unmark_pending(&pubkeys); - return Err(anyhow::anyhow!("Failed to confirm transaction: {}", e)); - } - }; - - if confirmed { - if let Err(e) = verify_transaction_execution(&*rpc, signature).await { - self.tracker.unmark_pending(&pubkeys); - return Err(e); - } - - for account_state in account_states { - self.tracker.remove_compressed(&account_state.pubkey); - } - info!("compress_and_close tx confirmed: {}", signature); - Ok(signature) - } else { - // Transaction not confirmed — return accounts to work pool for retry - self.tracker.unmark_pending(&pubkeys); - Err(anyhow::anyhow!( - "compress_and_close tx not confirmed: {} - accounts returned to work pool", - signature - )) - } + send_and_confirm_with_tracking( + &mut *rpc, + &[ix], + &self.payer_keypair, + &*self.tracker, + &pubkeys, + "compress_and_close", + ) + .await } } diff --git a/forester/src/compressible/ctoken/state.rs b/forester/src/compressible/ctoken/state.rs index b9e2c0d112..1c71e785f3 100644 --- a/forester/src/compressible/ctoken/state.rs +++ b/forester/src/compressible/ctoken/state.rs @@ -107,11 +107,29 @@ impl CTokenAccountTracker { } }; + let is_ata = { + let owner = Pubkey::new_from_array(ctoken.owner.to_bytes()); + let mint = Pubkey::new_from_array(ctoken.mint.to_bytes()); + let light_token_program_id = + Pubkey::new_from_array(light_token_interface::LIGHT_TOKEN_PROGRAM_ID); + let expected_ata = Pubkey::find_program_address( + &[ + owner.as_ref(), + light_token_program_id.as_ref(), + mint.as_ref(), + ], + &light_token_program_id, + ) + .0; + pubkey == expected_ata + }; + let state = CTokenAccountState { pubkey, account: ctoken, lamports, compressible_slot, + is_ata, }; debug!( diff --git a/forester/src/compressible/ctoken/types.rs b/forester/src/compressible/ctoken/types.rs index 21ab03d3d1..5eb9de4132 100644 --- a/forester/src/compressible/ctoken/types.rs +++ b/forester/src/compressible/ctoken/types.rs @@ -10,6 +10,8 @@ pub struct CTokenAccountState { pub lamports: u64, /// Ready to compress when current_slot > compressible_slot pub compressible_slot: u64, + /// Whether this account is an ATA (computed once at insert time). + pub is_ata: bool, } impl CompressibleState for CTokenAccountState { diff --git a/forester/src/compressible/mint/compressor.rs b/forester/src/compressible/mint/compressor.rs index 1cd12d388c..8fb65d4605 100644 --- a/forester/src/compressible/mint/compressor.rs +++ b/forester/src/compressible/mint/compressor.rs @@ -18,7 +18,10 @@ use tracing::{debug, info}; use super::{state::MintAccountTracker, types::MintAccountState}; use crate::{ - compressible::traits::{verify_transaction_execution, CompressibleTracker}, + compressible::traits::{ + send_and_confirm_with_tracking, verify_transaction_execution, Cancelled, + CompressibleTracker, + }, Result, }; @@ -103,67 +106,18 @@ impl MintCompressor { instructions.len() ); - // Mark as pending before sending let pubkeys: Vec = mint_states.iter().map(|s| s.pubkey).collect(); - self.tracker.mark_pending(&pubkeys); - // Send all instructions in a single transaction let mut rpc = self.rpc_pool.get_connection().await?; - let signature = match rpc - .create_and_send_transaction( - &instructions, - &self.payer_keypair.pubkey(), - &[&self.payer_keypair], - ) - .await - { - Ok(sig) => sig, - Err(e) => { - self.tracker.unmark_pending(&pubkeys); - return Err(anyhow::anyhow!( - "Failed to send batched CompressAndCloseMint transaction: {:?}", - e - )); - } - }; - - info!( - "Batched CompressAndCloseMint tx for {} mints sent: {}", - mint_states.len(), - signature - ); - - // Wait for confirmation - let confirmed = match rpc.confirm_transaction(signature).await { - Ok(confirmed) => confirmed, - Err(e) => { - self.tracker.unmark_pending(&pubkeys); - return Err(anyhow::anyhow!("Failed to confirm transaction: {:?}", e)); - } - }; - - if confirmed { - if let Err(e) = verify_transaction_execution(&*rpc, signature).await { - self.tracker.unmark_pending(&pubkeys); - return Err(e); - } - - for mint_state in mint_states { - self.tracker.remove_compressed(&mint_state.pubkey); - } - info!("Batched CompressAndCloseMint tx confirmed: {}", signature); - Ok(signature) - } else { - self.tracker.unmark_pending(&pubkeys); - tracing::warn!( - "Batch CompressAndCloseMint tx not confirmed: {} - returned to work pool", - signature - ); - Err(anyhow::anyhow!( - "Batch CompressAndCloseMint tx not confirmed: {}", - signature - )) - } + send_and_confirm_with_tracking( + &mut *rpc, + &instructions, + &self.payer_keypair, + &*self.tracker, + &pubkeys, + "CompressAndCloseMint", + ) + .await } /// Compress a batch of decompressed Mint accounts with concurrent execution. @@ -203,7 +157,7 @@ impl MintCompressor { // Check cancellation before processing if cancelled.load(Ordering::Relaxed) { compressor.tracker.unmark_pending(&[mint_state.pubkey]); - return Err((mint_state, anyhow::anyhow!("Cancelled"))); + return Err((mint_state, Cancelled.into())); } match compressor.compress(&mint_state).await { diff --git a/forester/src/compressible/pda/compressor.rs b/forester/src/compressible/pda/compressor.rs index bcfe1bb961..c364d1c5d4 100644 --- a/forester/src/compressible/pda/compressor.rs +++ b/forester/src/compressible/pda/compressor.rs @@ -28,7 +28,10 @@ use super::{state::PdaAccountTracker, types::PdaAccountState}; use crate::{ compressible::{ config::PdaProgramConfig, - traits::{verify_transaction_execution, CompressibleTracker}, + traits::{ + send_and_confirm_with_tracking, verify_transaction_execution, Cancelled, + CompressibleTracker, + }, }, Result, }; @@ -172,7 +175,7 @@ impl PdaCompressor { if cancelled.load(Ordering::Relaxed) { // Unmark since we won't process this account compressor.tracker.unmark_pending(&[account_state.pubkey]); - return Err((account_state, anyhow::anyhow!("Cancelled"))); + return Err((account_state, Cancelled.into())); } match compressor @@ -295,65 +298,15 @@ impl PdaCompressor { program_id ); - // Mark as pending before sending - self.tracker.mark_pending(&pubkeys); - - // Send single transaction - let signature = match rpc - .create_and_send_transaction( - &[ix], - &self.payer_keypair.pubkey(), - &[&self.payer_keypair], - ) - .await - { - Ok(sig) => sig, - Err(e) => { - self.tracker.unmark_pending(&pubkeys); - return Err(anyhow::anyhow!("Failed to send transaction: {:?}", e)); - } - }; - - info!( - "Batched compress_accounts_idempotent tx for {} PDAs sent: {}", - account_states.len(), - signature - ); - - // Wait for confirmation - let confirmed = match rpc.confirm_transaction(signature).await { - Ok(confirmed) => confirmed, - Err(e) => { - self.tracker.unmark_pending(&pubkeys); - return Err(anyhow::anyhow!("Failed to confirm transaction: {:?}", e)); - } - }; - - if confirmed { - if let Err(e) = verify_transaction_execution(&*rpc, signature).await { - self.tracker.unmark_pending(&pubkeys); - return Err(e); - } - - for state in account_states { - self.tracker.remove_compressed(&state.pubkey); - } - info!( - "Batched compress_accounts_idempotent tx confirmed: {}", - signature - ); - Ok(signature) - } else { - self.tracker.unmark_pending(&pubkeys); - tracing::warn!( - "compress_accounts_idempotent tx not confirmed: {} - returned to work pool", - signature - ); - Err(anyhow::anyhow!( - "Batch transaction not confirmed: {}", - signature - )) - } + send_and_confirm_with_tracking( + &mut *rpc, + &[ix], + &self.payer_keypair, + &*self.tracker, + &pubkeys, + "compress_accounts_idempotent", + ) + .await } /// Compress a single PDA account using cached config diff --git a/forester/src/compressible/traits.rs b/forester/src/compressible/traits.rs index 1e1b18ff3d..0255d89458 100644 --- a/forester/src/compressible/traits.rs +++ b/forester/src/compressible/traits.rs @@ -1,13 +1,34 @@ //! Shared traits for compressible account tracking. -use std::sync::atomic::{AtomicU64, Ordering}; +use std::{ + fmt, + sync::atomic::{AtomicU64, Ordering}, +}; use dashmap::{DashMap, DashSet}; use light_client::rpc::Rpc; -use solana_sdk::{pubkey::Pubkey, signature::Signature}; +use solana_sdk::{ + instruction::Instruction, + pubkey::Pubkey, + signature::{Keypair, Signature}, + signer::Signer, +}; +use tracing::info; use crate::Result; +/// Typed error for compressor cancellation, used instead of string matching. +#[derive(Debug)] +pub struct Cancelled; + +impl fmt::Display for Cancelled { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Cancelled") + } +} + +impl std::error::Error for Cancelled {} + pub trait CompressibleState: Clone + Send + Sync { fn pubkey(&self) -> &Pubkey; fn lamports(&self) -> u64; @@ -143,3 +164,66 @@ pub async fn verify_transaction_execution(rpc: &impl Rpc, signature: Signature) MAX_RETRIES )) } + +/// Marks `pubkeys` as pending, sends the transaction, waits for confirmation, +/// verifies execution, and either marks accounts as compressed or unmarks pending +/// on any failure. +pub async fn send_and_confirm_with_tracking( + rpc: &mut impl Rpc, + instructions: &[Instruction], + payer: &Keypair, + tracker: &impl CompressibleTracker, + pubkeys: &[Pubkey], + tx_label: &str, +) -> Result { + tracker.mark_pending(pubkeys); + + let signature = match rpc + .create_and_send_transaction(instructions, &payer.pubkey(), &[payer]) + .await + { + Ok(sig) => sig, + Err(e) => { + tracker.unmark_pending(pubkeys); + return Err(anyhow::anyhow!( + "Failed to send {} transaction: {:?}", + tx_label, + e + )); + } + }; + + info!("{} tx sent: {}", tx_label, signature); + + let confirmed = match rpc.confirm_transaction(signature).await { + Ok(confirmed) => confirmed, + Err(e) => { + tracker.unmark_pending(pubkeys); + return Err(anyhow::anyhow!( + "Failed to confirm {} transaction: {:?}", + tx_label, + e + )); + } + }; + + if confirmed { + if let Err(e) = verify_transaction_execution(rpc, signature).await { + tracker.unmark_pending(pubkeys); + return Err(e); + } + + for pubkey in pubkeys { + tracker.remove_compressed(pubkey); + } + info!("{} tx confirmed: {}", tx_label, signature); + Ok(signature) + } else { + tracker.unmark_pending(pubkeys); + Err(anyhow::anyhow!( + "{} tx not confirmed: {} - accounts returned to work pool", + tx_label, + signature + )) + } +} diff --git a/forester/src/epoch_manager.rs b/forester/src/epoch_manager.rs index 10e1bb3271..6a18e2a059 100644 --- a/forester/src/epoch_manager.rs +++ b/forester/src/epoch_manager.rs @@ -46,7 +46,10 @@ use tokio::{ use tracing::{debug, error, info, info_span, instrument, trace, warn}; use crate::{ - compressible::{traits::CompressibleTracker, CTokenAccountTracker, CTokenCompressor}, + compressible::{ + traits::{Cancelled, CompressibleTracker}, + CTokenAccountTracker, CTokenCompressor, + }, errors::{ ChannelError, ForesterError, InitializationError, RegistrationError, WorkReportError, }, @@ -2342,7 +2345,7 @@ impl EpochManager { batch_idx + 1, num_batches ); - return Err((batch_idx, batch.len(), anyhow!("Cancelled"))); + return Err((batch_idx, batch.len(), Cancelled.into())); } // Check forester is still eligible before processing this batch @@ -2622,7 +2625,7 @@ impl EpochManager { total_compressed += 1; } Err((account_state, e)) => { - if e.to_string() != "Cancelled" { + if e.downcast_ref::().is_none() { error!( event = "compression_pda_account_failed", run_id = %self.run_id, @@ -2729,7 +2732,7 @@ impl EpochManager { total_compressed += 1; } Err((mint_state, e)) => { - if e.to_string() != "Cancelled" { + if e.downcast_ref::().is_none() { error!( event = "compression_mint_account_failed", run_id = %self.run_id,
- {truncateAddress(tree.merkle_tree, 6)} + + e.stopPropagation()} + > + {truncateAddress(tree.merkle_tree, 6)} +
@@ -151,19 +177,18 @@ export function TreeTable({
{tree.v2_queue_info ? ( - - I:{tree.v2_queue_info.input_pending_batches * - tree.v2_queue_info.zkp_batch_size}{" "} - {tree.tree_type === "StateV2" && ( + + {tree.tree_type === "StateV2" ? ( <> - O: - {tree.v2_queue_info.output_pending_batches * - tree.v2_queue_info.zkp_batch_size} + I:{tree.v2_queue_info.input_pending_batches}{" "} + O:{tree.v2_queue_info.output_pending_batches} + ) : ( + tree.v2_queue_info.input_pending_batches )} ) : ( - + {tree.queue_length != null ? formatNumber(tree.queue_length) : "-"} @@ -171,14 +196,24 @@ export function TreeTable({ )} - {tree.assigned_forester - ? truncateAddress(tree.assigned_forester, 4) - : "-"} + {tree.assigned_forester ? ( + e.stopPropagation()} + > + {truncateAddress(tree.assigned_forester, 4)} + + ) : "-"}