Skip to content

Commit 3395547

Browse files
pzhan9meta-codesync[bot]
authored andcommitted
Log events for host mesh (#1905)
Summary: Pull Request resolved: #1905 As title Reviewed By: mariusae Differential Revision: D87111325 fbshipit-source-id: 0fe1ab4617421e3dd20de10f8d0b0ee6a02d9bcf
1 parent deeaee9 commit 3395547

File tree

1 file changed

+29
-4
lines changed

1 file changed

+29
-4
lines changed

hyperactor_mesh/src/v1/host_mesh.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,11 +360,22 @@ impl HostMesh {
360360
name: &str,
361361
bootstrap_params: Option<BootstrapCommand>,
362362
) -> v1::Result<Self> {
363+
Self::allocate_inner(cx, alloc, Name::new(name), bootstrap_params).await
364+
}
365+
366+
// Use allocate_inner to set field mesh_name in span
367+
#[hyperactor::instrument(fields(mesh_name=name.to_string()))]
368+
async fn allocate_inner(
369+
cx: &impl context::Actor,
370+
alloc: Box<dyn Alloc + Send + Sync>,
371+
name: Name,
372+
bootstrap_params: Option<BootstrapCommand>,
373+
) -> v1::Result<Self> {
374+
tracing::info!(name = "HostMeshStatus", status = "Allocate::Attempt");
363375
let transport = alloc.transport();
364376
let extent = alloc.extent().clone();
365377
let is_local = alloc.is_local();
366-
let proc_mesh = ProcMesh::allocate(cx, alloc, name).await?;
367-
let name = Name::new(name);
378+
let proc_mesh = ProcMesh::allocate(cx, alloc, name.name()).await?;
368379

369380
// TODO: figure out how to deal with MAST allocs. It requires an extra dimension,
370381
// into which it launches multiple procs, so we need to always specify an additional
@@ -405,7 +416,7 @@ impl HostMesh {
405416
}
406417

407418
let proc_mesh_ref = proc_mesh.clone();
408-
Ok(Self {
419+
let mesh = Self {
409420
name: name.clone(),
410421
extent: extent.clone(),
411422
allocation: HostMeshAllocation::ProcMesh {
@@ -414,7 +425,9 @@ impl HostMesh {
414425
hosts: hosts.clone(),
415426
},
416427
current_ref: HostMeshRef::new(name, extent.into(), hosts).unwrap(),
417-
})
428+
};
429+
tracing::info!(name = "HostMeshStatus", status = "Allocate::Created");
430+
Ok(mesh)
418431
}
419432

420433
/// Take ownership of an existing host mesh reference.
@@ -490,6 +503,11 @@ impl Drop for HostMesh {
490503
/// only provides opportunistic cleanup to prevent process leaks
491504
/// if shutdown is skipped.
492505
fn drop(&mut self) {
506+
tracing::info!(
507+
name = "HostMeshStatus",
508+
mesh_name = %self.name,
509+
status = "Dropping",
510+
);
493511
// Snapshot the owned hosts we're responsible for.
494512
let hosts: Vec<HostRef> = match &self.allocation {
495513
HostMeshAllocation::ProcMesh { hosts, .. } | HostMeshAllocation::Owned { hosts } => {
@@ -572,10 +590,17 @@ impl Drop for HostMesh {
572590
// No runtime here; PDEATHSIG and manager Drop remain the
573591
// last-resort safety net.
574592
tracing::warn!(
593+
mesh_name = %self.name,
575594
hosts = hosts.len(),
576595
"HostMesh dropped without a tokio runtime; skipping best-effort shutdown"
577596
);
578597
}
598+
599+
tracing::info!(
600+
name = "HostMeshStatus",
601+
mesh_name = %self.name,
602+
status = "Dropped",
603+
);
579604
}
580605
}
581606

0 commit comments

Comments
 (0)