Skip to content

Commit 3bf4be5

Browse files
committed
torch-sys --> torch-sys2, monarch no longer links torch
Pull Request resolved: #2046 This replaces torch-sys, with torch-sys2, which are much simpler binings to torch that route through pyo3. This allows us to drop the linking against torch. There are significant advantages now: * importing monarch drops from ~3 seconds to 70ms !!! * importing monarch was the primary reason why things felt slow: every time we started a new process it had to import this stuff, and a lot of those process starts got serialized. Now they they are fast iteration will feel much faster. * The monarch library can work with any somewhat recent version of torch. No need to match monarch to torch. This should make packaging orders of magnitude easier. * Monarch itself is now smalliish at 70MB and can easily be distributed in many places. Torch does not have to be installed in those places for it to work. * It offers teh possibility to statically link libcudart, and rdma, making monarch no longer have any library dependencies. It is much more likely we can "inject" the monarch packages in various python distributions to make monarch work in them. * Less critical need for "torch vs no-torch" bindings. ghstack-source-id: 326996723 @exported-using-ghexport Differential Revision: [D88338645](https://our.internmc.facebook.com/intern/diff/D88338645/)
1 parent 7c86587 commit 3bf4be5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1152
-6857
lines changed

.github/workflows/test-gpu-rust.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ jobs:
6666
timeout 12m cargo nextest run --workspace --profile ci \
6767
--exclude monarch_messages \
6868
--exclude monarch_tensor_worker \
69-
--exclude torch-sys \
7069
--exclude torch-sys-cuda
7170
# Copy the test results to the expected location
7271
# TODO: error in pytest-results-action, TypeError: results.testsuites.testsuite.testcase is not iterable

Cargo.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,4 @@ members = [
2525
"rdmaxcel-sys",
2626
"serde_multipart",
2727
"timed_test",
28-
"torch-sys",
29-
"torch-sys-cuda",
3028
]

docs/source/rust-api.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The Monarch project consists of several Rust crates, each with specialized funct
1818
### CUDA and GPU Computing
1919
- <a id="link-cuda-sys" href="rust-api/cuda_sys/index.html">**cuda-sys**</a><span id="desc-cuda-sys"> - Low-level CUDA FFI bindings</span>
2020
- <a id="link-nccl-sys" href="rust-api/nccl_sys/index.html">**nccl-sys**</a><span id="desc-nccl-sys"> - NCCL (NVIDIA Collective Communications Library) bindings</span>
21-
- <a id="link-torch-sys" href="rust-api/torch_sys/index.html">**torch-sys**</a><span id="desc-torch-sys"> - PyTorch C++ API bindings for Rust</span>
21+
- <a id="link-torch-sys2" href="rust-api/torch_sys2/index.html">**torch-sys2**</a><span id="desc-torch-sys2"> - Simplified PyTorch Python API bindings for Rust</span>
2222
- <a id="link-monarch_tensor_worker" href="rust-api/monarch_tensor_worker/index.html">**monarch_tensor_worker**</a><span id="desc-monarch_tensor_worker"> - High-performance tensor processing worker</span>
2323

2424
### RDMA and High-Performance Networking

monarch_extension/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,9 @@ pyo3 = { version = "0.24", features = ["anyhow", "multiple-pymethods", "py-clone
3535
rdmaxcel-sys = { path = "../rdmaxcel-sys", optional = true }
3636
serde = { version = "1.0.219", features = ["derive", "rc"] }
3737
tokio = { version = "1.47.1", features = ["full", "test-util", "tracing"] }
38-
torch-sys = { version = "0.0.0", path = "../torch-sys", optional = true }
3938
torch-sys-cuda = { version = "0.0.0", path = "../torch-sys-cuda", optional = true }
4039
tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
4140

4241
[features]
4342
default = ["tensor_engine"]
44-
tensor_engine = ["dep:monarch_messages", "dep:monarch_rdma_extension", "dep:monarch_tensor_worker", "dep:nccl-sys", "dep:rdmaxcel-sys", "dep:torch-sys", "dep:torch-sys-cuda"]
43+
tensor_engine = ["dep:monarch_messages", "dep:monarch_rdma_extension", "dep:monarch_tensor_worker", "dep:nccl-sys", "dep:rdmaxcel-sys", "dep:torch-sys-cuda"]

monarch_extension/build.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@ fn main() {
1010
// Only set torch-related rpaths if tensor_engine feature is enabled
1111
#[cfg(feature = "tensor_engine")]
1212
{
13-
// `torch-sys` will set this env var through Cargo `links` metadata.
14-
let lib_path = std::env::var("DEP_TORCH_LIB_PATH").expect("DEP_TORCH_LIB_PATH to be set");
15-
// Set the rpath so that the dynamic linker can find libtorch and friends.
16-
println!("cargo::rustc-link-arg=-Wl,-rpath,{lib_path}");
17-
1813
if let Ok(path) = std::env::var("DEP_NCCL_LIB_PATH") {
1914
println!("cargo::rustc-link-arg=-Wl,-rpath,{path}");
2015
}

monarch_extension/src/client.rs

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ use pyo3::types::PyDict;
5050
use pyo3::types::PyList;
5151
use pyo3::types::PyNone;
5252
use tokio::sync::Mutex;
53-
use torch_sys::RValue;
5453

5554
use crate::convert::convert;
5655

@@ -74,57 +73,13 @@ impl WorkerResponse {
7473

7574
#[pymethods]
7675
impl WorkerResponse {
77-
#[staticmethod]
78-
fn new_for_unit_test(py: Python<'_>, seq: u64, response: PyObject) -> PyResult<Self> {
79-
if let Ok(exc) = response.downcast_bound::<PyException>(py) {
80-
Ok(Self {
81-
seq: seq.into(),
82-
result: Some(Err(exc.borrow().inner.clone())),
83-
})
84-
} else {
85-
Ok(Self {
86-
seq: seq.into(),
87-
result: Some(Ok(Serialized::serialize(
88-
&response.extract::<PyTree<RValue>>(py)?,
89-
)
90-
.map_err(|err| {
91-
PyRuntimeError::new_err(format!("Failed to deserialize: {:?}", err))
92-
})?)),
93-
})
94-
}
95-
}
96-
9776
// For now lets treat Seq as just an int with an opaque alias on python side.
9877
// We can expose the rust version later if desired.
9978
#[getter]
10079
fn seq(&self) -> u64 {
10180
self.seq.into()
10281
}
10382

104-
// TODO: result() cannot yet be called within a device mesh.
105-
// Fake tensors, which are not on the intended devices, will cause the deserialization to fail.
106-
fn result(&self, py: Python<'_>) -> PyResult<PyObject> {
107-
if let Some(result) = &self.result {
108-
if result.is_err() {
109-
PyNone::get(py).into_py_any(py)
110-
} else {
111-
// TODO: Use better shared error class
112-
let rvalue = result
113-
.clone()
114-
.unwrap()
115-
.deserialized::<PyTree<RValue>>()
116-
.map_err(|err| {
117-
PyRuntimeError::new_err(format!("Failed to deserialize: {:?}", err))
118-
})?;
119-
// SAFETY: Safety requirements are propagated via the `unsafe` tag
120-
// on this method.
121-
Ok(unsafe { rvalue.try_to_object_unsafe(py)?.unbind() })
122-
}
123-
} else {
124-
PyNone::get(py).into_py_any(py)
125-
}
126-
}
127-
12883
fn exception(&self, py: Python<'_>) -> PyResult<PyObject> {
12984
match self.result.as_ref() {
13085
Some(Ok(_)) => PyNone::get(py).into_py_any(py),

monarch_messages/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pyo3 = { version = "0.24", features = ["anyhow", "multiple-pymethods", "py-clone
1818
serde = { version = "1.0.219", features = ["derive", "rc"] }
1919
serde_bytes = "0.11"
2020
thiserror = "2.0.12"
21-
torch-sys = { version = "0.0.0", path = "../torch-sys" }
21+
torch-sys2 = { version = "0.0.0", path = "../torch-sys2" }
2222
torch-sys-cuda = { version = "0.0.0", path = "../torch-sys-cuda" }
2323
tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
2424

monarch_messages/build.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,5 @@
77
*/
88

99
fn main() {
10-
// `torch-sys` will set this env var through Cargo `links` metadata.
11-
let lib_path = std::env::var("DEP_TORCH_LIB_PATH").expect("DEP_TORCH_LIB_PATH to be set");
12-
// Set the rpath so that the dynamic linker can find libtorch and friends.
13-
println!("cargo::rustc-link-arg=-Wl,-rpath,{lib_path}");
10+
// Build script for monarch_messages
1411
}

monarch_messages/src/wire_value.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ use pyo3::prelude::*;
1616
use pyo3::types::PyNone;
1717
use serde::Deserialize;
1818
use serde::Serialize;
19-
use torch_sys::Device;
20-
use torch_sys::Layout;
21-
use torch_sys::MemoryFormat;
22-
use torch_sys::ScalarType;
19+
use torch_sys2::Device;
20+
use torch_sys2::Layout;
21+
use torch_sys2::MemoryFormat;
22+
use torch_sys2::ScalarType;
2323

2424
use crate::worker::Ref;
2525

@@ -40,9 +40,9 @@ pub enum WireValue {
4040
IntList(Vec<i64>),
4141
RefList(Vec<Ref>),
4242
Device(Device),
43-
Layout(#[serde(with = "torch_sys::LayoutDef")] Layout),
44-
ScalarType(#[serde(with = "torch_sys::ScalarTypeDef")] ScalarType),
45-
MemoryFormat(#[serde(with = "torch_sys::MemoryFormatDef")] MemoryFormat),
43+
Layout(#[serde(with = "torch_sys2::LayoutDef")] Layout),
44+
ScalarType(#[serde(with = "torch_sys2::ScalarTypeDef")] ScalarType),
45+
MemoryFormat(#[serde(with = "torch_sys2::MemoryFormatDef")] MemoryFormat),
4646
// Make this wrap the unit type, as `pyo3::FromPyObject` doesn't work with
4747
// empty enum variants.
4848
None(()),

monarch_messages/src/worker.rs

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,13 @@ use pyo3::types::PyTuple;
3838
use serde::Deserialize;
3939
use serde::Serialize;
4040
use thiserror::Error;
41-
use torch_sys::BorrowError;
42-
use torch_sys::Device;
43-
use torch_sys::Layout;
44-
use torch_sys::ScalarType;
45-
use torch_sys::call_op::CallOpError;
4641
use torch_sys_cuda::nccl::NcclConfig;
4742
use torch_sys_cuda::nccl::ReduceOp;
4843
use torch_sys_cuda::nccl::UniqueId;
44+
use torch_sys2::BorrowError;
45+
use torch_sys2::Device;
46+
use torch_sys2::Layout;
47+
use torch_sys2::ScalarType;
4948

5049
use crate::controller::ControllerActor;
5150
use crate::controller::Seq;
@@ -483,9 +482,9 @@ pub enum Reduction {
483482
)]
484483
pub struct Factory {
485484
pub size: Vec<i64>,
486-
#[serde(with = "torch_sys::ScalarTypeDef")]
485+
#[serde(with = "torch_sys2::ScalarTypeDef")]
487486
pub dtype: ScalarType,
488-
#[serde(with = "torch_sys::LayoutDef")]
487+
#[serde(with = "torch_sys2::LayoutDef")]
489488
pub layout: Layout,
490489
pub device: Device,
491490
}
@@ -619,11 +618,6 @@ impl CallFunctionError {
619618
Self::Error(anyhow::anyhow!("borrow failed: {}", err))
620619
}
621620

622-
#[allow(non_snake_case)]
623-
pub fn OperatorFailed(err: CallOpError) -> Self {
624-
Self::Error(anyhow::anyhow!("torch operator failed: {}", err))
625-
}
626-
627621
#[allow(non_snake_case)]
628622
pub fn UnexpectedNumberOfReturns(expected: usize, actual: usize) -> Self {
629623
Self::Error(anyhow::anyhow!(
@@ -660,12 +654,6 @@ impl From<BorrowError> for CallFunctionError {
660654
}
661655
}
662656

663-
impl From<CallOpError> for CallFunctionError {
664-
fn from(v: CallOpError) -> CallFunctionError {
665-
CallFunctionError::Error(v.into())
666-
}
667-
}
668-
669657
/// Worker messages. These define the observable behavior of the worker, so the
670658
/// documentations here
671659
#[derive(

0 commit comments

Comments
 (0)