meta-pytorch
diff --git a/‎.github/workflows/test-gpu-rust.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/test-gpu-rust.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Cargo.toml‎
Lines changed: 0 additions & 2 deletions b/‎Cargo.toml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/source/rust-api.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/rust-api.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monarch_extension/Cargo.toml‎
Lines changed: 1 addition & 2 deletions b/‎monarch_extension/Cargo.toml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎monarch_extension/build.rs‎
Lines changed: 0 additions & 5 deletions b/‎monarch_extension/build.rs‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎monarch_extension/src/client.rs‎
Lines changed: 0 additions & 45 deletions b/‎monarch_extension/src/client.rs‎
Lines changed: 0 additions & 45 deletions
diff --git a/‎monarch_messages/Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎monarch_messages/Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monarch_messages/build.rs‎
Lines changed: 1 addition & 4 deletions b/‎monarch_messages/build.rs‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎monarch_messages/src/wire_value.rs‎
Lines changed: 7 additions & 7 deletions b/‎monarch_messages/src/wire_value.rs‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎monarch_messages/src/worker.rs‎
Lines changed: 6 additions & 18 deletions b/‎monarch_messages/src/worker.rs‎
Lines changed: 6 additions & 18 deletions
@@ -66,7 +66,6 @@ jobs:
         timeout 12m cargo nextest run --workspace --profile ci \
           --exclude monarch_messages \
           --exclude monarch_tensor_worker \
-          --exclude torch-sys \
           --exclude torch-sys-cuda
         # Copy the test results to the expected location
         # TODO: error in pytest-results-action, TypeError: results.testsuites.testsuite.testcase is not iterable
 
@@ -25,6 +25,4 @@ members = [
     "rdmaxcel-sys",
     "serde_multipart",
     "timed_test",
-    "torch-sys",
-    "torch-sys-cuda",
 ]
@@ -18,7 +18,7 @@ The Monarch project consists of several Rust crates, each with specialized funct
 ### CUDA and GPU Computing
 - <a id="link-cuda-sys" href="rust-api/cuda_sys/index.html">**cuda-sys**</a><span id="desc-cuda-sys"> - Low-level CUDA FFI bindings</span>
 - <a id="link-nccl-sys" href="rust-api/nccl_sys/index.html">**nccl-sys**</a><span id="desc-nccl-sys"> - NCCL (NVIDIA Collective Communications Library) bindings</span>
-- <a id="link-torch-sys" href="rust-api/torch_sys/index.html">**torch-sys**</a><span id="desc-torch-sys"> - PyTorch C++ API bindings for Rust</span>
+- <a id="link-torch-sys2" href="rust-api/torch_sys2/index.html">**torch-sys2**</a><span id="desc-torch-sys2"> - Simplified PyTorch Python API bindings for Rust</span>
 - <a id="link-monarch_tensor_worker" href="rust-api/monarch_tensor_worker/index.html">**monarch_tensor_worker**</a><span id="desc-monarch_tensor_worker"> - High-performance tensor processing worker</span>
 
 ### RDMA and High-Performance Networking
 
@@ -35,10 +35,9 @@ pyo3 = { version = "0.24", features = ["anyhow", "multiple-pymethods", "py-clone
 rdmaxcel-sys = { path = "../rdmaxcel-sys", optional = true }
 serde = { version = "1.0.219", features = ["derive", "rc"] }
 tokio = { version = "1.47.1", features = ["full", "test-util", "tracing"] }
-torch-sys = { version = "0.0.0", path = "../torch-sys", optional = true }
 torch-sys-cuda = { version = "0.0.0", path = "../torch-sys-cuda", optional = true }
 tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
 
 [features]
 default = ["tensor_engine"]
-tensor_engine = ["dep:monarch_messages", "dep:monarch_rdma_extension", "dep:monarch_tensor_worker", "dep:nccl-sys", "dep:rdmaxcel-sys", "dep:torch-sys", "dep:torch-sys-cuda"]
+tensor_engine = ["dep:monarch_messages", "dep:monarch_rdma_extension", "dep:monarch_tensor_worker", "dep:nccl-sys", "dep:rdmaxcel-sys", "dep:torch-sys-cuda"]
@@ -10,11 +10,6 @@ fn main() {
     // Only set torch-related rpaths if tensor_engine feature is enabled
     #[cfg(feature = "tensor_engine")]
     {
-        // `torch-sys` will set this env var through Cargo `links` metadata.
-        let lib_path = std::env::var("DEP_TORCH_LIB_PATH").expect("DEP_TORCH_LIB_PATH to be set");
-        // Set the rpath so that the dynamic linker can find libtorch and friends.
-        println!("cargo::rustc-link-arg=-Wl,-rpath,{lib_path}");
-
         if let Ok(path) = std::env::var("DEP_NCCL_LIB_PATH") {
             println!("cargo::rustc-link-arg=-Wl,-rpath,{path}");
         }
 
@@ -50,7 +50,6 @@ use pyo3::types::PyDict;
 use pyo3::types::PyList;
 use pyo3::types::PyNone;
 use tokio::sync::Mutex;
-use torch_sys::RValue;
 
 use crate::convert::convert;
 
@@ -74,57 +73,13 @@ impl WorkerResponse {
 
 #[pymethods]
 impl WorkerResponse {
-    #[staticmethod]
-    fn new_for_unit_test(py: Python<'_>, seq: u64, response: PyObject) -> PyResult<Self> {
-        if let Ok(exc) = response.downcast_bound::<PyException>(py) {
-            Ok(Self {
-                seq: seq.into(),
-                result: Some(Err(exc.borrow().inner.clone())),
-            })
-        } else {
-            Ok(Self {
-                seq: seq.into(),
-                result: Some(Ok(Serialized::serialize(
-                    &response.extract::<PyTree<RValue>>(py)?,
-                )
-                .map_err(|err| {
-                    PyRuntimeError::new_err(format!("Failed to deserialize: {:?}", err))
-                })?)),
-            })
-        }
-    }
-
     // For now lets treat Seq as just an int with an opaque alias on python side.
     // We can expose the rust version later if desired.
     #[getter]
     fn seq(&self) -> u64 {
         self.seq.into()
     }
 
-    // TODO: result() cannot yet be called within a device mesh.
-    // Fake tensors, which are not on the intended devices, will cause the deserialization to fail.
-    fn result(&self, py: Python<'_>) -> PyResult<PyObject> {
-        if let Some(result) = &self.result {
-            if result.is_err() {
-                PyNone::get(py).into_py_any(py)
-            } else {
-                // TODO: Use better shared error class
-                let rvalue = result
-                    .clone()
-                    .unwrap()
-                    .deserialized::<PyTree<RValue>>()
-                    .map_err(|err| {
-                        PyRuntimeError::new_err(format!("Failed to deserialize: {:?}", err))
-                    })?;
-                // SAFETY: Safety requirements are propagated via the `unsafe` tag
-                // on this method.
-                Ok(unsafe { rvalue.try_to_object_unsafe(py)?.unbind() })
-            }
-        } else {
-            PyNone::get(py).into_py_any(py)
-        }
-    }
-
     fn exception(&self, py: Python<'_>) -> PyResult<PyObject> {
         match self.result.as_ref() {
             Some(Ok(_)) => PyNone::get(py).into_py_any(py),
 
@@ -18,7 +18,7 @@ pyo3 = { version = "0.24", features = ["anyhow", "multiple-pymethods", "py-clone
 serde = { version = "1.0.219", features = ["derive", "rc"] }
 serde_bytes = "0.11"
 thiserror = "2.0.12"
-torch-sys = { version = "0.0.0", path = "../torch-sys" }
+torch-sys2 = { version = "0.0.0", path = "../torch-sys2" }
 torch-sys-cuda = { version = "0.0.0", path = "../torch-sys-cuda" }
 tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
 
 
@@ -7,8 +7,5 @@
  */
 
 fn main() {
-    // `torch-sys` will set this env var through Cargo `links` metadata.
-    let lib_path = std::env::var("DEP_TORCH_LIB_PATH").expect("DEP_TORCH_LIB_PATH to be set");
-    // Set the rpath so that the dynamic linker can find libtorch and friends.
-    println!("cargo::rustc-link-arg=-Wl,-rpath,{lib_path}");
+    // Build script for monarch_messages
 }
@@ -16,10 +16,10 @@ use pyo3::prelude::*;
 use pyo3::types::PyNone;
 use serde::Deserialize;
 use serde::Serialize;
-use torch_sys::Device;
-use torch_sys::Layout;
-use torch_sys::MemoryFormat;
-use torch_sys::ScalarType;
+use torch_sys2::Device;
+use torch_sys2::Layout;
+use torch_sys2::MemoryFormat;
+use torch_sys2::ScalarType;
 
 use crate::worker::Ref;
 
@@ -40,9 +40,9 @@ pub enum WireValue {
     IntList(Vec<i64>),
     RefList(Vec<Ref>),
     Device(Device),
-    Layout(#[serde(with = "torch_sys::LayoutDef")] Layout),
-    ScalarType(#[serde(with = "torch_sys::ScalarTypeDef")] ScalarType),
-    MemoryFormat(#[serde(with = "torch_sys::MemoryFormatDef")] MemoryFormat),
+    Layout(#[serde(with = "torch_sys2::LayoutDef")] Layout),
+    ScalarType(#[serde(with = "torch_sys2::ScalarTypeDef")] ScalarType),
+    MemoryFormat(#[serde(with = "torch_sys2::MemoryFormatDef")] MemoryFormat),
     // Make this wrap the unit type, as `pyo3::FromPyObject` doesn't work with
     // empty enum variants.
     None(()),
 
@@ -38,14 +38,13 @@ use pyo3::types::PyTuple;
 use serde::Deserialize;
 use serde::Serialize;
 use thiserror::Error;
-use torch_sys::BorrowError;
-use torch_sys::Device;
-use torch_sys::Layout;
-use torch_sys::ScalarType;
-use torch_sys::call_op::CallOpError;
 use torch_sys_cuda::nccl::NcclConfig;
 use torch_sys_cuda::nccl::ReduceOp;
 use torch_sys_cuda::nccl::UniqueId;
+use torch_sys2::BorrowError;
+use torch_sys2::Device;
+use torch_sys2::Layout;
+use torch_sys2::ScalarType;
 
 use crate::controller::ControllerActor;
 use crate::controller::Seq;
@@ -483,9 +482,9 @@ pub enum Reduction {
 )]
 pub struct Factory {
     pub size: Vec<i64>,
-    #[serde(with = "torch_sys::ScalarTypeDef")]
+    #[serde(with = "torch_sys2::ScalarTypeDef")]
     pub dtype: ScalarType,
-    #[serde(with = "torch_sys::LayoutDef")]
+    #[serde(with = "torch_sys2::LayoutDef")]
     pub layout: Layout,
     pub device: Device,
 }
@@ -619,11 +618,6 @@ impl CallFunctionError {
         Self::Error(anyhow::anyhow!("borrow failed: {}", err))
     }
 
-    #[allow(non_snake_case)]
-    pub fn OperatorFailed(err: CallOpError) -> Self {
-        Self::Error(anyhow::anyhow!("torch operator failed: {}", err))
-    }
-
     #[allow(non_snake_case)]
     pub fn UnexpectedNumberOfReturns(expected: usize, actual: usize) -> Self {
         Self::Error(anyhow::anyhow!(
@@ -660,12 +654,6 @@ impl From<BorrowError> for CallFunctionError {
     }
 }
 
-impl From<CallOpError> for CallFunctionError {
-    fn from(v: CallOpError) -> CallFunctionError {
-        CallFunctionError::Error(v.into())
-    }
-}
-
 /// Worker messages. These define the observable behavior of the worker, so the
 /// documentations here
 #[derive(
Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,4 @@ members = [`
`25`	`25`	`"rdmaxcel-sys",`
`26`	`26`	`"serde_multipart",`
`27`	`27`	`"timed_test",`
`28`		`- "torch-sys",`
`29`		`- "torch-sys-cuda",`
`30`	`28`	`]`
Original file line number	Diff line number	Diff line change
`@@ -10,11 +10,6 @@ fn main() {`
`10`	`10`	`// Only set torch-related rpaths if tensor_engine feature is enabled`
`11`	`11`	`#[cfg(feature = "tensor_engine")]`
`12`	`12`	`{`
`13`		- // `torch-sys` will set this env var through Cargo `links` metadata.
`14`		`- let lib_path = std::env::var("DEP_TORCH_LIB_PATH").expect("DEP_TORCH_LIB_PATH to be set");`
`15`		`- // Set the rpath so that the dynamic linker can find libtorch and friends.`
`16`		`- println!("cargo::rustc-link-arg=-Wl,-rpath,{lib_path}");`
`17`		`-`
`18`	`13`	`if let Ok(path) = std::env::var("DEP_NCCL_LIB_PATH") {`
`19`	`14`	`println!("cargo::rustc-link-arg=-Wl,-rpath,{path}");`
`20`	`15`	`}`
Original file line number	Diff line number	Diff line change
`@@ -7,8 +7,5 @@`
`7`	`7`	`*/`
`8`	`8`
`9`	`9`	`fn main() {`
`10`		- // `torch-sys` will set this env var through Cargo `links` metadata.
`11`		`- let lib_path = std::env::var("DEP_TORCH_LIB_PATH").expect("DEP_TORCH_LIB_PATH to be set");`
`12`		`- // Set the rpath so that the dynamic linker can find libtorch and friends.`
`13`		`- println!("cargo::rustc-link-arg=-Wl,-rpath,{lib_path}");`
	`10`	`+ // Build script for monarch_messages`
`14`	`11`	`}`