diff --git a/HISTORY.rst b/HISTORY.rst index 1f82028..e9773d4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,4 +1,15 @@ +6.2.0 / 2026-06-05 +================== + + * feat: reduce peak serializer memory in the pure Python rooted-output path + * feat: reduce Rust extension peak serializer memory by writing directly into Python bytes + * fix: split PyO3 extension-module linking so ``cargo test`` works on macOS + * chore: release ``json2xml-rs`` 0.3.0 and make ``json2xml[fast]`` require the memory-saving Rust package + * chore: update the Rust extension crate to Rust 2024 with rust-version 1.96 + * docs: add reproducible Rust memory benchmark results showing about 49% lower serializer RSS delta + + 6.1.0 / 2026-05-04 ================== diff --git a/Makefile b/Makefile index f9dbfd9..899aeb0 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ test-simple: ## run tests without coverage pytest -vv tests test-rust: ## run Rust tests - cd rust && cargo test --no-default-features + cd rust && cargo test test-all: test test-rust ## run all tests (Python and Rust) @@ -100,4 +100,3 @@ dist: clean ## builds source and wheel package install: clean ## install the package to the active Python's site-packages python setup.py install - diff --git a/benchmark_memory_rust.py b/benchmark_memory_rust.py new file mode 100644 index 0000000..7363761 --- /dev/null +++ b/benchmark_memory_rust.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +"""Measure peak RSS for the Rust json2xml extension on a large payload.""" +from __future__ import annotations + +import argparse +import gc +import json +import platform +import resource +import sys +import time +from typing import Any + +from json2xml_rs import dicttoxml + + +def max_rss_bytes() -> int: + """Return current process max RSS in bytes on macOS/Linux.""" + value = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(value) + return int(value) * 1024 + + +def make_payload(records: int) -> list[dict[str, Any]]: + """Build deterministic nested data that stays on the Rust fast path.""" + payload: list[dict[str, Any]] = [] + for i in range(records): + suffix = f"{i:08d}" + payload.append( + { + "id": i, + "name": f"customer-{suffix}-" + ("name" * 8), + "email": f"user-{suffix}@example.com", + "active": i % 2 == 0, + "score": (i % 10_000) / 17.0, + "tags": [ + f"tag-{i % 17}", + f"region-{i % 23}", + f"cohort-{i % 31}", + "xml-safe", + "memory-benchmark", + ], + "metadata": { + "created": "2026-06-05T10:30:00Z", + "updated": "2026-06-05T12:45:00Z", + "version": i % 101, + "nested": { + "level1": { + "level2": { + "value": f"value-{suffix}-" + ("payload" * 6), + "checksum": f"{(i * 2654435761) & 0xFFFFFFFF:08x}", + } + } + }, + }, + } + ) + return payload + + +def mib(value: int) -> float: + return value / (1024 * 1024) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--records", type=int, default=100_000) + parser.add_argument("--label", default="unknown") + args = parser.parse_args() + + # Load and initialize the extension before the baseline payload RSS. + dicttoxml([{"warmup": "ok"}], attr_type=True) + + payload = make_payload(args.records) + gc.collect() + baseline_rss = max_rss_bytes() + + start = time.perf_counter() + xml = dicttoxml(payload, attr_type=True) + elapsed = time.perf_counter() - start + peak_rss = max_rss_bytes() + + result = { + "label": args.label, + "records": args.records, + "python": platform.python_version(), + "platform": platform.platform(), + "baseline_rss_mib": round(mib(baseline_rss), 2), + "peak_rss_mib": round(mib(peak_rss), 2), + "serializer_delta_mib": round(mib(max(0, peak_rss - baseline_rss)), 2), + "xml_size_mib": round(mib(len(xml)), 2), + "elapsed_seconds": round(elapsed, 3), + "output_type": type(xml).__name__, + } + print(json.dumps(result, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst index a35e13c..2bd175e 100644 --- a/docs/benchmarks.rst +++ b/docs/benchmarks.rst @@ -145,6 +145,8 @@ Recent pure Python improvements substantially reduced conversion time. Medium an Go and Zig remain useful for native CLI workflows. They are slower for small and medium inputs because startup dominates, but both beat Python on the 323KB workload when full CLI process time is measured. +See ``docs/rust_memory_benchmark.rst`` for the June 2026 Rust peak-memory comparison between the previous extension-boundary copy and the Python bytes-writer implementation. + When to Use Each Implementation ------------------------------- diff --git a/docs/rust_memory_benchmark.rst b/docs/rust_memory_benchmark.rst new file mode 100644 index 0000000..383928d --- /dev/null +++ b/docs/rust_memory_benchmark.rst @@ -0,0 +1,110 @@ +Rust Memory Benchmark +===================== + +This benchmark compares peak memory usage before and after the Rust serializer started writing directly into Python ``bytes``. + +Summary +------- + +The current branch reduces serializer peak RSS by about **77.44 MiB** on a 100,000-record payload that produces **78.17 MiB** of XML. That is a **49.1% reduction** in serializer memory delta compared with the previous PR commit. + +.. list-table:: + :header-rows: 1 + :widths: 24 22 22 22 22 + + * - Version + - Commit + - Avg peak RSS + - Avg serializer delta + - Avg time + * - Previous + - ``7dd86b0`` + - 349.15 MiB + - 157.70 MiB + - 0.180s + * - Current + - ``07d840f`` + - 271.65 MiB + - 80.26 MiB + - 0.265s + +The memory result matches the implementation change: the previous version held roughly one final XML payload in Rust plus one Python ``bytes`` payload, while the current version writes into the Python ``bytes`` object directly. + +Methodology +----------- + +The benchmark uses ``benchmark_memory_rust.py`` with a deterministic generated payload so the Rust fast path can be measured without file parsing or pure-Python fallback behavior. + +* Machine: Apple Silicon arm64 +* OS: macOS 26.5 +* Python: 3.14.0 +* Build: ``python3 -m maturin develop --release --offline`` +* Payload: 100,000 nested records +* Input JSON size: 44.31 MiB +* Output XML size: 78.17 MiB +* Measurement: process ``ru_maxrss`` after payload creation versus peak after ``json2xml_rs.dicttoxml(payload, attr_type=True)`` +* Sampling: three fresh Python processes per version + +The baseline RSS is captured after the large Python payload is already built. The reported serializer delta is ``peak_rss - baseline_rss``, which focuses the comparison on output construction rather than payload allocation. + +Raw Samples +----------- + +.. list-table:: + :header-rows: 1 + :widths: 24 18 18 18 18 + + * - Run + - Baseline RSS + - Peak RSS + - Serializer delta + - Time + * - previous-release-1 + - 191.53 MiB + - 349.22 MiB + - 157.69 MiB + - 0.182s + * - previous-release-2 + - 191.39 MiB + - 349.09 MiB + - 157.70 MiB + - 0.179s + * - previous-release-3 + - 191.44 MiB + - 349.14 MiB + - 157.70 MiB + - 0.178s + * - current-release-1 + - 191.45 MiB + - 271.77 MiB + - 80.31 MiB + - 0.265s + * - current-release-2 + - 191.48 MiB + - 271.64 MiB + - 80.16 MiB + - 0.272s + * - current-release-3 + - 191.25 MiB + - 271.55 MiB + - 80.30 MiB + - 0.258s + +Tradeoff +-------- + +The memory improvement comes with a throughput cost in this release benchmark. Average conversion time increased from 0.180s to 0.265s, about **47.5% slower** for this payload. + +That cost is likely from routing every XML write through ``std::io::Write`` and PyO3's bytes writer. The memory win is substantial for large outputs, but latency-sensitive callers may want more timing data before treating the bytes-writer path as a universal improvement. + +Reproduction +------------ + +Run each version in a fresh process after installing the desired Rust extension build. + +.. code-block:: bash + + python3 -m maturin develop --release --offline + python3 benchmark_memory_rust.py --records 100000 --label current-release-1 + +For the previous comparison, install commit ``7dd86b0`` in a temporary worktree, then run the same command from the main checkout so the benchmark script stays identical. diff --git a/json2xml/__init__.py b/json2xml/__init__.py index 9c5472e..92d2228 100644 --- a/json2xml/__init__.py +++ b/json2xml/__init__.py @@ -2,4 +2,4 @@ __author__ = """Vinit Kumar""" __email__ = "mail@vinitkumar.me" -__version__ = "6.1.0" +__version__ = "6.2.0" diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 2e2321c..dcc3fc7 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -903,7 +903,6 @@ def dicttoxml( ] return "".join(output).encode("utf-8") - output = [] namespace_str = "" if xml_namespaces is None: xml_namespaces = {} @@ -926,15 +925,18 @@ def dicttoxml( ns = xml_namespaces[prefix] namespace_str += f' xmlns:{prefix}="{ns}"' if root: - output.append('') custom_root, root_attr = make_valid_xml_name(custom_root, {}) output_elem = convert( obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root, list_headers=list_headers ) - output.append(f"<{custom_root}{make_attrstring(root_attr)}{namespace_str}>{output_elem}") - else: - output.append( - convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="", list_headers=list_headers) + output = ( + f'' + f"<{custom_root}{make_attrstring(root_attr)}{namespace_str}>" + f"{output_elem}" ) - - return "".join(output).encode("utf-8") + del output_elem + return output.encode("utf-8") + else: + return convert( + obj, ids, attr_type, item_func, cdata, item_wrap, parent="", list_headers=list_headers + ).encode("utf-8") diff --git a/json2xml/json2xml.py b/json2xml/json2xml.py index 0842283..14e339d 100644 --- a/json2xml/json2xml.py +++ b/json2xml/json2xml.py @@ -57,7 +57,7 @@ def to_xml(self) -> Any | None: from defusedxml.minidom import parseString try: - result = parseString(xml_data.decode("utf-8")).toprettyxml(encoding="UTF-8").decode() + result = parseString(xml_data).toprettyxml(encoding="UTF-8").decode() except ExpatError: raise InvalidDataError return result diff --git a/lat.md/architecture.md b/lat.md/architecture.md index ce60b57..12ef6b0 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,12 +14,26 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. +The root wrapper path releases the unwrapped XML body before UTF-8 encoding the final document. That keeps peak memory closer to the returned byte size without changing the recursive serializer contract. + ## Backend selection The fast-path module prefers the Rust extension when it can preserve Python semantics, and falls back to the Python serializer for unsupported features. [[json2xml/dicttoxml_fast.py#dicttoxml]] uses the Rust backend only when optional features such as `ids`, custom `item_func`, XML namespaces, XPath mode, root scalar payloads, or special `@` keys are not involved. A local stub for the optional `json2xml_rs` module keeps static analysis aligned with that fallback design, so type checking still passes when the extension is not installed. This keeps fast installs fast without letting the optimized path silently change behavior. +The Rust backend writes serializer output into Python's bytes writer instead of building a Rust string and copying it across the extension boundary. This keeps the fast path's peak output memory closer to the final `bytes` object. + +The Rust extension crate targets the Rust 2024 edition and pins `rust-version` to the current stable toolchain so native builds fail clearly on older compilers. + +The Cargo feature layout separates normal Rust/PyO3 tests from extension-module builds. `cargo test` uses the default `python` feature without extension-module linking, while maturin enables the `extension-module` feature for wheel builds. + +## Release packaging + +Package releases keep the Python wrapper and Rust accelerator versioned together so optional fast installs receive compatible wheels. + +The Python package version lives in `pyproject.toml` and `json2xml/__init__.py`. The Rust accelerator version lives in both `rust/Cargo.toml` and `rust/pyproject.toml`, and the Python `fast` extra should require the Rust package version that contains any expected accelerator behavior. + ## Performance benchmarks The benchmark docs record measured implementation tradeoffs so users can choose between Python, Rust, Go, and Zig without guessing. @@ -28,6 +42,8 @@ The May 2026 benchmark on Apple Silicon shows the Rust extension as the best opt Reproduction docs require contributors to record machine, OS, Python, and tool availability before comparing results. `benchmark_all.py` mixes library calls and CLI subprocesses intentionally, so its Go and Zig rows include process startup overhead. +The June 2026 Rust memory benchmark uses [[benchmark_memory_rust.py#main]] to compare release builds in fresh Python processes. The bytes-writer implementation cuts serializer peak RSS by about half for large outputs, with a documented throughput tradeoff. + ## Dependency security Dependency floors and lockfiles keep known vulnerable packages out of runtime and development environments. diff --git a/pyproject.toml b/pyproject.toml index 4c54910..2d5a154 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "json2xml" -version = "6.1.0" +version = "6.2.0" description = "Simple Python Library to convert JSON to XML" readme = "README.rst" requires-python = ">=3.10" @@ -50,7 +50,7 @@ dev = [ "pygments>=2.20.0", "xmltodict>=0.12.0", ] -fast = ["json2xml-rs>=0.1.0"] +fast = ["json2xml-rs>=0.3.0"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 3121889..fe6f0fd 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "json2xml_rs" -version = "0.2.0" -edition = "2021" +version = "0.3.0" +edition = "2024" +rust-version = "1.96" description = "Fast native JSON to XML conversion for Python" license = "Apache-2.0" @@ -11,7 +12,8 @@ crate-type = ["cdylib", "rlib"] [features] default = ["python"] -python = ["pyo3/extension-module", "dep:pyo3"] +python = ["dep:pyo3"] +extension-module = ["python", "pyo3/extension-module"] [dependencies] pyo3 = { version = "0.28.2", optional = true } diff --git a/rust/README.md b/rust/README.md index 58953b1..be67eda 100644 --- a/rust/README.md +++ b/rust/README.md @@ -6,7 +6,7 @@ A high-performance Rust implementation of the dicttoxml module using PyO3. ### Prerequisites -- Rust (1.70+) +- Rust (1.96+) - Python (3.9+) - maturin (`pip install maturin`) @@ -76,6 +76,10 @@ The Rust implementation is expected to be 5-15x faster than pure Python for: - Type dispatch (compiled match statements vs. `isinstance()` chains) - String building (pre-allocated buffers vs. f-string concatenation) +Version 0.3.0 writes serializer output directly into Python bytes, reducing the +measured serializer RSS delta for a 100,000-record benchmark by about 49% +compared with the previous Rust implementation. + ## Limitations The Rust implementation currently does not support: diff --git a/rust/pyproject.toml b/rust/pyproject.toml index 56329e6..d12c9a4 100644 --- a/rust/pyproject.toml +++ b/rust/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "json2xml_rs" -version = "0.2.0" +version = "0.3.0" description = "Fast native JSON to XML conversion - Rust extension for json2xml" readme = "README.md" requires-python = ">=3.9" @@ -16,5 +16,5 @@ classifiers = [ ] [tool.maturin] -features = ["pyo3/extension-module"] +features = ["extension-module"] module-name = "json2xml_rs" diff --git a/rust/src/lib.rs b/rust/src/lib.rs index bb73436..22ce33e 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -8,7 +8,9 @@ use pyo3::exceptions::PyValueError; #[cfg(feature = "python")] use pyo3::prelude::*; #[cfg(feature = "python")] -use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyList, PyString}; +use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PyString}; +#[cfg(feature = "python")] +use std::io::Write; /// Escape special XML characters in a string (allocating convenience wrapper). #[inline] pub fn escape_xml(s: &str) -> String { @@ -58,6 +60,61 @@ pub fn push_escaped_attr(out: &mut String, s: &str) { out.push_str(&s[last..]); } +#[cfg(feature = "python")] +#[inline] +fn write_str(out: &mut dyn Write, s: &str) -> PyResult<()> { + out.write_all(s.as_bytes())?; + Ok(()) +} + +#[cfg(feature = "python")] +#[inline] +fn write_byte(out: &mut dyn Write, b: u8) -> PyResult<()> { + out.write_all(&[b])?; + Ok(()) +} + +#[cfg(feature = "python")] +#[inline] +fn write_escaped_text(out: &mut dyn Write, s: &str) -> PyResult<()> { + let mut last = 0; + for (i, b) in s.bytes().enumerate() { + let repl = match b { + b'&' => "&", + b'"' => """, + b'\'' => "'", + b'<' => "<", + b'>' => ">", + _ => continue, + }; + write_str(out, &s[last..i])?; + write_str(out, repl)?; + last = i + 1; + } + write_str(out, &s[last..]) +} + +#[cfg(feature = "python")] +#[inline] +fn write_escaped_attr(out: &mut dyn Write, s: &str) -> PyResult<()> { + write_escaped_text(out, s) +} + +#[cfg(feature = "python")] +#[inline] +fn write_cdata(out: &mut dyn Write, s: &str) -> PyResult<()> { + write_str(out, "") { + let abs = start + i; + write_str(out, &s[start..abs])?; + write_str(out, "]]]]>")?; + start = abs + 3; + } + write_str(out, &s[start..])?; + write_str(out, "]]>") +} + /// Wrap content in CDATA section (allocating convenience wrapper). #[inline] pub fn wrap_cdata(s: &str) -> String { @@ -161,29 +218,34 @@ fn push_attrs(out: &mut String, attrs: &[(String, String)]) { /// Write opening tag with optional name and type attributes directly to buffer. #[cfg(feature = "python")] #[inline] -fn write_open_tag(out: &mut String, tag: &str, name_attr: Option<&str>, type_attr: Option<&str>) { - out.push('<'); - out.push_str(tag); +fn write_open_tag( + out: &mut dyn Write, + tag: &str, + name_attr: Option<&str>, + type_attr: Option<&str>, +) -> PyResult<()> { + write_byte(out, b'<')?; + write_str(out, tag)?; if let Some(name) = name_attr { - out.push_str(" name=\""); - push_escaped_attr(out, name); - out.push('"'); + write_str(out, " name=\"")?; + write_escaped_attr(out, name)?; + write_byte(out, b'"')?; } if let Some(ty) = type_attr { - out.push_str(" type=\""); - out.push_str(ty); - out.push('"'); + write_str(out, " type=\"")?; + write_str(out, ty)?; + write_byte(out, b'"')?; } - out.push('>'); + write_byte(out, b'>') } /// Write a closing tag directly to buffer. #[cfg(feature = "python")] #[inline] -fn write_close_tag(out: &mut String, tag: &str) { - out.push_str("'); +fn write_close_tag(out: &mut dyn Write, tag: &str) -> PyResult<()> { + write_str(out, "') } /// Configuration for XML conversion @@ -203,11 +265,7 @@ use pyo3::PyResult; #[cfg(feature = "python")] #[inline] fn type_attr<'a>(cfg: &ConvertConfig, ty: &'a str) -> Option<&'a str> { - if cfg.attr_type { - Some(ty) - } else { - None - } + if cfg.attr_type { Some(ty) } else { None } } /// Single unified type-dispatch writer. Every Python value goes through here @@ -215,7 +273,7 @@ fn type_attr<'a>(cfg: &ConvertConfig, ty: &'a str) -> Option<&'a str> { #[cfg(feature = "python")] fn write_value( py: Python<'_>, - out: &mut String, + out: &mut dyn Write, obj: &Bound<'_, PyAny>, tag: &str, name_attr: Option<&str>, @@ -224,64 +282,64 @@ fn write_value( ) -> PyResult<()> { // None if obj.is_none() { - write_open_tag(out, tag, name_attr, type_attr(cfg, "null")); - write_close_tag(out, tag); + write_open_tag(out, tag, name_attr, type_attr(cfg, "null"))?; + write_close_tag(out, tag)?; return Ok(()); } // Bool (must check before int since bool is subclass of int in Python) if obj.is_instance_of::() { let v: bool = obj.extract()?; - write_open_tag(out, tag, name_attr, type_attr(cfg, "bool")); - out.push_str(if v { "true" } else { "false" }); - write_close_tag(out, tag); + write_open_tag(out, tag, name_attr, type_attr(cfg, "bool"))?; + write_str(out, if v { "true" } else { "false" })?; + write_close_tag(out, tag)?; return Ok(()); } // Int - try i64 first, fall back to string for large integers if obj.is_instance_of::() { - write_open_tag(out, tag, name_attr, type_attr(cfg, "int")); + write_open_tag(out, tag, name_attr, type_attr(cfg, "int"))?; match obj.extract::() { Ok(v) => { - out.push_str(&v.to_string()); + write_str(out, &v.to_string())?; } Err(_) => { - out.push_str(obj.str()?.to_str()?); + write_str(out, obj.str()?.to_str()?)?; } } - write_close_tag(out, tag); + write_close_tag(out, tag)?; return Ok(()); } // Float - use Python's str() for parity (Rust renders 1.0 as "1") if obj.is_instance_of::() { - write_open_tag(out, tag, name_attr, type_attr(cfg, "float")); - out.push_str(obj.str()?.to_str()?); - write_close_tag(out, tag); + write_open_tag(out, tag, name_attr, type_attr(cfg, "float"))?; + write_str(out, obj.str()?.to_str()?)?; + write_close_tag(out, tag)?; return Ok(()); } // String if let Ok(py_str) = obj.cast::() { let s = py_str.to_str()?; - write_open_tag(out, tag, name_attr, type_attr(cfg, "str")); + write_open_tag(out, tag, name_attr, type_attr(cfg, "str"))?; if cfg.cdata { - push_cdata(out, s); + write_cdata(out, s)?; } else { - push_escaped_text(out, s); + write_escaped_text(out, s)?; } - write_close_tag(out, tag); + write_close_tag(out, tag)?; return Ok(()); } // Dict if let Ok(dict) = obj.cast::() { if wrap_container { - write_open_tag(out, tag, name_attr, type_attr(cfg, "dict")); + write_open_tag(out, tag, name_attr, type_attr(cfg, "dict"))?; } write_dict_contents(py, out, dict, cfg)?; if wrap_container { - write_close_tag(out, tag); + write_close_tag(out, tag)?; } return Ok(()); } @@ -289,11 +347,11 @@ fn write_value( // List if let Ok(list) = obj.cast::() { if wrap_container { - write_open_tag(out, tag, name_attr, type_attr(cfg, "list")); + write_open_tag(out, tag, name_attr, type_attr(cfg, "list"))?; } write_list_contents(py, out, list, tag, cfg)?; if wrap_container { - write_close_tag(out, tag); + write_close_tag(out, tag)?; } return Ok(()); } @@ -303,11 +361,11 @@ fn write_value( let items: Vec> = iter.collect::>()?; let list = PyList::new(py, &items)?; if wrap_container { - write_open_tag(out, tag, name_attr, type_attr(cfg, "list")); + write_open_tag(out, tag, name_attr, type_attr(cfg, "list"))?; } write_list_contents(py, out, &list, tag, cfg)?; if wrap_container { - write_close_tag(out, tag); + write_close_tag(out, tag)?; } return Ok(()); } @@ -315,13 +373,13 @@ fn write_value( // Fallback: convert to string via Python's str() let py_str = obj.str()?; let s = py_str.to_str()?; - write_open_tag(out, tag, name_attr, type_attr(cfg, "str")); + write_open_tag(out, tag, name_attr, type_attr(cfg, "str"))?; if cfg.cdata { - push_cdata(out, s); + write_cdata(out, s)?; } else { - push_escaped_text(out, s); + write_escaped_text(out, s)?; } - write_close_tag(out, tag); + write_close_tag(out, tag)?; Ok(()) } @@ -329,7 +387,7 @@ fn write_value( #[cfg(feature = "python")] fn write_dict_contents( py: Python<'_>, - out: &mut String, + out: &mut dyn Write, dict: &Bound<'_, PyDict>, cfg: &ConvertConfig, ) -> PyResult<()> { @@ -348,9 +406,9 @@ fn write_dict_contents( let wrap_list_container = (cfg.item_wrap || !first_is_scalar) && !cfg.list_headers; if wrap_list_container { - write_open_tag(out, &xml_key, name_attr, type_attr(cfg, "list")); + write_open_tag(out, &xml_key, name_attr, type_attr(cfg, "list"))?; write_list_contents(py, out, list, &xml_key, cfg)?; - write_close_tag(out, &xml_key); + write_close_tag(out, &xml_key)?; } else { write_list_contents(py, out, list, &xml_key, cfg)?; } @@ -377,7 +435,7 @@ fn is_python_scalar(obj: &Bound<'_, PyAny>) -> bool { #[cfg(feature = "python")] fn write_list_contents( py: Python<'_>, - out: &mut String, + out: &mut dyn Write, list: &Bound<'_, PyList>, parent: &str, cfg: &ConvertConfig, @@ -400,9 +458,9 @@ fn write_list_contents( } else { type_attr(cfg, "dict") }; - write_open_tag(out, dict_tag_name, None, dict_type_attr); + write_open_tag(out, dict_tag_name, None, dict_type_attr)?; write_dict_contents(py, out, dict, cfg)?; - write_close_tag(out, dict_tag_name); + write_close_tag(out, dict_tag_name)?; } else { write_dict_contents(py, out, dict, cfg)?; } @@ -441,7 +499,7 @@ fn dicttoxml( item_wrap: bool, cdata: bool, list_headers: bool, -) -> PyResult> { +) -> PyResult> { if !is_valid_xml_name(custom_root) { return Err(PyValueError::new_err(format!( "Invalid XML root element name: '{}'", @@ -456,30 +514,31 @@ fn dicttoxml( list_headers, }; - let mut out = String::new(); - - if root { - out.push_str(""); - out.push('<'); - out.push_str(custom_root); - out.push('>'); - } + PyBytes::new_with_writer(py, 0, |out| { + if root { + write_str(out, "")?; + write_byte(out, b'<')?; + write_str(out, custom_root)?; + write_byte(out, b'>')?; + } - if let Ok(dict) = obj.cast::() { - write_dict_contents(py, &mut out, dict, &config)?; - } else if let Ok(list) = obj.cast::() { - write_list_contents(py, &mut out, list, custom_root, &config)?; - } else { - write_value(py, &mut out, obj, custom_root, None, &config, true)?; - } + if let Ok(dict) = obj.cast::() { + write_dict_contents(py, out, dict, &config)?; + } else if let Ok(list) = obj.cast::() { + write_list_contents(py, out, list, custom_root, &config)?; + } else { + write_value(py, out, obj, custom_root, None, &config, true)?; + } - if root { - out.push_str("'); - } + if root { + write_str(out, "')?; + } - Ok(out.into_bytes()) + Ok(()) + }) + .map(Bound::unbind) } /// Fast XML string escaping.