diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index d0ddd2c..c7db597 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -26,13 +26,18 @@ jobs: strategy: fail-fast: false matrix: - python-version: [pypy-3.10, pypy-3.11, '3.10', '3.11', '3.12', '3.13', '3.14', '3.14t', '3.15.0-beta.2'] + # Pin PyPy 3.11 to the 7.3.23 build so CI keeps exercising the + # CPython 3.11.15-compatible runtime documented in BENCHMARKS.md. + python-version: [pypy-3.10, pypy-3.11-v7.3.23, '3.10', '3.11', '3.12', '3.13', '3.14', '3.14t', '3.15.0-beta.3'] os: [ ubuntu-latest, windows-latest, macos-latest, "ubuntu-24.04-arm" ] + exclude: + - os: ubuntu-latest + python-version: '3.14t' steps: - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 with: diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 2058767..4ad5443 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -4,12 +4,20 @@ on: push: branches: [master, main] paths: + - 'json2xml/backend_selector.py' + - 'json2xml/dicttoxml.py' + - 'json2xml/dicttoxml_fast.py' + - 'json2xml/json2xml.py' - 'rust/**' - 'tests/test_rust_dicttoxml.py' - '.github/workflows/rust-ci.yml' pull_request: branches: [master, main] paths: + - 'json2xml/backend_selector.py' + - 'json2xml/dicttoxml.py' + - 'json2xml/dicttoxml_fast.py' + - 'json2xml/json2xml.py' - 'rust/**' - 'tests/test_rust_dicttoxml.py' - '.github/workflows/rust-ci.yml' diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 9082ed8..f2ba1a2 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -8,7 +8,7 @@ Comprehensive performance comparison between all json2xml implementations. - **OS**: macOS 26.5 (Darwin 25.5.0) - **Python**: 3.14.4 - **Date**: May 27, 2026 -- **CLI tools**: `json2xml-go` and `json2xml-zig` from `/Users/vinitkumar/.local/bin` +- **CLI tools**: `json2xml-go` and `json2xml-zig` on `PATH` (the published run used a local `~/.local/bin` install) To make new runs comparable, record the same fields for your machine before publishing results: @@ -63,6 +63,34 @@ which json2xml-go json2xml-zig 2>/dev/null || true *CLI tools have process spawn overhead (~3-6ms) which dominates for small inputs. +### Multi-Python CLI Benchmark (June 25, 2026) + +This rerun compares the same CLI workload across uv-managed CPython 3.14.6, CPython 3.15.0b3, PyPy 3.11.15, and `json2xml-go`. The listed environment is the recorded machine for this run, not a requirement for contributors on other platforms. + +#### Environment + +- **Machine**: Apple Silicon (arm64) +- **OS**: macOS 26.5.1 (Darwin 25.5.0) +- **Interpreters**: CPython 3.14.6, CPython 3.15.0b3, PyPy 3.11.15 +- **Date**: June 25, 2026 +- **Go CLI**: `json2xml-go` on `PATH` (the recorded run used a local `~/.local/bin` install) + +#### Results + +| Test Case | CPython 3.14.6 | CPython 3.15.0b3 | PyPy 3.11.15 | Go | +|-----------|----------------|------------------|--------------|----| +| Small (47B) | 61.86ms | 47.61ms | 98.45ms | 4.46ms | +| Medium (2.6KB) | 62.85ms | 43.41ms | 97.96ms | 4.88ms | +| Large (323KB, 1K records) | 174.11ms | 146.74ms | 271.24ms | 62.23ms | +| Very Large (1.62MB, 5K records) | 759.26ms | 691.53ms | 526.96ms | 269.96ms | + +#### Takeaways + +- **CPython 3.15.0b3 beat CPython 3.14.6 in every test**, from **1.14x faster on average** across the four cases. +- **PyPy 3.11.15 still lagged on smaller inputs** because startup cost dominates, but it **overtook both CPython builds on the 5K-record case**. +- **Go remained the fastest CLI path overall**, mainly because the conversion work dominates process startup once the payload gets large. +- These numbers are **end-to-end subprocess timings**, not isolated serializer throughput, so interpreter startup and environment activation costs are part of the result by design. + ## Key Observations ### 1. Rust Extension is the Best Choice for Python Users 🦀 @@ -194,10 +222,14 @@ python benchmark_rust.py ### Multi-Python Version Benchmark Creates per-interpreter virtual environments under `.benchmark_venvs/` and -compares the hard-coded Python paths in `benchmark_multi_python.py`. Edit -`PYTHON_VERSIONS` in that script or install the listed interpreters before -running it. Set `JSON2XML_GO_CLI=/path/to/json2xml-go` if the Go binary is not -named `json2xml-go` on `PATH`. +compares the configured Python interpreter paths in `benchmark_multi_python.py`. +By default the script looks for uv-managed CPython 3.14.6, CPython 3.15.0b3, +and PyPy 3.11.15 under `JSON2XML_UV_PYTHON_DIR` (default: +`~/.local/share/uv/python`). Override individual interpreter paths with +`JSON2XML_PYTHON_CPYTHON_314_6`, `JSON2XML_PYTHON_CPYTHON_315_0B3`, or +`JSON2XML_PYTHON_PYPY_311_15` if your layout differs. Set +`JSON2XML_GO_CLI=/path/to/json2xml-go` if the Go binary is not named +`json2xml-go` on `PATH`. ```bash python benchmark_multi_python.py diff --git a/benchmark_multi_python.py b/benchmark_multi_python.py index c22117c..cb36b92 100755 --- a/benchmark_multi_python.py +++ b/benchmark_multi_python.py @@ -3,9 +3,9 @@ Multi-Python Benchmark: Compare json2xml performance across Python implementations. Compares: -- CPython 3.14.2 (homebrew) -- CPython 3.15.0a4 (latest alpha) -- PyPy 3.10.16 +- CPython 3.14.6 +- CPython 3.15.0b3 +- PyPy 3.11.15 - Go (json2xml-go) Each Python version gets its own virtual environment with json2xml installed. @@ -24,28 +24,45 @@ from dataclasses import dataclass from pathlib import Path - # Configuration BASE_DIR = Path(__file__).resolve().parent VENVS_DIR = BASE_DIR / ".benchmark_venvs" GO_CLI = Path(os.environ.get("JSON2XML_GO_CLI", "json2xml-go")) +UV_PYTHON_DIR = Path( + os.environ.get("JSON2XML_UV_PYTHON_DIR", str(Path.home() / ".local/share/uv/python")) +) + + +def _uv_python_path(distribution: str, executable: str) -> str: + """Build a uv-managed interpreter path from a configurable base directory.""" + return str(UV_PYTHON_DIR / distribution / "bin" / executable) + # Python implementations to benchmark PYTHON_VERSIONS = [ { - "name": "CPython 3.14.2", - "python": "/opt/homebrew/bin/python3.14", - "venv_name": "venv_cpython314_2", + "name": "CPython 3.14.6", + "python": os.environ.get( + "JSON2XML_PYTHON_CPYTHON_314_6", + _uv_python_path("cpython-3.14.6-macos-aarch64-none", "python3.14"), + ), + "venv_name": "venv_cpython314_6", }, { - "name": "CPython 3.15.0a4", - "python": str(Path.home() / ".local/share/uv/python/cpython-3.15.0a4-macos-aarch64-none/bin/python3.15"), - "venv_name": "venv_cpython315a4", + "name": "CPython 3.15.0b3", + "python": os.environ.get( + "JSON2XML_PYTHON_CPYTHON_315_0B3", + _uv_python_path("cpython-3.15.0b3-macos-aarch64-none", "python3.15"), + ), + "venv_name": "venv_cpython315b3", }, { - "name": "PyPy 3.10.16", - "python": str(Path.home() / ".local/share/uv/python/pypy-3.10.19-macos-aarch64-none/bin/pypy3.10"), - "venv_name": "venv_pypy310", + "name": "PyPy 3.11.15", + "python": os.environ.get( + "JSON2XML_PYTHON_PYPY_311_15", + _uv_python_path("pypy-3.11.15-macos-aarch64-none", "pypy3.11"), + ), + "venv_name": "venv_pypy311", }, ] @@ -129,7 +146,7 @@ def setup_venv(python_path: str, venv_path: Path) -> bool: return False # Install json2xml in the venv - print(f" Installing json2xml...") + print(" Installing json2xml...") pip_path = venv_path / "bin" / "pip" result = subprocess.run( [str(pip_path), "install", "-e", str(BASE_DIR), "-q"], @@ -276,7 +293,7 @@ def main() -> int: print(f" Go (json2xml-go): {colorize('✓', Colors.GREEN)} Ready") else: print(f" Go (json2xml-go): {colorize('✗', Colors.RED)} Not found at {GO_CLI}") - print(f" Set JSON2XML_GO_CLI env var or ensure json2xml-go is in PATH") + print(" Set JSON2XML_GO_CLI env var or ensure json2xml-go is in PATH") print() if not active_pythons: @@ -377,10 +394,10 @@ def main() -> int: avg_times[name].append(result.avg_ms) # Print summary table - print(f" {'Implementation':<35} {'Avg Time':>12} {'vs CPython 3.14.2':>20}") + print(f" {'Implementation':<35} {'Avg Time':>12} {'vs CPython 3.14.6':>20}") print(f" {'-' * 35} {'-' * 12} {'-' * 20}") - baseline_name = "CPython 3.14.2" + baseline_name = "CPython 3.14.6" baseline_avg = sum(avg_times.get(baseline_name, [0])) / len(avg_times.get(baseline_name, [1])) sorted_impls = sorted(avg_times.items(), key=lambda x: sum(x[1]) / len(x[1])) diff --git a/json2xml/backend_selector.py b/json2xml/backend_selector.py new file mode 100644 index 0000000..7e3cb98 --- /dev/null +++ b/json2xml/backend_selector.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Protocol + + +@dataclass(frozen=True, slots=True) +class ConversionRequest: + """Normalized conversion request shared across backend adapters.""" + + obj: Any + root: bool + custom_root: str + ids: list[int] | None + attr_type: bool + item_wrap: bool + item_func: Any + cdata: bool + xml_namespaces: dict[str, Any] | None + list_headers: bool + xpath_format: bool + + +class BackendAdapter(Protocol): + """Small adapter seam for conversion backends.""" + + name: str + + def can_handle(self, request: ConversionRequest) -> bool: + raise NotImplementedError # pragma: no cover + + def render(self, request: ConversionRequest) -> bytes: + raise NotImplementedError # pragma: no cover + + +class BackendSelector: + """Pick the first backend that can preserve request semantics.""" + + def __init__(self, *backends: BackendAdapter) -> None: + self._backends = backends + + def render(self, request: ConversionRequest) -> bytes: + for backend in self._backends: + if backend.can_handle(request): + return backend.render(request) + raise RuntimeError("No XML backend can handle the requested conversion") + + +def has_special_keys(obj: Any) -> bool: + """Return True when the payload uses Python-only special key semantics.""" + if isinstance(obj, dict): + for key, value in obj.items(): + if isinstance(key, str) and (key.startswith("@") or key.endswith("@flat")): + return True + if has_special_keys(value): + return True + return False + + if isinstance(obj, list): + for item in obj: + if has_special_keys(item): + return True + + return False diff --git a/json2xml/cli.py b/json2xml/cli.py index a7c48cc..a00db2f 100644 --- a/json2xml/cli.py +++ b/json2xml/cli.py @@ -43,6 +43,7 @@ import argparse import sys +from dataclasses import dataclass from pathlib import Path from typing import NoReturn @@ -62,12 +63,142 @@ EMAIL = "mail@vinitkumar.me" +@dataclass(frozen=True, slots=True) +class CLIConversionOptions: + """Parsed CLI options normalized for the conversion workflow.""" + + input_file: str | None + url: str | None + string: str | None + output: str | None + wrapper: str + root: bool + pretty: bool + attr_type: bool + item_wrap: bool + xpath_format: bool + cdata: bool + list_headers: bool + + @classmethod + def from_namespace(cls, args: argparse.Namespace) -> "CLIConversionOptions": + return cls( + input_file=args.input_file, + url=args.url, + string=args.string, + output=args.output, + wrapper=args.wrapper, + root=args.root, + pretty=args.pretty, + attr_type=args.attr_type, + item_wrap=args.item_wrap, + xpath_format=args.xpath_format, + cdata=args.cdata, + list_headers=args.list_headers, + ) + + def exit_with_error(message: str) -> NoReturn: """Print an error message and terminate CLI processing.""" print(message, file=sys.stderr) raise SystemExit(1) +class CLIApplication: + """Thin command adapter around input resolution, conversion, and output.""" + + def read_input(self, options: CLIConversionOptions) -> JSONValue: + if options.url: + try: + return readfromurl(options.url) + except URLReadError as error: + exit_with_error(f"Error reading from URL: {error}") + + if options.string: + try: + return readfromstring(options.string) + except StringReadError as error: + exit_with_error( + "Error: Invalid JSON in --string input. " + "Pass a valid JSON object, array, string, number, boolean, or null. " + f"({error})" + ) + + if options.input_file: + if options.input_file == "-": + return read_from_stdin() + if not Path(options.input_file).is_file(): + exit_with_error( + f"Error: JSON file not found: {options.input_file}. " + "Check the path or use - to read JSON from stdin." + ) + try: + return readfromjson(options.input_file) + except JSONReadError as error: + exit_with_error( + f"Error: Could not parse JSON file: {options.input_file}. " + f"Check that the file contains valid JSON. ({error})" + ) + + if not sys.stdin.isatty(): + return read_from_stdin() + + exit_with_error( + "Error: No input provided. Pass a JSON file, use - for stdin, " + "or provide --string/--url." + ) + raise AssertionError("unreachable") + + def read_from_stdin(self) -> JSONValue: + try: + json_str = sys.stdin.read().strip() + if not json_str: + exit_with_error( + "Error: Empty stdin. Pipe JSON into stdin or pass a file/--string." + ) + return readfromstring(json_str) + except StringReadError as error: + exit_with_error( + "Error: Invalid JSON from stdin. Pipe valid JSON into stdin " + f"or pass a file/--string. ({error})" + ) + + def convert(self, data: JSONValue, options: CLIConversionOptions) -> str | bytes: + converter = Json2xml( + data=data, + wrapper=options.wrapper, + root=options.root, + pretty=options.pretty, + attr_type=options.attr_type, + item_wrap=options.item_wrap, + xpath_format=options.xpath_format, + cdata=options.cdata, + list_headers=options.list_headers, + ) + xml_output = converter.to_xml() + if xml_output is None: + raise ValueError("Empty data, no XML generated") + return xml_output + + def write_output(self, output: str | bytes, output_file: str | None) -> None: + if isinstance(output, bytes): + output = output.decode("utf-8") + + if output_file: + try: + with open(output_file, "w", encoding="utf-8") as file_obj: + file_obj.write(output) + except OSError as error: + print(f"Error writing to file: {error}", file=sys.stderr) + sys.exit(1) + return + + print(output) + + +_APP = CLIApplication() + + # @lat: [[architecture#CLI entrypoint]] def create_parser() -> argparse.ArgumentParser: """Create and configure the argument parser.""" @@ -239,152 +370,37 @@ def create_parser() -> argparse.ArgumentParser: # @lat: [[behavior#Input readers]] def read_input(args: argparse.Namespace) -> JSONValue: - """ - Read JSON input from the specified source. - - Priority: URL > String > File > Stdin - - Args: - args: Parsed command line arguments. - - Returns: - Parsed JSON data as dict or list. - - Raises: - SystemExit: When no input is provided or reading fails. - """ - # Priority: URL > String > File > Stdin - if args.url: - try: - return readfromurl(args.url) - except URLReadError as e: - exit_with_error(f"Error reading from URL: {e}") - - if args.string: - try: - return readfromstring(args.string) - except StringReadError as e: - exit_with_error( - "Error: Invalid JSON in --string input. " - f"Pass a valid JSON object, array, string, number, boolean, or null. ({e})" - ) - - if args.input_file: - if args.input_file == "-": - # Read from stdin - return read_from_stdin() - if not Path(args.input_file).is_file(): - exit_with_error( - f"Error: JSON file not found: {args.input_file}. " - "Check the path or use - to read JSON from stdin." - ) - try: - return readfromjson(args.input_file) - except JSONReadError as e: - exit_with_error( - f"Error: Could not parse JSON file: {args.input_file}. " - f"Check that the file contains valid JSON. ({e})" - ) - - # Check if there's data on stdin - if not sys.stdin.isatty(): - return read_from_stdin() - - exit_with_error( - "Error: No input provided. Pass a JSON file, use - for stdin, " - "or provide --string/--url." - ) + """Read JSON input from the specified source.""" + return _APP.read_input(CLIConversionOptions.from_namespace(args)) def read_from_stdin() -> JSONValue: - """ - Read JSON from standard input. - - Returns: - Parsed JSON data. - - Raises: - SystemExit: When stdin is empty or contains invalid JSON. - """ - try: - json_str = sys.stdin.read().strip() - if not json_str: - exit_with_error( - "Error: Empty stdin. Pipe JSON into stdin or pass a file/--string." - ) - return readfromstring(json_str) - except StringReadError as e: - exit_with_error( - "Error: Invalid JSON from stdin. Pipe valid JSON into stdin " - f"or pass a file/--string. ({e})" - ) + """Read JSON from standard input.""" + return _APP.read_from_stdin() def write_output(output: str | bytes, output_file: str | None) -> None: - """ - Write XML output to the specified destination. - - Args: - output: XML content to write. - output_file: Path to output file, or None for stdout. - """ - if isinstance(output, bytes): - output = output.decode("utf-8") - - if output_file: - try: - with open(output_file, "w", encoding="utf-8") as f: - f.write(output) - except OSError as e: - print(f"Error writing to file: {e}", file=sys.stderr) - sys.exit(1) - else: - print(output) + """Write XML output to the specified destination.""" + _APP.write_output(output, output_file) def main(argv: list[str] | None = None) -> int: - """ - Main entry point for the CLI. - - Args: - argv: Command line arguments (defaults to sys.argv[1:]). - - Returns: - Exit code (0 for success, 1 for error). - """ + """Main entry point for the CLI.""" parser = create_parser() args = parser.parse_args(argv) - # Read input data try: data = read_input(args) - except Exception as e: - print(f"Error reading input: {e}", file=sys.stderr) + except Exception as error: + print(f"Error reading input: {error}", file=sys.stderr) return 1 - # Convert to XML try: - converter = Json2xml( - data=data, - wrapper=args.wrapper, - root=args.root, - pretty=args.pretty, - attr_type=args.attr_type, - item_wrap=args.item_wrap, - xpath_format=args.xpath_format, - cdata=args.cdata, - list_headers=args.list_headers, - ) - xml_output = converter.to_xml() - - if xml_output is None: - print("Error: Empty data, no XML generated", file=sys.stderr) - return 1 - - write_output(xml_output, args.output) - - except Exception as e: - print(f"Error converting to XML: {e}", file=sys.stderr) + options = CLIConversionOptions.from_namespace(args) + xml_output = _APP.convert(data, options) + write_output(xml_output, options.output) + except Exception as error: + print(f"Error converting to XML: {error}", file=sys.stderr) return 1 return 0 diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index f136ec3..5f54343 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -4,6 +4,7 @@ import logging import numbers from collections.abc import Callable, Sequence +from dataclasses import dataclass from decimal import Decimal from fractions import Fraction from functools import lru_cache @@ -951,6 +952,127 @@ def convert_none_valid_name( return f"<{key}{attr_string}>" +@dataclass(frozen=True, slots=True) +class SerializerConfig: + """Normalized options for the pure Python serializer engine.""" + + obj: ELEMENT + root: bool + custom_root: str + ids: list[int] | None + attr_type: bool + item_wrap: bool + item_func: Callable[[str], str] + cdata: bool + xml_namespaces: dict[str, Any] | None + list_headers: bool + xpath_format: bool + + +class _XPathDocumentRenderer: + """Render the W3C XPath 3.1 JSON-to-XML document shape.""" + + def __init__(self, config: SerializerConfig) -> None: + self._config = config + + def render(self) -> bytes: + output = _XMLWriter() + output.write('') + tag_name = get_xpath31_tag_name(self._config.obj) + if tag_name in {"map", "array"}: + _append_xpath31(output, self._config.obj, namespace=True) + else: + output.write(f'') + _append_xpath31(output, self._config.obj) + output.write("") + return output.to_bytes() + + +class _NamespaceFormatter: + """Keep namespace emission and schema-attribute quirks in one place.""" + + @staticmethod + def format(xml_namespaces: dict[str, Any] | None) -> str: + if xml_namespaces is None: + return "" + + namespace_parts: list[str] = [] + for prefix in xml_namespaces: + if prefix == "xsi": + for schema_att in xml_namespaces[prefix]: + if schema_att == "schemaInstance": + namespace_parts.append( + f' xmlns:{prefix}="{xml_namespaces[prefix]["schemaInstance"]}"' + ) + elif schema_att == "schemaLocation": + namespace_parts.append( + f' xsi:{schema_att}="{xml_namespaces[prefix][schema_att]}"' + ) + elif prefix == "xmlns": + namespace_parts.append(f' xmlns="{xml_namespaces[prefix]}"') + else: + namespace_parts.append(f' xmlns:{prefix}="{xml_namespaces[prefix]}"') + return "".join(namespace_parts) + + +class _StandardDocumentRenderer: + """Render the project-specific XML document shape.""" + + def __init__(self, config: SerializerConfig) -> None: + self._config = config + + def render(self) -> bytes: + output = _XMLWriter() + if self._config.root: + self._render_with_root(output) + else: + self._render_fragment(output) + return output.to_bytes() + + def _render_with_root(self, output: _XMLWriter) -> None: + custom_root, root_attr = make_valid_xml_name(self._config.custom_root, {}) + namespace_str = _NamespaceFormatter.format(self._config.xml_namespaces) + output.write('') + output.write(f"<{custom_root}{make_attrstring(root_attr)}{namespace_str}>") + _append_convert( + output, + self._config.obj, + self._config.ids, + self._config.attr_type, + self._config.item_func, + self._config.cdata, + self._config.item_wrap, + parent=custom_root, + list_headers=self._config.list_headers, + ) + output.write(f"") + + def _render_fragment(self, output: _XMLWriter) -> None: + _append_convert( + output, + self._config.obj, + self._config.ids, + self._config.attr_type, + self._config.item_func, + self._config.cdata, + self._config.item_wrap, + parent="", + list_headers=self._config.list_headers, + ) + + +class _SerializerEngine: + """Choose the document renderer while keeping helper semantics local.""" + + def __init__(self, config: SerializerConfig) -> None: + self._config = config + + def render(self) -> bytes: + if self._config.xpath_format: + return _XPathDocumentRenderer(self._config).render() + return _StandardDocumentRenderer(self._config).render() + + # @lat: [[architecture#Conversion engine]] def dicttoxml( obj: ELEMENT, @@ -1103,62 +1225,17 @@ def dicttoxml( 456 """ - if xpath_format: - output = _XMLWriter() - output.write('') - tag_name = get_xpath31_tag_name(obj) - if tag_name in {"map", "array"}: - _append_xpath31(output, obj, namespace=True) - else: - output.write(f'') - _append_xpath31(output, obj) - output.write("") - return output.to_bytes() - - namespace_parts: list[str] = [] - if xml_namespaces is None: - xml_namespaces = {} - for prefix in xml_namespaces: - if prefix == 'xsi': - for schema_att in xml_namespaces[prefix]: - if schema_att == 'schemaInstance': - ns = xml_namespaces[prefix]['schemaInstance'] - namespace_parts.append(f' xmlns:{prefix}="{ns}"') - elif schema_att == 'schemaLocation': - ns = xml_namespaces[prefix][schema_att] - namespace_parts.append(f' xsi:{schema_att}="{ns}"') - - elif prefix == 'xmlns': - # xmns needs no prefix - ns = xml_namespaces[prefix] - namespace_parts.append(f' xmlns="{ns}"') - - else: - ns = xml_namespaces[prefix] - namespace_parts.append(f' xmlns:{prefix}="{ns}"') - namespace_str = "".join(namespace_parts) - if root: - custom_root, root_attr = make_valid_xml_name(custom_root, {}) - output = _XMLWriter() - output.write('') - output.write(f"<{custom_root}{make_attrstring(root_attr)}{namespace_str}>") - _append_convert( - output, - obj, - ids, - attr_type, - item_func, - cdata, - item_wrap, - parent=custom_root, - list_headers=list_headers, - ) - output.write(f"") - return output.to_bytes() - - output = _XMLWriter() - _append_convert( - output, - obj, ids, attr_type, item_func, cdata, item_wrap, parent="", list_headers=list_headers + config = SerializerConfig( + obj=obj, + root=root, + custom_root=custom_root, + ids=ids, + attr_type=attr_type, + item_wrap=item_wrap, + item_func=item_func, + cdata=cdata, + xml_namespaces=xml_namespaces, + list_headers=list_headers, + xpath_format=xpath_format, ) - return output.to_bytes() + return _SerializerEngine(config).render() diff --git a/json2xml/dicttoxml_fast.py b/json2xml/dicttoxml_fast.py index 1ad5b36..836dbc8 100644 --- a/json2xml/dicttoxml_fast.py +++ b/json2xml/dicttoxml_fast.py @@ -15,8 +15,11 @@ import logging from collections.abc import Callable +from dataclasses import dataclass from typing import Any +from .backend_selector import BackendSelector, ConversionRequest, has_special_keys + RustStringTransform = Callable[[str], str] LOG = logging.getLogger("dicttoxml_fast") @@ -49,6 +52,67 @@ def get_backend() -> str: """Return the name of the current backend ('rust' or 'python').""" return "rust" if _use_rust else "python" +@dataclass(frozen=True, slots=True) +class _RustBackendAdapter: + """Adapter for the optional Rust backend.""" + + rust_dicttoxml: Callable[..., bytes] | None + + name: str = "rust" + + def can_handle(self, request: ConversionRequest) -> bool: + if not _use_rust or self.rust_dicttoxml is None: + return False + + return not ( + request.ids is not None + or request.item_func is not None + or request.xml_namespaces + or request.xpath_format + or not isinstance(request.obj, (dict, list)) + or has_special_keys(request.obj) + ) + + def render(self, request: ConversionRequest) -> bytes: + assert self.rust_dicttoxml is not None + return self.rust_dicttoxml( + request.obj, + root=request.root, + custom_root=request.custom_root, + attr_type=request.attr_type, + item_wrap=request.item_wrap, + cdata=request.cdata, + list_headers=request.list_headers, + ) + + +@dataclass(frozen=True, slots=True) +class _PythonBackendAdapter: + """Adapter for the compatibility-preserving Python backend.""" + + python_dicttoxml: Callable[..., bytes] + default_item_func: Callable[[str], str] + + name: str = "python" + + def can_handle(self, request: ConversionRequest) -> bool: + return True + + def render(self, request: ConversionRequest) -> bytes: + return self.python_dicttoxml( + request.obj, + root=request.root, + custom_root=request.custom_root, + ids=request.ids, + attr_type=request.attr_type, + item_wrap=request.item_wrap, + item_func=request.item_func or self.default_item_func, + cdata=request.cdata, + xml_namespaces=request.xml_namespaces, + list_headers=request.list_headers, + xpath_format=request.xpath_format, + ) + # @lat: [[architecture#Backend selection]] def dicttoxml( @@ -86,62 +150,24 @@ def dicttoxml( Returns: UTF-8 encoded XML as bytes """ - # Features that require Python fallback - needs_python = ( - ids is not None - or item_func is not None - or xml_namespaces - or xpath_format - or not isinstance(obj, (dict, list)) + request = ConversionRequest( + obj=obj, + root=root, + custom_root=custom_root, + ids=ids, + attr_type=attr_type, + item_wrap=item_wrap, + item_func=item_func, + cdata=cdata, + xml_namespaces=xml_namespaces, + list_headers=list_headers, + xpath_format=xpath_format, ) - - # Check for special dict keys that require Python - if not needs_python and isinstance(obj, dict): - needs_python = _has_special_keys(obj) - - if _use_rust and not needs_python and _rust_dicttoxml is not None: # pragma: no cover - # Use fast Rust implementation - return _rust_dicttoxml( - obj, - root=root, - custom_root=custom_root, - attr_type=attr_type, - item_wrap=item_wrap, - cdata=cdata, - list_headers=list_headers, - ) - else: - # Fall back to pure Python - return _py_dicttoxml.dicttoxml( - obj, - root=root, - custom_root=custom_root, - ids=ids, - attr_type=attr_type, - item_wrap=item_wrap, - item_func=item_func or _py_dicttoxml.default_item_func, - cdata=cdata, - xml_namespaces=xml_namespaces, - list_headers=list_headers, - xpath_format=xpath_format, - ) - - -def _has_special_keys(obj: Any) -> bool: - """Check if a dict contains special keys that require Python processing.""" - if isinstance(obj, dict): - for key, val in obj.items(): - if isinstance(key, str) and ( - key.startswith("@") or key.endswith("@flat") - ): - return True - if _has_special_keys(val): - return True - elif isinstance(obj, list): - for item in obj: - if _has_special_keys(item): - return True - return False + selector = BackendSelector( + _RustBackendAdapter(_rust_dicttoxml), + _PythonBackendAdapter(_py_dicttoxml.dicttoxml, _py_dicttoxml.default_item_func), + ) + return selector.render(request) # Re-export commonly used functions diff --git a/lat.md/architecture.md b/lat.md/architecture.md index 558ff88..706f64e 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,13 +14,17 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. -The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Attribute formatting stays centralized through `make_attrstring()`, and `@attrs`/`@val` normalization stays local to dict element handling so caller-owned metadata is never mutated. +The `dicttoxml()` entry point now normalizes options into `SerializerConfig` and delegates document shaping to a small renderer seam inside [[json2xml/dicttoxml.py#dicttoxml]]. That keeps XPath document framing, namespace emission, and root wrapping separate from the recursive element walkers. + +The recursive serializer still streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. Attribute formatting stays centralized through `make_attrstring()`, and `@attrs`/`@val` normalization stays local to dict element handling so caller-owned metadata is never mutated. ## Backend selection The fast-path module prefers the Rust extension when it can preserve Python semantics, and falls back to the Python serializer for unsupported features. -[[json2xml/dicttoxml_fast.py#dicttoxml]] uses the Rust backend only when optional features such as `ids`, custom `item_func`, XML namespaces, XPath mode, root scalar payloads, or special `@` keys are not involved. A local stub for the optional `json2xml_rs` module keeps static analysis aligned with that fallback design, so type checking still passes when the extension is not installed. This keeps fast installs fast without letting the optimized path silently change behavior. +[[json2xml/dicttoxml_fast.py#dicttoxml]] now normalizes each call into a shared conversion request and asks a tiny backend selector seam to choose Rust or Python. The Rust adapter accepts only requests whose semantics it can preserve, namely no `ids`, custom `item_func`, XML namespaces, XPath mode, root scalar payloads, or special `@` keys. + +A local stub for the optional `json2xml_rs` module keeps static analysis aligned with that fallback design, so type checking still passes when the extension is not installed. This keeps fast installs fast without letting the optimized path silently change behavior. The Rust backend writes serializer output into Python's bytes writer instead of building a Rust string and copying it across the extension boundary. This keeps the fast path's peak output memory closer to the final `bytes` object. @@ -46,6 +50,10 @@ Reproduction docs require contributors to record machine, OS, Python, and tool a The June 2026 Rust memory benchmark uses [[benchmark_memory_rust.py#main]] under hyperfine to compare release builds in fresh Python processes. The bytes-writer implementation cuts serializer peak RSS by about half for large outputs, with a documented throughput tradeoff. +The June 2026 multi-interpreter CLI rerun uses [[benchmark_multi_python.py#main]] with per-interpreter virtual environments. On the recorded Apple Silicon run, CPython 3.15.0b3 beat CPython 3.14.6 on every case, PyPy 3.11.15 only won the largest case, and Go remained the fastest end-to-end CLI path overall. + +The benchmark script now tracks uv-managed current-series interpreters through a configurable `JSON2XML_UV_PYTHON_DIR` base path plus per-interpreter overrides, with the documented defaults targeting CPython 3.14.6, CPython 3.15.0b3, and PyPy 3.11.15. That keeps the published setup reproducible without hard-coding one contributor's home directory. + ## Dependency security Dependency floors and lockfiles keep known vulnerable packages out of runtime and development environments. @@ -56,10 +64,12 @@ Runtime dependencies are declared in `pyproject.toml` and mirrored by `uv.lock`; GitHub Actions workflows run with read-only tokens by default and use full SHA pins so third-party action updates are explicit. -The `.github/workflows/` files declare the minimum `permissions:` scopes needed by each workflow, with CodeQL retaining `security-events: write` for result upload. Action references are pinned to immutable commits with the upstream tag retained in comments for reviewability, and `.github/dependabot.yml` checks the `github-actions` ecosystem weekly so those pins do not silently age. +The `.github/workflows/` files declare the minimum `permissions:` scopes needed by each workflow, with CodeQL retaining `security-events: write` for result upload. Action references are pinned to immutable commits with the upstream tag retained in comments for reviewability, and `.github/dependabot.yml` checks the `github-actions` ecosystem weekly so those pins do not silently age. The Python test matrix pins its PyPy 3.11 job to an explicit PyPy release so CI keeps exercising the intended CPython 3.11.15-compatible runtime instead of silently drifting with runner cache updates. It also exercises regular CPython 3.15.0b3 while leaving that beta's free-threaded builds out of CI until the runner support is less brittle. + +Rust extension CI triggers on Rust sources, Rust integration tests, and Python fast-path files such as [[json2xml/backend_selector.py]] and [[json2xml/dicttoxml_fast.py]]. That keeps native backend tests attached to the Python dispatch code that decides whether the accelerator is used. ## CLI entrypoint The CLI is a thin adapter that parses options, resolves one input source, and forwards those options into the same converter used by the library API. -[[json2xml/cli.py#create_parser]] defines the user-facing flags. [[json2xml/cli.py#read_input]] enforces the source priority rules, and [[json2xml/cli.py#main]] constructs [[json2xml/json2xml.py#Json2xml]] so command-line use and library use stay aligned. +[[json2xml/cli.py#create_parser]] defines the user-facing flags. A small `CLIApplication` seam now owns source resolution, stdin parsing, conversion, and output writing, while [[json2xml/cli.py#read_input]] and [[json2xml/cli.py#main]] remain the stable wrapper functions used by tests and callers. Command-line use and library use still meet at [[json2xml/json2xml.py#Json2xml]]. diff --git a/lat.md/tests.md b/lat.md/tests.md index 3530832..c3ac0aa 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -38,6 +38,22 @@ Running the CLI without JSON should fail with a message that tells users to pass Malformed JSON read from an existing file should mention that file path so users can distinguish file parsing failures from missing-file, string, stdin, or conversion failures. +### No-input guard stays total if exit helper is bypassed + +The CLI no-input branch should still fail loudly with its internal assertion if the exit helper is replaced during tests or embedding, so the control flow cannot fall through silently. + +## Performance benchmarks + +These tests pin the benchmark script configuration rules so contributors can rerun published measurements without inheriting one machine's filesystem layout. + +### Benchmark script derives interpreter paths from configurable uv base dir + +The multi-interpreter benchmark should derive default interpreter paths from `JSON2XML_UV_PYTHON_DIR` so the documented uv layout stays portable across machines. + +### Benchmark script lets explicit interpreter env vars override uv defaults + +The multi-interpreter benchmark should let per-interpreter environment variables override uv-derived defaults so unusual local layouts remain runnable without editing the script. + ## Conversion behavior These tests pin the XML shapes that matter most for interoperability, especially the modes that intentionally diverge from the default serializer. @@ -110,6 +126,14 @@ Backend metadata helpers should report whether Rust is active and name the selec Helper exports for XML escaping and CDATA wrapping should preserve Python behavior when Rust helper callables are unavailable. +### Backend selector detects Python-only payload markers + +The backend selector should recognize nested `@attrs`, `@val`, and `@flat` markers so Rust is skipped before semantics drift. + +### Backend selector fails loudly with no compatible backend + +If every backend rejects a conversion request, the selector should raise a clear error instead of silently returning bad output. + ### Json2xml uses fast backend selection The public `Json2xml` wrapper should delegate through the fast backend selector so regular library and CLI conversions can use the Rust accelerator when installed. diff --git a/tests/test_backend_selector.py b/tests/test_backend_selector.py new file mode 100644 index 0000000..18d6f0b --- /dev/null +++ b/tests/test_backend_selector.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import pytest + +from json2xml.backend_selector import BackendSelector, ConversionRequest, has_special_keys + + +class _NeverBackend: + name = "never" + + def can_handle(self, request: ConversionRequest) -> bool: + return False + + def render(self, request: ConversionRequest) -> bytes: + raise AssertionError("render should not be called") + + +# @lat: [[tests#Conversion behavior#Backend selector detects Python-only payload markers]] +def test_has_special_keys_detects_nested_python_only_markers() -> None: + assert has_special_keys({"items": [{"record": {"@attrs": {"id": "7"}}}]}) is True + assert has_special_keys({"items": [{"record@flat": [1, 2, 3]}]}) is True + assert has_special_keys({"items": [{"record": {"name": "Ada"}}]}) is False + + +# @lat: [[tests#Conversion behavior#Backend selector fails loudly with no compatible backend]] +def test_backend_selector_raises_when_no_backend_can_handle_request() -> None: + request = ConversionRequest( + obj={"name": "Ada"}, + root=True, + custom_root="root", + ids=None, + attr_type=True, + item_wrap=True, + item_func=None, + cdata=False, + xml_namespaces=None, + list_headers=False, + xpath_format=False, + ) + + selector = BackendSelector(_NeverBackend()) + + with pytest.raises(RuntimeError, match="No XML backend can handle"): + selector.render(request) diff --git a/tests/test_benchmark_multi_python.py b/tests/test_benchmark_multi_python.py new file mode 100644 index 0000000..6879572 --- /dev/null +++ b/tests/test_benchmark_multi_python.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import importlib +from pathlib import Path + + +# @lat: [[tests#Performance benchmarks#Benchmark script derives interpreter paths from configurable uv base dir]] +def test_benchmark_multi_python_uses_configurable_uv_base_dir(monkeypatch) -> None: + monkeypatch.setenv("JSON2XML_UV_PYTHON_DIR", "/tmp/uv-python") + monkeypatch.delenv("JSON2XML_PYTHON_CPYTHON_314_6", raising=False) + monkeypatch.delenv("JSON2XML_PYTHON_CPYTHON_315_0B3", raising=False) + monkeypatch.delenv("JSON2XML_PYTHON_PYPY_311_15", raising=False) + + module = importlib.import_module("benchmark_multi_python") + module = importlib.reload(module) + + assert module.UV_PYTHON_DIR.as_posix() == "/tmp/uv-python" + assert Path(module.PYTHON_VERSIONS[0]["python"]).as_posix() == "/tmp/uv-python/cpython-3.14.6-macos-aarch64-none/bin/python3.14" + assert Path(module.PYTHON_VERSIONS[1]["python"]).as_posix() == "/tmp/uv-python/cpython-3.15.0b3-macos-aarch64-none/bin/python3.15" + assert Path(module.PYTHON_VERSIONS[2]["python"]).as_posix() == "/tmp/uv-python/pypy-3.11.15-macos-aarch64-none/bin/pypy3.11" + + +# @lat: [[tests#Performance benchmarks#Benchmark script lets explicit interpreter env vars override uv defaults]] +def test_benchmark_multi_python_env_overrides_interpreter_paths(monkeypatch) -> None: + monkeypatch.setenv("JSON2XML_UV_PYTHON_DIR", "/tmp/uv-python") + monkeypatch.setenv("JSON2XML_PYTHON_CPYTHON_314_6", "/custom/python314") + monkeypatch.setenv("JSON2XML_PYTHON_CPYTHON_315_0B3", "/custom/python315") + monkeypatch.setenv("JSON2XML_PYTHON_PYPY_311_15", "/custom/pypy311") + + module = importlib.import_module("benchmark_multi_python") + module = importlib.reload(module) + + assert Path(module.PYTHON_VERSIONS[0]["python"]).as_posix() == "/custom/python314" + assert Path(module.PYTHON_VERSIONS[1]["python"]).as_posix() == "/custom/python315" + assert Path(module.PYTHON_VERSIONS[2]["python"]).as_posix() == "/custom/pypy311" diff --git a/tests/test_cli.py b/tests/test_cli.py index 8c9cacd..7796d51 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -11,7 +11,15 @@ import pytest -from json2xml.cli import create_parser, main, read_from_stdin, read_input, write_output +from json2xml.cli import ( + CLIApplication, + CLIConversionOptions, + create_parser, + main, + read_from_stdin, + read_input, + write_output, +) if TYPE_CHECKING: from pytest import CaptureFixture @@ -601,6 +609,32 @@ def test_read_input_no_input_tty(self, capsys: CaptureFixture[str]) -> None: captured = capsys.readouterr() assert "No input provided" in captured.err + # @lat: [[tests#CLI failure messages#No-input guard stays total if exit helper is bypassed]] + def test_read_input_no_input_guard_raises_assertion_if_exit_helper_returns(self) -> None: + """Test the no-input branch keeps a hard failure if the exit helper is bypassed.""" + app = CLIApplication() + options = CLIConversionOptions( + input_file=None, + url=None, + string=None, + output=None, + wrapper="all", + root=True, + pretty=True, + attr_type=True, + item_wrap=True, + xpath_format=False, + cdata=False, + list_headers=True, + ) + + with ( + patch("sys.stdin.isatty", return_value=True), + patch("json2xml.cli.exit_with_error", return_value=None), + ): + with pytest.raises(AssertionError, match="unreachable"): + app.read_input(options) + def test_read_input_stdin_when_not_tty(self) -> None: """Test read_input reads from stdin when not a tty.""" with (