From 5d814d06341d6d33ee9b78ec050db334a3afbab9 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Tue, 9 Jun 2026 12:38:48 +0530 Subject: [PATCH 1/3] feat: do another round of memory optimisations --- json2xml/dicttoxml.py | 435 ++++++++++++++++++++++++++++++++++++++--- lat.md/architecture.md | 2 +- 2 files changed, 414 insertions(+), 23 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index dcc3fc7..02a14c5 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -7,6 +7,7 @@ from decimal import Decimal from fractions import Fraction from functools import lru_cache +from io import BytesIO from random import SystemRandom from typing import Any, Union, cast @@ -20,6 +21,21 @@ _XML_ESCAPE_CHARS = frozenset("&\"'<>") +class _XMLWriter: + """Small UTF-8 byte writer used by the internal streaming serializer.""" + + __slots__ = ("_buffer",) + + def __init__(self) -> None: + self._buffer = BytesIO() + + def write(self, value: str) -> None: + self._buffer.write(value.encode("utf-8")) + + def to_bytes(self) -> bytes: + return self._buffer.getvalue() + + def make_id(element: str, start: int = 100000, end: int = 999999) -> str: """ Generate a random ID for a given element. @@ -313,6 +329,39 @@ def convert_to_xpath31(obj: Any, parent_key: str | None = None) -> str: return f"{escape_xml(str(obj))}" +def _append_xpath31( + output: _XMLWriter, + obj: Any, + parent_key: str | None = None, + namespace: bool = False, +) -> None: + """Append XPath 3.1 json-to-xml output without building child strings.""" + key_attr = f' key="{escape_xml(parent_key)}"' if parent_key is not None else "" + namespace_attr = f' xmlns="{XPATH_FUNCTIONS_NS}"' if namespace else "" + tag_name = get_xpath31_tag_name(obj) + + if tag_name == "null": + output.write(f"") + elif tag_name == "boolean": + output.write(f"{str(obj).lower()}") + elif tag_name == "number": + output.write(f"{obj}") + elif tag_name == "string": + output.write(f"{escape_xml(str(obj))}") + elif tag_name == "map": + output.write(f"") + for key, val in obj.items(): + _append_xpath31(output, val, key) + output.write("") + elif tag_name == "array": + output.write(f"") + for item in obj: + _append_xpath31(output, item) + output.write("") + else: + output.write(f"{escape_xml(str(obj))}") + + def convert( obj: Any, ids: Any, @@ -646,6 +695,339 @@ def convert_list( return "".join(output) +def _append_convert( + output: _XMLWriter, + obj: Any, + ids: Any, + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, + parent: str = "root", + list_headers: bool = False, +) -> None: + """Append converted XML directly into output without building subtree strings.""" + item_name = item_func(parent) + + if isinstance(obj, bool): + output.write(convert_bool(key=item_name, val=obj, attr_type=attr_type, cdata=cdata)) + elif isinstance(obj, numbers.Number): + output.write(convert_kv(key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata)) + elif isinstance(obj, str): + output.write(convert_kv(key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata)) + elif hasattr(obj, "isoformat") and isinstance(obj, (datetime.datetime, datetime.date)): + output.write( + convert_kv( + key=item_name, + val=obj.isoformat(), + attr_type=attr_type, + attr={}, + cdata=cdata, + ) + ) + elif obj is None: + output.write(convert_none(key=item_name, attr_type=attr_type, cdata=cdata)) + elif isinstance(obj, dict): + _append_convert_dict( + output, + cast("dict[str, Any]", obj), + ids, + parent, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + ) + elif isinstance(obj, Sequence): + _append_convert_list( + output, + obj, + ids, + parent, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + ) + else: + raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})") + + +def _append_dict2xml_str( + output: _XMLWriter, + attr_type: bool, + attr: dict[str, Any], + item: dict[str, Any], + item_func: Callable[[str], str], + cdata: bool, + item_name: str, + item_wrap: bool, + parentIsList: bool, + parent: str = "", + list_headers: bool = False, +) -> None: + """Append a dict element using the same shape as dict2xml_str.""" + ids: list[str] = [] + + if attr_type: + attr["type"] = get_xml_type(item) + val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr) + if "@val" in item: + rawitem = item["@val"] + elif "@attrs" in item: + rawitem = {key: value for key, value in item.items() if key != "@attrs"} + else: + rawitem = item + + if parentIsList and list_headers: + if len(val_attr) > 0 and not item_wrap: + output.write(f"<{parent}{make_attrstring(val_attr)}>") + else: + output.write(f"<{parent}>") + _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + output.write(f"") + elif item.get("@flat", False) or (parentIsList and not item_wrap): + _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + else: + output.write(f"<{item_name}{make_attrstring(val_attr)}>") + _append_rawitem(output, rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers) + output.write(f"") + + +def _append_rawitem( + output: _XMLWriter, + rawitem: Any, + ids: list[str], + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, + item_name: str, + list_headers: bool, +) -> None: + if rawitem is None: + return + if isinstance(rawitem, bool): + output.write(str(rawitem).lower()) + elif isinstance(rawitem, (str, numbers.Number)): + output.write(escape_xml(str(rawitem))) + else: + _append_convert( + output, + rawitem, + ids, + attr_type, + item_func, + cdata, + item_wrap, + item_name, + list_headers=list_headers, + ) + + +def _append_list2xml_str( + output: _XMLWriter, + attr_type: bool, + attr: dict[str, Any], + item: Sequence[Any], + item_func: Callable[[str], str], + cdata: bool, + item_name: str, + item_wrap: bool, + list_headers: bool = False, +) -> None: + ids: list[str] = [] + if attr_type: + attr["type"] = get_xml_type(item) + flat = False + if item_name.endswith("@flat"): + item_name = item_name[0:-5] + flat = True + + if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap) or list_headers: + _append_convert_list( + output, + item, + ids, + item_name, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + ) + return + + output.write(f"<{item_name}{make_attrstring(attr)}>") + _append_convert_list( + output, + item, + ids, + item_name, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + ) + output.write(f"") + + +def _append_convert_dict( + output: _XMLWriter, + obj: dict[str, Any], + ids: list[str], + parent: str, + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, + list_headers: bool = False, +) -> None: + """Append a dict as XML without allocating a joined child subtree.""" + for key, val in obj.items(): + attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} + key_is_flat = isinstance(key, str) and key.endswith("@flat") + xml_key = key[:-5] if key_is_flat else key + + key, attr = make_valid_xml_name(xml_key, attr) + + if isinstance(val, bool): + output.write(convert_bool_valid_name(key, val, attr_type, attr)) + elif isinstance(val, (numbers.Number, str)): + output.write( + convert_kv_valid_name( + key=key, val=val, attr_type=attr_type, attr=attr, cdata=cdata + ) + ) + elif hasattr(val, "isoformat"): + output.write( + convert_kv_valid_name( + key=key, + val=val.isoformat(), + attr_type=attr_type, + attr=attr, + cdata=cdata, + ) + ) + elif isinstance(val, dict): + _append_dict2xml_str( + output, + attr_type, + attr, + val, + item_func, + cdata, + key, + item_wrap, + False, + list_headers=list_headers, + ) + elif isinstance(val, Sequence): + _append_list2xml_str( + output, + attr_type=attr_type, + attr=attr, + item=val, + item_func=item_func, + cdata=cdata, + item_name=f"{key}@flat" if key_is_flat else key, + item_wrap=item_wrap, + list_headers=list_headers, + ) + elif not val: + output.write(convert_none_valid_name(key, attr_type, attr)) + else: + raise TypeError(f"Unsupported data type: {val} ({type(val).__name__})") + + +def _append_convert_list( + output: _XMLWriter, + items: Sequence[Any], + ids: list[str] | None, + parent: str, + attr_type: bool, + item_func: Callable[[str], str], + cdata: bool, + item_wrap: bool, + list_headers: bool = False, +) -> None: + """Append a list as XML without allocating a joined child subtree.""" + item_name = item_func(parent) + if item_name.endswith("@flat"): + item_name = item_name[:-5] + item_name, item_name_attr = make_valid_xml_name(item_name, {}) + scalar_key = item_name if item_wrap else parent + scalar_key, scalar_key_attr = make_valid_xml_name(scalar_key, {}) + this_id = get_unique_id(parent) if ids else None + + for i, item in enumerate(items): + attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} + + if isinstance(item, bool): + if item_name_attr: + attr.update(item_name_attr) + output.write(convert_bool_valid_name(item_name, item, attr_type, attr)) + elif isinstance(item, (numbers.Number, str)): + if scalar_key_attr: + attr.update(scalar_key_attr) + output.write( + convert_kv_valid_name( + key=scalar_key, + val=item, + attr_type=attr_type, + attr=attr, + cdata=cdata, + ) + ) + elif hasattr(item, "isoformat"): + if item_name_attr: + attr.update(item_name_attr) + output.write( + convert_kv_valid_name( + key=item_name, + val=item.isoformat(), + attr_type=attr_type, + attr=attr, + cdata=cdata, + ) + ) + elif isinstance(item, dict): + _append_dict2xml_str( + output, + attr_type=attr_type, + attr=attr, + item=item, + item_func=item_func, + cdata=cdata, + item_name=item_name, + item_wrap=item_wrap, + parentIsList=True, + parent=parent, + list_headers=list_headers, + ) + elif isinstance(item, Sequence): + _append_list2xml_str( + output, + attr_type=attr_type, + attr=attr, + item=item, + item_func=item_func, + cdata=cdata, + item_name=item_name, + item_wrap=item_wrap, + list_headers=list_headers, + ) + elif item is None: + if item_name_attr: + attr.update(item_name_attr) + output.write(convert_none_valid_name(item_name, attr_type, attr)) + else: + raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") + + def convert_kv( key: str, val: str | int | float | numbers.Number | datetime.datetime | datetime.date, @@ -892,16 +1274,16 @@ def dicttoxml( """ if xpath_format: - xml_content = convert_to_xpath31(obj) - output = [ - '', - xml_content.replace("{xml_content}', - ] - return "".join(output).encode("utf-8") + output = _XMLWriter() + output.write('') + tag_name = get_xpath31_tag_name(obj) + if tag_name in {"map", "array"}: + _append_xpath31(output, obj, namespace=True) + else: + output.write(f'') + _append_xpath31(output, obj) + output.write("") + return output.to_bytes() namespace_str = "" if xml_namespaces is None: @@ -926,17 +1308,26 @@ def dicttoxml( namespace_str += f' xmlns:{prefix}="{ns}"' if root: custom_root, root_attr = make_valid_xml_name(custom_root, {}) - output_elem = convert( - obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root, list_headers=list_headers - ) - output = ( - f'' - f"<{custom_root}{make_attrstring(root_attr)}{namespace_str}>" - f"{output_elem}" + output = _XMLWriter() + output.write('') + output.write(f"<{custom_root}{make_attrstring(root_attr)}{namespace_str}>") + _append_convert( + output, + obj, + ids, + attr_type, + item_func, + cdata, + item_wrap, + parent=custom_root, + list_headers=list_headers, ) - del output_elem - return output.encode("utf-8") - else: - return convert( + output.write(f"") + return output.to_bytes() + + output = _XMLWriter() + _append_convert( + output, obj, ids, attr_type, item_func, cdata, item_wrap, parent="", list_headers=list_headers - ).encode("utf-8") + ) + return output.to_bytes() diff --git a/lat.md/architecture.md b/lat.md/architecture.md index f27bedf..5435ac5 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,7 +14,7 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. -The root wrapper path releases the unwrapped XML body before UTF-8 encoding the final document. That keeps peak memory closer to the returned byte size without changing the recursive serializer contract. +The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility, but library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. ## Backend selection From 8cd34d691dc015f80036cd2adaa72dceefce4fd1 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Tue, 9 Jun 2026 16:17:54 +0530 Subject: [PATCH 2/3] refactor: align serializer helper paths --- json2xml/dicttoxml.py | 354 +++++++---------------------------- lat.md/architecture.md | 2 +- lat.md/tests.md | 4 + tests/test_dicttoxml_unit.py | 29 +++ 4 files changed, 105 insertions(+), 284 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 02a14c5..8444080 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -303,30 +303,9 @@ def convert_to_xpath31(obj: Any, parent_key: str | None = None) -> str: Returns: str: XML string in XPath 3.1 format. """ - key_attr = f' key="{escape_xml(parent_key)}"' if parent_key is not None else "" - tag_name = get_xpath31_tag_name(obj) - - if tag_name == "null": - return f"" - - if tag_name == "boolean": - return f"{str(obj).lower()}" - - if tag_name == "number": - return f"{obj}" - - if tag_name == "string": - return f"{escape_xml(str(obj))}" - - if tag_name == "map": - children = "".join(convert_to_xpath31(v, k) for k, v in obj.items()) - return f"{children}" - - if tag_name == "array": - children = "".join(convert_to_xpath31(item) for item in obj) - return f"{children}" - - return f"{escape_xml(str(obj))}" + output = _XMLWriter() + _append_xpath31(output, obj, parent_key) + return output.to_bytes().decode("utf-8") def _append_xpath31( @@ -374,45 +353,19 @@ def convert( ) -> str: """Routes the elements of an object to the right function to convert them based on their data type""" - item_name = item_func(parent) - # since bool is also a subtype of number.Number and int, the check for bool - # never comes and hence we get wrong value for the xml type bool - # here, we just change order and check for bool first, because no other - # type other than bool can be true for bool check - if isinstance(obj, bool): - return convert_bool(key=item_name, val=obj, attr_type=attr_type, cdata=cdata) - - if isinstance(obj, numbers.Number): - return convert_kv( - key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata - ) - - if isinstance(obj, str): - return convert_kv( - key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata - ) - - if hasattr(obj, "isoformat") and isinstance( - obj, (datetime.datetime, datetime.date) - ): - return convert_kv( - key=item_name, - val=obj.isoformat(), - attr_type=attr_type, - attr={}, - cdata=cdata, - ) - - if obj is None: - return convert_none(key=item_name, attr_type=attr_type, cdata=cdata) - - if isinstance(obj, dict): - return convert_dict(cast("dict[str, Any]", obj), ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers) - - if isinstance(obj, Sequence): - return convert_list(obj, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers) - - raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})") + output = _XMLWriter() + _append_convert( + output, + obj, + ids, + attr_type, + item_func, + cdata, + item_wrap, + parent, + list_headers=list_headers, + ) + return output.to_bytes().decode("utf-8") def is_primitive_type(val: Any) -> bool: @@ -434,42 +387,21 @@ def dict2xml_str( """ parse dict2xml """ - ids: list[str] = [] # initialize list of unique ids - subtree = "" # Initialize subtree with default empty string - - if attr_type: - attr["type"] = get_xml_type(item) - val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr) - if "@val" in item: - rawitem = item["@val"] - elif "@attrs" in item: - rawitem = {key: value for key, value in item.items() if key != "@attrs"} - else: - rawitem = item - if is_primitive_type(rawitem): - if rawitem is None: - subtree = "" - elif isinstance(rawitem, bool): - subtree = str(rawitem).lower() - else: - subtree = escape_xml(str(rawitem)) - else: - # we can not use convert_dict, because rawitem could be non-dict - subtree = convert( - rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers=list_headers - ) - - if parentIsList and list_headers: - if len(val_attr) > 0 and not item_wrap: - attrstring = make_attrstring(val_attr) - return f"<{parent}{attrstring}>{subtree}" - return f"<{parent}>{subtree}" - elif item.get("@flat", False) or (parentIsList and not item_wrap): - return subtree - - attrstring = make_attrstring(val_attr) - - return f"<{item_name}{attrstring}>{subtree}" + output = _XMLWriter() + _append_dict2xml_str( + output, + attr_type, + attr, + item, + item_func, + cdata, + item_name, + item_wrap, + parentIsList, + parent, + list_headers=list_headers, + ) + return output.to_bytes().decode("utf-8") def list2xml_str( @@ -482,30 +414,19 @@ def list2xml_str( item_wrap: bool, list_headers: bool = False, ) -> str: - ids: list[str] = [] # initialize list of unique ids - if attr_type: - attr["type"] = get_xml_type(item) - flat = False - subtree = "" # Initialize subtree with default empty string - if item_name.endswith("@flat"): - item_name = item_name[0:-5] - flat = True - subtree = convert_list( - items=item, - ids=ids, - parent=item_name, - attr_type=attr_type, - item_func=item_func, - cdata=cdata, - item_wrap=item_wrap, - list_headers=list_headers + output = _XMLWriter() + _append_list2xml_str( + output, + attr_type, + attr, + item, + item_func, + cdata, + item_name, + item_wrap, + list_headers=list_headers, ) - if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap): - return subtree - elif list_headers: - return subtree - attrstring = make_attrstring(attr) - return f"<{item_name}{attrstring}>{subtree}" + return output.to_bytes().decode("utf-8") def convert_dict( @@ -519,71 +440,19 @@ def convert_dict( list_headers: bool = False ) -> str: """Converts a dict into an XML string.""" - output: list[str] = [] - addline = output.append - - for key, val in obj.items(): - attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} - key_is_flat = isinstance(key, str) and key.endswith("@flat") - xml_key = key[:-5] if key_is_flat else key - - key, attr = make_valid_xml_name(xml_key, attr) - - # since bool is also a subtype of number.Number and int, the check for bool - # never comes and hence we get wrong value for the xml type bool - # here, we just change order and check for bool first, because no other - # type other than bool can be true for bool check - if isinstance(val, bool): - addline(convert_bool_valid_name(key, val, attr_type, attr)) - - elif isinstance(val, (numbers.Number, str)): - addline( - convert_kv_valid_name( - key=key, val=val, attr_type=attr_type, attr=attr, cdata=cdata - ) - ) - - elif hasattr(val, "isoformat"): # datetime - addline( - convert_kv_valid_name( - key=key, - val=val.isoformat(), - attr_type=attr_type, - attr=attr, - cdata=cdata, - ) - ) - - elif isinstance(val, dict): - addline( - dict2xml_str( - attr_type, attr, val, item_func, cdata, key, item_wrap, - False, - list_headers=list_headers - ) - ) - - elif isinstance(val, Sequence): - addline( - list2xml_str( - attr_type=attr_type, - attr=attr, - item=val, - item_func=item_func, - cdata=cdata, - item_name=f"{key}@flat" if key_is_flat else key, - item_wrap=item_wrap, - list_headers=list_headers - ) - ) - - elif not val: - addline(convert_none_valid_name(key, attr_type, attr)) - - else: - raise TypeError(f"Unsupported data type: {val} ({type(val).__name__})") - - return "".join(output) + output = _XMLWriter() + _append_convert_dict( + output, + obj, + ids, + parent, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + ) + return output.to_bytes().decode("utf-8") def convert_list( @@ -597,102 +466,19 @@ def convert_list( list_headers: bool = False, ) -> str: """Converts a list into an XML string.""" - output: list[str] = [] - addline = output.append - - item_name = item_func(parent) # Is item_name still relevant if item_wrap is false - if item_name.endswith("@flat"): - item_name = item_name[:-5] - item_name, item_name_attr = make_valid_xml_name(item_name, {}) - scalar_key = item_name if item_wrap else parent - scalar_key, scalar_key_attr = make_valid_xml_name(scalar_key, {}) - this_id = None - if ids: - this_id = get_unique_id(parent) - - for i, item in enumerate(items): - attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} - - if isinstance(item, bool): - if item_name_attr: - attr.update(item_name_attr) - addline(convert_bool_valid_name(item_name, item, attr_type, attr)) - - elif isinstance(item, (numbers.Number, str)): - if scalar_key_attr: - attr.update(scalar_key_attr) - if item_wrap: - addline( - convert_kv_valid_name( - key=scalar_key, - val=item, - attr_type=attr_type, - attr=attr, - cdata=cdata, - ) - ) - else: - addline( - convert_kv_valid_name( - key=scalar_key, - val=item, - attr_type=attr_type, - attr=attr, - cdata=cdata, - ) - ) - - elif hasattr(item, "isoformat"): # datetime - if item_name_attr: - attr.update(item_name_attr) - addline( - convert_kv_valid_name( - key=item_name, - val=item.isoformat(), - attr_type=attr_type, - attr=attr, - cdata=cdata, - ) - ) - - elif isinstance(item, dict): - addline( - dict2xml_str( - attr_type=attr_type, - attr=attr, - item=item, - item_func=item_func, - cdata=cdata, - item_name=item_name, - item_wrap=item_wrap, - parentIsList=True, - parent=parent, - list_headers=list_headers - ) - ) - - elif isinstance(item, Sequence): - addline( - list2xml_str( - attr_type=attr_type, - attr=attr, - item=item, - item_func=item_func, - cdata=cdata, - item_name=item_name, - item_wrap=item_wrap, - list_headers=list_headers - ) - ) - - elif item is None: - if item_name_attr: - attr.update(item_name_attr) - addline(convert_none_valid_name(item_name, attr_type, attr)) - - else: - raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") - return "".join(output) + output = _XMLWriter() + _append_convert_list( + output, + items, + ids, + parent, + attr_type, + item_func, + cdata, + item_wrap, + list_headers=list_headers, + ) + return output.to_bytes().decode("utf-8") def _append_convert( @@ -770,6 +556,7 @@ def _append_dict2xml_str( ) -> None: """Append a dict element using the same shape as dict2xml_str.""" ids: list[str] = [] + attr = dict(attr) if attr_type: attr["type"] = get_xml_type(item) @@ -839,6 +626,7 @@ def _append_list2xml_str( list_headers: bool = False, ) -> None: ids: list[str] = [] + attr = dict(attr) if attr_type: attr["type"] = get_xml_type(item) flat = False diff --git a/lat.md/architecture.md b/lat.md/architecture.md index 5435ac5..05f9f07 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -14,7 +14,7 @@ The pure Python serializer recursively maps Python values to XML elements, attri [[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys; common ASCII names use cached fast validation, while parser validation remains available for non-ASCII or unusual names. Dict and list scalar paths reuse validated element names and specialize generated type attributes so common payloads avoid repeated normalization and escaping work. Special `@attrs`/`@val` handling avoids mutating caller data. -The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility, but library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. +The `dicttoxml()` entry point streams normal and XPath serialization through [[json2xml/dicttoxml.py#_XMLWriter]] so recursive dict and list payloads do not allocate a complete string for each nested subtree. Public helpers such as `convert_dict()` still return strings for compatibility by delegating to the same append path, while library and CLI conversions write UTF-8 bytes incrementally and return the final `bytes` object. ## Backend selection diff --git a/lat.md/tests.md b/lat.md/tests.md index 86e3f6c..8d784c6 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -130,6 +130,10 @@ These tests pin low-level XML helper contracts so performance refactors keep the Helpers that receive prevalidated XML names should add type metadata only to the emitted element and must not mutate caller-owned attribute dictionaries. +### Container helpers preserve caller attrs + +Dict and list element helpers should add container type metadata only to emitted XML and must not mutate caller-owned attribute dictionaries. + ### XML name validity fast and cached paths XML name validation should agree across the ASCII fast path, parser-backed path, and repeated cached calls so optimization does not change accepted names. diff --git a/tests/test_dicttoxml_unit.py b/tests/test_dicttoxml_unit.py index 91d0a40..e047e8b 100644 --- a/tests/test_dicttoxml_unit.py +++ b/tests/test_dicttoxml_unit.py @@ -113,6 +113,35 @@ def test_valid_name_helpers_keep_existing_attrs_without_attr_type() -> None: assert base_attrs == {"name": "invalid key"} +# @lat: [[tests#XML helper behavior#Container helpers preserve caller attrs]] +def test_container_helpers_set_type_without_mutating_caller_attrs() -> None: + dict_attrs = {"id": "shared"} + list_attrs = {"id": "shared"} + + assert dicttoxml.dict2xml_str( + attr_type=True, + attr=dict_attrs, + item={"name": "Bike"}, + item_func=lambda _parent: "item", + cdata=False, + item_name="product", + item_wrap=True, + parentIsList=False, + ) == 'Bike' + assert dict_attrs == {"id": "shared"} + + assert dicttoxml.list2xml_str( + attr_type=True, + attr=list_attrs, + item=["Bike"], + item_func=lambda _parent: "item", + cdata=False, + item_name="products", + item_wrap=True, + ) == 'Bike' + assert list_attrs == {"id": "shared"} + + # @lat: [[tests#XML helper behavior#XML name validity fast and cached paths]] def test_key_is_valid_xml_fast_and_parse_paths_are_stable_under_cache() -> None: dicttoxml.key_is_valid_xml.cache_clear() From b06ca10f47af6ea9d7ca81149579339331136a54 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Tue, 9 Jun 2026 16:52:04 +0530 Subject: [PATCH 3/3] test: cover xpath root scalar output --- lat.md/tests.md | 4 ++++ tests/test_dict2xml.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/lat.md/tests.md b/lat.md/tests.md index 8d784c6..78f1201 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -46,6 +46,10 @@ These tests pin the XML shapes that matter most for interoperability, especially XPath mode should emit the W3C XPath functions namespace and typed child elements so downstream consumers receive standards-shaped XML. +### XPath format wraps root scalars + +XPath mode should wrap root scalar payloads in a namespace-qualified map so the output remains a single well-formed XML document. + ### Item-wrap false repeats parent tag Disabling item wrapping should repeat the parent element name for primitive list items instead of producing nested `` tags. diff --git a/tests/test_dict2xml.py b/tests/test_dict2xml.py index 13d9277..f54422e 100644 --- a/tests/test_dict2xml.py +++ b/tests/test_dict2xml.py @@ -95,6 +95,18 @@ def test_dict2xml_xsi_xmlns(self) -> None: "blue" == result ) + # @lat: [[tests#Conversion behavior#XPath format wraps root scalars]] + def test_xpath_format_root_scalar_wraps_in_namespace_map(self) -> None: + """Test XPath root scalar output remains one namespace-qualified document.""" + result = dicttoxml.dicttoxml("Bike", xpath_format=True) + + assert result == ( + b'' + b'' + b"Bike" + b"" + ) + def test_item_wrap_true(self) -> None: """Test dicttoxml with item_wrap=True.""" data = {"bike": ["blue", "green"]}