jkobject · LiudengZhang · May 14, 2026
diff --git a/scdataloader/utils.py b/scdataloader/utils.py
@@ -740,6 +740,32 @@ def length_normalize(adata: AnnData, gene_lengths: list) -> AnnData:
     return adata
 
 
+def _lookup_name(obj, ontology_id):
+    """Resolve an ontology term's name from its ontology id.
+
+    Handles comma-joined compound ids (e.g. CellxGene's
+    ``self_reported_ethnicity_ontology_term_id`` for multi-ethnic donors) by
+    resolving each part and rejoining the names. Returns None if any part is
+    absent from the database, so callers can skip unmappable terms instead of
+    raising.
+
+    Args:
+        obj: the bionty registry to look the id up in (e.g. bt.Ethnicity).
+        ontology_id (str): a single ontology id, or several joined by commas.
+
+    Returns:
+        str or None: the resolved name(s), comma-joined for compound ids, or
+            None if any part could not be found.
+    """
+    names = []
+    for part in str(ontology_id).split(","):
+        record = obj.filter(ontology_id=part.strip()).one_or_none()
+        if record is None:
+            return None
+        names.append(record.name)
+    return ",".join(names)
+
+
 def translate(
     val: Union[str, list, set, Counter, dict], t: str = "cell_type_ontology_term_id"
 ) -> dict:
@@ -776,11 +802,11 @@ def translate(
     else:
         return None
     if type(val) is str:
-        return {val: obj.filter(ontology_id=val).one().name}
+        return {val: _lookup_name(obj, val)}
     elif type(val) is dict or type(val) is Counter:
-        return {obj.filter(ontology_id=k).one().name: v for k, v in val.items()}
+        return {_lookup_name(obj, k): v for k, v in val.items()}
     elif type(val) is set:
-        return {i: obj.filter(ontology_id=i).one().name for i in val}
+        return {i: _lookup_name(obj, i) for i in val}
     else:
-        rl = {i: obj.filter(ontology_id=i).one().name for i in set(val)}
+        rl = {i: _lookup_name(obj, i) for i in set(val)}
         return [rl.get(i, None) for i in val]
diff --git a/tests/test_translate.py b/tests/test_translate.py
@@ -0,0 +1,69 @@
+"""Unit tests for ``scdataloader.utils.translate`` and its ``_lookup_name`` helper.
+
+These use a fake bionty registry so they exercise the id-splitting / name-joining
+logic without needing a populated ontology database.
+"""
+
+import bionty as bt
+
+from scdataloader.utils import _lookup_name, translate
+
+
+class _Record:
+    def __init__(self, name):
+        self.name = name
+
+
+class _Filtered:
+    def __init__(self, record):
+        self._record = record
+
+    def one_or_none(self):
+        return self._record
+
+
+class _FakeRegistry:
+    """Mimics a bionty registry: ``.filter(ontology_id=...).one_or_none()``."""
+
+    _DB = {"HANCESTRO:0005": "European", "HANCESTRO:0008": "African"}
+
+    def filter(self, ontology_id):
+        name = self._DB.get(ontology_id)
+        return _Filtered(_Record(name) if name is not None else None)
+
+
+def test_lookup_name_single_id():
+    assert _lookup_name(_FakeRegistry(), "HANCESTRO:0005") == "European"
+
+
+def test_lookup_name_compound_id():
+    # CellxGene records multi-ethnic donors as comma-joined ids; this used to
+    # raise because the whole string was looked up as a single term.
+    assert (
+        _lookup_name(_FakeRegistry(), "HANCESTRO:0005,HANCESTRO:0008")
+        == "European,African"
+    )
+
+
+def test_lookup_name_unknown_id_returns_none():
+    assert _lookup_name(_FakeRegistry(), "HANCESTRO:9999") is None
+
+
+def test_lookup_name_compound_with_unknown_part_returns_none():
+    assert _lookup_name(_FakeRegistry(), "HANCESTRO:0005,HANCESTRO:9999") is None
+
+
+def test_lookup_name_tolerates_whitespace():
+    assert (
+        _lookup_name(_FakeRegistry(), "HANCESTRO:0005, HANCESTRO:0008")
+        == "European,African"
+    )
+
+
+def test_translate_list_with_compound_id(monkeypatch):
+    monkeypatch.setattr(bt, "Ethnicity", _FakeRegistry())
+    out = translate(
+        ["HANCESTRO:0005", "HANCESTRO:0005,HANCESTRO:0008"],
+        "self_reported_ethnicity_ontology_term_id",
+    )
+    assert out == ["European", "European,African"]