Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ In development

- Add ``TreeSequence.ld_matrix`` stats method and documentation, for computing
two-locus statistics in site and branch mode.
(:user:`lkirk`, :user:`apragsdale`, :pr:`3416`)
(:user:`lkirk`, :user:`apragsdale`, :pr:`3416`)
- Add `node_labels` parameter to `write_nexus`. (:user:`kaathewisegit`, :pr:`3442`)

--------------------
[1.0.2] - 2026-03-06
Expand Down
92 changes: 92 additions & 0 deletions python/tests/test_phylo_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import functools
import io
import random
import textwrap

import dendropy
Expand Down Expand Up @@ -334,6 +335,97 @@ def test_nexus_no_trees_or_alignments(self):
)


class TestNexusNodeLabels:
@tests.cached_example
def balanced_tree(self):
# 4
# ┏━┻┓
# ┃ 3
# ┃ ┏┻┓
# 0 1 2
return tskit.Tree.generate_balanced(3)

def test_as_nexus_labels_basic(self):
ts = self.balanced_tree().tree_sequence
labels = {0: "human", 1: "chimp", 2: "bonobo"}
expected = textwrap.dedent(
"""\
#NEXUS
BEGIN TAXA;
DIMENSIONS NTAX=3;
TAXLABELS human chimp bonobo;
END;
BEGIN TREES;
TRANSLATE n0 human, n1 chimp, n2 bonobo;
TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);
END;
"""
)
assert expected == ts.as_nexus(include_alignments=False, node_labels=labels)

def test_as_nexus_labels_partial(self):
ts = self.balanced_tree().tree_sequence
labels = {0: "human", 2: "bonobo"}
expected = textwrap.dedent(
"""\
#NEXUS
BEGIN TAXA;
DIMENSIONS NTAX=3;
TAXLABELS human n1 bonobo;
END;
BEGIN TREES;
TRANSLATE n0 human, n2 bonobo;
TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);
END;
"""
)
assert expected == ts.as_nexus(include_alignments=False, node_labels=labels)

def test_as_nexus_labels_none(self):
ts = self.balanced_tree().tree_sequence
expected = textwrap.dedent(
"""\
#NEXUS
BEGIN TAXA;
DIMENSIONS NTAX=3;
TAXLABELS n0 n1 n2;
END;
BEGIN TREES;
TREE t0^1 = [&R] (n0:2,(n1:1,n2:1):1);
END;
"""
)
assert expected == ts.as_nexus(include_alignments=False, node_labels=None)

@pytest.mark.parametrize("ts", get_example_tree_sequences())
def test_parseable(self, ts):
def all_samples_are_leaves(ts):
internal_nodes = np.unique(ts.edges_parent)
is_internal_sample = np.isin(ts.samples(), internal_nodes)
return not np.any(is_internal_sample)

if not all_samples_are_leaves(ts):
# TRANSLATE doesn't support translating internal nodes
return

for tree in ts.trees():
if not tree.has_single_root:
return

labels = {}
samples = ts.samples()
k = random.randint(1, len(samples))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the random does much here, I'd just do something like

u = ts.samples()[0]
labels[u] = f"new_node_which_was_{u}"

And then after just check that labels[u] is recognised as a taxon ID by dendropy?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was to make sure that the parsers support translations given in arbitrary orders, because I wasn't 100% sure that'd be supported for non-integer labels.

The TRANSLATE statement maps arbitrary labels in the tree specification to valid taxon names. If the arbitrary labels are integers, they are mapped onto the valid taxon names as dictated by the TRANSLATE command without any consideration of the order of the taxa in the matrix

I've added additional asserts which actually check the labels. This surfaced a bug because dendropy doesn't support samples being internal nodes. There are 3 tree sequences like that in get_example_tree_sequences (all_nodes_samples, internal_nodes_samples, mixed_internal_leaf_samples), so I filtered them out.

for node in random.sample(list(samples), k):
labels[node] = f"new_node_which_was_{node}"

nexus = ts.as_nexus(include_alignments=False, node_labels=labels)
ds = dendropy.DataSet.get(data=nexus, schema="nexus")
tree = ds.tree_lists[0][0]
dendropy_labels = [node.taxon.label for node in tree.nodes() if node.taxon]
for label in labels.values():
assert label.replace("_", " ") in dendropy_labels


class TestNewickCodePaths:
"""
Test that the different code paths we use under the hood lead to
Expand Down
14 changes: 13 additions & 1 deletion python/tskit/text_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def write_nexus(
include_alignments,
reference_sequence,
missing_data_character,
node_labels,
isolated_as_missing=None,
):
# See TreeSequence.write_nexus for documentation on parameters.
Expand All @@ -134,7 +135,13 @@ def write_nexus(
print("#NEXUS", file=out)
print("BEGIN TAXA;", file=out)
print("", f"DIMENSIONS NTAX={ts.num_samples};", sep=indent, file=out)
taxlabels = " ".join(f"n{u}" for u in ts.samples())

if node_labels is not None:
taxlabels = " ".join(
node_labels[u] if u in node_labels else f"n{u}" for u in ts.samples()
)
else:
taxlabels = " ".join(f"n{u}" for u in ts.samples())
print("", f"TAXLABELS {taxlabels};", sep=indent, file=out)
print("END;", file=out)

Expand Down Expand Up @@ -166,6 +173,11 @@ def write_nexus(
include_trees = True if include_trees is None else include_trees
if include_trees:
print("BEGIN TREES;", file=out)

if node_labels is not None:
translations = ", ".join(f"n{u} {name}" for u, name in node_labels.items())
print(f" TRANSLATE {translations};", file=out)

for tree in ts.trees():
start_interval = "{0:.{1}f}".format(tree.interval.left, pos_precision)
end_interval = "{0:.{1}f}".format(tree.interval.right, pos_precision)
Expand Down
6 changes: 6 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -6797,6 +6797,7 @@ def write_nexus(
reference_sequence=None,
missing_data_character=None,
isolated_as_missing=None,
node_labels=None,
):
"""
Returns a `nexus encoding <https://en.wikipedia.org/wiki/Nexus_file>`_
Expand Down Expand Up @@ -6896,6 +6897,10 @@ def write_nexus(
:param str missing_data_character: As for the :meth:`.alignments` method,
but defaults to "?".
:param bool isolated_as_missing: As for the :meth:`.alignments` method.
:param node_labels: A map of type `{node_id: name}`. Samples present
in the map will have the given name instead of `n{node_id}`. Note
that the names must not have whitespace (spaces should be replaced
by underscores) or puncuation in them.
:return: A nexus representation of this :class:`TreeSequence`
:rtype: str
"""
Expand All @@ -6908,6 +6913,7 @@ def write_nexus(
reference_sequence=reference_sequence,
missing_data_character=missing_data_character,
isolated_as_missing=isolated_as_missing,
node_labels=node_labels,
)

def as_nexus(self, **kwargs):
Expand Down