Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
85ed17d
perf: session-scope blobs fixtures to cut setup time by ~10s
LucaMarconato May 5, 2026
9f1378d
perf: use blobs(length=128) in test_vectorize to cut to_circles cost
LucaMarconato May 5, 2026
149bfdf
chore: remove benchmark CSVs from repo
LucaMarconato May 5, 2026
73f4387
chore: remove benchmark stats csv
LucaMarconato May 5, 2026
7726556
perf: eliminate double-validation in __setitem__ and use fast fixture…
LucaMarconato May 5, 2026
ac97136
perf: add validate=False option to get_model, clarify setitem validat…
LucaMarconato May 5, 2026
acb25b1
perf: skip re-validation when building SpatialData from already-valid…
LucaMarconato May 5, 2026
4113706
Revert "perf: skip re-validation when building SpatialData from alrea…
LucaMarconato May 6, 2026
89a2202
perf: vectorize label centroid computation, 30x speedup
LucaMarconato May 6, 2026
b9d9b57
perf: session-scope blobs fixtures in concatenate and get_attrs tests
LucaMarconato May 6, 2026
b5b09e3
perf: reduce categories and partitions in test_categories_on_partitio…
LucaMarconato May 6, 2026
031ba56
perf: shrink test_categories_on_partitioned_dataframe to N=10, local …
LucaMarconato May 6, 2026
51674b9
ci: parallelize test suite with pytest-xdist worksteal
LucaMarconato May 6, 2026
4da1bc5
refactor: clean up _get_centroids_for_labels docstring and clarify in…
LucaMarconato May 6, 2026
61553c7
refactor: minor code swap for improved clarity
LucaMarconato May 6, 2026
0448312
perf: speed up test suite — subset sdata, disable numba JIT, promote …
LucaMarconato May 6, 2026
b66a41b
fix+perf: faster test suite — subset fixtures, disable numba JIT, fix…
LucaMarconato May 6, 2026
58b64c8
refactor: update comments and refine test_consolidated_metadata subset
LucaMarconato May 6, 2026
8f1f49d
fix: prevent numba JIT errors during test collection on runner envs
LucaMarconato May 6, 2026
c7759a4
Merge branch 'main' into fix/numba-jit-config
LucaMarconato May 6, 2026
de3ed36
refactor: consolidate numba JIT disable into root conftest only
LucaMarconato May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/tutorials/notebooks
Submodule notebooks updated 160 files
8 changes: 6 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from __future__ import annotations

import os
import sys

# Disable numba JIT for the test suite (the test data is small so initializing the JIT is slower than using plain
# Python)
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")
# Python). Force-set (not setdefault) so the runner environment cannot accidentally override with "0".
os.environ["NUMBA_DISABLE_JIT"] = "1"
# If a pytest plugin already imported numba before this conftest ran, patch the cached config value too.
if "numba.core.config" in sys.modules:
sys.modules["numba.core.config"].NUMBA_DISABLE_JIT = 1

import copy as _copy
from collections.abc import Callable, Sequence
Expand Down
7 changes: 0 additions & 7 deletions tests/core/operations/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1 @@
from __future__ import annotations

import os

# Disable numba JIT compilation for rasterize tests. Datashader (used by rasterize) triggers
# numba JIT on first call, costing ~1.4s per worker. Python-mode gives identical results for
# the small test data here — unlike real data, there is no throughput advantage from JIT.
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")
29 changes: 16 additions & 13 deletions tests/core/operations/test_vectorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@
from spatialdata.models.models import ShapesModel
from spatialdata.testing import assert_elements_are_identical


# each of the tests operates on different elements, hence we can initialize the data once without conflicts
sdata = blobs(length=128)
@pytest.fixture(scope="module")
def sdata():
return blobs(length=128)


# conversion from labels
@pytest.mark.parametrize("is_multiscale", [False, True])
def test_labels_2d_to_circles(is_multiscale: bool) -> None:
def test_labels_2d_to_circles(sdata, is_multiscale: bool) -> None:
key = "blobs" + ("_multiscale" if is_multiscale else "") + "_labels"
element = sdata[key]
new_circles = to_circles(element)
Expand All @@ -36,7 +39,7 @@ def test_labels_2d_to_circles(is_multiscale: bool) -> None:


@pytest.mark.parametrize("is_multiscale", [False, True])
def test_labels_2d_to_polygons(is_multiscale: bool) -> None:
def test_labels_2d_to_polygons(sdata, is_multiscale: bool) -> None:
key = "blobs" + ("_multiscale" if is_multiscale else "") + "_labels"
element = sdata[key]
new_polygons = to_polygons(element)
Expand All @@ -49,7 +52,7 @@ def test_labels_2d_to_polygons(is_multiscale: bool) -> None:
assert ((new_polygons.area - new_polygons.pixel_count) / new_polygons.pixel_count < 0.01).all()


def test_chunked_labels_2d_to_polygons() -> None:
def test_chunked_labels_2d_to_polygons(sdata) -> None:
no_chunks_polygons = to_polygons(sdata["blobs_labels"])

sdata["blobs_labels_chunked"] = sdata["blobs_labels"].copy()
Expand All @@ -63,21 +66,21 @@ def test_chunked_labels_2d_to_polygons() -> None:


# conversion from circles
def test_circles_to_circles() -> None:
def test_circles_to_circles(sdata) -> None:
element = sdata["blobs_circles"]
new_circles = to_circles(element)
assert_elements_are_identical(element, new_circles)


def test_circles_to_polygons() -> None:
def test_circles_to_polygons(sdata) -> None:
element = sdata["blobs_circles"]
polygons = to_polygons(element, buffer_resolution=1000)
areas = element.radius**2 * math.pi
assert np.allclose(polygons.area, areas)


# conversion from polygons/multipolygons
def test_polygons_to_circles() -> None:
def test_polygons_to_circles(sdata) -> None:
element = sdata["blobs_polygons"].iloc[:2]
new_circles = to_circles(element)

Expand All @@ -93,7 +96,7 @@ def test_polygons_to_circles() -> None:
assert_elements_are_identical(new_circles, expected)


def test_multipolygons_to_circles() -> None:
def test_multipolygons_to_circles(sdata) -> None:
element = sdata["blobs_multipolygons"]
new_circles = to_circles(element)

Expand All @@ -108,13 +111,13 @@ def test_multipolygons_to_circles() -> None:
assert_elements_are_identical(new_circles, expected)


def test_polygons_multipolygons_to_polygons() -> None:
def test_polygons_multipolygons_to_polygons(sdata) -> None:
polygons = sdata["blobs_multipolygons"]
assert polygons is to_polygons(polygons)


# conversion from points
def test_points_to_circles() -> None:
def test_points_to_circles(sdata) -> None:
element = sdata["blobs_points"]
with pytest.raises(RuntimeError, match="`radius` must either be provided, either be a column"):
to_circles(element)
Expand All @@ -126,18 +129,18 @@ def test_points_to_circles() -> None:
assert np.array_equal(np.ones_like(x), circles["radius"])


def test_points_to_polygons() -> None:
def test_points_to_polygons(sdata) -> None:
with pytest.raises(RuntimeError, match="Cannot convert points to polygons"):
to_polygons(sdata["blobs_points"])


# conversion from images (invalid)
def test_images_to_circles() -> None:
def test_images_to_circles(sdata) -> None:
with pytest.raises(RuntimeError, match=r"Cannot apply to_circles\(\) to images"):
to_circles(sdata["blobs_image"])


def test_images_to_polygons() -> None:
def test_images_to_polygons(sdata) -> None:
with pytest.raises(RuntimeError, match=r"Cannot apply to_polygons\(\) to images"):
to_polygons(sdata["blobs_image"])

Expand Down
17 changes: 9 additions & 8 deletions tests/core/query/test_relational_query_match_sdata_to_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ def _make_test_data() -> SpatialData:
return sdata


# constructing the example data; let's use a global variable as we can reuse the same object on most tests
# without having to recreate it
sdata = _make_test_data()
# constructing the example data; reuse the same object on most tests without having to recreate it
@pytest.fixture(scope="module")
def sdata():
return _make_test_data()


def test_match_sdata_to_table_filter_specific_instances():
def test_match_sdata_to_table_filter_specific_instances(sdata):
"""
Filter to keep only specific instances. Note that it works even when the table annotates multiple elements.
"""
Expand All @@ -33,7 +34,7 @@ def test_match_sdata_to_table_filter_specific_instances():
assert "blobs_polygons-sdata2" in matched


def test_match_sdata_to_table_filter_specific_instances_element():
def test_match_sdata_to_table_filter_specific_instances_element(sdata):
"""
Filter to keep only specific instances, in a specific element.
"""
Expand All @@ -49,7 +50,7 @@ def test_match_sdata_to_table_filter_specific_instances_element():
assert "blobs_polygons-sdata2" not in matched


def test_match_sdata_to_table_filter_by_threshold():
def test_match_sdata_to_table_filter_by_threshold(sdata):
"""
Filter by a threshold on a value column, in a specific element.
"""
Expand All @@ -63,7 +64,7 @@ def test_match_sdata_to_table_filter_by_threshold():
assert "blobs_polygons-sdata2" not in matched


def test_match_sdata_to_table_subset_certain_obs():
def test_match_sdata_to_table_subset_certain_obs(sdata):
"""
Subset to certain obs (we could also subset to certain var or layer).
"""
Expand Down Expand Up @@ -135,7 +136,7 @@ def test_match_sdata_to_table_match_labels_error():
assert "blobs_points-sdata1" not in matched


def test_match_sdata_to_table_no_table_argument():
def test_match_sdata_to_table_no_table_argument(sdata):
"""
If no table argument is passed, the table_name argument will be used to match the table.
"""
Expand Down
22 changes: 13 additions & 9 deletions tests/core/test_data_extent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@
from spatialdata.models import Image2DModel, PointsModel, ShapesModel
from spatialdata.transformations import Affine, Translation, remove_transformation, set_transformation

# for faster tests; we will pay attention not to modify the original data
sdata = blobs()
RNG = default_rng(seed=0)


# for faster tests; we will pay attention not to modify the original data
@pytest.fixture(scope="module")
def sdata():
return blobs()


def check_test_results0(extent, min_coordinates, max_coordinates, axes):
for i, ax in enumerate(axes):
assert np.isclose(extent[ax][0], min_coordinates[i])
Expand All @@ -36,7 +40,7 @@ def check_test_results1(extent0, extent1):


@pytest.mark.parametrize("shape_type", ["circles", "polygons", "multipolygons"])
def test_get_extent_shapes(shape_type):
def test_get_extent_shapes(sdata, shape_type):
extent = get_extent(sdata[f"blobs_{shape_type}"])
if shape_type == "circles":
min_coordinates = np.array([98.92618679, 137.62348969])
Expand All @@ -58,7 +62,7 @@ def test_get_extent_shapes(shape_type):


@pytest.mark.parametrize("exact", [True, False])
def test_get_extent_points(exact: bool):
def test_get_extent_points(sdata, exact: bool):
# 2d case
extent = get_extent(sdata["blobs_points"], exact=exact)
check_test_results0(
Expand All @@ -83,7 +87,7 @@ def test_get_extent_points(exact: bool):

@pytest.mark.parametrize("raster_type", ["image", "labels"])
@pytest.mark.parametrize("multiscale", [False, True])
def test_get_extent_raster(raster_type, multiscale):
def test_get_extent_raster(sdata, raster_type, multiscale):
raster = sdata[f"blobs_multiscale_{raster_type}"] if multiscale else sdata[f"blobs_{raster_type}"]

extent = get_extent(raster)
Expand All @@ -95,7 +99,7 @@ def test_get_extent_raster(raster_type, multiscale):
)


def test_get_extent_spatialdata():
def test_get_extent_spatialdata(sdata):
sdata2 = SpatialData(shapes={"circles": sdata["blobs_circles"], "polygons": sdata["blobs_polygons"]})
extent = get_extent(sdata2)
check_test_results0(
Expand All @@ -106,7 +110,7 @@ def test_get_extent_spatialdata():
)


def test_get_extent_invalid_coordinate_system():
def test_get_extent_invalid_coordinate_system(sdata):
# element without the coordinate system
with pytest.raises(ValueError):
_ = get_extent(sdata["blobs_circles"], coordinate_system="invalid")
Expand Down Expand Up @@ -231,7 +235,7 @@ def test_rotate_vector_data(exact):
check_test_results1(extent, expected)


def test_get_extent_affine_circles():
def test_get_extent_affine_circles(sdata):
"""
Verify that the extent of the transformed circles, computed with exact = False, gives the same result as
transforming the bounding box of the original circles
Expand Down Expand Up @@ -304,7 +308,7 @@ def test_get_extent_affine_points3d():
assert np.allclose(transformed_extent_3d["z"], extent_3d["z"])


def test_get_extent_affine_sdata():
def test_get_extent_affine_sdata(sdata):
# let's make a copy since we don't want to modify the original data
sdata2 = SpatialData(
shapes={
Expand Down
7 changes: 0 additions & 7 deletions tests/dataloader/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1 @@
from __future__ import annotations

import os

# Disable numba JIT compilation for dataloader tests. Datashader (used by rasterize) triggers
# numba JIT on first call, costing ~1.4s per worker. Python-mode gives identical results for
# the small test data here — unlike real data, there is no throughput advantage from JIT.
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")
7 changes: 5 additions & 2 deletions tests/io/test_readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,9 @@ def test_incremental_io_attrs(points: SpatialData, sdata_container_format: Spati
assert sdata2.attrs["c"] == 3


cached_sdata_blobs = blobs()
@pytest.fixture(scope="module")
def _cached_sdata_blobs():
return blobs()


@pytest.mark.filterwarnings("ignore:SpatialData is not stored in the most current format:UserWarning")
Expand Down Expand Up @@ -1020,6 +1022,7 @@ def test_delete_element_from_disk(
@pytest.mark.parametrize("sdata_container_format", SDATA_FORMATS)
def test_element_already_on_disk_different_type(
full_sdata,
_cached_sdata_blobs,
element_name: str,
sdata_container_format: SpatialDataContainerFormatType,
) -> None:
Expand All @@ -1037,7 +1040,7 @@ def test_element_already_on_disk_different_type(
wrong_group = "images" if element_type == "tables" else "tables"
del getattr(full_sdata, element_type)[element_name]
getattr(full_sdata, wrong_group)[element_name] = (
getattr(cached_sdata_blobs, wrong_group).values().__iter__().__next__()
getattr(_cached_sdata_blobs, wrong_group).values().__iter__().__next__()
)
ERROR_MSG = "The in-memory object should have a different name."

Expand Down
Loading