Skip to content

Commit 9011cfd

Browse files
Merge pull request #373 from scverse/faster-imports
Faster imports
2 parents 6cc3afc + 0dd816a commit 9011cfd

File tree

14 files changed

+279
-97
lines changed

14 files changed

+279
-97
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ _version.py
3636
node_modules/
3737
.code-workspace
3838

39+
# memray report
40+
*.bin
41+
3942
# test datasets (e.g. Xenium ones)
4043
# symlinks
4144
data

asv.conf.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
"project": "spatialdata-io",
44
"project_url": "https://github.com/scverse/spatialdata-io",
55
"repo": ".",
6-
"branches": ["image-reader-chunkwise"],
6+
"branches": ["faster-imports", "main"],
77
"dvcs": "git",
88
"environment_type": "virtualenv",
9-
"pythons": ["3.12"],
9+
"pythons": ["3.13"],
1010
"build_command": [],
1111
"install_command": ["python -m pip install {build_dir}[test]"],
1212
"uninstall_command": ["python -m pip uninstall -y {project}"],
@@ -17,7 +17,7 @@
1717
"hash_length": 8,
1818
"build_cache_size": 2,
1919
"install_timeout": 600,
20-
"repeat": 3,
20+
"repeat": 5,
2121
"processes": 1,
2222
"attribute_selection": ["time_*", "peakmem_*"]
2323
}

benchmarks/benchmark_image.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from spatialdata._logging import logger
1717
from xarray import DataArray
1818

19-
from spatialdata_io import image # type: ignore[attr-defined]
19+
from spatialdata_io import image
2020

2121
# =============================================================================
2222
# CONFIGURATION - Edit these values to match your setup

benchmarks/benchmark_imports.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""ASV benchmarks for spatialdata-io import times.
2+
3+
Measures how long it takes to import the package and individual readers
4+
in a fresh subprocess, isolating import overhead from runtime work.
5+
6+
Running (with the current environment, no virtualenv rebuild):
7+
# Quick sanity check (single iteration):
8+
asv run --python=same --quick --show-stderr -v -b ImportBenchmark
9+
10+
# Full benchmark on current commit:
11+
asv run --python=same --show-stderr -v -b ImportBenchmark
12+
13+
# Compare two branches (using --python=same, one-liner):
14+
git stash && git checkout main && pip install -e . -q \
15+
&& asv run --python=same -v -b ImportBenchmark \
16+
&& git checkout faster-imports && git stash pop && pip install -e . -q \
17+
&& asv run --python=same -v -b ImportBenchmark
18+
# Then view the comparison:
19+
asv compare $(git rev-parse main) $(git rev-parse faster-imports)
20+
21+
# Compare two branches (let ASV build virtualenvs, slower first run):
22+
asv continuous --show-stderr -v -b ImportBenchmark main faster-imports
23+
24+
# Generate an HTML report:
25+
asv publish && asv preview
26+
"""
27+
28+
import subprocess
29+
import sys
30+
31+
32+
def _import_time(statement: str) -> float:
33+
"""Time an import in a fresh subprocess. Returns seconds."""
34+
code = f"import time; t0=time.perf_counter(); {statement}; print(time.perf_counter()-t0)"
35+
result = subprocess.run(
36+
[sys.executable, "-c", code],
37+
capture_output=True,
38+
text=True,
39+
)
40+
if result.returncode != 0:
41+
raise RuntimeError(result.stderr)
42+
return float(result.stdout.strip())
43+
44+
45+
class ImportBenchmark:
46+
"""Import-time benchmarks for spatialdata-io.
47+
48+
Each ``time_*`` method is a separate ASV benchmark.
49+
They run in isolated subprocesses so that one import
50+
does not warm the cache for the next.
51+
"""
52+
53+
# ASV settings tuned for subprocess-based import timing:
54+
timeout = 120 # seconds before ASV kills a benchmark; generous since each
55+
# call spawns a subprocess (~2s each × 10 repeats = ~20s worst case)
56+
repeat = 5 # number of timing samples ASV collects; high because import
57+
# times have variance from OS caching / disk I/O / background load;
58+
# ASV reports the median and IQR from these samples
59+
number = 1 # calls per sample; must be 1 because each call spawns a fresh
60+
# subprocess — running >1 would just re-import in a warm process
61+
warmup_time = 0 # seconds of warm-up iterations before timing; disabled because
62+
# each call is already a cold subprocess — warming up the parent
63+
# process is meaningless
64+
processes = 1 # number of ASV worker processes; 1 avoids parallel subprocesses
65+
# competing for CPU / disk and inflating timings
66+
67+
# -- top-level package -------------------------------------------------
68+
69+
def time_import_spatialdata_io(self) -> float:
70+
"""Wall time: ``import spatialdata_io`` (lazy, no readers loaded)."""
71+
return _import_time("import spatialdata_io")
72+
73+
# -- single reader via the public API ----------------------------------
74+
75+
def time_from_spatialdata_io_import_xenium(self) -> float:
76+
"""Wall time: ``from spatialdata_io import xenium``."""
77+
return _import_time("from spatialdata_io import xenium")
78+
79+
def time_from_spatialdata_io_import_visium(self) -> float:
80+
"""Wall time: ``from spatialdata_io import visium``."""
81+
return _import_time("from spatialdata_io import visium")
82+
83+
def time_from_spatialdata_io_import_visium_hd(self) -> float:
84+
"""Wall time: ``from spatialdata_io import visium_hd``."""
85+
return _import_time("from spatialdata_io import visium_hd")
86+
87+
def time_from_spatialdata_io_import_merscope(self) -> float:
88+
"""Wall time: ``from spatialdata_io import merscope``."""
89+
return _import_time("from spatialdata_io import merscope")
90+
91+
def time_from_spatialdata_io_import_cosmx(self) -> float:
92+
"""Wall time: ``from spatialdata_io import cosmx``."""
93+
return _import_time("from spatialdata_io import cosmx")
94+
95+
# -- key dependencies (reference) --------------------------------------
96+
97+
def time_import_spatialdata(self) -> float:
98+
"""Wall time: ``import spatialdata`` (reference)."""
99+
return _import_time("import spatialdata")
100+
101+
def time_import_anndata(self) -> float:
102+
"""Wall time: ``import anndata`` (reference)."""
103+
return _import_time("import anndata")

benchmarks/benchmark_xenium.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
from spatialdata import SpatialData
4141

42-
from spatialdata_io import xenium # type: ignore[attr-defined]
42+
from spatialdata_io import xenium
4343

4444
# =============================================================================
4545
# CONFIGURATION - Edit these paths to match your setup

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ lint.ignore = [
185185
# Unused imports
186186
"F401",
187187
]
188+
[tool.ruff.lint.per-file-ignores]
189+
"src/spatialdata_io/__init__.py" = ["I001"]
188190

189191
[tool.jupytext]
190192
formats = "ipynb,md"

src/spatialdata_io/__init__.py

Lines changed: 77 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,36 @@
1+
from importlib import import_module
12
from importlib.metadata import version
3+
from typing import Any, TYPE_CHECKING
24

3-
from spatialdata_io.converters.generic_to_zarr import generic_to_zarr
4-
from spatialdata_io.readers.codex import codex
5-
from spatialdata_io.readers.cosmx import cosmx
6-
from spatialdata_io.readers.curio import curio
7-
from spatialdata_io.readers.dbit import dbit
8-
from spatialdata_io.readers.generic import generic, geojson, image
9-
from spatialdata_io.readers.macsima import macsima
10-
from spatialdata_io.readers.mcmicro import mcmicro
11-
from spatialdata_io.readers.merscope import merscope
12-
from spatialdata_io.readers.seqfish import seqfish
13-
from spatialdata_io.readers.steinbock import steinbock
14-
from spatialdata_io.readers.stereoseq import stereoseq
15-
from spatialdata_io.readers.visium import visium
16-
from spatialdata_io.readers.visium_hd import visium_hd
17-
from spatialdata_io.readers.xenium import (
18-
xenium,
19-
xenium_aligned_image,
20-
xenium_explorer_selection,
21-
)
22-
23-
_readers_technologies = [
5+
__version__ = version("spatialdata-io")
6+
7+
_LAZY_IMPORTS: dict[str, str] = {
8+
# readers
9+
"codex": "spatialdata_io.readers.codex",
10+
"cosmx": "spatialdata_io.readers.cosmx",
11+
"curio": "spatialdata_io.readers.curio",
12+
"dbit": "spatialdata_io.readers.dbit",
13+
"macsima": "spatialdata_io.readers.macsima",
14+
"mcmicro": "spatialdata_io.readers.mcmicro",
15+
"merscope": "spatialdata_io.readers.merscope",
16+
"seqfish": "spatialdata_io.readers.seqfish",
17+
"steinbock": "spatialdata_io.readers.steinbock",
18+
"stereoseq": "spatialdata_io.readers.stereoseq",
19+
"visium": "spatialdata_io.readers.visium",
20+
"visium_hd": "spatialdata_io.readers.visium_hd",
21+
"xenium": "spatialdata_io.readers.xenium",
22+
"xenium_aligned_image": "spatialdata_io.readers.xenium",
23+
"xenium_explorer_selection": "spatialdata_io.readers.xenium",
24+
# readers file types
25+
"generic": "spatialdata_io.readers.generic",
26+
"geojson": "spatialdata_io.readers.generic",
27+
"image": "spatialdata_io.readers.generic",
28+
# converters
29+
"generic_to_zarr": "spatialdata_io.converters.generic_to_zarr",
30+
}
31+
32+
__all__ = [
33+
# readers
2434
"codex",
2535
"cosmx",
2636
"curio",
@@ -34,28 +44,57 @@
3444
"visium",
3545
"visium_hd",
3646
"xenium",
37-
]
38-
39-
_readers_file_types = [
47+
"xenium_aligned_image",
48+
"xenium_explorer_selection",
49+
# readers file types
4050
"generic",
41-
"image",
4251
"geojson",
43-
]
44-
45-
_converters = [
52+
"image",
53+
# converters
4654
"generic_to_zarr",
4755
]
4856

4957

50-
__all__ = (
51-
[
52-
"xenium_aligned_image",
53-
"xenium_explorer_selection",
54-
]
55-
+ _readers_technologies
56-
+ _readers_file_types
57-
+ _converters
58-
)
58+
def __getattr__(name: str) -> Any:
59+
if name in _LAZY_IMPORTS:
60+
module_path = _LAZY_IMPORTS[name]
61+
mod = import_module(module_path)
62+
val = getattr(mod, name)
63+
globals()[name] = val
64+
return val
65+
else:
66+
try:
67+
return globals()[name]
68+
except KeyError as e:
69+
raise AttributeError(f"Module 'spatialdata_io' has no attribute '{name}'") from e
5970

6071

61-
__version__ = version("spatialdata-io")
72+
def __dir__() -> list[str]:
73+
return __all__ + ["__version__"]
74+
75+
76+
if TYPE_CHECKING:
77+
# readers
78+
from spatialdata_io.readers.codex import codex
79+
from spatialdata_io.readers.cosmx import cosmx
80+
from spatialdata_io.readers.curio import curio
81+
from spatialdata_io.readers.dbit import dbit
82+
from spatialdata_io.readers.macsima import macsima
83+
from spatialdata_io.readers.mcmicro import mcmicro
84+
from spatialdata_io.readers.merscope import merscope
85+
from spatialdata_io.readers.seqfish import seqfish
86+
from spatialdata_io.readers.steinbock import steinbock
87+
from spatialdata_io.readers.stereoseq import stereoseq
88+
from spatialdata_io.readers.visium import visium
89+
from spatialdata_io.readers.visium_hd import visium_hd
90+
from spatialdata_io.readers.xenium import (
91+
xenium,
92+
xenium_aligned_image,
93+
xenium_explorer_selection,
94+
)
95+
96+
# readers file types
97+
from spatialdata_io.readers.generic import generic, geojson, image
98+
99+
# converters
100+
from spatialdata_io.converters.generic_to_zarr import generic_to_zarr

0 commit comments

Comments
 (0)