Skip to content

Commit 271fb04

Browse files
Copiloteeholmes
andcommitted
remove h5py from show_variables: use xarray lazy path for speed and portability
Co-authored-by: eeholmes <2545978+eeholmes@users.noreply.github.com>
1 parent f1f95e5 commit 271fb04

2 files changed

Lines changed: 13 additions & 122 deletions

File tree

src/point_collocation/core/plan.py

Lines changed: 1 addition & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -466,10 +466,8 @@ def show_variables(
466466
If the plan contains no granules.
467467
"""
468468
from point_collocation.core._open_method import (
469-
_GEOLOC_PAIRS,
470469
_apply_coords,
471470
_build_effective_open_kwargs,
472-
_h5py_file_info,
473471
_merge_datatree_with_spec,
474472
_normalize_open_method,
475473
_open_and_merge_dataset_groups,
@@ -521,120 +519,9 @@ def show_variables(
521519
display_spec.setdefault("merge", None)
522520
print(f"open_method: {display_spec!r}")
523521

524-
def _seek_back() -> None:
525-
if hasattr(file_obj, "seek"):
526-
try:
527-
file_obj.seek(0) # type: ignore[attr-defined]
528-
except Exception:
529-
pass
530-
531-
# ---------------------------------------------------------------
532-
# Fast path: use h5py for per-group metadata inspection.
533-
# ---------------------------------------------------------------
534-
h5_info = _h5py_file_info(file_obj)
535-
_seek_back() # ensure file_obj is at start for subsequent xarray opens
536-
if h5_info is not None:
537-
# Determine which groups to include based on merge spec.
538-
merge = spec.get("merge")
539-
if merge == "root":
540-
relevant_groups: set[str] | None = {"/"}
541-
elif isinstance(merge, list):
542-
relevant_groups = set(merge)
543-
else:
544-
# "all" or no merge key: include all discovered groups
545-
relevant_groups = None
546-
547-
all_var_names: set[str] = set()
548-
merged_dims: dict[str, int] = {}
549-
merged_vars: list[str] = []
550-
551-
for group_path, vars_dict in h5_info:
552-
if relevant_groups is not None and group_path not in relevant_groups:
553-
continue
554-
555-
# Collect dimension sizes from variable shapes.
556-
for vinfo in vars_dict.values():
557-
for dim, size in zip(vinfo["dims"], vinfo["shape"]):
558-
if dim and dim != "?":
559-
merged_dims.setdefault(dim, size)
560-
561-
all_var_names.update(vars_dict.keys())
562-
for vname in vars_dict:
563-
if vname not in merged_vars:
564-
merged_vars.append(vname)
565-
566-
# Flat merged summary across all relevant groups.
567-
print(f"\nDimensions: {merged_dims}")
568-
print(f"\nVariables: {merged_vars}")
569-
570-
# Geolocation detection — handles coords as dict, list, or 'auto'.
571-
coords = spec.get("coords", "auto")
572-
set_coords_val = spec.get("set_coords", True)
573-
if isinstance(coords, dict):
574-
lat_name = coords.get("lat")
575-
lon_name = coords.get("lon")
576-
if (
577-
lat_name and lon_name
578-
and lat_name in all_var_names
579-
and lon_name in all_var_names
580-
):
581-
print(f"\nGeolocation: ({lon_name!r}, {lat_name!r}) detected")
582-
else:
583-
print(
584-
f"\nGeolocation: NONE detected with "
585-
f"'coords': {coords!r}, 'set_coords': {set_coords_val!r}."
586-
)
587-
elif isinstance(coords, list):
588-
missing = [n for n in coords if n not in all_var_names]
589-
if not missing:
590-
geo_pairs = [
591-
(lon_n, lat_n)
592-
for lon_n, lat_n in _GEOLOC_PAIRS
593-
if lon_n in coords and lat_n in coords
594-
]
595-
if geo_pairs:
596-
lon_n, lat_n = geo_pairs[0]
597-
print(f"\nGeolocation: ({lon_n!r}, {lat_n!r}) detected")
598-
else:
599-
print(f"\nGeolocation: {coords!r} found in dataset")
600-
else:
601-
print(
602-
f"\nGeolocation: NONE — specified coords {missing!r} not found."
603-
)
604-
else:
605-
# 'auto': check _GEOLOC_PAIRS
606-
geo_pairs_found = [
607-
(lon_n, lat_n)
608-
for lon_n, lat_n in _GEOLOC_PAIRS
609-
if lon_n in all_var_names and lat_n in all_var_names
610-
]
611-
if geo_pairs_found:
612-
lon_n, lat_n = geo_pairs_found[0]
613-
print(f"\nGeolocation: ({lon_n!r}, {lat_n!r}) detected")
614-
else:
615-
print(
616-
f"\nGeolocation: NONE detected with "
617-
f"'coords': {coords!r}, 'set_coords': {set_coords_val!r}. "
618-
"Try open_method='datatree-merge' or specify "
619-
"open_method={'coords': {'lat': '...', 'lon': '...'}}."
620-
)
621-
622-
return
623-
624522
# ---------------------------------------------------------------
625-
# Fallback: use xarray to inspect the file.
523+
# Use xarray to inspect the file (same lazy path as open_dataset).
626524
# ---------------------------------------------------------------
627-
# For "auto" mode, probe the first granule to resolve which open
628-
# path to use (dataset vs. datatree).
629-
if xarray_open == "auto":
630-
try:
631-
spec = _resolve_auto_spec(file_obj, spec)
632-
xarray_open = spec["xarray_open"]
633-
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
634-
except ValueError:
635-
xarray_open = "dataset"
636-
spec = {**spec, "xarray_open": "dataset"}
637-
638525
dt = None
639526
merge = spec.get("merge")
640527
if xarray_open == "datatree":

tests/test_plan.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3504,12 +3504,12 @@ def test_open_dataset_lazy_access_after_merge_does_not_raise(
35043504

35053505

35063506
# ---------------------------------------------------------------------------
3507-
# Tests for show_variables with h5py fast path (Task 3)
3507+
# Tests for show_variables (xarray path)
35083508
# ---------------------------------------------------------------------------
35093509

35103510

3511-
class TestShowVariablesH5py:
3512-
"""Tests for show_variables() using h5py fast path."""
3511+
class TestShowVariables:
3512+
"""Tests for show_variables() using the xarray path (no h5py)."""
35133513

35143514
def _make_plan(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, nc_path: str) -> Plan:
35153515
mock_ea = MagicMock()
@@ -3577,24 +3577,28 @@ def test_show_variables_open_method_includes_merge_key(
35773577
assert first_line.startswith("open_method:")
35783578
assert "'merge'" in first_line
35793579

3580-
def test_show_variables_grouped_file_prints_flat_summary_and_sst(
3580+
def test_show_variables_grouped_file_auto_mode_shows_root_group(
35813581
self,
35823582
tmp_path: pathlib.Path,
35833583
monkeypatch: pytest.MonkeyPatch,
35843584
capsys: pytest.CaptureFixture,
35853585
) -> None:
3586-
"""show_variables() with grouped HDF5 prints flat summary (no Group blocks)."""
3586+
"""show_variables() with grouped HDF5 in auto mode shows root group (no Group blocks).
3587+
3588+
In auto mode xarray opens only the root group, so subgroup variables
3589+
such as 'sst' are not listed. Users should pass
3590+
open_method='datatree-merge' to inspect all groups.
3591+
"""
35873592
nc_path = str(tmp_path / "grouped.nc")
35883593
_make_grouped_nc(nc_path)
35893594
p = self._make_plan(tmp_path, monkeypatch, nc_path)
35903595
p.show_variables()
35913596
captured = capsys.readouterr()
3592-
# Per-group blocks are no longer printed
3597+
# Per-group blocks are not printed
35933598
assert "Group /" not in captured.out
3594-
assert "sst" in captured.out
3599+
# Root group geolocation and summary lines are present
35953600
assert "Geolocation" in captured.out
35963601
assert "lon" in captured.out
3597-
# The flat merged summary should be present
35983602
assert "Dimensions:" in captured.out
35993603
assert "Variables:" in captured.out
36003604
# Dataset Detail section should NOT appear

0 commit comments

Comments
 (0)