Skip to content

Commit 23b4f79

Browse files
authored
Merge pull request #110 from fish-pace/copilot/clean-up-plan-show-variables
show_variables/open_dataset/open_mfdataset: always print `merge` key, remove per-group output blocks, use xarray instead of h5py
2 parents e135041 + fbfb7f6 commit 23b4f79

2 files changed

Lines changed: 37 additions & 135 deletions

File tree

src/point_collocation/core/plan.py

Lines changed: 6 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ def open_dataset(
293293

294294
if not silent:
295295
display_spec = {**spec, "open_kwargs": effective_kwargs}
296+
display_spec.setdefault("merge", None)
296297
print(f"open_method: {display_spec!r}")
297298

298299
if xarray_open == "datatree":
@@ -390,6 +391,7 @@ def open_mfdataset(
390391

391392
if not silent:
392393
display_spec = {**spec, "open_kwargs": effective_kwargs}
394+
display_spec.setdefault("merge", None)
393395
print(f"open_method: {display_spec!r}")
394396

395397
result_list = results.results if isinstance(results, Plan) else list(results)
@@ -466,10 +468,8 @@ def show_variables(
466468
If the plan contains no granules.
467469
"""
468470
from point_collocation.core._open_method import (
469-
_GEOLOC_PAIRS,
470471
_apply_coords,
471472
_build_effective_open_kwargs,
472-
_h5py_file_info,
473473
_merge_datatree_with_spec,
474474
_normalize_open_method,
475475
_open_and_merge_dataset_groups,
@@ -515,136 +515,15 @@ def show_variables(
515515
spec = {**spec, "xarray_open": "dataset", "merge": None}
516516
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
517517

518-
# Print the spec summary.
518+
# Print the spec summary. Ensure "merge" is always present so the
519+
# printed spec is unambiguous (it may be absent for the "auto" preset).
519520
display_spec = {**spec, "open_kwargs": effective_kwargs}
521+
display_spec.setdefault("merge", None)
520522
print(f"open_method: {display_spec!r}")
521523

522-
def _seek_back() -> None:
523-
if hasattr(file_obj, "seek"):
524-
try:
525-
file_obj.seek(0) # type: ignore[attr-defined]
526-
except Exception:
527-
pass
528-
529-
# ---------------------------------------------------------------
530-
# Fast path: use h5py for per-group metadata inspection.
531-
# ---------------------------------------------------------------
532-
h5_info = _h5py_file_info(file_obj)
533-
_seek_back() # ensure file_obj is at start for subsequent xarray opens
534-
if h5_info is not None:
535-
# Determine which groups to include based on merge spec.
536-
merge = spec.get("merge")
537-
if merge == "root":
538-
relevant_groups: set[str] | None = {"/"}
539-
elif isinstance(merge, list):
540-
relevant_groups = set(merge)
541-
else:
542-
# "all" or no merge key: include all discovered groups
543-
relevant_groups = None
544-
545-
all_var_names: set[str] = set()
546-
merged_dims: dict[str, int] = {}
547-
merged_vars: list[str] = []
548-
549-
for group_path, vars_dict in h5_info:
550-
if relevant_groups is not None and group_path not in relevant_groups:
551-
continue
552-
553-
print(f"\nGroup {group_path}")
554-
# Collect dimension sizes from variable shapes.
555-
dim_sizes: dict[str, int] = {}
556-
for vinfo in vars_dict.values():
557-
for dim, size in zip(vinfo["dims"], vinfo["shape"]):
558-
if dim and dim != "?":
559-
dim_sizes.setdefault(dim, size)
560-
merged_dims.setdefault(dim, size)
561-
if dim_sizes:
562-
print(f" Dimensions: {dim_sizes}")
563-
564-
if vars_dict:
565-
var_strs = [
566-
f"{vname}{vinfo['dims']}"
567-
for vname, vinfo in vars_dict.items()
568-
]
569-
print(f" Variables: {', '.join(var_strs)}")
570-
571-
all_var_names.update(vars_dict.keys())
572-
for vname in vars_dict:
573-
if vname not in merged_vars:
574-
merged_vars.append(vname)
575-
576-
# Flat merged summary across all relevant groups.
577-
print(f"\nDimensions: {merged_dims}")
578-
print(f"\nVariables: {merged_vars}")
579-
580-
# Geolocation detection — handles coords as dict, list, or 'auto'.
581-
coords = spec.get("coords", "auto")
582-
set_coords_val = spec.get("set_coords", True)
583-
if isinstance(coords, dict):
584-
lat_name = coords.get("lat")
585-
lon_name = coords.get("lon")
586-
if (
587-
lat_name and lon_name
588-
and lat_name in all_var_names
589-
and lon_name in all_var_names
590-
):
591-
print(f"\nGeolocation: ({lon_name!r}, {lat_name!r}) detected")
592-
else:
593-
print(
594-
f"\nGeolocation: NONE detected with "
595-
f"'coords': {coords!r}, 'set_coords': {set_coords_val!r}."
596-
)
597-
elif isinstance(coords, list):
598-
missing = [n for n in coords if n not in all_var_names]
599-
if not missing:
600-
geo_pairs = [
601-
(lon_n, lat_n)
602-
for lon_n, lat_n in _GEOLOC_PAIRS
603-
if lon_n in coords and lat_n in coords
604-
]
605-
if geo_pairs:
606-
lon_n, lat_n = geo_pairs[0]
607-
print(f"\nGeolocation: ({lon_n!r}, {lat_n!r}) detected")
608-
else:
609-
print(f"\nGeolocation: {coords!r} found in dataset")
610-
else:
611-
print(
612-
f"\nGeolocation: NONE — specified coords {missing!r} not found."
613-
)
614-
else:
615-
# 'auto': check _GEOLOC_PAIRS
616-
geo_pairs_found = [
617-
(lon_n, lat_n)
618-
for lon_n, lat_n in _GEOLOC_PAIRS
619-
if lon_n in all_var_names and lat_n in all_var_names
620-
]
621-
if geo_pairs_found:
622-
lon_n, lat_n = geo_pairs_found[0]
623-
print(f"\nGeolocation: ({lon_n!r}, {lat_n!r}) detected")
624-
else:
625-
print(
626-
f"\nGeolocation: NONE detected with "
627-
f"'coords': {coords!r}, 'set_coords': {set_coords_val!r}. "
628-
"Try open_method='datatree-merge' or specify "
629-
"open_method={'coords': {'lat': '...', 'lon': '...'}}."
630-
)
631-
632-
return
633-
634524
# ---------------------------------------------------------------
635-
# Fallback: use xarray to inspect the file.
525+
# Use xarray to inspect the file (same lazy path as open_dataset).
636526
# ---------------------------------------------------------------
637-
# For "auto" mode, probe the first granule to resolve which open
638-
# path to use (dataset vs. datatree).
639-
if xarray_open == "auto":
640-
try:
641-
spec = _resolve_auto_spec(file_obj, spec)
642-
xarray_open = spec["xarray_open"]
643-
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
644-
except ValueError:
645-
xarray_open = "dataset"
646-
spec = {**spec, "xarray_open": "dataset"}
647-
648527
dt = None
649528
merge = spec.get("merge")
650529
if xarray_open == "datatree":

tests/test_plan.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3504,12 +3504,12 @@ def test_open_dataset_lazy_access_after_merge_does_not_raise(
35043504

35053505

35063506
# ---------------------------------------------------------------------------
3507-
# Tests for show_variables with h5py fast path (Task 3)
3507+
# Tests for show_variables (xarray path)
35083508
# ---------------------------------------------------------------------------
35093509

35103510

3511-
class TestShowVariablesH5py:
3512-
"""Tests for show_variables() using h5py fast path."""
3511+
class TestShowVariables:
3512+
"""Tests for show_variables() using the xarray path (no h5py)."""
35133513

35143514
def _make_plan(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, nc_path: str) -> Plan:
35153515
mock_ea = MagicMock()
@@ -3559,23 +3559,46 @@ def test_show_variables_flat_file_prints_dims_vars_geo(
35593559
assert "lon" in captured.out
35603560
assert "Dataset Detail" not in captured.out
35613561

3562-
def test_show_variables_grouped_file_prints_groups_and_sst(
3562+
def test_show_variables_open_method_includes_merge_key(
35633563
self,
35643564
tmp_path: pathlib.Path,
35653565
monkeypatch: pytest.MonkeyPatch,
35663566
capsys: pytest.CaptureFixture,
35673567
) -> None:
3568-
"""show_variables() with grouped HDF5 prints group info and sst variable."""
3568+
"""open_method line always includes 'merge' key, even when it is None."""
3569+
nc_path = str(tmp_path / "flat.nc")
3570+
_make_l3_dataset([-90.0, 0.0, 90.0], [-180.0, 0.0, 180.0]).to_netcdf(
3571+
nc_path, engine="netcdf4"
3572+
)
3573+
p = self._make_plan(tmp_path, monkeypatch, nc_path)
3574+
p.show_variables()
3575+
captured = capsys.readouterr()
3576+
first_line = captured.out.splitlines()[0]
3577+
assert first_line.startswith("open_method:")
3578+
assert "'merge'" in first_line
3579+
3580+
def test_show_variables_grouped_file_auto_mode_shows_root_group(
3581+
self,
3582+
tmp_path: pathlib.Path,
3583+
monkeypatch: pytest.MonkeyPatch,
3584+
capsys: pytest.CaptureFixture,
3585+
) -> None:
3586+
"""show_variables() with grouped HDF5 in auto mode shows root group (no Group blocks).
3587+
3588+
In auto mode xarray opens only the root group, so subgroup variables
3589+
such as 'sst' are not listed. Users should pass
3590+
open_method='datatree-merge' to inspect all groups.
3591+
"""
35693592
nc_path = str(tmp_path / "grouped.nc")
35703593
_make_grouped_nc(nc_path)
35713594
p = self._make_plan(tmp_path, monkeypatch, nc_path)
35723595
p.show_variables()
35733596
captured = capsys.readouterr()
3574-
assert "Group /" in captured.out
3575-
assert "sst" in captured.out
3597+
# Per-group blocks are not printed
3598+
assert "Group /" not in captured.out
3599+
# Root group geolocation and summary lines are present
35763600
assert "Geolocation" in captured.out
35773601
assert "lon" in captured.out
3578-
# The flat merged summary should also be present
35793602
assert "Dimensions:" in captured.out
35803603
assert "Variables:" in captured.out
35813604
# Dataset Detail section should NOT appear

0 commit comments

Comments
 (0)