Skip to content

Commit cc0f91a

Browse files
authored
Merge pull request #98 from fish-pace/copilot/fix-open-dataset-respecting-method
open_dataset: remove open_dataset_kwargs, add silent param, integer index, datatree preset, and fix show_variables auto-resolution
2 parents d2b0597 + aff590d commit cc0f91a

4 files changed

Lines changed: 384 additions & 63 deletions

File tree

src/point_collocation/core/_open_method.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
)
6464

6565
_VALID_XARRAY_OPEN: frozenset[str] = frozenset({"dataset", "datatree"})
66-
_VALID_PRESETS: frozenset[str] = frozenset({"dataset", "datatree-merge", "auto"})
66+
_VALID_PRESETS: frozenset[str] = frozenset({"dataset", "datatree", "datatree-merge", "auto"})
6767

6868
# Default open kwargs applied to both xr.open_dataset and xr.open_datatree
6969
# unless explicitly overridden by the user.
@@ -163,7 +163,8 @@ def _expand_preset(preset: str) -> dict:
163163
Parameters
164164
----------
165165
preset:
166-
One of ``"dataset"``, ``"datatree-merge"``, or ``"auto"``.
166+
One of ``"dataset"``, ``"datatree"``, ``"datatree-merge"``, or
167+
``"auto"``.
167168
168169
Returns
169170
-------
@@ -179,6 +180,17 @@ def _expand_preset(preset: str) -> dict:
179180
return {
180181
"xarray_open": "dataset",
181182
"open_kwargs": {},
183+
"merge": None,
184+
"coords": "auto",
185+
"set_coords": True,
186+
"dim_renames": None,
187+
"auto_align_phony_dims": None,
188+
}
189+
if preset == "datatree":
190+
return {
191+
"xarray_open": "datatree",
192+
"open_kwargs": {},
193+
"merge": None,
182194
"coords": "auto",
183195
"set_coords": True,
184196
"dim_renames": None,
@@ -252,11 +264,13 @@ def _validate_and_fill_spec(spec: dict) -> dict:
252264
)
253265

254266
if xarray_open == "datatree":
255-
result.setdefault("merge", "all")
256-
result.setdefault("merge_kwargs", {})
257-
elif xarray_open == "dataset" and "merge" in result:
258-
# Allow merge_kwargs alongside merge for the dataset path too.
259-
result.setdefault("merge_kwargs", {})
267+
result.setdefault("merge", None)
268+
if result.get("merge") is not None:
269+
result.setdefault("merge_kwargs", {})
270+
elif xarray_open == "dataset":
271+
result.setdefault("merge", None)
272+
if result.get("merge") is not None:
273+
result.setdefault("merge_kwargs", {})
260274

261275
return result
262276

src/point_collocation/core/engine.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def matchup(
8888
8989
* ``"dataset"`` — open with ``xarray.open_dataset`` (fast path for
9090
typical flat NetCDF files).
91+
* ``"datatree"`` — open as a raw DataTree without merging groups.
9192
* ``"datatree-merge"`` — open as DataTree and merge all groups into
9293
a flat Dataset (for grouped/HDF5-ish files).
9394
* ``"auto"`` *(default)* — try the fast ``"dataset"`` path first; if
@@ -101,7 +102,7 @@ def matchup(
101102
open_method = {
102103
"xarray_open": "dataset" | "datatree",
103104
"open_kwargs": {},
104-
"merge": "all" | "root" | ["/path/a"],
105+
"merge": None | "all" | "root" | ["/path/a"],
105106
"merge_kwargs": {},
106107
"coords": "auto" | ["Lat", "Lon"] | {"lat": "...", "lon": "..."},
107108
"set_coords": True,

src/point_collocation/core/plan.py

Lines changed: 108 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -188,39 +188,72 @@ def __getitem__(self, idx: int | slice) -> "Plan | Any":
188188

189189
def open_dataset(
190190
self,
191-
result: Any,
192-
open_method: str | dict | None = None,
193-
open_dataset_kwargs: dict[str, Any] | None = None,
194-
) -> "xr.Dataset":
195-
"""Open a single granule result as an :class:`xarray.Dataset`.
191+
result: "int | Any",
192+
open_method: "str | dict | None" = None,
193+
*,
194+
silent: bool = False,
195+
) -> "Any":
196+
"""Open a single granule result as an :class:`xarray.Dataset` or DataTree.
196197
197198
Parameters
198199
----------
199200
result:
200-
A single earthaccess result object, typically obtained via
201-
``plan[n]``.
201+
An integer index into ``plan.results`` (e.g. ``0``), or a
202+
single earthaccess result object obtained via ``plan[n]``.
203+
Using an integer is preferred: ``plan.open_dataset(0)`` is
204+
equivalent to ``plan.open_dataset(plan[0])``.
202205
open_method:
203206
How to open the granule. Accepts the same string presets or
204207
dict spec as :func:`~point_collocation.matchup`. Defaults to
205208
``"auto"`` (try dataset first, fall back to datatree merge).
206-
open_dataset_kwargs:
207-
Keyword arguments forwarded to the xarray open function.
208-
``chunks`` defaults to ``{}`` (lazy/dask loading). ``engine``
209-
defaults to ``"h5netcdf"`` when not specified.
209+
210+
**String presets:**
211+
212+
* ``"dataset"`` — open with ``xarray.open_dataset`` (flat NetCDF).
213+
* ``"datatree"`` — open as a DataTree with all groups; returns the
214+
raw :class:`xarray.DataTree` (or ``datatree.DataTree``) without
215+
merging groups. Equivalent to ``xarray.open_datatree(f)``.
216+
* ``"datatree-merge"`` — open as DataTree and merge all groups into
217+
a flat Dataset.
218+
* ``"auto"`` *(default)* — probe the file first; if lat/lon can be
219+
detected via ``xr.open_dataset``, use that; otherwise fall back to
220+
``"datatree-merge"``. The printed spec shows the **resolved** mode.
221+
222+
Pass open-function kwargs via the ``"open_kwargs"`` key of a
223+
dict spec, e.g.
224+
``open_method={"open_kwargs": {"engine": "netcdf4"}}``.
225+
silent:
226+
When ``False`` (default), print the effective open_method spec
227+
actually used (after normalization and auto-resolution).
228+
Set to ``True`` to suppress this output.
210229
211230
Returns
212231
-------
213-
xarray.Dataset
214-
The caller is responsible for closing the dataset when finished
215-
(e.g. ``ds.close()`` or ``with plan.open_dataset(...) as ds:``).
232+
xarray.Dataset or xarray.DataTree
233+
A flat :class:`xarray.Dataset` for all modes except
234+
``open_method="datatree"`` (or a dict spec with
235+
``xarray_open="datatree"`` and ``merge=None``), which returns the
236+
raw DataTree.
237+
The caller is responsible for closing the returned object when
238+
finished (e.g. ``ds.close()``).
216239
"""
240+
if isinstance(result, int):
241+
n = len(self.results)
242+
if result < 0 or result >= n:
243+
raise IndexError(
244+
f"result index {result} is out of range for a plan with {n} result(s). "
245+
f"Valid indices are 0 to {n - 1}."
246+
)
247+
result = self.results[result]
248+
217249
from point_collocation.core._open_method import (
218250
_apply_coords,
219251
_build_effective_open_kwargs,
220252
_merge_datatree_with_spec,
221253
_normalize_open_method,
222254
_open_and_merge_dataset_groups,
223255
_open_datatree_fn,
256+
_resolve_auto_spec,
224257
)
225258

226259
try:
@@ -234,7 +267,10 @@ def open_dataset(
234267
import xarray as xr
235268

236269
effective_open_method = "auto" if open_method is None else open_method
237-
spec = _normalize_open_method(effective_open_method, open_dataset_kwargs)
270+
spec = _normalize_open_method(effective_open_method)
271+
272+
xarray_open = spec.get("xarray_open", "dataset")
273+
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
238274

239275
file_objs = earthaccess.open([result], pqdm_kwargs={"disable": True})
240276
if len(file_objs) != 1:
@@ -243,10 +279,28 @@ def open_dataset(
243279
)
244280
file_obj = file_objs[0]
245281

246-
xarray_open = spec.get("xarray_open", "dataset")
247-
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
282+
# For "auto" mode, probe the file first so that the printed spec shows
283+
# the actual resolved mode (e.g. "dataset" or "datatree"), not "auto".
284+
if xarray_open == "auto":
285+
try:
286+
spec = _resolve_auto_spec(file_obj, spec)
287+
xarray_open = spec["xarray_open"]
288+
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
289+
except ValueError:
290+
xarray_open = "dataset"
291+
spec = {**spec, "xarray_open": "dataset", "merge": None}
292+
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
293+
294+
if not silent:
295+
display_spec = {**spec, "open_kwargs": effective_kwargs}
296+
print(f"open_method: {display_spec!r}")
248297

249298
if xarray_open == "datatree":
299+
merge = spec.get("merge")
300+
if merge is None:
301+
# Return the raw DataTree without merging — like open_datatree(f).
302+
return _open_datatree_fn(file_obj, effective_kwargs)
303+
# merge is "all", "root", or a list: merge groups into a flat Dataset.
250304
dt = _open_datatree_fn(file_obj, effective_kwargs)
251305
try:
252306
ds = _merge_datatree_with_spec(dt, spec)
@@ -259,11 +313,7 @@ def open_dataset(
259313
pass # coords not found; return merged dataset as-is
260314
return ds
261315

262-
if xarray_open in ("dataset", "auto"):
263-
# For the dataset and auto paths, return an open dataset whose
264-
# lifecycle is managed by the caller. Auto tries the fast path
265-
# only; if the caller needs datatree fallback they should use
266-
# open_method="datatree-merge" explicitly.
316+
if xarray_open == "dataset":
267317
merge = spec.get("merge")
268318
if merge is not None:
269319
# Dataset-based group merge: open each group and merge.
@@ -283,8 +333,9 @@ def open_dataset(
283333
def open_mfdataset(
284334
self,
285335
results: "list[Any] | Plan",
286-
open_method: str | dict | None = None,
287-
open_dataset_kwargs: dict[str, Any] | None = None,
336+
open_method: "str | dict | None" = None,
337+
*,
338+
silent: bool = False,
288339
) -> "xr.Dataset":
289340
"""Open multiple granule results as a single :class:`xarray.Dataset`.
290341
@@ -300,10 +351,13 @@ def open_mfdataset(
300351
``"datatree-merge"`` opens each granule as a DataTree, merges
301352
its groups into a flat dataset, then concatenates all granules
302353
along a new ``granule`` dimension. Defaults to ``"auto"``.
303-
open_dataset_kwargs:
304-
Keyword arguments forwarded to the xarray open function.
305-
``chunks`` defaults to ``{}`` (lazy/dask loading). ``engine``
306-
defaults to ``"h5netcdf"`` when not specified.
354+
Pass open-function kwargs via the ``"open_kwargs"`` key of a
355+
dict spec, e.g.
356+
``open_method={"open_kwargs": {"engine": "netcdf4"}}``.
357+
silent:
358+
When ``False`` (default), print the effective open_method spec
359+
actually used (after normalization and defaults are applied).
360+
Set to ``True`` to suppress this output.
307361
308362
Returns
309363
-------
@@ -329,17 +383,21 @@ def open_mfdataset(
329383
import xarray as xr
330384

331385
effective_open_method = "auto" if open_method is None else open_method
332-
spec = _normalize_open_method(effective_open_method, open_dataset_kwargs)
386+
spec = _normalize_open_method(effective_open_method)
387+
388+
xarray_open = spec.get("xarray_open", "dataset")
389+
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
390+
391+
if not silent:
392+
display_spec = {**spec, "open_kwargs": effective_kwargs}
393+
print(f"open_method: {display_spec!r}")
333394

334395
result_list = results.results if isinstance(results, Plan) else list(results)
335396
file_objs = earthaccess.open(result_list, pqdm_kwargs={"disable": True})
336397

337-
xarray_open = spec.get("xarray_open", "dataset")
338-
339398
if xarray_open == "datatree":
340399
# Open each granule as a DataTree, merge its groups, then
341400
# concatenate all granule datasets along a new "granule" dim.
342-
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
343401
merged_datasets: list[xr.Dataset] = []
344402
for file_obj in file_objs:
345403
dt = _open_datatree_fn(file_obj, effective_kwargs)
@@ -357,7 +415,6 @@ def open_mfdataset(
357415
# separate datasets and merge them, then concatenate all granules
358416
# along a new "granule" dimension.
359417
# Without merge, use xr.open_mfdataset for simplicity.
360-
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
361418
merge = spec.get("merge")
362419
if merge is not None:
363420
merged_datasets = []
@@ -380,8 +437,7 @@ def open_mfdataset(
380437

381438
def show_variables(
382439
self,
383-
open_method: str | dict | None = None,
384-
open_dataset_kwargs: dict[str, Any] | None = None,
440+
open_method: "str | dict | None" = None,
385441
) -> None:
386442
"""Print the first granule's groups, dimensions, and variables.
387443
@@ -395,19 +451,16 @@ def show_variables(
395451
the merged flat dataset.
396452
397453
To obtain the full xarray representation programmatically use
398-
``plan.open_dataset(plan[0])`` instead.
454+
``plan.open_dataset(0)`` instead.
399455
400456
Parameters
401457
----------
402458
open_method:
403459
How to open the granule. Accepts the same string presets or
404460
dict spec as :func:`~point_collocation.matchup`. Defaults to
405-
``"auto"``.
406-
open_dataset_kwargs:
407-
Keyword arguments forwarded to the xarray open function when
408-
the h5py fast path is unavailable.
409-
``chunks`` defaults to ``{}`` (lazy/dask loading). ``engine``
410-
defaults to ``"h5netcdf"`` when not specified.
461+
``"auto"``. Pass open-function kwargs via the ``"open_kwargs"``
462+
key of a dict spec, e.g.
463+
``open_method={"open_kwargs": {"engine": "netcdf4"}}``.
411464
412465
Raises
413466
------
@@ -430,7 +483,7 @@ def show_variables(
430483
raise ValueError("No granules in plan — cannot show variables.")
431484

432485
effective_open_method = "auto" if open_method is None else open_method
433-
spec = _normalize_open_method(effective_open_method, open_dataset_kwargs)
486+
spec = _normalize_open_method(effective_open_method)
434487
xarray_open = spec.get("xarray_open", "dataset")
435488

436489
try:
@@ -452,6 +505,18 @@ def show_variables(
452505

453506
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
454507

508+
# For "auto" mode, probe the file first so the printed spec shows the
509+
# actual resolved mode (e.g. "dataset"), not "auto".
510+
if xarray_open == "auto":
511+
try:
512+
spec = _resolve_auto_spec(file_obj, spec)
513+
xarray_open = spec["xarray_open"]
514+
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
515+
except ValueError:
516+
xarray_open = "dataset"
517+
spec = {**spec, "xarray_open": "dataset", "merge": None}
518+
effective_kwargs = _build_effective_open_kwargs(spec.get("open_kwargs", {}))
519+
455520
# Print the spec summary.
456521
display_spec = {**spec, "open_kwargs": effective_kwargs}
457522
print(f"open_method: {display_spec!r}")

0 commit comments

Comments
 (0)