Skip to content

Commit 8d4b71f

Browse files
authored
Add IFS Humidity Data (#27)
* Add IFS uncompressed data * Add IFS humidity data set * Break down datasets into 2 chunks
1 parent 960d5dd commit 8d4b71f

File tree

4 files changed

+112
-14
lines changed

4 files changed

+112
-14
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ To download all the data used for the benchmark run the following commands:
1919
uv run python -m climatebenchpress.data_loader.datasets.esa_biomass_cci
2020
uv run python -m climatebenchpress.data_loader.datasets.cams
2121
uv run python -m climatebenchpress.data_loader.datasets.ifs_uncompressed
22+
uv run python -m climatebenchpress.data_loader.datasets.ifs_humidity
2223
uv run python -m climatebenchpress.data_loader.datasets.nextgems
2324
uv run python -m climatebenchpress.data_loader.datasets.cmip6.access_ta
2425
uv run python -m climatebenchpress.data_loader.datasets.cmip6.access_tos

src/climatebenchpress/data_loader/datasets/all.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
from .cmip6.all import *
55
from .era5 import *
66
from .esa_biomass_cci import *
7+
from .ifs_humidity import *
78
from .ifs_uncompressed import *
89
from .nextgems import *
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
__all__ = ["IFSHumidityDataset"]
2+
3+
import argparse
4+
from pathlib import Path
5+
6+
import xarray as xr
7+
8+
from .. import (
9+
monitor,
10+
open_downloaded_canonicalized_dataset,
11+
open_downloaded_tiny_canonicalized_dataset,
12+
)
13+
from .abc import Dataset
14+
from .ifs_uncompressed import load_hplp_data, regrid_to_regular
15+
16+
17+
class IFSHumidityDataset(Dataset):
18+
"""Dataset for the humidity field of the uncompressed IFS data.
19+
20+
Contains data from the [hplp](https://apps.ecmwf.int/ifs-experiments/rd/hplp/)
21+
experiment from the Integrated Forecasting System (IFS) model. Crucially,
22+
this dataset contains uncompressed 64-bit floating point data.
23+
"""
24+
25+
name = "ifs-humidity"
26+
27+
@staticmethod
28+
def download(download_path: Path, progress: bool = True):
29+
donefile = download_path / "download.done"
30+
if donefile.exists():
31+
return
32+
33+
ds = load_hplp_data(leveltype="ml", gridtype="reduced_gg", step=0)
34+
ds = ds[["q"]]
35+
ds_regridded = regrid_to_regular(
36+
ds,
37+
in_grid={"grid": "O400"},
38+
out_grid={"grid": [0.25, 0.25]},
39+
)
40+
downloadfile = download_path / "ifs_humidity.zarr"
41+
with monitor.progress_bar(progress):
42+
ds_regridded.to_zarr(
43+
downloadfile, mode="w", encoding=dict(), compute=False
44+
).compute()
45+
46+
@staticmethod
47+
def open(download_path: Path) -> xr.Dataset:
48+
ds = xr.open_dataset(download_path / "ifs_humidity.zarr")
49+
num_levels = ds["level"].size
50+
ds = ds.isel(time=slice(0, 1)).chunk(
51+
{
52+
"latitude": -1,
53+
"longitude": -1,
54+
"time": -1,
55+
"level": (num_levels // 2) + 1,
56+
}
57+
)
58+
59+
# Needed to make the dataset CF-compliant.
60+
ds.longitude.attrs["axis"] = "X"
61+
ds.latitude.attrs["axis"] = "Y"
62+
ds.level.attrs["axis"] = "Z"
63+
ds.time.attrs["standard_name"] = "time"
64+
return ds
65+
66+
67+
if __name__ == "__main__":
68+
parser = argparse.ArgumentParser()
69+
parser.add_argument("--basepath", type=Path, default=Path())
70+
args = parser.parse_args()
71+
72+
ds = open_downloaded_canonicalized_dataset(
73+
IFSHumidityDataset, basepath=args.basepath
74+
)
75+
open_downloaded_tiny_canonicalized_dataset(
76+
IFSHumidityDataset, basepath=args.basepath
77+
)
78+
79+
for v, da in ds.items():
80+
print(f"- {v}: {da.dims}")

src/climatebenchpress/data_loader/datasets/ifs_uncompressed.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,27 @@ def regrid_to_regular(ds, in_grid, out_grid):
126126
"""
127127
out_data = {var: [] for var in ds.data_vars}
128128
for var in ds.data_vars:
129+
var_has_level = "level" in ds[var].dims
129130
for time in ds.time:
130-
r = earthkit.regrid.interpolate(
131-
ds[var].sel(time=time).values,
132-
in_grid=in_grid,
133-
out_grid=out_grid,
134-
method="linear",
135-
)
136-
out_data[var].append(r)
131+
if var_has_level:
132+
level_data = []
133+
for level in ds[var].level:
134+
r = earthkit.regrid.interpolate(
135+
ds[var].sel(time=time, level=level).values,
136+
in_grid=in_grid,
137+
out_grid=out_grid,
138+
method="linear",
139+
)
140+
level_data.append(r)
141+
out_data[var].append(level_data)
142+
else:
143+
r = earthkit.regrid.interpolate(
144+
ds[var].sel(time=time).values,
145+
in_grid=in_grid,
146+
out_grid=out_grid,
147+
method="linear",
148+
)
149+
out_data[var].append(r)
137150

138151
dx = out_grid["grid"][0]
139152
assert (
@@ -146,13 +159,16 @@ def regrid_to_regular(ds, in_grid, out_grid):
146159
"latitude": lats,
147160
"longitude": lons,
148161
}
149-
out_ds = xr.Dataset(
150-
{
151-
var: (("time", "latitude", "longitude"), out_data[var])
152-
for var in ds.data_vars
153-
},
154-
coords=coords,
155-
)
162+
163+
data_vars = {}
164+
for var in ds.data_vars:
165+
if "level" in ds[var].dims:
166+
coords["level"] = ds[var].level
167+
data_vars[var] = (("time", "level", "latitude", "longitude"), out_data[var])
168+
else:
169+
data_vars[var] = (("time", "latitude", "longitude"), out_data[var])
170+
171+
out_ds = xr.Dataset(data_vars, coords=coords)
156172
return out_ds
157173

158174

0 commit comments

Comments
 (0)