Skip to content

Commit 01eabb5

Browse files
authored
Merge pull request #466 from bouweandela/add-api-notebook
Add API demo notebook
2 parents 1993cb2 + f6fe0e8 commit 01eabb5

File tree

8 files changed

+482
-42
lines changed

8 files changed

+482
-42
lines changed

.readthedocs.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# Required
66
version: 2
77

8-
98
# Set the version of Python and other tools you might need
109
build:
1110
os: ubuntu-24.04
@@ -20,4 +19,4 @@ build:
2019
- make fetch-test-data
2120
- uv run ref datasets ingest --source-type cmip6 $READTHEDOCS_REPOSITORY_PATH/tests/test-data/sample-data/CMIP6
2221
# Run a strict build
23-
- NO_COLOR=1 uv run mkdocs build --strict --site-dir $READTHEDOCS_OUTPUT/html
22+
- unset NO_COLOR; FORCE_COLOR=1 uv run mkdocs build --strict --site-dir $READTHEDOCS_OUTPUT/html

changelog/466.docs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a Jupyter notebook showing how to use the CMIP7 Assessment Fast Track website OpenAPI.

docs/how-to-guides/dataset-selection.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# extension: .py
66
# format_name: percent
77
# format_version: '1.3'
8-
# jupytext_version: 1.16.4
8+
# jupytext_version: 1.17.1
99
# kernelspec:
1010
# display_name: Python 3 (ipykernel)
1111
# language: python
@@ -121,7 +121,7 @@ def display_groups(frames):
121121

122122

123123
# %% [markdown]
124-
124+
#
125125
# ### Facet filters
126126
# The simplest data request is a `FacetFilter`.
127127
# This filters the data catalog to include only the data required for a given diagnostic run.
@@ -141,7 +141,7 @@ def display_groups(frames):
141141
display_groups(groups)
142142

143143
# %% [markdown]
144-
144+
#
145145
# ### Group by
146146
# The `group_by` field can be used to split the filtered data into multiple groups,
147147
# each of which has a unique set of values in the specified facets.
@@ -166,15 +166,13 @@ def display_groups(frames):
166166

167167

168168
# %% [markdown]
169-
169+
#
170170
# ### Constraints
171171
# A data requirement can optionally specify `Constraint`s.
172172
# These constraints are applied to each group independently to modify a group or ignore it.
173-
# All constraints must hold for a group to be executed.
173+
# A group must not be empty after modification for it to be executed.
174174
#
175-
# One type of constraint is a `GroupOperation`.
176-
# This constraint allows for the manipulation of a given group.
177-
# This can be used to remove datasets or include additional datasets from the catalog,
175+
# Constraints can be used to remove datasets or include additional datasets from the catalog,
178176
# which is useful to select common datasets for all groups (e.g. cell areas).
179177
#
180178
# Below, an `IncludeTas` GroupOperation is included which adds the corresponding `tas` dataset to each group.
@@ -187,6 +185,8 @@ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame
187185
tas = data_catalog[
188186
(data_catalog["variable_id"] == "tas")
189187
& data_catalog["source_id"].isin(group["source_id"].unique())
188+
& data_catalog["experiment_id"].isin(group["experiment_id"].unique())
189+
& data_catalog["member_id"].isin(group["member_id"].unique())
190190
]
191191

192192
return pd.concat([group, tas])
@@ -195,7 +195,7 @@ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame
195195
data_requirement = DataRequirement(
196196
source_type=SourceDatasetType.CMIP6,
197197
filters=(FacetFilter(facets={"frequency": "mon"}),),
198-
group_by=("variable_id", "source_id", "member_id"),
198+
group_by=("variable_id", "source_id", "member_id", "experiment_id"),
199199
constraints=(IncludeTas(),),
200200
)
201201

@@ -205,26 +205,26 @@ def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame
205205

206206

207207
# %% [markdown]
208-
# In addition to operations, a `GroupValidator` constraint can be specified.
209-
# This validator is used to determine if a group is valid or not.
210-
# If the validator does not return True, then the group is excluded from the list of groups for execution.
208+
# In addition to operations adding datasets, it is also possible to remove datasets.
211209

212210

213211
# %%
214212
class AtLeast2:
215-
def validate(self, group: pd.DataFrame) -> bool:
216-
return len(group["instance_id"].drop_duplicates()) >= 2
213+
def apply(self, group: pd.DataFrame, data_catalog: pd.DataFrame) -> pd.DataFrame:
214+
if len(group["variable_id"].drop_duplicates()) >= 2:
215+
return group
216+
return group.loc[[]]
217217

218218

219219
# %% [markdown]
220220
# Here we add a simple validator which ensures that at least 2 unique datasets are present.
221-
# This removes the tas-only group from above.
221+
# This removes the groups from above where tas was not available.
222222

223223
# %%
224224
data_requirement = DataRequirement(
225225
source_type=SourceDatasetType.CMIP6,
226226
filters=(FacetFilter(facets={"frequency": "mon"}),),
227-
group_by=("variable_id", "source_id", "member_id"),
227+
group_by=("variable_id", "source_id", "member_id", "experiment_id"),
228228
constraints=(IncludeTas(), AtLeast2()),
229229
)
230230

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
# ---
2+
# jupyter:
3+
# jupytext:
4+
# text_representation:
5+
# extension: .py
6+
# format_name: percent
7+
# format_version: '1.3'
8+
# jupytext_version: 1.17.1
9+
# kernelspec:
10+
# display_name: Python 3 (ipykernel)
11+
# language: python
12+
# name: python3
13+
# ---
14+
15+
# %% [markdown]
16+
# # Using pre-computed results
17+
#
18+
# Results computed by the CMIP7 Assessment Fast Track Rapid Evaluation Framework are available from
19+
# the website: https://dashboard.climate-ref.org and the associated API: https://api.climate-ref.org.
20+
# This API provides an [OpenAPI](https://www.openapis.org) schema that documents what queries are available.
21+
# The API documentation can be viewed at: https://api.climate-ref.org/docs.
22+
#
23+
# This Jupyter notebook shows how to use this API to download pre-computed results and use those to do
24+
# your own analyses.
25+
26+
# %% [markdown]
27+
# ## Generate and install
28+
#
29+
# We start by generating and installing a Python package for interacting with the API
30+
# from the OpenAPI-compatible [schema](https://api.climate-ref.org/api/v1/openapi.json).
31+
32+
# %%
33+
# !uvx --quiet --from openapi-python-client openapi-python-client generate --url https://api.climate-ref.org/api/v1/openapi.json --meta setup --output-path climate_ref_client --overwrite
34+
35+
# %%
36+
# !pip install --quiet ./climate_ref_client
37+
38+
# %% [markdown]
39+
# ## Set up the notebook
40+
#
41+
# Import some libraries and load the [rich](https://rich.readthedocs.io/en/latest/introduction.html)
42+
# Jupyter notebook extension for conveniently viewing the large data structures produced by the client
43+
# package.
44+
45+
# %%
46+
from pathlib import Path
47+
48+
import cartopy.crs
49+
import matplotlib.pyplot as plt
50+
import pandas as pd
51+
import requests
52+
import seaborn as sns
53+
import xarray as xr
54+
from climate_rapid_evaluation_framework_client import Client
55+
from climate_rapid_evaluation_framework_client.api.diagnostics import (
56+
diagnostics_list,
57+
diagnostics_list_metric_values,
58+
)
59+
from climate_rapid_evaluation_framework_client.api.executions import executions_get
60+
from climate_rapid_evaluation_framework_client.models.metric_value_type import (
61+
MetricValueType,
62+
)
63+
from IPython.display import Markdown
64+
from pandas_indexing import formatlevel
65+
66+
# %%
67+
# %load_ext rich
68+
69+
# %% [markdown]
70+
# ## View the available diagnostics
71+
#
72+
# We start by setting up a client for interacting with the server:
73+
74+
# %%
75+
client = Client("https://api.climate-ref.org")
76+
77+
# %% [markdown]
78+
# Retrieve the available diagnostics from the server, and inspect the first one:
79+
80+
# %%
81+
diagnostics = diagnostics_list.sync(client=client).data
82+
diagnostics[0]
83+
84+
# %% [markdown]
85+
# To get an idea of what is available, we create a list of all diagnostics
86+
# with short descriptions (a full overview is available in Appendix C of
87+
# [Hoffman et al., 2025](https://doi.org/10.5194/egusphere-2025-2685)):
88+
89+
# %%
90+
txt = ""
91+
for diagnostic in sorted(diagnostics, key=lambda diagnostic: diagnostic.name):
92+
title = f"### {diagnostic.name}"
93+
description = diagnostic.description.strip()
94+
if not description.endswith("."):
95+
description += "."
96+
if diagnostic.aft_link:
97+
description += f" {diagnostic.aft_link.short_description.strip()}"
98+
if not description.endswith("."):
99+
description += "."
100+
if (aft_description := diagnostic.aft_link.description.strip()) != "nan":
101+
description += f" {aft_description}"
102+
if not description.endswith("."):
103+
description += "."
104+
txt += f"{title}\n{description}\n\n"
105+
Markdown(txt)
106+
107+
# %% [markdown]
108+
# ## Metrics
109+
#
110+
# Many of the diagnostics provide "metric" values, single values that describe some property
111+
# of a model. Here we show how to access these values and create a plot.
112+
113+
# %%
114+
# Select the "Atlantic Meridional Overturning Circulation (RAPID)"
115+
# diagnostic as an example
116+
diagnostic_name = "Atlantic Meridional Overturning Circulation (RAPID)"
117+
diagnostic = next(d for d in diagnostics if d.name == diagnostic_name)
118+
# Inspect an example value.
119+
diagnostics_list_metric_values.sync(
120+
diagnostic.provider.slug,
121+
diagnostic.slug,
122+
value_type=MetricValueType.SCALAR,
123+
client=client,
124+
).data[0]
125+
126+
# %% [markdown]
127+
# Read the metric values into a Pandas DataFrame:
128+
129+
# %%
130+
df = (
131+
pd.DataFrame(
132+
metric.dimensions.additional_properties | {"value": metric.value}
133+
for metric in diagnostics_list_metric_values.sync(
134+
diagnostic.provider.slug,
135+
diagnostic.slug,
136+
value_type=MetricValueType.SCALAR,
137+
client=client,
138+
).data
139+
)
140+
.replace("None", pd.NA)
141+
.drop_duplicates()
142+
)
143+
# Drop a few columns that appear to be the same for all entries of
144+
# particular diagnostic.
145+
df.drop(columns=["experiment_id", "metric", "region"], inplace=True)
146+
# Use the columns that do not contain the metric value for indexing
147+
df.set_index([c for c in df.columns if c != "value"], inplace=True)
148+
df
149+
150+
# %% [markdown]
151+
# and create a portrait diagram:
152+
153+
# %%
154+
# Use the median metric value for models with multiple ensemble
155+
# members to keep the figure readable.
156+
df = df.groupby(level=["source_id", "grid_label", "statistic"]).median()
157+
# Convert df to a "2D" dataframe for use with the seaborn heatmap plot
158+
df_2D = (
159+
formatlevel(df, model="{source_id}.{grid_label}", drop=True)
160+
.reset_index()
161+
.pivot(columns="statistic", index="model", values="value")
162+
)
163+
figure, ax = plt.subplots(figsize=(5, 8))
164+
_ = sns.heatmap(
165+
df_2D / df_2D.median(),
166+
annot=df_2D,
167+
cmap="viridis",
168+
linewidths=0.5,
169+
ax=ax,
170+
cbar_kws={"label": "Color indicates value relative to the median"},
171+
)
172+
# %% [markdown]
173+
# ## Series
174+
#
175+
# Many of the diagnostics provide "series" values, a range of values along with an index
176+
# that describe some property of a model. Here we show how to access these values and create a plot.
177+
178+
# %%
179+
# Select the "Sea Ice Area Basic Metrics" diagnostic as an example
180+
diagnostic_name = "Sea Ice Area Basic Metrics"
181+
diagnostic = next(d for d in diagnostics if d.name == diagnostic_name)
182+
# Inspect an example series value:
183+
diagnostics_list_metric_values.sync(
184+
diagnostic.provider.slug,
185+
diagnostic.slug,
186+
value_type=MetricValueType.SERIES,
187+
client=client,
188+
).data[0]
189+
190+
# %% [markdown]
191+
# Read the metric values into a Pandas DataFrame:
192+
193+
# %%
194+
statistic_name = "20-year average seasonal cycle"
195+
value_name = "sea ice area (1e6 km2)"
196+
df = pd.DataFrame(
197+
metric.dimensions.additional_properties | {value_name: value, "month": int(month)}
198+
for metric in diagnostics_list_metric_values.sync(
199+
diagnostic.provider.slug,
200+
diagnostic.slug,
201+
value_type=MetricValueType.SERIES,
202+
client=client,
203+
).data
204+
if metric.dimensions.additional_properties["statistic"].startswith(statistic_name)
205+
for value, month in zip(metric.values, metric.index)
206+
if value < 1e10 # Ignore some invalid values.
207+
)
208+
df
209+
210+
# %% [markdown]
211+
# and create a plot:
212+
213+
# %%
214+
_ = sns.relplot(
215+
data=df.sort_values("source_id"),
216+
x="month",
217+
y=value_name,
218+
col="region",
219+
hue="source_id",
220+
kind="line",
221+
)
222+
# %% [markdown]
223+
# ## Files
224+
#
225+
# Many of the diagnostics produce NetCDF files that can be used for further analysis or custom plotting.
226+
# We will look at the global warming levels diagnostic and create our own figure using the available data.
227+
#
228+
# Each diagnostic can be run (executed) multiple times with different input data. The global warmings
229+
# levels diagnostic has been executed several times, leading to multiple "execution groups":
230+
231+
# %%
232+
diagnostic_name = "Climate at Global Warming Levels"
233+
diagnostic = next(d for d in diagnostics if d.name == diagnostic_name)
234+
[executions_get.sync(g, client=client).key for g in diagnostic.execution_groups]
235+
236+
# %% [markdown]
237+
# Let's select the "ssp585" scenario and look at the output files that were produced:
238+
239+
# %%
240+
for group in diagnostic.execution_groups:
241+
execution = executions_get.sync(group, client=client)
242+
if execution.key.endswith("ssp585"):
243+
ssp585_outputs = execution.latest_execution.outputs
244+
break
245+
else:
246+
msg = "Failed to find the ssp585 execution group"
247+
raise ValueError(msg)
248+
[o.filename for o in ssp585_outputs]
249+
250+
# %% [markdown]
251+
# Select one of the output files and inspect it:
252+
253+
# %%
254+
filename = "tas/plot_gwl_stats/CMIP6_mm_mean_2.0.nc"
255+
file = next(f for f in ssp585_outputs if f.filename.endswith(filename))
256+
file
257+
258+
# %% [markdown]
259+
# Download the file and open it with `xarray`:
260+
261+
# %%
262+
local_file = Path(Path(file.filename).name)
263+
local_file.write_bytes(requests.get(file.url, timeout=120).content)
264+
ds = xr.open_dataset(local_file).drop_vars("cube_label")
265+
ds
266+
267+
# %% [markdown]
268+
# Create our own plot:
269+
270+
# %%
271+
plot = ds.tas.plot.contourf(
272+
cmap="viridis",
273+
vmin=-30,
274+
vmax=30,
275+
levels=11,
276+
figsize=(12, 5),
277+
transform=cartopy.crs.PlateCarree(),
278+
subplot_kws={
279+
"projection": cartopy.crs.Orthographic(
280+
central_longitude=-100,
281+
central_latitude=40,
282+
),
283+
},
284+
)
285+
_ = plot.axes.coastlines()

0 commit comments

Comments
 (0)