Skip to content

Commit 168e2cb

Browse files
authored
#372: Revert "Implement parallel, parameterised REF/EVAL (multi-run) standardisation"
Revert "366 Implement parallel, parameterised REF/EVAL (multi-run) standardis…" This reverts commit a19822e.
1 parent 364fc1e commit 168e2cb

14 files changed

Lines changed: 144 additions & 400 deletions

File tree

CMEW/app/configure_recipe/bin/configure_recipe.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def main():
2727
# Write the updated configuration values to the file defined by
2828
# 'USER_CONFIG_PATH'.
2929
user_config_path = values["USER_CONFIG_PATH"]
30-
os.makedirs(os.path.dirname(user_config_path), exist_ok=True)
3130
write_yaml(user_config_path, user_config_file_contents)
3231

3332

CMEW/app/configure_standardise/bin/configure_standardise.sh

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,86 @@
55
BASH_XTRACEFD=1
66
set -xeuo pipefail
77

8+
echo "[INFO] Running configure_standardise for REF and EVAL runs"
9+
810
# ---------------------------------------------------------------------------
911
# 0. Defensive programming
1012
# ---------------------------------------------------------------------------
11-
: "${RUN_LABEL:?RUN_LABEL must be set (e.g. ref/eval)}"
12-
: "${REQUEST_PATH:?REQUEST_PATH must be set}"
13-
: "${MODEL_ID:?MODEL_ID must be set}"
14-
: "${SUITE_ID:?SUITE_ID must be set}"
15-
: "${CALENDAR:?CALENDAR must be set}"
16-
: "${VARIABLES_PATH:?VARIABLES_PATH must be set}"
13+
: "${REQUEST_PATH_REF:?REQUEST_PATH_REF must be set}"
14+
: "${REQUEST_PATH_EVAL:?REQUEST_PATH_EVAL must be set}"
15+
16+
echo "[INFO] Using REQUEST_PATH_REF=${REQUEST_PATH_REF}"
17+
echo "[INFO] Using REQUEST_PATH_EVAL=${REQUEST_PATH_EVAL}"
18+
19+
# Require REF_* and base MODEL_* metadata in the environment
20+
: "${REF_MODEL_ID:?REF_MODEL_ID must be set}"
21+
: "${REF_SUITE_ID:?REF_SUITE_ID must be set}"
22+
: "${REF_CALENDAR:?REF_CALENDAR must be set}"
23+
: "${MODEL_ID:?MODEL_ID (evaluation) must be set}"
24+
: "${SUITE_ID:?SUITE_ID (evaluation) must be set}"
25+
: "${CALENDAR:?CALENDAR (evaluation) must be set}"
26+
27+
# ---------------------------------------------------------------------------
28+
# 1. Create variables.txt once (shared by both runs)
29+
# ---------------------------------------------------------------------------
30+
echo "[INFO] Creating variables file from ESMValTool recipe"
31+
cmew-esmvaltool-env create_variables_file.py
32+
33+
# ---------------------------------------------------------------------------
34+
# 2. Helper: configure CDDS request + directory structure for a given run
35+
# ---------------------------------------------------------------------------
36+
create_for_run() {
37+
local RUN_LABEL="$1"
38+
39+
local run_model_id=""
40+
local run_suite_id=""
41+
local run_calendar=""
42+
local run_variant=""
43+
local run_request=""
44+
45+
case "${RUN_LABEL}" in
46+
REF)
47+
run_model_id="${REF_MODEL_ID}"
48+
run_suite_id="${REF_SUITE_ID}"
49+
run_calendar="${REF_CALENDAR}"
50+
run_variant="${REF_VARIANT_LABEL:-}"
51+
run_request="${REQUEST_PATH_REF}"
52+
;;
53+
EVAL)
54+
# Evaluation run uses the base MODEL_ID/SUITE_ID/CALENDAR/VARIANT_LABEL
55+
run_model_id="${MODEL_ID}"
56+
run_suite_id="${SUITE_ID}"
57+
run_calendar="${CALENDAR}"
58+
run_variant="${VARIANT_LABEL:-}"
59+
run_request="${REQUEST_PATH_EVAL}"
60+
;;
61+
*)
62+
echo "[ERROR] Unknown run label: ${RUN_LABEL}" >&2
63+
exit 1
64+
;;
65+
esac
1766

18-
echo "[INFO] RUN_LABEL=${RUN_LABEL}"
19-
echo "[INFO] MODEL_ID=${MODEL_ID}"
20-
echo "[INFO] SUITE_ID=${SUITE_ID}"
21-
echo "[INFO] CALENDAR=${CALENDAR}"
22-
echo "[INFO] VARIANT_LABEL=${VARIANT_LABEL}"
67+
(
68+
# Subshell: don't leak these exports back out into the caller.
69+
export MODEL_ID="${run_model_id}"
70+
export SUITE_ID="${run_suite_id}"
71+
export CALENDAR="${run_calendar}"
72+
export VARIANT_LABEL="${run_variant}"
73+
export REQUEST_PATH="${run_request}"
2374

24-
echo "[INFO] Running configure_standardise for RUN_LABEL=${RUN_LABEL}"
25-
echo "[INFO] REQUEST_PATH=${REQUEST_PATH}"
75+
echo "[INFO] Creating request for ${RUN_LABEL} run at: ${REQUEST_PATH}"
76+
cmew-esmvaltool-env create_request_file.py
2677

27-
test -f "${VARIABLES_PATH}" || { echo "[ERROR] variables file missing: ${VARIABLES_PATH}" >&2; exit 2; }
78+
echo "[INFO] Creating CDDS directory structure for ${RUN_LABEL} run"
79+
cmew-standardise-env create_cdds_directory_structure "${REQUEST_PATH}"
80+
cmew-standardise-env prepare_generate_variable_list "${REQUEST_PATH}"
81+
)
82+
}
83+
84+
# ---------------------------------------------------------------------------
85+
# 3. Configure both runs
86+
# ---------------------------------------------------------------------------
87+
create_for_run REF
88+
create_for_run EVAL
2889

29-
cmew-esmvaltool-env create_request_file.py
30-
cmew-standardise-env create_cdds_directory_structure "${REQUEST_PATH}"
31-
cmew-standardise-env prepare_generate_variable_list "${REQUEST_PATH}"
90+
echo "[INFO] configure_standardise completed for REF and EVAL runs"

CMEW/app/configure_standardise/bin/create_request_file.py

Lines changed: 37 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -1,203 +1,40 @@
11
#!/usr/bin/env python
22
# (C) Crown Copyright 2024-2026, Met Office.
33
# The LICENSE.md file contains full licensing details.
4-
54
"""
65
Generates the request configuration file from the ESMValTool recipe.
7-
8-
Supports per-run metadata via RUNS_CONFIG_PATH + RUN_LABEL,
9-
while keeping backward compatibility with legacy env vars
10-
MODEL_ID/SUITE_ID/CALENDAR/VARIANT_LABEL.
11-
12-
Naming requirement:
13-
- In ALL modes (legacy and multi-run), set workflow_basename = suite_id
14-
so CDDS paths are cdds_<suite_id>.
156
"""
16-
177
import configparser
18-
import json
198
import os
209
from pathlib import Path
21-
from typing import Any, Dict, Optional
22-
23-
24-
def _resolve_runs_config_path() -> Optional[Path]:
25-
raw = os.environ.get("RUNS_CONFIG_PATH", "").strip()
26-
if not raw:
27-
return None
28-
29-
candidate = Path(os.path.expandvars(os.path.expanduser(raw)))
30-
31-
if candidate.is_absolute() and candidate.exists():
32-
return candidate
33-
34-
share_dir = os.environ.get("CYLC_WORKFLOW_SHARE_DIR", "").strip()
35-
if share_dir:
36-
p = Path(share_dir) / candidate
37-
if p.exists():
38-
return p
39-
40-
try:
41-
repo_root = Path(__file__).resolve().parents[3]
42-
p = repo_root / candidate
43-
if p.exists():
44-
return p
45-
except Exception:
46-
pass
47-
48-
if not candidate.is_absolute():
49-
candidate = (Path.cwd() / candidate).resolve()
50-
return candidate
51-
52-
53-
def _load_runs_config_file() -> Dict[str, Any]:
54-
path = _resolve_runs_config_path()
55-
if path is None:
56-
return {}
57-
58-
if not path.exists():
59-
raise FileNotFoundError(
60-
f"RUNS_CONFIG_PATH points to missing file: {path}"
61-
)
62-
63-
raw = path.read_text(encoding="utf-8")
64-
runs = json.loads(raw)
65-
66-
if not isinstance(runs, dict):
67-
raise ValueError(
68-
f"Runs config in {path} must be a JSON object, got {type(runs)}"
69-
)
70-
71-
normalized: Dict[str, Any] = {}
72-
for k, v in runs.items():
73-
if not isinstance(k, str):
74-
raise ValueError(
75-
f"Runs config keys must be strings, got key={k!r}"
76-
)
77-
normalized[k.strip().lower()] = v
78-
79-
return normalized
80-
81-
82-
def _get_required_env(name: str) -> str:
83-
val = os.environ.get(name, "").strip()
84-
if not val:
85-
raise KeyError(f"{name} must be set")
86-
return val
87-
88-
89-
def _normalize_run_entry(run_key: str, cfg: Any) -> Dict[str, str]:
90-
if not isinstance(cfg, dict):
91-
raise ValueError(
92-
f"Runs config entry for '{run_key}' must be an object, \
93-
got {type(cfg)}"
94-
)
95-
96-
model_id = cfg.get("model_id") or cfg.get("MODEL_ID")
97-
suite_id = cfg.get("suite_id") or cfg.get("SUITE_ID")
98-
calendar = cfg.get("calendar") or cfg.get("CALENDAR")
99-
variant_label = cfg.get("variant_label") or cfg.get("VARIANT_LABEL")
100-
101-
missing = [
102-
k
103-
for k, v in {
104-
"model_id": model_id,
105-
"suite_id": suite_id,
106-
"calendar": calendar,
107-
"variant_label": variant_label,
108-
}.items()
109-
if not (isinstance(v, str) and v.strip())
110-
]
111-
if missing:
112-
raise KeyError(
113-
f"Missing keys for run '{run_key}' \
114-
in runs config: {missing}"
115-
)
116-
117-
return {
118-
"model_id": str(model_id).strip(),
119-
"suite_id": str(suite_id).strip(),
120-
"calendar": str(calendar).strip(),
121-
"variant_label": str(variant_label).strip(),
122-
}
12310

12411

125-
def _resolve_run_metadata(run_label: str) -> Dict[str, str]:
126-
"""
127-
Resolve per-run metadata in priority order:
128-
1) runs.json (RUNS_CONFIG_PATH) if configured
129-
2) Legacy env vars (MODEL_ID/SUITE_ID/CALENDAR/VARIANT_LABEL)
12+
def create_request():
13+
"""Retrieve CDDS request information from Rose suite configuration.
13014
131-
RUN_LABEL may be:
132-
- a key in runs.json ("ref", "eval", ...)
133-
- a suite_id value ("u-xxxxx") in runs.json entries
15+
Returns
16+
-------
17+
configparser.ConfigParser()
18+
CDDS request configuration.
13419
"""
135-
runs_cfg = _load_runs_config_file()
136-
137-
if runs_cfg:
138-
if run_label in runs_cfg:
139-
return _normalize_run_entry(run_label, runs_cfg[run_label])
140-
141-
for key, cfg in runs_cfg.items():
142-
if not isinstance(cfg, dict):
143-
continue
144-
suite_id = cfg.get("suite_id") or cfg.get("SUITE_ID")
145-
if isinstance(suite_id, str) and suite_id.strip() == run_label:
146-
return _normalize_run_entry(key, cfg)
147-
148-
raise KeyError(
149-
f"RUN_LABEL='{run_label}' not found as a key in runs config "
150-
f"and did not match any suite_id. Available keys: \
151-
{sorted(runs_cfg.keys())}"
152-
)
153-
154-
# Legacy fallback
155-
return {
156-
"model_id": _get_required_env("MODEL_ID"),
157-
"suite_id": _get_required_env("SUITE_ID"),
158-
"calendar": _get_required_env("CALENDAR"),
159-
"variant_label": _get_required_env("VARIANT_LABEL"),
160-
}
161-
162-
163-
def create_request() -> configparser.ConfigParser:
164-
start_year = int(_get_required_env("START_YEAR"))
165-
number_of_years = int(_get_required_env("NUMBER_OF_YEARS"))
166-
end_year = start_year + number_of_years
167-
168-
run_label = os.environ.get("RUN_LABEL", "").strip().lower()
169-
170-
if run_label:
171-
meta = _resolve_run_metadata(run_label)
172-
else:
173-
# Legacy mode: do NOT require RUN_LABEL (unit tests rely on this)
174-
meta = {
175-
"model_id": _get_required_env("MODEL_ID"),
176-
"suite_id": _get_required_env("SUITE_ID"),
177-
"calendar": _get_required_env("CALENDAR"),
178-
"variant_label": _get_required_env("VARIANT_LABEL"),
179-
}
180-
181-
# REQUIREMENT: always use suite_id for basename
182-
workflow_basename = meta["suite_id"]
183-
20+
end_year = int(os.environ["START_YEAR"]) + int(
21+
os.environ["NUMBER_OF_YEARS"]
22+
)
18423
request = configparser.ConfigParser()
185-
18624
request["metadata"] = {
18725
"base_date": "1850-01-01T00:00:00",
18826
"branch_method": "no parent",
189-
"calendar": meta["calendar"],
27+
"calendar": os.environ["CALENDAR"],
19028
"experiment_id": "amip",
191-
"institution_id": _get_required_env("INSTITUTION_ID"),
29+
"institution_id": os.environ["INSTITUTION_ID"],
19230
"license": "GCModelDev model data is licensed under the Open Government License v3 (https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/)", # noqa: E501
19331
"mip": "ESMVal",
19432
"mip_era": "GCModelDev",
195-
"model_id": meta["model_id"],
33+
"model_id": os.environ["MODEL_ID"],
19634
"model_type": "AGCM AER",
19735
"sub_experiment_id": "none",
198-
"variant_label": meta["variant_label"],
36+
"variant_label": os.environ["VARIANT_LABEL"],
19937
}
200-
20138
request["common"] = {
20239
"external_plugin": "",
20340
"external_plugin_location": "",
@@ -206,42 +43,49 @@ def create_request() -> configparser.ConfigParser:
20643
),
20744
"mode": "relaxed",
20845
"package": "round-1",
209-
"root_proc_dir": _get_required_env("ROOT_PROC_DIR"),
210-
"root_data_dir": _get_required_env("ROOT_DATA_DIR"),
211-
"workflow_basename": workflow_basename,
46+
"root_proc_dir": os.environ["ROOT_PROC_DIR"],
47+
"root_data_dir": os.environ["ROOT_DATA_DIR"],
48+
"workflow_basename": os.environ["SUITE_ID"],
21249
}
213-
21450
request["data"] = {
21551
"end_date": f"{end_year}-01-01T00:00:00",
21652
"mass_data_class": "crum",
21753
"model_workflow_branch": "trunk",
218-
"model_workflow_id": meta["suite_id"],
54+
"model_workflow_id": os.environ["SUITE_ID"],
21955
"model_workflow_revision": "not used except with data request",
220-
"start_date": f"{start_year}-01-01T00:00:00",
56+
"start_date": f"{os.environ['START_YEAR']}-01-01T00:00:00",
22157
"streams": "apm",
222-
"variable_list_file": _get_required_env("VARIABLES_PATH"),
58+
"variable_list_file": os.environ["VARIABLES_PATH"],
59+
}
60+
request["misc"] = {
61+
"atmos_timestep": "1200",
22362
}
224-
225-
request["misc"] = {"atmos_timestep": "1200"}
226-
22763
request["conversion"] = {
22864
"mip_convert_plugin": "UKESM1",
22965
"skip_archive": "True",
23066
"cylc_args": "--no-detach -v",
23167
}
232-
23368
return request
23469

23570

236-
def write_request(
237-
request: configparser.ConfigParser, target_path: Path
238-
) -> None:
239-
with open(target_path, mode="w", encoding="utf-8") as file_handle:
71+
def write_request(request, target_path):
72+
"""Write the request configuration to a file at ``target_path``.
73+
74+
Parameters
75+
----------
76+
request : configparser.ConfigParser()
77+
The request configuration.
78+
79+
target_path: Path
80+
The full path to the file
81+
where the request configuration will be written.
82+
"""
83+
with open(target_path, mode="w") as file_handle:
24084
request.write(file_handle)
24185

24286

243-
def main() -> None:
244-
target_path = Path(_get_required_env("REQUEST_PATH"))
87+
def main():
88+
target_path = Path(os.environ["REQUEST_PATH"])
24589
request = create_request()
24690
write_request(request, target_path)
24791

0 commit comments

Comments
 (0)