Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
88a88bd
Did show as table with clone, now doesn't
NParsonsMO Feb 4, 2026
be1f690
Clone and table show while it's a namelist
NParsonsMO Feb 4, 2026
0bb6cc4
namelist file copied to share directory
NParsonsMO Feb 4, 2026
e887832
Reading dates from environment variable
NParsonsMO Feb 4, 2026
0ee8982
Merge branch 'main' into 130-add-cmip-runs-in-addition-using-rose-sec…
NParsonsMO Feb 5, 2026
8053d8e
Sorting incorrect merge
NParsonsMO Feb 5, 2026
9b1576d
Removing "duplicated" dataset
NParsonsMO Feb 5, 2026
c15ab85
Renaming app and adding .py file with executable permissions
NParsonsMO Feb 6, 2026
956fed6
Removing incorrect way of handling metadata
NParsonsMO Feb 6, 2026
19eab63
Removing incorrect way of handling metadata
NParsonsMO Feb 9, 2026
054ba7b
Rename and adding comments
NParsonsMO Feb 10, 2026
2266765
Renames
NParsonsMO Feb 10, 2026
3874deb
Start on unit testing
NParsonsMO Feb 10, 2026
24e1940
Struggling with env vars pulling through
NParsonsMO Feb 10, 2026
d710ecd
Missed change
NParsonsMO Feb 10, 2026
3013e76
Unit test failing but running
NParsonsMO Feb 10, 2026
e8e872c
Unit tests passing
NParsonsMO Feb 11, 2026
3873dbf
Adding comment to unit test
NParsonsMO Feb 11, 2026
4f4aa86
One more unit test
NParsonsMO Feb 11, 2026
7021040
One more unit test
NParsonsMO Feb 11, 2026
87dc408
Comment
NParsonsMO Feb 11, 2026
4218d96
Yet another unit test
NParsonsMO Feb 11, 2026
3a2a6f0
Eugh
NParsonsMO Feb 11, 2026
40f939e
Final unit test
NParsonsMO Feb 11, 2026
18d184a
Changes from pre-commit
NParsonsMO Feb 11, 2026
74ce3ad
Lines too long
NParsonsMO Feb 11, 2026
983e86b
too much indent
NParsonsMO Feb 11, 2026
c20eba2
Merge branch 'main' into 130-add-cmip-runs-in-addition-using-rose-sec…
NParsonsMO Feb 11, 2026
5d92596
renames
NParsonsMO Feb 11, 2026
fdbe20c
Looking in correct directory
NParsonsMO Feb 13, 2026
b320c7b
Unused imports
NParsonsMO Feb 13, 2026
d0fe188
Improved comment
NParsonsMO Feb 13, 2026
d6c65ef
Now inheriting seems fine
NParsonsMO Feb 13, 2026
a30ecc4
Using strings in rose config to enable help
NParsonsMO Feb 13, 2026
e5714cf
Adding documentation
NParsonsMO Feb 13, 2026
f67ab0d
Copyrights
NParsonsMO Feb 13, 2026
7ed1e0c
Merge branch 'main' into 130-add-cmip-runs-in-addition-using-rose-sec…
NParsonsMO Feb 13, 2026
f6e99e3
Documentation tweaks
NParsonsMO Feb 13, 2026
e39a7cb
Documentation tweaks
NParsonsMO Feb 13, 2026
717e02a
Whitespace
NParsonsMO Feb 13, 2026
1e28611
Rose reorder
NParsonsMO Feb 13, 2026
bca36b0
Merge branch 'main' into 130-add-cmip-runs-in-addition-using-rose-sec…
NParsonsMO Feb 17, 2026
ac18e55
Adding experiment
NParsonsMO Feb 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 242 additions & 0 deletions CMEW/app/add_datasets/bin/add_datasets_to_share.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/usr/bin/env python
# (C) Crown Copyright 2026, Met Office.
# The LICENSE.md file contains full licensing details.
"""
Process and copy the dataset namelist files to a shared directory.

Namelist files are created by rose from the sections in
CMEW/app/add_datasets/rose-app.conf. These may be edited in the GUI.
This application reads the namelist files,
converts the contents to a dictionary of datasets and their facets,
then writes those dictionaries to YAML files in the share directory.
"""
import os
import yaml


def extract_sections_from_naml(naml_fp):
"""
Read sections from a namelist file and return them as a list of strings.

Parameters
----------
naml_fp: str
The file path to the namelist file containing the datasets.

Returns
-------
datatsets: list of str
A list of strings, each containing the content of a section in the
namelist file minus the headers and separating characters.
"""

# Read the namelist file
with open(naml_fp, "r") as file:
content = file.read()

# Namelist files are separated by a line containing only "/"
datasets = content.split("\n/\n")

# Read the line containing the header for the first dataset's section
first_dataset = datasets[0]
first_line = first_dataset.split("\n")[0]
name = first_line.replace("&", "") # This could be returned if needed

# Initialise a list to hold the extracted datasets
extracted_datasets = []

for dataset in datasets:
if dataset: # There is an empty dataset at the end
# Replace newlines with just commas
dataset = dataset.replace(",\n", ",")

# Remove remaining new lines
dataset = dataset.replace("\n", "")

# Remove the header
dataset = dataset.replace(f"&{name}", "")

# Add the datasets to the list
extracted_datasets.append(dataset)

return extracted_datasets


def convert_str_to_facets(section):
"""
Converts a section of a naml file to a dictionary of its facets.

Parameters
----------
section: str
A string containing the amended content of a section of namelist file.
The content is expected to be in the format of key=value pairs,
without a header and separated by commas.

Returns
-------
section_dict: dict
A dictionary containing the facets of the dataset.
"""

# Initialise a dictionary to hold the facets of the dataset
section_dict = {}

# Separate the facets in the string to loop over
facets = section.split(",")
for facet in facets:
if facet: # There's an empty facet at the end

# The facets are in the string are key=value pairs
key, value = facet.split("=")

# Values are output with quotes around them
value = value.replace('"', "")

# Add the key: value pair dictionary
section_dict[key.strip()] = value.strip()

return section_dict


def add_common_facets(dataset_dict, project="CMIP6"):
"""
Add start year, end year and project to a dataset dictionary.

Parameters
----------
dataset_dict: dict
A dictionary containing the facets of a dataset.
project: str
A string indicating the project to which the dataset belongs.
Default is "CMIP6".

Returns
-------
dataset_dict: dict
The input dataset dictionary with the common facets added.
"""
# Read the time window from environment
start_year = int(os.environ["START_YEAR"])
end_year = (
int(os.environ["START_YEAR"]) + int(os.environ["NUMBER_OF_YEARS"]) - 1
)

# Add the start year, end year and project to the dataset dictionary
dataset_dict["start_year"] = start_year
dataset_dict["end_year"] = end_year
dataset_dict["project"] = project

return dataset_dict


def process_naml_file(naml_fp):
"""
Extract the datasets and their facets from a namelist file.

Parameters
----------
naml_fp: str
The file path to the namelist file containing the datasets.

Returns
-------
datasets: list of dict
A list of dictionaries, each containing the facets of one dataset.
"""
datasets = []
sections = extract_sections_from_naml(naml_fp)
for section in sections:
dataset_dict = convert_str_to_facets(section)
dataset_dict = add_common_facets(dataset_dict)
datasets.append(dataset_dict)
return datasets


# Note: I've stolen this with a slight rename from update_recipe_file.py
# Eventually the plan is to move it to a common directory
def write_dict_to_yaml(dict_to_write, target_path):
"""Write the contents of a dictionary to a YAML file at ``target_path``.

Parameters
----------
dict_to_write dict
Dictionary containing the content to write.

target_path: str
Location at which to write the content.
"""
with open(target_path, "w") as file_handle:
yaml.dump(
dict_to_write,
file_handle,
default_flow_style=False,
sort_keys=True,
)


# If the above function does stay here, there's no reason to have this
# whole function just to create a target path then call the above
def write_datasets_to_yaml(datasets, name, target_dir):
"""
Write a list of dataset dictionaries to a YAML file in the directory.

Parameters
----------
datasets: list of dict
A list of dictionaries, each containing the facets of a dataset.
name: str
The name of the YAML file to which the datasets are to be written.
target_dir: str
The directory in which the YAML file is to be written.
"""
target_fp = os.path.join(target_dir, f"{name}.yml")
write_dict_to_yaml(datasets, target_fp)


def dict_namelists_in_work_dir():
"""
Looks for namelist files in the work directory of the current app.

Returns
-------
filepaths: dict
A dictionary of namelist file basenames and their file paths
based on the filenames ending ".nl".
"""
filepaths = {}

# Namelist files are written to the work directory of the add_datasets
work_dir = os.getenv("CYLC_TASK_WORK_DIR")

# Grab all the namelist files, in case we add more in future
for file in os.listdir(work_dir):
if file.endswith(".nl"):

# Read the name of the file for the key, minus ".nl"
basename = os.path.basename(file)[:-3]

# Use the filepath for the value
namelist_fp = os.path.join(work_dir, file)

# Add to the dictionary
filepaths[basename] = namelist_fp

return filepaths


if __name__ == "__main__":
# Read the target (shared) directory from the environment
target_dir = os.environ["DATASETS_LIST_DIR"]

# Create the target directory if it doesn't exist
os.makedirs(target_dir, exist_ok=True)

# Loop over the namelist files in the work directory
for basename, nl_fp in dict_namelists_in_work_dir().items():

# Extract the datasets from each file
datasets = process_naml_file(nl_fp)

# Write the datasets to a YAML file in the target directory
write_datasets_to_yaml(datasets, basename, target_dir)
Loading