-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy path__main__.py
More file actions
55 lines (46 loc) · 1.85 KB
/
__main__.py
File metadata and controls
55 lines (46 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import logging
from pathlib import Path
import hydra
from omegaconf import DictConfig
from .. import DATASETS
from ..utils import run_in_env
from . import CFG_YAML, TASKS
logger = logging.getLogger(__name__)
@hydra.main(version_base=None, config_path=str(CFG_YAML.parent), config_name=CFG_YAML.stem)
def main(cfg: DictConfig):
if cfg.task not in TASKS:
raise ValueError(f"Task {cfg.task} not currently configured. Configured tasks: {TASKS.keys()}")
task_config_path = TASKS[cfg.task]["criteria_fp"]
if cfg.get("dataset_predicates_path", None):
logger.info(f"Using provided (local) predicates path: {cfg.dataset_predicates_path}")
dataset_predicates_path = Path(cfg.dataset_predicates_path)
else:
if cfg.dataset not in DATASETS:
raise ValueError(
f"Dataset {cfg.dataset} not currently configured! Available datasets: {DATASETS.keys()}"
)
dataset_predicates_path = DATASETS[cfg.dataset]["predicates"]
logger.info(f"Running task {cfg.task} on dataset {cfg.dataset}")
cmd = " ".join(
[
"aces-cli",
"--multirun",
f"cohort_name={cfg.task}",
"data=sharded",
"data.standard=meds",
f"data.root={cfg.dataset_dir}/data",
f"data.shard=$(expand_shards {cfg.dataset_dir}/data)",
f"config_path={task_config_path}",
f"predicates_path={dataset_predicates_path}",
f"output_filepath={cfg.output_dir}" + r"/\$\{data._prefix\}.parquet",
f"log_dir={cfg.output_dir}/.logs",
]
)
logger.info(f"Running ACES: {cmd}")
run_in_env(
cmd=cmd,
output_dir=cfg.output_dir,
do_overwrite=cfg.do_overwrite,
run_as_script=False,
)
logger.info(f"Extract {cfg.task} for {cfg.dataset} command {cmd} finished successfully.")