Skip to content

Commit 695d434

Browse files
authored
Merge pull request #12 from VACLab/add-progress-bar
Added progress bar to create_cohort() and get_concept_hierarchy()
2 parents 6b4b356 + fde6da1 commit 695d434

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

biasanalyzer/cohort.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import duckdb
33
import pandas as pd
44
from datetime import datetime
5+
from tqdm.auto import tqdm
56
from pydantic import ValidationError
67
from biasanalyzer.models import CohortDefinition, Cohort
78
from biasanalyzer.config import load_cohort_creation_config
@@ -79,10 +80,18 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
7980
and storing the result in BiasDatabase. The query can be passed in directly
8081
or built from a yaml file using a corresponding SQL query template
8182
"""
83+
stages = [
84+
"Built query",
85+
"Executed query on OMOP database to get cohort data",
86+
"Inserted cohort data into DuckDB - Done"
87+
]
88+
progress = tqdm(total=len(stages), desc="Cohort creation", unit="stage", dynamic_ncols=True, leave=True)
89+
90+
progress.set_postfix_str(stages[0])
8291
if query_or_yaml_file.endswith('.yaml') or query_or_yaml_file.endswith('.yml'):
8392
try:
8493
cohort_config = load_cohort_creation_config(query_or_yaml_file)
85-
print(f'configuration specified in {query_or_yaml_file} loaded successfully')
94+
tqdm.write(f'configuration specified in {query_or_yaml_file} loaded successfully')
8695
except FileNotFoundError:
8796
print('specified cohort creation configuration file does not exist. Make sure '
8897
'the configuration file name with path is specified correctly.')
@@ -95,7 +104,9 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
95104
query = self._query_builder.build_query(cohort_config)
96105
else:
97106
query = clean_string(query_or_yaml_file)
107+
progress.update(1)
98108

109+
progress.set_postfix_str(stages[1])
99110
omop_session = self.omop_db.get_session()
100111
try:
101112
# Execute read-only query from OMOP CDM database
@@ -108,14 +119,18 @@ def create_cohort(self, cohort_name: str, description: str, query_or_yaml_file:
108119
creation_info=clean_string(query),
109120
created_by=created_by
110121
)
111-
cohort_def_id = self.bias_db.create_cohort_definition(cohort_def)
122+
cohort_def_id = self.bias_db.create_cohort_definition(cohort_def, progress_obj=tqdm)
123+
progress.update(1)
112124

125+
progress.set_postfix_str(stages[2])
113126
# Store cohort_definition and cohort data into BiasDatabase
114127
cohort_df = pd.DataFrame(result)
115128
cohort_df['cohort_definition_id'] = cohort_def_id
116129
cohort_df = cohort_df.rename(columns={"person_id": "subject_id"})
117130
self.bias_db.create_cohort_in_bulk(cohort_df)
118-
print(f"Cohort {cohort_name} successfully created.")
131+
progress.update(1)
132+
133+
tqdm.write(f"Cohort {cohort_name} successfully created.")
119134
return CohortData(cohort_id=cohort_def_id, bias_db=self.bias_db, omop_db=self.omop_db)
120135
except duckdb.Error as e:
121136
print(f"Error executing query: {e}")

biasanalyzer/database.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pandas as pd
33
from typing import Optional
44
from datetime import datetime
5+
from tqdm.auto import tqdm
56
from sqlalchemy.orm import sessionmaker
67
from sqlalchemy.exc import SQLAlchemyError
78
from sqlalchemy import create_engine, text
@@ -91,7 +92,7 @@ def load_postgres_extension(self):
9192
self.conn.execute("INSTALL postgres_scanner;")
9293
self.conn.execute("LOAD postgres_scanner;")
9394

94-
def create_cohort_definition(self, cohort_definition: CohortDefinition):
95+
def create_cohort_definition(self, cohort_definition: CohortDefinition, progress_obj=None):
9596
self.conn.execute('''
9697
INSERT INTO cohort_definition (name, description, created_date, creation_info, created_by)
9798
VALUES (?, ?, ?, ?, ?)
@@ -102,7 +103,10 @@ def create_cohort_definition(self, cohort_definition: CohortDefinition):
102103
cohort_definition.creation_info,
103104
cohort_definition.created_by
104105
))
105-
print("Cohort definition inserted successfully.")
106+
if progress_obj is None:
107+
print("Cohort definition inserted successfully.")
108+
else:
109+
progress_obj.write("Cohort definition inserted successfully.")
106110
self.conn.execute("SELECT id from cohort_definition ORDER BY id DESC LIMIT 1")
107111
created_cohort_id = self.conn.fetchone()[0]
108112
return created_cohort_id
@@ -407,6 +411,14 @@ def get_concept_hierarchy(self, concept_id: int):
407411
Retrieves the full concept hierarchy (ancestors and descendants) for a given concept_id
408412
and organizes it into a nested dictionary to represent the tree structure.
409413
"""
414+
stages = [
415+
"Queried concept hierarchy",
416+
"Fetched concept details",
417+
"Built hierarchy tree"
418+
]
419+
progress = tqdm(total=len(stages), desc="Concept Hierarchy", unit="stage")
420+
421+
progress.set_postfix_str(stages[0])
410422
query = """
411423
WITH RECURSIVE concept_hierarchy AS (
412424
SELECT ancestor_concept_id, descendant_concept_id, min_levels_of_separation
@@ -425,7 +437,9 @@ def get_concept_hierarchy(self, concept_id: int):
425437
"""
426438

427439
results = self.execute_query(query, params={"concept_id": concept_id})
440+
progress.update(1)
428441

442+
progress.set_postfix_str(stages[1])
429443
# Collect all unique concept IDs involved in the hierarchy using set comprehension
430444
concept_ids = {row['ancestor_concept_id'] for row in results} | {row['descendant_concept_id'] for row in results}
431445
# Fetch details of each concept
@@ -439,7 +453,9 @@ def get_concept_hierarchy(self, concept_id: int):
439453

440454
result = self.execute_query(query, params={"concept_ids": tuple(concept_ids)})
441455
concept_details = {row['concept_id']: row for row in result}
456+
progress.update(1)
442457

458+
progress.set_postfix_str(stages[2])
443459
# Build the hierarchy tree using a dictionary
444460
hierarchy = {}
445461
reverse_hierarchy = {}
@@ -458,6 +474,7 @@ def get_concept_hierarchy(self, concept_id: int):
458474
ancestor_entry_rev = reverse_hierarchy.setdefault(
459475
ancestor_id, {"details": concept_details[ancestor_id], "parents": []})
460476
desc_entry_rev["parents"].append(ancestor_entry_rev)
477+
progress.update(1)
461478

462479
# Return the parent hierarchy and children hierarchy of the specified concept
463480
return reverse_hierarchy[concept_id], hierarchy[concept_id]

poetry.lock

Lines changed: 22 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ psycopg2 = "^2.9.1"
2121
ipytree = "^0.2.2"
2222
ipywidgets = "^8.1.5"
2323
jinja2 = "3.1.6"
24+
tqdm = "4.67.1"
2425

2526
[tool.poetry.dev-dependencies]
2627
pytest = "^8.3.3"

0 commit comments

Comments
 (0)