Skip to content

Commit e53992d

Browse files
author
AI Agent
committed
Fix amalgkit taxdump fetching
1 parent 4288e50 commit e53992d

1 file changed

Lines changed: 5 additions & 6 deletions

File tree

src/metainformant/rna/engine/streaming_orchestrator.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -469,13 +469,12 @@ def discover_species_tasks(self, config_name: str, max_gb: float, threads: int)
469469
ete_dir = work_dir / "downloads" / "ete4"
470470
try:
471471
ete_dir.mkdir(parents=True, exist_ok=True)
472-
# Seed from the newly hosted persistent location
473-
taxdump_src = Path("/app/output/taxdump.tar.gz")
474472
taxdump_dest = ete_dir / "taxdump.tar.gz"
475-
if taxdump_src.exists() and not taxdump_dest.exists():
476-
import shutil
477-
shutil.copy2(taxdump_src, taxdump_dest)
478-
logger.info(f"Seeded NCBI Taxdump locally at {taxdump_dest}")
473+
if not taxdump_dest.exists():
474+
import urllib.request
475+
logger.info(f"Downloading NCBI Taxdump directly to {taxdump_dest}...")
476+
urllib.request.urlretrieve("https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz", taxdump_dest)
477+
logger.info(f"Successfully seeded NCBI Taxdump locally!")
479478
except Exception as e:
480479
logger.warning(f"Failed to seed taxdump.tar.gz: {e}")
481480

0 commit comments

Comments
 (0)