Skip to content

Commit ab9bf5a

Browse files
committed
test: make tests idempotent and CI-compatible
1 parent 75f1b33 commit ab9bf5a

File tree

11 files changed

+400
-170
lines changed

11 files changed

+400
-170
lines changed

pixi.lock

Lines changed: 362 additions & 117 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pixi.toml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,14 @@ patchelf = "*"
3838
commitizen = "*"
3939
typer = "*"
4040
uv = "*"
41+
samtools = "*"
42+
bcftools = "*"
4143

4244
[pypi-dependencies]
4345
genvarloader = { path = ".", editable = true }
44-
seqpro = "==0.3.1"
4546
hirola = "==0.3"
46-
genoray = "==0.10.1"
47+
seqpro = "==0.3.2"
48+
genoray = "==0.10.2"
4749

4850
[feature.docs.dependencies]
4951
sphinx = ">=7.4.7"
@@ -79,21 +81,21 @@ python = "3.12.*"
7981

8082
[environments]
8183
dev = { features = ["pytorch-cpu", "basenji2", "py310"] }
82-
docs = { features = ["docs", "pytorch-cpu", "basenji2", "py310"] }
84+
docs = { features = ["docs", "pytorch-cpu", "basenji2", "py312"] }
8385
py310 = { features = ["pytorch-cpu", "py310"] }
8486
py311 = { features = ["pytorch-cpu", "py311"] }
8587
py312 = { features = ["pytorch-cpu", "py312"] }
8688
# docs-gpu = { features = ["docs", "pytorch-gpu", "basenji2", "py310"] }
8789

8890
[tasks]
89-
install = "uv pip install -e /cellar/users/dlaub/projects/genoray -e ."
91+
install = "uv pip install -e /cellar/users/dlaub/projects/ML4GLand/SeqPro -e /cellar/users/dlaub/projects/genoray -e ."
9092
pre-commit = "pre-commit install --hook-type commit-msg"
9193
gen = "python tests/data/generate_ground_truth.py"
9294
test = { cmd = "pytest tests && cargo test --release", depends-on = ["gen"] }
9395
publish = "maturin publish"
9496

9597
[feature.docs.tasks]
96-
install = "uv pip install -e /cellar/users/dlaub/projects/genoray -e ."
98+
install = "uv pip install -e /cellar/users/dlaub/projects/ML4GLand/SeqPro -e /cellar/users/dlaub/projects/genoray -e ."
9799
i-kernel = "ipython kernel install --user --name 'gvl-docs' --display-name 'GVL Docs'"
98100
i-kernel-gpu = "ipython kernel install --user --name 'gvl-docs-gpu' --display-name 'GVL Docs GPU'"
99101
doc = "cd docs && make clean && make html"

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ dependencies = [
2727
"tbb",
2828
"joblib",
2929
"pooch",
30-
"seqpro>=0.3.1",
3130
"awkward",
3231
"hirola>=0.3,<0.4",
33-
"genoray>=0.10.1",
32+
"seqpro>=0.3.2",
33+
"genoray>=0.10.2",
3434
]
3535

3636
[project.urls]
@@ -54,6 +54,7 @@ filterwarnings = [
5454
"ignore::DeprecationWarning:ray.*",
5555
"ignore::DeprecationWarning:pkg_resources.*",
5656
"ignore::DeprecationWarning:jupyter_client.*",
57+
"ignore::DeprecationWarning:sorted_nearest.*",
5758
]
5859
markers = ["slow: mark test as slow (deselect with '-m \"not slow\"')"]
5960

python/genvarloader/_dataset/_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def _write_from_svar(
594594
)
595595
# this is fine if there aren't any overlapping variants that could make a v_idx < -1
596596
# have a further end than v_idx == -1
597-
#! calling ak.max() means v_idxs is not a view of svar.genos.data
597+
#* calling ak.max() means v_idxs is not a view of svar.genos.data
598598
# (r s p ~v) -> (r)
599599
v_idxs = ak.max(sp_genos.to_awkward(), -1).to_numpy().max((1, 2)) # type: ignore
600600
c_max_ends = max_ends[contig_offset : contig_offset + df.height]

tests/data/source.vcf

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
##fileDate=20230929
44
##source=myImputationProgramV3.1
55
##reference=Homo_sapiens.GRCh38.p13_v109.dna.toplevel.fa.gz
6-
##contig=<ID=19>
7-
##contig=<ID=20>
6+
##contig=<ID=chr19>
7+
##contig=<ID=chr20>
88
##phasing=partial
99
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
1010
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
@@ -24,17 +24,17 @@
2424
##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
2525
##ALT=<ID=CNV,Description="Copy number variable region">
2626
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
27-
19 111 . N C 9.6 . . GT:VAF:HQ 0|0:.:10,15 0|0:.:10,10 0/1:.3:3,3
28-
19 1010696 . GAGA G 10 . . GT:VAF:HQ 1|0:.3:10,10 0|0:.1:10,15 0/0:.:3,3
29-
19 1010696 . GAGACGG G 10 . . GT:VAF:HQ 0|0:.:10,10 0|0:.5:10,15 0/1:.15:3,3
30-
19 1010696 . GAGACGGGGCC G 10 . . GT:VAF:HQ 0|1:.2:10,10 1|1:.6:10,15 0/0:.:3,3
31-
19 1110696 . A TTT 10 . . GT:VAF:HQ 0|1:.25:10,10 1|1:.45:10,15 0/0:.:3,3
32-
19 1110696 . A G 10 . . GT:VAF:HQ 0|0:.:10,10 0|0:.:10,10 0/1:.2:3,3
33-
19 1210696 . C G 10 . . GT:VAF:HQ 1|.:0.1:10,10 0/1:0.23:10,10 1|1:0.2:3,3
34-
19 1210696 . C G 10 . . GT:VAF:HQ .|1:0.15:10,10 0|0:.:10,10 0/0:.:3,3
35-
19 1210697 . T G 10 . . GT:VAF:HQ 0/0:.:10,10 1|0:0.05:10,10 0/1:0.2:3,3
36-
19 1210697 . T A 10 . . GT:VAF:HQ 0/0:.:10,10 1|0:0.4:10,10 0/1:0.3:3,3
37-
20 14370 rs6054257 N A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:VAF:GQ:DP:HQ 0|0:.:48:1:51,51 1|0:.15:48:8:51,51 1/1:.65:43:5:.,.
38-
20 17330 . N A 3 q10 NS=3;DP=11;AF=0.017 GT:VAF:GQ:DP:HQ 0|0:.:49:3:58,50 0|1:.3:3:5:65,3 0/0:.:41:3:.,.
39-
20 1110696 rs6040355 G A,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:VAF:GQ:DP:HQ 1|2:.55:21:6:23,27 2|1:.45:2:0:18,2 2/2:.6:35:4:.,.
40-
20 1234567 microsat1 A GA,AC 50 PASS NS=3;DP=9;AA=G;AN=6;AC=3,1 GT:VAF:GQ:DP 0/1:.2:.:4 0/2:.15:17:2 ./.:.:40:3
27+
chr19 111 . N C 9.6 . . GT:VAF:HQ 0|0:.:10,15 0|0:.:10,10 0/1:.3:3,3
28+
chr19 1010696 . GAGA G 10 . . GT:VAF:HQ 1|0:.3:10,10 0|0:.1:10,15 0/0:.:3,3
29+
chr19 1010696 . GAGACGG G 10 . . GT:VAF:HQ 0|0:.:10,10 0|0:.5:10,15 0/1:.15:3,3
30+
chr19 1010696 . GAGACGGGGCC G 10 . . GT:VAF:HQ 0|1:.2:10,10 1|1:.6:10,15 0/0:.:3,3
31+
chr19 1110696 . A TTT 10 . . GT:VAF:HQ 0|1:.25:10,10 1|1:.45:10,15 0/0:.:3,3
32+
chr19 1110696 . A G 10 . . GT:VAF:HQ 0|0:.:10,10 0|0:.:10,10 0/1:.2:3,3
33+
chr19 1210696 . C G 10 . . GT:VAF:HQ 1|.:0.1:10,10 0/1:0.23:10,10 1|1:0.2:3,3
34+
chr19 1210696 . C G 10 . . GT:VAF:HQ .|1:0.15:10,10 0|0:.:10,10 0/0:.:3,3
35+
chr19 1210697 . T G 10 . . GT:VAF:HQ 0/0:.:10,10 1|0:0.05:10,10 0/1:0.2:3,3
36+
chr19 1210697 . T A 10 . . GT:VAF:HQ 0/0:.:10,10 1|0:0.4:10,10 0/1:0.3:3,3
37+
chr20 14370 rs6054257 N A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:VAF:GQ:DP:HQ 0|0:.:48:1:51,51 1|0:.15:48:8:51,51 1/1:.65:43:5:.,.
38+
chr20 17330 . N A 3 q10 NS=3;DP=11;AF=0.017 GT:VAF:GQ:DP:HQ 0|0:.:49:3:58,50 0|1:.3:3:5:65,3 0/0:.:41:3:.,.
39+
chr20 1110696 rs6040355 G A,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:VAF:GQ:DP:HQ 1|2:.55:21:6:23,27 2|1:.45:2:0:18,2 2/2:.6:35:4:.,.
40+
chr20 1234567 microsat1 A GA,AC 50 PASS NS=3;DP=9;AA=G;AN=6;AC=3,1 GT:VAF:GQ:DP 0/1:.2:.:4 0/2:.15:17:2 ./.:.:40:3

tests/dataset/test_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pytest_cases import fixture, parametrize_with_cases
66

77
data_dir = Path(__file__).resolve().parents[1] / "data"
8-
ref = data_dir / "fasta" / "Homo_sapiens.GRCh38.dna.primary_assembly.fa.bgz"
8+
ref = data_dir / "fasta" / "hg38.fa.bgz"
99

1010
def ds_phased():
1111
return gvl.Dataset.open(data_dir / "phased_dataset.vcf.gvl", ref)

tests/dataset/test_ds_haps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pytest_cases import parametrize_with_cases
99

1010
data_dir = Path(__file__).resolve().parents[1] / "data"
11-
ref = data_dir / "fasta" / "Homo_sapiens.GRCh38.dna.primary_assembly.fa.bgz"
11+
ref = data_dir / "fasta" / "hg38.fa.bgz"
1212
cons_dir = data_dir / "consensus"
1313

1414

@@ -40,7 +40,7 @@ def test_ds_haps(dataset: gvl.RaggedDataset[gvl.Ragged[np.bytes_], None, None, N
4040
actual = haps[h]
4141
fpath = f"source_{sample}_nr{region}_h{h}.fa"
4242
with pysam.FastaFile(str(cons_dir / fpath)) as f:
43-
desired = sp.cast_seqs(f.fetch(f.references[0]))
43+
desired = sp.cast_seqs(f.fetch(f.references[0]).upper())
4444
np.testing.assert_equal(
4545
actual,
4646
desired,

tests/dataset/test_subset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pytest_cases import parametrize_with_cases
88

99
DATA_DIR = Path(__file__).resolve().parents[1] / "data"
10-
REF = DATA_DIR / "fasta" / "Homo_sapiens.GRCh38.dna.primary_assembly.fa.bgz"
10+
REF = DATA_DIR / "fasta" / "hg38.fa.bgz"
1111
DATASET = gvl.Dataset.open(DATA_DIR / "phased_dataset.vcf.gvl", REF)
1212

1313

tests/dataset/test_write.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def bed():
3636

3737
@fixture
3838
def ref():
39-
return ddir / "fasta" / "Homo_sapiens.GRCh38.dna.primary_assembly.fa.bgz"
39+
return ddir / "fasta" / "hg38.fa.bgz"
4040

4141

4242
@mark.skip
@@ -101,4 +101,4 @@ def test_write(reader: Reader, bed: pl.DataFrame, ref: Path, tmp_path):
101101
max_len = lengths.max()
102102
for len_ in range(1, max_len + 1):
103103
mask = ak.num(desired, -1) == len_
104-
assert ak.all(actual[mask][:, :len_] == desired[mask][:, :len_])
104+
assert ak.all(actual[mask][:, :len_] == desired[mask][:, :len_]) # type: ignore

tests/test_fasta.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@ def fasta_path():
1313
Path(__file__).parent
1414
/ "data"
1515
/ "fasta"
16-
/ "Homo_sapiens.GRCh38.dna.primary_assembly.fa.bgz"
16+
/ "hg38.fa.bgz"
1717
)
1818

1919

2020
def test_pad_right(fasta_path):
2121
fasta = Fasta("ref", fasta_path, pad="N")
22-
contig = "1"
22+
contig = "chr1"
2323
with FastaFile(fasta_path) as f:
2424
end_of_contig = f.get_reference_length(contig)
2525
start = end_of_contig - 5
@@ -36,7 +36,7 @@ def test_pad_right(fasta_path):
3636

3737
def test_pad_left(fasta_path):
3838
fasta = Fasta("ref", fasta_path, pad="N")
39-
contig = "1"
39+
contig = "chr1"
4040
start = -5
4141
end = start + 10
4242
seq = fasta.read(contig, start, end)
@@ -50,13 +50,13 @@ def test_pad_left(fasta_path):
5050
def test_no_pad(fasta_path):
5151
fasta = Fasta("ref", fasta_path)
5252
end_of_contig_1 = 248956422
53-
contig = "1"
53+
contig = "chr1"
5454
start = end_of_contig_1 - 5
5555
end = start + 10
5656
with pytest.raises(NoPadError):
5757
fasta.read(contig, start, end)
5858

59-
contig = "1"
59+
contig = "chr1"
6060
start = -5
6161
end = start + 10
6262
with pytest.raises(NoPadError):

0 commit comments

Comments
 (0)