Skip to content

Commit 3015dc1

Browse files
authored
Merge pull request #19 from hdashnow/ci
Introduce Travis CI testing
2 parents b4dae20 + 7f0fd58 commit 3015dc1

9 files changed

Lines changed: 205 additions & 12 deletions

File tree

.testing/STRs.benchmark.tsv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
chrom start end sample repeatunit reflen locuscoverage outlier p_adj bpInsertion repeatUnits
2+
chr13 70713515 70713561 11 AGC 15.3 35 1.88685648944145 0.0295898153640992 316.950118548597 120.950039516199
3+
chr13 70713515 70713561 69 AGC 15.3 8 0.4297790752454 0.333678177749521 76.1210174457791 40.6736724819264
4+
chr13 70713515 70713561 1 AGC 15.3 3 -0.426744171073212 0.665217162914456 32.9115187238725 26.2705062412908
5+
chr13 70713515 70713561 54 AGC 15.3 2 -0.730726313557163 0.7675268301731 24.4403744973217 23.4467914991072
6+
chr13 70713515 70713561 49 AGC 15.3 1 -1.15916508005647 0.876805548823274 16.0677184130193 20.6559061376731

.testing/install-ci.sh

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#!/bin/bash
2+
3+
## This script will install the tools required for the STRetch pipeline.
4+
## It will fetched each tool from the web and placed into the tools/ subdirectory.
5+
## Paths to all installed tools can be found in the file tools.groovy at the
6+
## end of execution of this script. These paths can be changed if a different
7+
## version of software is required. Note that R must be installed manually
8+
##
9+
10+
installdir=$PWD
11+
refdir=$PWD/reference-data
12+
toolspec=$PWD/pipelines/pipeline_config.groovy
13+
template=$PWD/pipelines/config-examples/pipeline_config_template.groovy
14+
15+
mkdir -p tools/bin
16+
cd tools
17+
18+
#a list of which programs need to be installed
19+
commands="bpipe python goleft bedtools bwa samtools"
20+
21+
#installation method
22+
function bpipe_install {
23+
wget -O bpipe-0.9.9.2.tar.gz https://github.com/ssadedin/bpipe/releases/download/0.9.9.2/bpipe-0.9.9.2.tar.gz
24+
tar -zxvf bpipe-0.9.9.2.tar.gz ; rm bpipe-0.9.9.2.tar.gz
25+
ln -s $PWD/bpipe-0.9.9.2/bin/* $PWD/bin/
26+
}
27+
28+
# Installs miniconda, Python 3 + required packages, BedTools and goleft
29+
# (and any other dependancies listed in environment.yml)
30+
function python_install {
31+
wget -O miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
32+
bash miniconda.sh -b -p $PWD/miniconda
33+
rm miniconda.sh
34+
$PWD/miniconda/bin/conda env create -f ../environment.yml
35+
ln -s $PWD/miniconda/envs/STR/bin/* $PWD/bin/
36+
# source activate STR
37+
}
38+
39+
function bwa_install {
40+
wget -O bwakit-0.7.15_x64-linux.tar.bz2 --no-check-certificate https://github.com/lh3/bwa/releases/download/v0.7.15/bwakit-0.7.15_x64-linux.tar.bz2
41+
tar -jxvf bwakit-0.7.15_x64-linux.tar.bz2
42+
rm bwakit-0.7.15_x64-linux.tar.bz2
43+
ln -s $PWD/bwa.kit/* $PWD/bin/
44+
}
45+
46+
function samtools_install {
47+
wget --no-check-certificate https://sourceforge.net/projects/samtools/files/samtools/1.3.1/samtools-1.3.1.tar.bz2
48+
tar -jxvf samtools-1.3.1.tar.bz2
49+
rm samtools-1.3.1.tar.bz2
50+
make prefix=$PWD install -C samtools-1.3.1/
51+
}
52+
53+
function download {
54+
wget --no-check-certificate -O $refdir/reference-data.zip https://ndownloader.figshare.com/articles/5353399?private_link=be9bde235448e937e468
55+
unzip $refdir/reference-data.zip -d $refdir
56+
rm $refdir/reference-data.zip
57+
58+
mkdir $installdir/test-data
59+
mv $refdir/*.gz $refdir/*.bam $refdir/*.bai $installdir/test-data
60+
}
61+
62+
#populate toolspec
63+
echo "// Bpipe pipeline config file" > $toolspec
64+
echo "// Paths are relative to the directory the pipeline is running in, so absolute" >> $toolspec
65+
echo "// paths are recommended." >> $toolspec
66+
echo >> $toolspec
67+
echo "// Adjust parameters" >> $toolspec
68+
echo "PLATFORM='illumina'" >> $toolspec
69+
echo >> $toolspec
70+
echo "// Number of threads to use for BWA" >> $toolspec
71+
echo "threads=8" >> $toolspec
72+
echo >> $toolspec
73+
echo "// For exome pipeline only ***Edit before running the exome pipeline***" >> $toolspec
74+
echo "EXOME_TARGET=\"SCA8_region.bed\"" >> $toolspec
75+
echo >> $toolspec
76+
77+
#set STRetch base directory
78+
echo "// STRetch installation location" >> $toolspec
79+
echo "STRETCH=\"$installdir\"" >> $toolspec
80+
echo >> $toolspec
81+
82+
echo "// Paths to tools used by the pipeline" >> $toolspec
83+
84+
for c in $commands ; do
85+
c_path=`which $PWD/bin/$c 2>/dev/null`
86+
if [ -z $c_path ] ; then
87+
echo "$c not found, fetching it"
88+
${c}_install
89+
c_path=`which $PWD/bin/$c 2>/dev/null`
90+
fi
91+
echo "$c=\"$c_path\"" >> $toolspec
92+
done
93+
94+
if [ ! -f $refdir/*dedup.sorted.bed ] ; then
95+
mkdir -p $refdir
96+
echo "Downloading reference and test data"
97+
download
98+
fi
99+
100+
echo >> $toolspec
101+
echo "// Path to reference data" >> $toolspec
102+
echo "refdir=\"$refdir\"" >> $toolspec
103+
104+
echo >> $toolspec
105+
echo "// Decoy reference assumed to have matching .genome file in the same directory" >> $toolspec
106+
echo "REF=\"$refdir/hg19.chr13.STRdecoys.sorted.fasta\"" >> $toolspec
107+
echo "STR_BED=\"$refdir/hg19.simpleRepeat_period1-6_dedup.sorted.bed\"" >> $toolspec
108+
echo "DECOY_BED=\"$refdir/STRdecoys.sorted.bed\"" >> $toolspec
109+
echo "// By default, uses other samples in the same batch as a control" >> $toolspec
110+
echo "CONTROL=\"\"" >> $toolspec
111+
echo "// Uncomment the line below to use a set of WGS samples as controls, or specify your own" >> $toolspec
112+
echo "CONTROL=\"$refdir/PCRfreeWGS.controls.tsv\"" >> $toolspec
113+
echo >> $toolspec
114+
115+
116+
#loop through commands to check they are all installed
117+
echo "**********************************************************"
118+
echo "Checking that all required tools were installed:"
119+
Final_message="All commands installed successfully!"
120+
for c in $commands ; do
121+
c_path=`which $PWD/bin/$c 2>/dev/null`
122+
if [ -z $c_path ] ; then
123+
echo -n "WARNING: $c could not be found!!!! "
124+
echo "You will need to download and install $c manually, then add its path to $toolspec"
125+
Final_message="WARNING: One or more command did not install successfully. See warning messages above. You will need to correct this before running STRetch."
126+
else
127+
echo "$c looks like it has been installed"
128+
fi
129+
done
130+
131+
echo "**********************************************************"
132+
133+
#check that R is installed
134+
R_path=`which R 2>/dev/null`
135+
if [ -z $R_path ] ; then
136+
echo "R not found!"
137+
echo "Please go to http://www.r-project.org/ and follow the installation instructions."
138+
echo "Please also install the required R packages."
139+
else
140+
echo "R seems to be available."
141+
echo "Make sure you are using the correct version of R and have installed all required packages."
142+
fi
143+
echo "R=\"$R_path\"" >> $toolspec
144+
145+
echo "**********************************************************"
146+
147+
#check for reference data
148+
if [ ! -f $refdir/*dedup.sorted.bed ] ; then
149+
echo -n "WARNING: reference files could not be found!!!! "
150+
echo "You will need to download them manually, then add the path to $toolspec"
151+
else
152+
echo "It looks like the reference data has been downloaded"
153+
fi
154+
155+
echo "**********************************************************"
156+
echo $Final_message
157+
echo "Please make sure you have installed the required R packages:"
158+
echo "install.packages(c('optparse','plyr','dplyr','tidyr','reshape2'))"

.testing/install-packages.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## Create the personal library if it doesn't exist. Ignore a warning if the directory already exists.
2+
dir.create(Sys.getenv("R_LIBS_USER"), showWarnings = TRUE, recursive = TRUE)
3+
## Install packages
4+
install.packages(c('optparse','plyr','dplyr','tidyr','reshape2'), repos="http://cran.rstudio.com/")

.travis.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
language: python
2+
cache: packages
3+
install:
4+
- ./.testing/install-ci.sh
5+
# Install R packages
6+
- tools/bin/Rscript --verbose ./.testing/install-packages.R
7+
# Create working directory
8+
- mkdir test
9+
- cp reference-data/SCA8_region.bed test/
10+
- cd test/
11+
# command to run tests
12+
script:
13+
# Run the test data
14+
- ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz
15+
- if diff STRs.tsv ../.testing/STRs.benchmark.tsv; then echo exit 0; else echo exit 1; fi
16+
after_script:
17+
- head *.locus_counts *.STR_counts *.median_cov
18+
- head *.tsv

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
[![Build Status](https://travis-ci.org/hdashnow/STRetch-paper.svg?branch=ci)](https://travis-ci.org/hdashnow/STRetch-paper)
2+
13
**Update:** the STRetch paper is now available!
24

35
If using STRetch, please cite:

environment.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
name: STR
22
channels:
3+
- conda-forge
34
- bioconda
45
dependencies:
56
- python=3*
7+
- R
68
- BioPython
79
- PyVCF
810
- pysam

install.sh

100644100755
File mode changed.

pipelines/STRetch_exome_pipeline.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ run {
1616
align_bwa + index_bam +
1717
median_cov_region +
1818
STR_coverage +
19-
STR_locus_counts
19+
STR_locus_counts
2020
] +
2121
estimate_size
2222
}

pipelines/pipeline_stages.groovy

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ set_sample_info = {
2323

2424
if(!file(REF).exists())
2525
fail """
26-
The configured decoy reference file: $REF could not be found.
26+
The configured decoy reference file: $REF could not be found.
2727
2828
Please check pipelines/pipeline_config.groovy to make sure this is set correctly
2929
"""
3030

3131
[bwa,samtools,bedtools,goleft,python].each { tool ->
32-
if(!file(tool).exists())
32+
if(!file(tool).exists())
3333
fail """
3434
The location of tool $tool does not appear to exist.
3535
@@ -88,10 +88,13 @@ STR_coverage = {
8888
STR_locus_counts = {
8989
transform("bam") to ("locus_counts") {
9090
exec """
91+
STRPATH=$PATH;
92+
PATH=$STRETCH/tools/bin:$PATH;
9193
$python $STRETCH/scripts/identify_locus.py
9294
--bam $input.bam
9395
--bed $STR_BED
9496
--output $output.locus_counts
97+
;PATH=$STRPATH
9598
"""
9699
}
97100
}
@@ -100,13 +103,13 @@ estimate_size = {
100103
produce("STRs.tsv") {
101104
if(CONTROL=="") {
102105
exec """
103-
Rscript $STRETCH/scripts/estimateSTR.R
104-
--model $STRETCH/scripts/STRcov.model.csv
106+
$STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R
107+
--model $STRETCH/scripts/STRcov.model.csv
105108
"""
106109
} else {
107110
exec """
108-
Rscript $STRETCH/scripts/estimateSTR.R
109-
--model $STRETCH/scripts/STRcov.model.csv
111+
$STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R
112+
--model $STRETCH/scripts/STRcov.model.csv
110113
--control $CONTROL
111114
"""
112115
}
@@ -144,12 +147,12 @@ doc "Calculate the median coverage over the target region"
144147

145148
@filter('slop')
146149
str_targets = {
147-
150+
148151
doc "Create bed file of region likely to contain STR reads and their mates"
149152

150153
SLOP=800
151154

152-
//produce(STR_BED[0..-3] + 'slop.bed') {
155+
//produce(STR_BED[0..-3] + 'slop.bed') {
153156
exec """
154157
$bedtools slop -b $SLOP -i $input.bed -g ${REF}.genome | $bedtools merge > $output.bed
155158
"""
@@ -165,9 +168,9 @@ extract_reads_region = {
165168
produce(branch.sample + '_L001_R1.fastq.gz', branch.sample + '_L001_R2.fastq.gz') {
166169
exec """
167170
168-
cat <( $samtools view -hu -L $input.bed $input.bam )
169-
<( $samtools view -u -f 4 $input.bam ) |
170-
$samtools collate -Ou -n 128 - $output.prefix |
171+
cat <( $samtools view -hu -L $input.bed $input.bam )
172+
<( $samtools view -u -f 4 $input.bam ) |
173+
$samtools collate -Ou -n 128 - $output.prefix |
171174
$bedtools bamtofastq -i - -fq >(gzip -c > $output1.gz) -fq2 >(gzip -c > $output2.gz)
172175
"""
173176
}

0 commit comments

Comments
 (0)