Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,30 +48,55 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: pip install
- name: Install Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
- name: build deeptools
run: |
pip install .[actions]
- name: Test deepTools
python -m venv venv
source venv/bin/activate
pip install maturin
maturin develop --release --extras actions
- name: test deeptools
run: |
source venv/bin/activate
pytest -v
- name: Build wheel
- name: build wheel
run: |
source venv/bin/activate
python -m build
- name: Test wheel
- name: test wheel
run: |
source venv/bin/activate
pip install dist/*whl
build-osx:
name: Test on OSX
runs-on: macOS-latest
strategy:
matrix:
python-version: ['3.9','3.10', '3.11', '3.12']
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
- name: pip install
run: |
pip install .[actions]
- name: Test deepTools
run: |
pytest -v
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
- name: build deeptools
run: |
python -m venv venv
source venv/bin/activate
pip install maturin
maturin develop --release --extras actions
- name: test deeptools
run: |
source venv/bin/activate
pytest -v
- name: build wheel
run: |
source venv/bin/activate
python -m build
- name: test wheel
run: |
source venv/bin/activate
pip install dist/*whl
6 changes: 5 additions & 1 deletion docs/content/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,13 @@ Core
- --quiet / -q option no longer exists.
- bed files in computeMatrix no longer support '#' to define groups.
- 'chromosome matching' i.e. chr1 <-> 1, chrMT <-> MT is no longer performed.

- metagene mode erroneously 'nan'ed the before and after values (if they fell outside of the feature). This is fixed now.
- Rounding bahvior in matrix output only two decimals now, unscaled 5 and unscaled 3 prime are now strictly separated from the rest of the scaled region (for value calculation).

* normalization
- Exactscaling is no longer an option, it's always performed.
- SES option in bamCompare mode is no longer available.
- blackList filtering is now performed on a position-based level. Meaning reads that overlap partially with the blacklist can still contribute to the signal.

* alignmentSieve
- options label, smartLabels, genomeChunkLength are removed.
Expand Down
2 changes: 1 addition & 1 deletion pydeeptools/deeptools/bamCompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def main(args=None):
extendReads=args.extendReads,
blackListFileName=args.blackListFileName,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
ignoreDuplicates=False,
center_read=args.centerReads,
zerosToNans=args.skipNonCoveredRegions,
skipZeroOverZero=args.skipZeroOverZero,
Expand Down
98 changes: 31 additions & 67 deletions pydeeptools/deeptools/bamCompare2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from deeptools import parserCommon
from deeptools.hp import r_bamcompare
import signal
import sys

def parseArguments():
parentParser = parserCommon.getParentArgParse()
Expand Down Expand Up @@ -73,7 +74,7 @@ def getOptionalArgs():
'for sequencing depth differences between the samples. '
'As an alternative, this can be set to None and an option from '
'--normalizeUsing <method> can be used. (Default: %(default)s)',
choices=['readCount', 'SES', 'None'],
choices=['readCount', 'None'],
default='readCount')

optional.add_argument('--sampleLength', '-l',
Expand Down Expand Up @@ -126,7 +127,6 @@ def getOptionalArgs():
'values (the first value is used as the numerator '
'pseudocount and the second the denominator pseudocount). (Default: %(default)s)',
default=[1],
type=float,
nargs='+',
action=parserCommon.requiredLength(1, 2),
required=False)
Expand Down Expand Up @@ -158,70 +158,8 @@ def process_args(args=None):
if not args.ignoreForNormalization:
args.ignoreForNormalization = []

if not isinstance(args.pseudocount, list):
args.pseudocount = [args.pseudocount]

if len(args.pseudocount) == 1:
args.pseudocount *= 2

return args

# get_scale_factors function is used for scaling in bamCompare
# while get_scale_factor is used for depth normalization


def get_scale_factors(args, statsList, mappedList):

if args.scaleFactors:
scale_factors = list(map(float, args.scaleFactors.split(":")))
elif args.scaleFactorsMethod == 'SES':
scalefactors_dict = estimateScaleFactor(
[args.bamfile1, args.bamfile2],
args.sampleLength, args.numberOfSamples,
1,
mappingStatsList=mappedList,
blackListFileName=args.blackListFileName,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose,
chrsToSkip=args.ignoreForNormalization)

scale_factors = scalefactors_dict['size_factors']

if args.verbose:
print("Size factors using SES: {}".format(scale_factors))
print("%s regions of size %s where used " %
(scalefactors_dict['sites_sampled'],
args.sampleLength))

print("ignoring filtering/blacklists, size factors if the number of mapped "
"reads would have been used:")
print(tuple(
float(min(mappedList)) / np.array(mappedList)))

elif args.scaleFactorsMethod == 'readCount':
# change the scaleFactor to 1.0
args.scaleFactor = 1.0
# get num of kept reads for bam file 1
args.bam = args.bamfile1
bam1_mapped, _ = get_num_kept_reads(args, statsList[0])
# get num of kept reads for bam file 2
args.bam = args.bamfile2
bam2_mapped, _ = get_num_kept_reads(args, statsList[1])

mapped_reads = [bam1_mapped, bam2_mapped]

# new scale_factors (relative to min of two bams)
scale_factors = float(min(bam1_mapped, bam2_mapped)) / np.array(mapped_reads)
if args.verbose:
print("Size factors using total number "
"of mapped reads: {}".format(scale_factors))

elif args.scaleFactorsMethod == 'None':
scale_factors = None

return scale_factors


def main(args=None):
"""
The algorithm is composed of two steps.
Expand Down Expand Up @@ -274,8 +212,29 @@ def main(args=None):
print("Please only provide one blacklist file.")
sys.exit()
args.blackListFileName = args.blackListFileName[0]

args.pseudocount = 1
if args.scaleFactors:
if len(args.scaleFactors.split(":")) == 2:
args.sf1 = float(args.scaleFactors.split(":")[0])
args.sf2 = float(args.scaleFactors.split(":")[0])
elif len(args.scaleFactors.split(":")) == 1:
args.sf1 = float(args.scaleFactors.split(":")[0])
args.sf2 = float(args.scaleFactors.split(":")[0])
else:
print("Please provide one scale factor, or two by a ':'.")
sys.exit()
else:
args.sf1 = 0.0
args.sf2 = 0.0
if len(args.pseudocount) == 1:
args.pseudocount1 = float(args.pseudocount[0])
args.pseudocount2 = float(args.pseudocount[0])
elif len(args.pseudocount) == 2:
args.pseudocount1 = float(args.pseudocount[0])
args.pseudocount2 = float(args.pseudocount[1])
else:
print(f"Pseudocounts should be either one or two values. Not {args.pseudocount}")
sys.exit()
print(args)
signal.signal(signal.SIGINT, signal.SIG_DFL)
r_bamcompare(
args.bamfile1, # bam file 1
Expand All @@ -285,8 +244,11 @@ def main(args=None):
args.normalizeUsing, # normalization method
args.effectiveGenomeSize, # effective genome size
args.scaleFactorsMethod, # scaling method
args.sf1,
args.sf2,
args.operation,
args.pseudocount,
args.pseudocount1,
args.pseudocount2,
args.extendReads,
args.extendReadsLen,
args.centerReads,
Expand All @@ -298,6 +260,8 @@ def main(args=None):
args.maxFragmentLength,
args.numberOfProcessors, # threads
args.ignoreForNormalization,
args.skipNonCoveredRegions,
args.skipZeroOverZero,
args.binSize, # bin size
args.region, # regions
args.verbose, # verbose
Expand Down
6 changes: 3 additions & 3 deletions pydeeptools/deeptools/bamCoverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def main(args=None):
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
ignoreDuplicates=False, # ignore duplicates is no longer available.
center_read=args.centerReads,
zerosToNans=args.skipNonCoveredRegions,
samFlag_include=args.samFlagInclude,
Expand All @@ -222,7 +222,7 @@ def main(args=None):
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
ignoreDuplicates=False, # ignore duplicates is no longer available.
center_read=args.centerReads,
zerosToNans=args.skipNonCoveredRegions,
samFlag_include=args.samFlagInclude,
Expand All @@ -242,7 +242,7 @@ def main(args=None):
numberOfProcessors=args.numberOfProcessors,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
ignoreDuplicates=False, # ignore duplicates is no longer available.
center_read=args.centerReads,
zerosToNans=args.skipNonCoveredRegions,
samFlag_include=args.samFlagInclude,
Expand Down
3 changes: 2 additions & 1 deletion pydeeptools/deeptools/computeMatrix2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from deeptools import parserCommon
from importlib.metadata import version
from deeptools.hp import r_computematrix
import signal

def parse_arguments(args=None):
parser = \
Expand Down Expand Up @@ -382,7 +383,7 @@ def main(args=None):
'unscaled 5 prime': args.unscaled5prime,
'unscaled 3 prime': args.unscaled3prime
}
# Assert all regions and scores exist
signal.signal(signal.SIGINT, signal.SIG_DFL)
r_computematrix(
args.command,
args.regionsFileName,
Expand Down
39 changes: 19 additions & 20 deletions pydeeptools/deeptools/getScaleFactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,24 +59,24 @@ def getFractionKept_worker(chrom, start, end, bamFile, args, offset):

# get rid of duplicate reads that have same position on each of the
# pairs
if args.ignoreDuplicates:
# Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions
if tLen >= 0:
s = read.pos
e = s + tLen
else:
s = read.pnext
e = s - tLen
if read.reference_id != read.next_reference_id:
e = read.pnext
if lpos is not None and lpos == read.reference_start \
and (s, e, read.next_reference_id, read.is_reverse) in prev_pos:
filtered += 1
continue
if lpos != read.reference_start:
prev_pos.clear()
lpos = read.reference_start
prev_pos.add((s, e, read.next_reference_id, read.is_reverse))
# if args.ignoreDuplicates:
# # Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions
# if tLen >= 0:
# s = read.pos
# e = s + tLen
# else:
# s = read.pnext
# e = s - tLen
# if read.reference_id != read.next_reference_id:
# e = read.pnext
# if lpos is not None and lpos == read.reference_start \
# and (s, e, read.next_reference_id, read.is_reverse) in prev_pos:
# filtered += 1
# continue
# if lpos != read.reference_start:
# prev_pos.clear()
# lpos = read.reference_start
# prev_pos.add((s, e, read.next_reference_id, read.is_reverse))

# If filterRNAstrand is in args, then filter accordingly
# This is very similar to what's used in the get_fragment_from_read function in the filterRnaStrand class
Expand Down Expand Up @@ -146,8 +146,7 @@ def fraction_kept(args, stats):
num_needed_to_sample = 0.1 * bam_mapped
else:
num_needed_to_sample = 1000000
if args.exactScaling:
num_needed_to_sample = bam_mapped
num_needed_to_sample = bam_mapped
if num_needed_to_sample == bam_mapped:
distanceBetweenBins = 55000
if args.ignoreForNormalization:
Expand Down
14 changes: 7 additions & 7 deletions pydeeptools/deeptools/parserCommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ def read_options():
default=False,
metavar="INT bp")

group.add_argument('--ignoreDuplicates',
help='If set, reads that have the same orientation '
'and start position will be considered only '
'once. If reads are paired, the mate\'s position '
'also has to coincide to ignore a read.',
action='store_true'
)
# group.add_argument('--ignoreDuplicates',
# help='If set, reads that have the same orientation '
# 'and start position will be considered only '
# 'once. If reads are paired, the mate\'s position '
# 'also has to coincide to ignore a read.',
# action='store_true'
# )

group.add_argument('--minMappingQuality',
metavar='INT',
Expand Down
2 changes: 1 addition & 1 deletion pydeeptools/deeptools/plotCoverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def main(args=None):
blackListFileName=args.blackListFileName,
extendReads=args.extendReads,
minMappingQuality=args.minMappingQuality,
ignoreDuplicates=args.ignoreDuplicates,
ignoreDuplicates=False, # ignoreDuplicates is no longer supported.
center_read=args.centerReads,
samFlag_include=args.samFlagInclude,
samFlag_exclude=args.samFlagExclude,
Expand Down
Loading
Loading