Skip to content

Commit aba9bbe

Browse files
committed
drop length metric collection in mbs mode
1 parent 1a48153 commit aba9bbe

File tree

5 files changed

+94
-66
lines changed

5 files changed

+94
-66
lines changed

pydeeptools/deeptools/multiBamSummary2.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,6 @@ def main(args=None):
248248
"""
249249
args = process_args(args)
250250
print(f"args = {args}")
251-
print("running r_mbams")
252251
r_mbams(
253252
args.command,
254253
args.bamfiles,

src/bamcompare.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ pub fn r_bamcompare(
9696
bamfiles.par_iter()
9797
.map(|(bamfile, ispe)| {
9898
let (bg, mapped, unmapped, readlen, fraglen) = regionblocks.par_iter()
99-
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, false))
99+
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, false, true))
100100
.reduce(
101101
|| (vec![], 0, 0, vec![], vec![]),
102102
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {

src/bamcoverage.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ pub fn r_bamcoverage(
125125
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap();
126126
let (bg, mapped, _unmapped, readlen, fraglen) = pool.install(|| {
127127
regionblocks.par_iter()
128-
.map(|i| bam_pileup(bamifile, &i, &binsize, &ispe, &ignorechr, &filters, collapse, false))
128+
.map(|i| bam_pileup(bamifile, &i, &binsize, &ispe, &ignorechr, &filters, collapse, false, true))
129129
.reduce(
130130
|| (vec![], 0, 0, vec![], vec![]),
131131
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {

src/covcalc.rs

Lines changed: 91 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,15 @@ pub fn parse_regions(region: &str, bam_ifile: Vec<&str>) -> (Vec<Region>, HashMa
130130
#[allow(unused_variables)]
131131
#[allow(unused_mut)]
132132
pub fn bam_pileup<'a>(
133-
bam_ifile: &str,
134-
regionvec: &'a Vec<Region>,
135-
provbs: &u32,
136-
ispe: &bool,
137-
ignorechr: &Vec<String>,
138-
filters: &Alignmentfilters,
139-
collapse: bool,
140-
gene_mode: bool
133+
bam_ifile: &str, // the bam file to consider.
134+
regionvec: &'a Vec<Region>, // Vector with regions to calculate coverage from.
135+
provbs: &u32, // Provisional bin size, in case we have gene mode later on this gets omitted.
136+
ispe: &bool, // Wether or not the bam file is paired end or single end.
137+
ignorechr: &Vec<String>, // chromosomes to ignore for normalization calculation.
138+
filters: &Alignmentfilters, // a struct with filtering settings.
139+
collapse: bool, // collapse bins with same coverage, should be avoided in bamCompare scenario for example.
140+
gene_mode: bool, // Gene mode (BED / GTF) or not (bins)
141+
gather_lengths: bool, // Wether or not to collect frag/read lengths (blows up memory for big bam files in MBS, for example).
141142
) -> (
142143
Vec<TempPath>, // temp bedgraph file.
143144
u32, // mapped reads
@@ -197,21 +198,31 @@ pub fn bam_pileup<'a>(
197198
(Revalue::U(start), Revalue::U(end)) => {
198199
counts = vec![0.0; 1];
199200
for record in bam.records() {
200-
let record = record.expect("Error parsing record.");
201-
if filters.filter(&record) {
202-
continue;
201+
let mut record = record.expect("Error parsing record.");
202+
if filters.manipulate {
203+
filters.manipulate_record(&mut record);
203204
}
205+
if filters.filter {
206+
if filters.filter_record(&record) {
207+
continue;
208+
}
209+
}
210+
204211
if !ignorechr.contains(&region.0) {
205212
if record.is_unmapped() {
206213
unmapped_reads += 1;
207214
} else {
208215
mapped_reads += 1;
209216
if *ispe {
210217
if record.is_paired() && record.is_proper_pair() && (record.flags() & FREAD != 0) {
211-
fraglens.push(record.insert_size().abs() as u32);
218+
if gather_lengths {
219+
fraglens.push(record.insert_size().abs() as u32);
220+
}
212221
}
213222
}
214-
readlens.push(record.seq_len() as u32);
223+
if gather_lengths {
224+
readlens.push(record.seq_len() as u32);
225+
}
215226
}
216227
}
217228
counts[0] += 1.0;
@@ -230,9 +241,14 @@ pub fn bam_pileup<'a>(
230241
bam.fetch((regstruct.chrom.as_str(), exon.0, exon.1))
231242
.expect(&format!("Error fetching region: {}:{},{}", regstruct.chrom, exon.0, exon.1));
232243
for record in bam.records() {
233-
let record = record.expect("Error parsing record.");
234-
if filters.filter(&record) {
235-
continue;
244+
let mut record = record.expect("Error parsing record.");
245+
if filters.manipulate {
246+
filters.manipulate_record(&mut record);
247+
}
248+
if filters.filter {
249+
if filters.filter_record(&record) {
250+
continue;
251+
}
236252
}
237253
if !ignorechr.contains(&region.0) {
238254
if record.is_unmapped() {
@@ -241,10 +257,14 @@ pub fn bam_pileup<'a>(
241257
mapped_reads += 1;
242258
if *ispe {
243259
if record.is_paired() && record.is_proper_pair() && (record.flags() & FREAD != 0) {
244-
fraglens.push(record.insert_size().abs() as u32);
260+
if gather_lengths {
261+
fraglens.push(record.insert_size().abs() as u32);
262+
}
245263
}
246264
}
247-
readlens.push(record.seq_len() as u32);
265+
if gather_lengths {
266+
readlens.push(record.seq_len() as u32);
267+
}
248268
}
249269
}
250270
counts[0] += 1.0;
@@ -259,9 +279,14 @@ pub fn bam_pileup<'a>(
259279
// let mut binstart = region.1;
260280
let mut binix: u32 = 0;
261281
for record in bam.records() {
262-
let record = record.expect("Error parsing record.");
263-
if filters.filter(&record) {
264-
continue;
282+
let mut record = record.expect("Error parsing record.");
283+
if filters.manipulate {
284+
filters.manipulate_record(&mut record);
285+
}
286+
if filters.filter {
287+
if filters.filter_record(&record) {
288+
continue;
289+
}
265290
}
266291
if !ignorechr.contains(&region.0) {
267292
if record.is_unmapped() {
@@ -270,10 +295,14 @@ pub fn bam_pileup<'a>(
270295
mapped_reads += 1;
271296
if *ispe {
272297
if record.is_paired() && record.is_proper_pair() && (record.flags() & FREAD != 0) {
273-
fraglens.push(record.insert_size().abs() as u32);
298+
if gather_lengths {
299+
fraglens.push(record.insert_size().abs() as u32);
300+
}
274301
}
275302
}
276-
readlens.push(record.seq_len() as u32);
303+
if gather_lengths {
304+
readlens.push(record.seq_len() as u32);
305+
}
277306
}
278307
}
279308
let indices: HashSet<usize> = record
@@ -367,6 +396,7 @@ pub struct Alignmentfilters {
367396
pub extendreads: u32,
368397
pub centerreads: bool,
369398
pub filter: bool,
399+
pub manipulate: bool,
370400
}
371401
impl Alignmentfilters {
372402
pub fn new(
@@ -385,6 +415,7 @@ impl Alignmentfilters {
385415
// Go through the arguments, and if they are not set or have default values, we set a filter boolean to false.
386416
// Only when filtering needs to happen the filterrecord will be invoked, for performance.
387417
let mut filter: bool = false;
418+
let mut manipulate: bool = false;
388419
let _mmq = minmappingquality.unwrap_or(0);
389420
let _sfi = samflaginclude.unwrap_or(0);
390421
let _sfe = samflagexclude.unwrap_or(0);
@@ -396,12 +427,13 @@ impl Alignmentfilters {
396427
let _extend = extendreads.unwrap_or(0);
397428
let _center = centerreads.unwrap_or(false);
398429

399-
// Set the filter bool for a quick escape in case filtering is not needed.
400-
if _mmq > 0 || _sfi > 0 || _sfe > 0 || _mifl > 0 || _mafl > 0 || _mnase || _offset != (1, -1) || _frs != "None" || _extend > 0 || _center {
401-
filter = true;
430+
// Set the manipulate bool for a quick escape in case manipulation is not needed.
431+
if _offset != (1, -1) || _mnase || _extend > 0 || _center {
432+
manipulate = true;
402433
}
403-
// If blacklist is set, we also need to filter.
404-
if blacklist.is_some() {
434+
435+
// Set the filter bool for a quick escape in case filtering is not needed.
436+
if _mmq > 0 || _sfi > 0 || _sfe > 0 || _mifl > 0 || _mafl > 0 || _frs != "None" || blacklist.is_some() {
405437
filter = true;
406438
}
407439

@@ -418,9 +450,10 @@ impl Alignmentfilters {
418450
extendreads: _extend,
419451
centerreads: _center,
420452
filter: filter,
453+
manipulate: manipulate,
421454
}
422455
}
423-
pub fn filter(&self, rec: &Record) -> bool {
456+
pub fn filter_record(&self, rec: &Record) -> bool {
424457
// Decides filtering of a record. The bool return is used to 'continue', i.e. skip the record.
425458
if rec.is_unmapped() {
426459
return true;
@@ -493,12 +526,8 @@ impl Alignmentfilters {
493526
}
494527
}
495528

496-
pub fn mnase(&self, rec: &Record) -> Option<Record> {
497-
// Only retain records that are proper pairs, and not reverse strand.
498-
if rec.is_paired() && rec.is_proper_pair() && !rec.is_reverse() {
499-
500-
}
501-
return None;
529+
pub fn manipulate_record(&self, rec: &mut Record) {
530+
println!("Will manipulate the record, but not now lol");
502531
}
503532
}
504533

@@ -1440,33 +1469,33 @@ pub fn region_divider(regs: &Vec<Region>) -> Vec<Vec<Region>> {
14401469
tempregionvec = Vec::new();
14411470
bplen = 0
14421471
}
1443-
// our regions are rather large, so we can split these up (in case both start/end are Revalue:U)
1444-
match (&reg.start, &reg.end) {
1445-
(Revalue::U(start), Revalue::U(end)) => {
1446-
let mut start: u32 = *start;
1447-
let mut end: u32 = *end;
1448-
while start < end {
1449-
let newend = std::cmp::min(start + 10000000, end);
1450-
let mut entryname = format!("{}:{}-{}", reg.chrom, start, newend);
1451-
tempregionvec.push( Region {
1452-
chrom: reg.chrom.clone(),
1453-
start: Revalue::U(start),
1454-
end: Revalue::U(newend),
1455-
score: reg.score.clone(),
1456-
strand: reg.strand.clone(),
1457-
name: entryname,
1458-
regionlength: newend-start
1459-
} );
1460-
start = newend;
1461-
}
1462-
blocks.push(tempregionvec);
1463-
tempregionvec = Vec::new();
1464-
},
1465-
_ => {
1466-
blocks.push(vec![reg.clone()]);
1467-
}
1468-
//blocks.push(vec![reg.clone()]);
1469-
}
1472+
// // our regions are rather large, so we can split these up (in case both start/end are Revalue:U)
1473+
// match (&reg.start, &reg.end) {
1474+
// (Revalue::U(start), Revalue::U(end)) => {
1475+
// let mut start: u32 = *start;
1476+
// let mut end: u32 = *end;
1477+
// while start < end {
1478+
// let newend = std::cmp::min(start + 10000000, end);
1479+
// let mut entryname = format!("{}:{}-{}", reg.chrom, start, newend);
1480+
// tempregionvec.push( Region {
1481+
// chrom: reg.chrom.clone(),
1482+
// start: Revalue::U(start),
1483+
// end: Revalue::U(newend),
1484+
// score: reg.score.clone(),
1485+
// strand: reg.strand.clone(),
1486+
// name: entryname,
1487+
// regionlength: newend-start
1488+
// } );
1489+
// start = newend;
1490+
// }
1491+
// blocks.push(tempregionvec);
1492+
// tempregionvec = Vec::new();
1493+
// },
1494+
// _ => {
1495+
// blocks.push(vec![reg.clone()]);
1496+
// }
1497+
blocks.push(vec![reg.clone()]);
1498+
// }
14701499
} else {
14711500
tempregionvec.push(reg.clone());
14721501
bplen += reg.regionlength;

src/multibamsummary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ pub fn r_mbams(
177177
bampfiles.par_iter()
178178
.map(|(bamfile, ispe)| {
179179
let (bg, _mapped, _unmapped, _readlen, _fraglen) = regionblocks.par_iter()
180-
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, gene_mode))
180+
.map(|i| bam_pileup(bamfile, &i, &binsize, &ispe, &ignorechr, &filters, false, gene_mode, false))
181181
.reduce(
182182
|| (vec![], 0, 0, vec![], vec![]),
183183
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| {

0 commit comments

Comments
 (0)