@@ -130,14 +130,15 @@ pub fn parse_regions(region: &str, bam_ifile: Vec<&str>) -> (Vec<Region>, HashMa
130130#[ allow( unused_variables) ]
131131#[ allow( unused_mut) ]
132132pub fn bam_pileup < ' a > (
133- bam_ifile : & str ,
134- regionvec : & ' a Vec < Region > ,
135- provbs : & u32 ,
136- ispe : & bool ,
137- ignorechr : & Vec < String > ,
138- filters : & Alignmentfilters ,
139- collapse : bool ,
140- gene_mode : bool
133+ bam_ifile : & str , // the bam file to consider.
134+ regionvec : & ' a Vec < Region > , // Vector with regions to calculate coverage from.
135+ provbs : & u32 , // Provisional bin size, in case we have gene mode later on this gets omitted.
136+ ispe : & bool , // Wether or not the bam file is paired end or single end.
137+ ignorechr : & Vec < String > , // chromosomes to ignore for normalization calculation.
138+ filters : & Alignmentfilters , // a struct with filtering settings.
139+ collapse : bool , // collapse bins with same coverage, should be avoided in bamCompare scenario for example.
140+ gene_mode : bool , // Gene mode (BED / GTF) or not (bins)
141+ gather_lengths : bool , // Wether or not to collect frag/read lengths (blows up memory for big bam files in MBS, for example).
141142) -> (
142143 Vec < TempPath > , // temp bedgraph file.
143144 u32 , // mapped reads
@@ -197,21 +198,31 @@ pub fn bam_pileup<'a>(
197198 ( Revalue :: U ( start) , Revalue :: U ( end) ) => {
198199 counts = vec ! [ 0.0 ; 1 ] ;
199200 for record in bam. records ( ) {
200- let record = record. expect ( "Error parsing record." ) ;
201- if filters. filter ( & record ) {
202- continue ;
201+ let mut record = record. expect ( "Error parsing record." ) ;
202+ if filters. manipulate {
203+ filters . manipulate_record ( & mut record ) ;
203204 }
205+ if filters. filter {
206+ if filters. filter_record ( & record) {
207+ continue ;
208+ }
209+ }
210+
204211 if !ignorechr. contains ( & region. 0 ) {
205212 if record. is_unmapped ( ) {
206213 unmapped_reads += 1 ;
207214 } else {
208215 mapped_reads += 1 ;
209216 if * ispe {
210217 if record. is_paired ( ) && record. is_proper_pair ( ) && ( record. flags ( ) & FREAD != 0 ) {
211- fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
218+ if gather_lengths {
219+ fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
220+ }
212221 }
213222 }
214- readlens. push ( record. seq_len ( ) as u32 ) ;
223+ if gather_lengths {
224+ readlens. push ( record. seq_len ( ) as u32 ) ;
225+ }
215226 }
216227 }
217228 counts[ 0 ] += 1.0 ;
@@ -230,9 +241,14 @@ pub fn bam_pileup<'a>(
230241 bam. fetch ( ( regstruct. chrom . as_str ( ) , exon. 0 , exon. 1 ) )
231242 . expect ( & format ! ( "Error fetching region: {}:{},{}" , regstruct. chrom, exon. 0 , exon. 1 ) ) ;
232243 for record in bam. records ( ) {
233- let record = record. expect ( "Error parsing record." ) ;
234- if filters. filter ( & record) {
235- continue ;
244+ let mut record = record. expect ( "Error parsing record." ) ;
245+ if filters. manipulate {
246+ filters. manipulate_record ( & mut record) ;
247+ }
248+ if filters. filter {
249+ if filters. filter_record ( & record) {
250+ continue ;
251+ }
236252 }
237253 if !ignorechr. contains ( & region. 0 ) {
238254 if record. is_unmapped ( ) {
@@ -241,10 +257,14 @@ pub fn bam_pileup<'a>(
241257 mapped_reads += 1 ;
242258 if * ispe {
243259 if record. is_paired ( ) && record. is_proper_pair ( ) && ( record. flags ( ) & FREAD != 0 ) {
244- fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
260+ if gather_lengths {
261+ fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
262+ }
245263 }
246264 }
247- readlens. push ( record. seq_len ( ) as u32 ) ;
265+ if gather_lengths {
266+ readlens. push ( record. seq_len ( ) as u32 ) ;
267+ }
248268 }
249269 }
250270 counts[ 0 ] += 1.0 ;
@@ -259,9 +279,14 @@ pub fn bam_pileup<'a>(
259279 // let mut binstart = region.1;
260280 let mut binix: u32 = 0 ;
261281 for record in bam. records ( ) {
262- let record = record. expect ( "Error parsing record." ) ;
263- if filters. filter ( & record) {
264- continue ;
282+ let mut record = record. expect ( "Error parsing record." ) ;
283+ if filters. manipulate {
284+ filters. manipulate_record ( & mut record) ;
285+ }
286+ if filters. filter {
287+ if filters. filter_record ( & record) {
288+ continue ;
289+ }
265290 }
266291 if !ignorechr. contains ( & region. 0 ) {
267292 if record. is_unmapped ( ) {
@@ -270,10 +295,14 @@ pub fn bam_pileup<'a>(
270295 mapped_reads += 1 ;
271296 if * ispe {
272297 if record. is_paired ( ) && record. is_proper_pair ( ) && ( record. flags ( ) & FREAD != 0 ) {
273- fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
298+ if gather_lengths {
299+ fraglens. push ( record. insert_size ( ) . abs ( ) as u32 ) ;
300+ }
274301 }
275302 }
276- readlens. push ( record. seq_len ( ) as u32 ) ;
303+ if gather_lengths {
304+ readlens. push ( record. seq_len ( ) as u32 ) ;
305+ }
277306 }
278307 }
279308 let indices: HashSet < usize > = record
@@ -367,6 +396,7 @@ pub struct Alignmentfilters {
367396 pub extendreads : u32 ,
368397 pub centerreads : bool ,
369398 pub filter : bool ,
399+ pub manipulate : bool ,
370400}
371401impl Alignmentfilters {
372402 pub fn new (
@@ -385,6 +415,7 @@ impl Alignmentfilters {
385415 // Go through the arguments, and if they are not set or have default values, we set a filter boolean to false.
386416 // Only when filtering needs to happen the filterrecord will be invoked, for performance.
387417 let mut filter: bool = false ;
418+ let mut manipulate: bool = false ;
388419 let _mmq = minmappingquality. unwrap_or ( 0 ) ;
389420 let _sfi = samflaginclude. unwrap_or ( 0 ) ;
390421 let _sfe = samflagexclude. unwrap_or ( 0 ) ;
@@ -396,12 +427,13 @@ impl Alignmentfilters {
396427 let _extend = extendreads. unwrap_or ( 0 ) ;
397428 let _center = centerreads. unwrap_or ( false ) ;
398429
399- // Set the filter bool for a quick escape in case filtering is not needed.
400- if _mmq > 0 || _sfi > 0 || _sfe > 0 || _mifl > 0 || _mafl > 0 || _mnase || _offset != ( 1 , -1 ) || _frs != "None" || _extend > 0 || _center {
401- filter = true ;
430+ // Set the manipulate bool for a quick escape in case manipulation is not needed.
431+ if _offset != ( 1 , -1 ) || _mnase || _extend > 0 || _center {
432+ manipulate = true ;
402433 }
403- // If blacklist is set, we also need to filter.
404- if blacklist. is_some ( ) {
434+
435+ // Set the filter bool for a quick escape in case filtering is not needed.
436+ if _mmq > 0 || _sfi > 0 || _sfe > 0 || _mifl > 0 || _mafl > 0 || _frs != "None" || blacklist. is_some ( ) {
405437 filter = true ;
406438 }
407439
@@ -418,9 +450,10 @@ impl Alignmentfilters {
418450 extendreads : _extend,
419451 centerreads : _center,
420452 filter : filter,
453+ manipulate : manipulate,
421454 }
422455 }
423- pub fn filter ( & self , rec : & Record ) -> bool {
456+ pub fn filter_record ( & self , rec : & Record ) -> bool {
424457 // Decides filtering of a record. The bool return is used to 'continue', i.e. skip the record.
425458 if rec. is_unmapped ( ) {
426459 return true ;
@@ -493,12 +526,8 @@ impl Alignmentfilters {
493526 }
494527 }
495528
496- pub fn mnase ( & self , rec : & Record ) -> Option < Record > {
497- // Only retain records that are proper pairs, and not reverse strand.
498- if rec. is_paired ( ) && rec. is_proper_pair ( ) && !rec. is_reverse ( ) {
499-
500- }
501- return None ;
529+ pub fn manipulate_record ( & self , rec : & mut Record ) {
530+ println ! ( "Will manipulate the record, but not now lol" ) ;
502531 }
503532}
504533
@@ -1440,33 +1469,33 @@ pub fn region_divider(regs: &Vec<Region>) -> Vec<Vec<Region>> {
14401469 tempregionvec = Vec :: new ( ) ;
14411470 bplen = 0
14421471 }
1443- // our regions are rather large, so we can split these up (in case both start/end are Revalue:U)
1444- match ( & reg. start , & reg. end ) {
1445- ( Revalue :: U ( start) , Revalue :: U ( end) ) => {
1446- let mut start: u32 = * start;
1447- let mut end: u32 = * end;
1448- while start < end {
1449- let newend = std:: cmp:: min ( start + 10000000 , end) ;
1450- let mut entryname = format ! ( "{}:{}-{}" , reg. chrom, start, newend) ;
1451- tempregionvec. push ( Region {
1452- chrom : reg. chrom . clone ( ) ,
1453- start : Revalue :: U ( start) ,
1454- end : Revalue :: U ( newend) ,
1455- score : reg. score . clone ( ) ,
1456- strand : reg. strand . clone ( ) ,
1457- name : entryname,
1458- regionlength : newend-start
1459- } ) ;
1460- start = newend;
1461- }
1462- blocks. push ( tempregionvec) ;
1463- tempregionvec = Vec :: new ( ) ;
1464- } ,
1465- _ => {
1466- blocks. push ( vec ! [ reg. clone( ) ] ) ;
1467- }
1468- // blocks.push(vec![reg.clone()]);
1469- }
1472+ // // our regions are rather large, so we can split these up (in case both start/end are Revalue:U)
1473+ // match (®.start, ®.end) {
1474+ // (Revalue::U(start), Revalue::U(end)) => {
1475+ // let mut start: u32 = *start;
1476+ // let mut end: u32 = *end;
1477+ // while start < end {
1478+ // let newend = std::cmp::min(start + 10000000, end);
1479+ // let mut entryname = format!("{}:{}-{}", reg.chrom, start, newend);
1480+ // tempregionvec.push( Region {
1481+ // chrom: reg.chrom.clone(),
1482+ // start: Revalue::U(start),
1483+ // end: Revalue::U(newend),
1484+ // score: reg.score.clone(),
1485+ // strand: reg.strand.clone(),
1486+ // name: entryname,
1487+ // regionlength: newend-start
1488+ // } );
1489+ // start = newend;
1490+ // }
1491+ // blocks.push(tempregionvec);
1492+ // tempregionvec = Vec::new();
1493+ // },
1494+ // _ => {
1495+ // blocks.push(vec![reg.clone()]);
1496+ // }
1497+ blocks. push ( vec ! [ reg. clone( ) ] ) ;
1498+ // }
14701499 } else {
14711500 tempregionvec. push ( reg. clone ( ) ) ;
14721501 bplen += reg. regionlength ;
0 commit comments