Skip to content

Commit 4b94935

Browse files
committed
Move randstrobes_query to seeding
1 parent 1efee06 commit 4b94935

File tree

5 files changed

+71
-72
lines changed

5 files changed

+71
-72
lines changed

src/chainer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ use log::trace;
55
use crate::details::NamDetails;
66
use crate::hit::{Hit, HitsDetails, find_hits};
77
use crate::index::StrobemerIndex;
8-
use crate::mapper::QueryRandstrobe;
98
use crate::mcsstrategy::McsStrategy;
109
use crate::nam::Nam;
10+
use crate::seeding::QueryRandstrobe;
1111

1212
const N_PRECOMPUTED: usize = 1024;
1313

src/hit.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ use log::Level;
55
use log::trace;
66

77
use crate::index::StrobemerIndex;
8-
use crate::mapper::QueryRandstrobe;
98
use crate::mcsstrategy::McsStrategy;
9+
use crate::seeding::QueryRandstrobe;
1010

1111
#[derive(Debug)]
1212
pub struct Hit {

src/mapper.rs

Lines changed: 1 addition & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::nam::{Nam, get_nams_by_chaining, reverse_nam_if_needed};
2525
use crate::piecewisealigner::remove_spurious_anchors;
2626
use crate::read::Read;
2727
use crate::revcomp::reverse_complement;
28-
use crate::seeding::{RandstrobeIterator, SeedingParameters, SyncmerIterator};
28+
use crate::seeding::SeedingParameters;
2929

3030
const MAX_PAIR_NAMS: usize = 1000;
3131

@@ -76,73 +76,6 @@ impl Alignment {
7676
}
7777
}
7878

79-
#[derive(Debug)]
80-
pub struct QueryRandstrobe {
81-
pub hash: u64,
82-
pub hash_revcomp: u64,
83-
pub start: usize,
84-
pub end: usize,
85-
}
86-
87-
/// Generate randstrobes for a query sequence and its reverse complement.
88-
/// TODO move to strobes.rs?
89-
pub fn randstrobes_query(seq: &[u8], parameters: &SeedingParameters) -> [Vec<QueryRandstrobe>; 2] {
90-
let mut randstrobes = {
91-
let expected = seq.len() / (parameters.syncmer.k - parameters.syncmer.s + 1);
92-
[Vec::with_capacity(expected), Vec::with_capacity(expected)]
93-
};
94-
if seq.len() < parameters.randstrobe.w_max {
95-
return randstrobes;
96-
}
97-
98-
// Generate syncmers for the forward sequence
99-
let syncmer_iter = SyncmerIterator::new(
100-
seq,
101-
parameters.syncmer.k,
102-
parameters.syncmer.s,
103-
parameters.syncmer.t,
104-
);
105-
let mut syncmers: Vec<_> = syncmer_iter.collect();
106-
107-
// Generate randstrobes for the forward sequence
108-
let randstrobe_iter =
109-
RandstrobeIterator::new(syncmers.iter().cloned(), parameters.randstrobe.clone());
110-
111-
for randstrobe in randstrobe_iter {
112-
randstrobes[0].push(QueryRandstrobe {
113-
hash: randstrobe.hash,
114-
hash_revcomp: randstrobe.hash_revcomp,
115-
start: randstrobe.strobe1_pos,
116-
end: randstrobe.strobe2_pos + parameters.syncmer.k,
117-
});
118-
}
119-
120-
// For the reverse complement, we can re-use the syncmers of the forward
121-
// sequence because canonical syncmers are invariant under reverse
122-
// complementing. Only the coordinates need to be adjusted.
123-
syncmers.reverse();
124-
for i in 0..syncmers.len() {
125-
syncmers[i].position = seq.len() - syncmers[i].position - parameters.syncmer.k;
126-
}
127-
128-
// Randstrobes cannot be re-used for the reverse complement:
129-
// If in the forward direction, syncmer[i] and syncmer[j] were paired up, it
130-
// is not necessarily the case that syncmer[j] is going to be paired with
131-
// syncmer[i] in the reverse direction because i is fixed in the forward
132-
// direction and j is fixed in the reverse direction.
133-
let rc_randstrobe_iter =
134-
RandstrobeIterator::new(syncmers.into_iter(), parameters.randstrobe.clone());
135-
for randstrobe in rc_randstrobe_iter {
136-
randstrobes[1].push(QueryRandstrobe {
137-
hash: randstrobe.hash,
138-
hash_revcomp: randstrobe.hash_revcomp,
139-
start: randstrobe.strobe1_pos,
140-
end: randstrobe.strobe2_pos + parameters.syncmer.k,
141-
});
142-
}
143-
randstrobes
144-
}
145-
14679
/// Conversion of an Alignment into a SamRecord
14780
#[derive(Default)]
14881
pub struct SamOutput {

src/nam.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ use crate::chainer::Chainer;
1010
use crate::details::NamDetails;
1111
use crate::index::StrobemerIndex;
1212
use crate::io::fasta::RefSequence;
13-
use crate::mapper;
1413
use crate::mcsstrategy::McsStrategy;
1514
use crate::read::Read;
15+
use crate::seeding::randstrobes_query;
1616

1717
/// Non-overlapping approximate match
1818
#[derive(Clone, Debug)]
@@ -138,7 +138,7 @@ pub fn get_nams_by_chaining(
138138
rng: &mut Rng,
139139
) -> (NamDetails, Vec<Nam>) {
140140
let timer = Instant::now();
141-
let query_randstrobes = mapper::randstrobes_query(sequence, &index.parameters);
141+
let query_randstrobes = randstrobes_query(sequence, &index.parameters);
142142
let time_randstrobes = timer.elapsed().as_secs_f64();
143143

144144
trace!(

src/seeding/mod.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,69 @@ pub mod syncmers;
66
pub use parameters::{InvalidSeedingParameter, SeedingParameters};
77
pub use strobes::{DEFAULT_AUX_LEN, RandstrobeIterator, RandstrobeParameters};
88
pub use syncmers::{Syncmer, SyncmerIterator, SyncmerParameters};
9+
10+
#[derive(Debug)]
11+
pub struct QueryRandstrobe {
12+
pub hash: u64,
13+
pub hash_revcomp: u64,
14+
pub start: usize,
15+
pub end: usize,
16+
}
17+
18+
/// Generate randstrobes for a query sequence and its reverse complement.
19+
pub fn randstrobes_query(seq: &[u8], parameters: &SeedingParameters) -> [Vec<QueryRandstrobe>; 2] {
20+
let mut randstrobes = {
21+
let expected = seq.len() / (parameters.syncmer.k - parameters.syncmer.s + 1);
22+
[Vec::with_capacity(expected), Vec::with_capacity(expected)]
23+
};
24+
if seq.len() < parameters.randstrobe.w_max {
25+
return randstrobes;
26+
}
27+
28+
// Generate syncmers for the forward sequence
29+
let syncmer_iter = SyncmerIterator::new(
30+
seq,
31+
parameters.syncmer.k,
32+
parameters.syncmer.s,
33+
parameters.syncmer.t,
34+
);
35+
let mut syncmers: Vec<_> = syncmer_iter.collect();
36+
37+
// Generate randstrobes for the forward sequence
38+
let randstrobe_iter =
39+
RandstrobeIterator::new(syncmers.iter().cloned(), parameters.randstrobe.clone());
40+
41+
for randstrobe in randstrobe_iter {
42+
randstrobes[0].push(QueryRandstrobe {
43+
hash: randstrobe.hash,
44+
hash_revcomp: randstrobe.hash_revcomp,
45+
start: randstrobe.strobe1_pos,
46+
end: randstrobe.strobe2_pos + parameters.syncmer.k,
47+
});
48+
}
49+
50+
// For the reverse complement, we can re-use the syncmers of the forward
51+
// sequence because canonical syncmers are invariant under reverse
52+
// complementing. Only the coordinates need to be adjusted.
53+
syncmers.reverse();
54+
for i in 0..syncmers.len() {
55+
syncmers[i].position = seq.len() - syncmers[i].position - parameters.syncmer.k;
56+
}
57+
58+
// Randstrobes cannot be re-used for the reverse complement:
59+
// If in the forward direction, syncmer[i] and syncmer[j] were paired up, it
60+
// is not necessarily the case that syncmer[j] is going to be paired with
61+
// syncmer[i] in the reverse direction because i is fixed in the forward
62+
// direction and j is fixed in the reverse direction.
63+
let rc_randstrobe_iter =
64+
RandstrobeIterator::new(syncmers.into_iter(), parameters.randstrobe.clone());
65+
for randstrobe in rc_randstrobe_iter {
66+
randstrobes[1].push(QueryRandstrobe {
67+
hash: randstrobe.hash,
68+
hash_revcomp: randstrobe.hash_revcomp,
69+
start: randstrobe.strobe1_pos,
70+
end: randstrobe.strobe2_pos + parameters.syncmer.k,
71+
});
72+
}
73+
randstrobes
74+
}

0 commit comments

Comments
 (0)