Skip to content

Commit 9e0d2c4

Browse files
authored
TIN with strandedness (#62)
* calculate TIN scores with strandedness
1 parent 573ff18 commit 9e0d2c4

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

include/tin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ typedef std::unordered_map<std::string, std::tuple<std::string, int, int, double
1717
// (Reference: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-0922-z#Sec11)
1818
void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::string& bam_filepath, int min_cov, int sample_size, const std::string& output_folder, int thread_count);
1919

20-
std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end);
20+
std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand);
2121

2222
bool checkMinReads(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, int min_reads);
2323

src/hts_reader.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ int HTSReader::updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics& bas
8181
break;
8282
case 'N':
8383
basic_qc.total_n_cnt++;
84-
std::cerr << "Warning: N base found in read " << bam_get_qname(record) << std::endl;
84+
// std::cerr << "Warning: N base found in read " << bam_get_qname(record) << std::endl;
8585
break;
8686
default:
87-
printError("Invalid base: " + std::to_string(base));
87+
// printError("Invalid base: " + std::to_string(base));
8888
break;
8989
}
9090
}

src/tin.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
#include "tin_stats.h"
1515

16-
std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end)
16+
std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand)
1717
{
1818
// Set up the region to fetch reads (1-based)
1919
std::string region = chr + ":" + std::to_string(start) + "-" + std::to_string(end);
@@ -46,6 +46,13 @@ std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, ba
4646
continue;
4747
}
4848

49+
// Skip if on a different strand
50+
bool read_strand = (record->core.flag & BAM_FREVERSE) ? false : true;
51+
if (read_strand != transcript_strand) {
52+
skip_count++;
53+
continue;
54+
}
55+
4956
read_count++;
5057

5158
// Clear positions for each read
@@ -194,10 +201,13 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
194201

195202
// Get the index for the BAM file
196203
hts_idx_t* index = sam_index_load(bam_file, bam_filepath.c_str());
204+
if (index == NULL) {
205+
std::cerr << "Error loading BAM index" << std::endl;
206+
exit(1);
207+
}
197208

198209
// Read the gene BED file
199210
std::ifstream gene_bed_file(gene_bed);
200-
201211
if (!gene_bed_file.is_open()) {
202212
std::cerr << "Error opening gene BED file" << std::endl;
203213
exit(1);
@@ -242,6 +252,12 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
242252
continue;
243253
}
244254

255+
std::cout << "Calculating TIN for transcript " << name << " with strand " << strand << std::endl;
256+
bool strand_bool = true;
257+
if (strand == "-") {
258+
strand_bool = false;
259+
}
260+
245261
// Remove the last comma from the exon sizes and starts strings
246262
if (exon_sizes_str.back() == ',') {
247263
exon_sizes_str.pop_back();
@@ -278,7 +294,7 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
278294
int exon_end = exon_start + exon_sizes[i] - 1;
279295

280296
// Get the depths and cumulative depths for the region
281-
std::unordered_map<int, int> exon_depths = getReadDepths(bam_file, index, header, chrom, exon_start, exon_end);
297+
std::unordered_map<int, int> exon_depths = getReadDepths(bam_file, index, header, chrom, exon_start, exon_end, strand_bool);
282298
for (const auto& depth : exon_depths) {
283299
C[depth.first] = depth.second;
284300
}
@@ -287,7 +303,7 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
287303
// Get the read depths for the transcript start+1 and end
288304
std::vector<int> transcript_positions = {start + 1, end};
289305
for (const auto& position : transcript_positions) {
290-
std::unordered_map<int, int> transcript_depths = getReadDepths(bam_file, index, header, chrom, position, position);
306+
std::unordered_map<int, int> transcript_depths = getReadDepths(bam_file, index, header, chrom, position, position, strand_bool);
291307
for (const auto& depth : transcript_depths) {
292308
C[depth.first] = depth.second;
293309
}

tests/test_general.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -755,19 +755,19 @@ def test_tin_mean(self, rnaseq_bam_output):
755755
output_statistics = rnaseq_bam_output[1]
756756
input_file = rnaseq_bam_output[2]
757757
tin_mean = output_statistics.getTINMean(input_file)
758-
assert round(tin_mean, 1) == 63.6
758+
assert round(tin_mean, 1) == 60.2
759759

760760
@pytest.mark.dependency(depends=["TestRNASeqBAM::test_success"])
761761
def test_tin_median(self, rnaseq_bam_output):
762762
output_statistics = rnaseq_bam_output[1]
763763
input_file = rnaseq_bam_output[2]
764764
tin_median = output_statistics.getTINMedian(input_file)
765-
assert round(tin_median, 1) == 83.7
765+
assert round(tin_median, 1) == 70.0
766766

767767
@pytest.mark.dependency(depends=["TestRNASeqBAM::test_success"])
768768
def test_tin_stddev(self, rnaseq_bam_output):
769769
output_statistics = rnaseq_bam_output[1]
770770
input_file = rnaseq_bam_output[2]
771771
tin_stddev = output_statistics.getTINStdDev(input_file)
772-
assert round(tin_stddev, 1) == 32.6
772+
assert round(tin_stddev, 1) == 31.0
773773

0 commit comments

Comments
 (0)