-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparseWindow.py
More file actions
94 lines (81 loc) · 3.49 KB
/
parseWindow.py
File metadata and controls
94 lines (81 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#! /usr/bin/env python3
'''
The purpose of this program is to read through the output from run_nt.py. run_nt.py takes a snapshot of a single window. Here, we combine all of those predictions to create a list
of all predictions at the nth position of an input sequence. It makes it easier to query predictions at the beginning or end of the sequence given to SegmentNT.
'''
import sys
import argparse
def parseArgs():
parser = argparse.ArgumentParser(description='Parse output from run_nt.py to make it easier to find predictions at any position in the input sequence for exons and introns')
parser.add_argument('-i', '--input', type=str, required=True, help='(input) Required input file from run_nt.py output.')
parser.add_argument('-o', '--output', type=str, required=True,help='(output) Required output text file')
args = parser.parse_args()
return args
def reformatRunNTResults(args):
intron_pred=[]
exon_pred=[] #list of lists with each position in the list being an array of all predictions at that context level
inArray=False
intron=False
exon=False
pos=0
lastRow=False
lastlastRow=False
#Read each line of the run_nt.py output
with open(args.input) as inputF: ###open(output_from run_nt.py)
for line in inputF:
line=line.strip()
if not inArray:
if not inArray and not line.startswith("Probabilities,"):
continue
if line.startswith("Probabilities, "):
line=line[15:] #remove "Probabilities, " from the line
pos=0
if line.startswith('intron: [['):
inArray=True
intron=True
line=line[10:] #remove 'intron: [[' from the line
elif line.startswith('exon: [['):
inArray=True
line=line[8:] #remove 'exon: [[' from the line
exon=True
elif line.startswith('['):
line=line[1:]
pos=0
if line.endswith("]]"):
lastlastRow=True
line=line[0:-2] #remove the bracket from the end of the line
elif line.endswith("]"):
line=line[0:-1] #remove the bracket from the end of the line
lastRow=True
predictions=map(float,line.split()) #get predictions and store as floats. Ensures that all values are floats.
for num in predictions:
if intron:
if len(intron_pred)==pos:
intron_pred.append([])
intron_pred[pos].append(num)
elif exon:
if len(exon_pred)==pos:
exon_pred.append([])
exon_pred[pos].append(num)
pos+=1
if lastRow:
pos=0
lastRow=False
if lastlastRow:
inArray=False
intron=False
exon=False
lastlastRow=False
with open(args.output,'w') as output:
output.write(str(len(intron_pred)) +" " +str(len(exon_pred)) +"\n")
output.write("Intron\n")
for array in intron_pred:
output.write(str(array) +"\n")
output.write("\nExon\n")
for array in exon_pred:
output.write(str(array)+"\n")
def main():
args=parseArgs()
reformatRunNTResults(args)
if __name__ == '__main__':
main()