-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetOnePosition.py
More file actions
65 lines (58 loc) · 2.57 KB
/
getOnePosition.py
File metadata and controls
65 lines (58 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#! /usr/bin/env python3
import sys
import json
import argparse
'''
This program determines how the position of a nucleotide within a context window changes its prediction.
'''
def parseArgs():
parser = argparse.ArgumentParser(description='Get the prediction values at a single position within the sequence. Output values represent the prediction within the context window (i.e., is the nucleotide at the beginning of the prediction sequence or the end?). Requires output from parseWindow.py')
parser.add_argument('-i', '--input', type=str, required=True, help='(input) Required input file from parseWindow.py output.')
parser.add_argument('-o', '--output', type=str, required=True,help='(output) Required output text file')
parser.add_argument('-n', '--number_of_tokens_per_seq', type=int, default=4096,required=False,help='Number of tokens per sequence')
parser.add_argument('-t', '--token_size', type=int, default=6,required=False,help='Token size. Most models currently use a token size of 6. Do not change unless a model uses a different token length')
parser.add_argument('-p', '--position', type=int, default=850,required=False,help='Position within the sequence to query')
args = parser.parse_args()
return args
def getOnePosition(args):
window= (args.number_of_tokens_per_seq *args.token_size) -1 #4096*6 minus 1 since one nucleotide overlaps the window from the gene.
num=0
inArrays=False
intron=False
exon=False
exon_list=[]
needed_pos=args.position #The position being queried
intron_list=[]
with open(args.input) as inputF:
for line in inputF:
line=line.strip()
if line =="":
inArrays=False
continue
elif line == "Intron":
num=0
exon=False
intron=True
inArrays=True
continue
elif line == "Exon":
num=0
exon=True
intron=False
inArrays=True
continue
if inArrays:
num +=1
yes=False
if exon:
exon_list.append(json.loads(line.strip())[window-num+needed_pos])
elif intron:
intron_list.append(json.loads(line.strip())[window-num+needed_pos])
with open(args.output,'w') as output:
output.write("exon=" + str(exon_list)+"\n")
output.write("intron=" + str(intron_list)+"\n")
def main():
args=parseArgs()
getOnePosition(args)
if __name__ == '__main__':
main()