-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPilonCheck.py
executable file
·117 lines (103 loc) · 4.29 KB
/
PilonCheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Imports
#--------
from __future__ import division
from collections import defaultdict
import sys, os
import math
syntax = '''----------------------------------------------------------------------------
Description: This script generates a file in PacBio utilities format with the 'bad' positions
of PacBio utilities that have been generated by several options of correction.
The correct positions to change are extracted from the Pilon changes file (previously converted
to BED format with parser_pilon_bed.py)
It also generates a file with those possible changes to PacBio utilities that cannot be found in Pilon).
Usage: python3 PilonCheck.py <Pilon.changes.bed> <targets_onlybad.txt> \\
<pilon_common.txt> <pilon_not_common.txt> <not_warning_file.txt> <warning_file.txt>
-----------------------------------------------------------------------------'''
if len(sys.argv) != 7:
print (syntax)
sys.exit()
# Parameters catching
#--------------------
changesPilon_1 = sys.argv[1]
targetsPacBioutilities_1 = sys.argv[2]
output1_file = sys.argv[3]
output2_file = sys.argv[4]
output3_file = sys.argv[5]
output4_file = sys.argv[6]
# Main
#-----
print (" Reading & storing Pilon file")
inhandle1 = open(changesPilon_1 , 'r')
lines1 = inhandle1.readlines()
inhandle1.close()
pilon_dic = defaultdict(lambda: defaultdict(lambda: defaultdict()))
#header =
#outhandle.write(header)
for line in lines1:
if "#" in line: # Dicard header lines
continue
else:
line = line.rstrip("\n")
chunks = line.split("\t")
name = chunks[0]
start = int(chunks[1])-1
pos1 = chunks[5]
pos2 = chunks[6]
lengthBasePos1 = len(pos1)
lengthBasePos2 = len(pos2)
pilon_dic[name][start]['line'] = line
pilon_dic[name][start]['pos1'] = pos1
pilon_dic[name][start]['pos2'] = pos2
pilon_dic[name][start]['lengthBasePos1'] = lengthBasePos1
pilon_dic[name][start]['lengthBasePos2'] = lengthBasePos2
print (" Reading PacBio utilities file & writing ouputs")
inhandle2 = open(targetsPacBioutilities_1 , 'r')
outhandle1 = open(output1_file , 'w')
outhandle2 = open(output2_file , 'w')
outhandle3 = open(output3_file , 'w')
outhandle4 = open(output4_file , 'w')
lines2 = inhandle2.readlines()
inhandle2.close()
for line in lines2:
if "#" in line[0]:
outhandle1.write(line)
else:
line = line.rstrip("\n")
chunks = line.split("\t")
name = chunks[0]
start = int(chunks[1])
haplotype = chunks[8]
if "," in haplotype:
haplotype = haplotype
else: #aqui falta algo. Lo he vaciado porque para que entre en Outhandle3 no tiene que existir el haplotype
haplotype = ''
try:
if pilon_dic[name][start]['pos1'] == '.':
if haplotype:
outhandle4.write(line + "\t" + "Warning, you should check this target!!" + "\n")
else:
outhandle3.write(line + "\t" + "Non warning, good target!!" + "\n")
changeLength = str(pilon_dic[name][start]['lengthBasePos2'])
change = ("+" + str(pilon_dic[name][start]['lengthBasePos2']) + str(pilon_dic[name][start]['pos2']))
outhandle1.write(str(name) + "\t" + str(start) + "\t" + str(chunks[2]) + "\t" + str(chunks[3]) + "\t" + \
str(chunks[4]) + "\t" + str(chunks[5]) + "\t" + str(chunks[6]) + "\t" + str(changeLength) + "\t" + \
str(change) + "\t" + str(chunks[9]) + "\t" + str(chunks[10]) + "\n")
else:
if haplotype:
outhandle4.write(line + "\t" + "Warning, you should check this target!!" + "\n")
else:
outhandle3.write(line + "\t" + "Non warning, good target!!" + "\n")
changeLength = str(pilon_dic[name][start]['lengthBasePos1'])
change = ("-" + str(pilon_dic[name][start]['lengthBasePos1']) + str(pilon_dic[name][start]['pos1']))
outhandle1.write(str(name) + "\t" + str(start) + "\t" + str(chunks[2]) + "\t" + str(chunks[3]) + "\t" + \
str(chunks[4]) + "\t" + str(chunks[5]) + "\t" + str(chunks[6]) + "\t" + '-' + str(changeLength) + "\t" +\
str(change) + "\t" + str(chunks[9]) + "\t" + str(chunks[10]) + "\n")
except KeyError:
outhandle2.write(line + "\n")
outhandle1.close()
outhandle2.close()
outhandle3.close()
outhandle4.close()
print("ALL DONE!!!")