@@ -85,7 +85,7 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
85
85
'''
86
86
targetbam = pysam .AlignmentFile (origbamfile )
87
87
donorbam = pysam .AlignmentFile (mutbamfile )
88
- write_mode = 'wc' if origbamfile .endswith ('.cram' ) else 'wb'
88
+ write_mode = 'wc' if outbamfile .endswith ('.cram' ) else 'wb'
89
89
outputbam = pysam .AlignmentFile (outbamfile , write_mode , template = targetbam )
90
90
91
91
if seed is not None : random .seed (int (seed ))
@@ -103,8 +103,7 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
103
103
# load reads from donorbam into dict
104
104
logger .info ("loading donor reads into dictionary...\n " )
105
105
106
- #rdict = defaultdict(list)
107
- rdict = {}
106
+ rdict = defaultdict (lambda : [None , None , None ])
108
107
secondary = defaultdict (list ) # track secondary alignments, if specified
109
108
supplementary = defaultdict (list ) # track supplementary alignments, if specified
110
109
excount = 0 # number of excluded reads
@@ -124,7 +123,9 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
124
123
read .qual = qual
125
124
extqname = read .qname
126
125
if not read .is_secondary and not read .is_supplementary :
127
- rdict [extqname ] = read
126
+ rlist = rdict [extqname ]
127
+ # 0: first pair, 1: second pair, 2: unpaired
128
+ rlist [0 if read .is_read1 else 1 if read .is_read2 else 2 ] = read
128
129
nr += 1
129
130
elif keepsecondary and read .is_secondary :
130
131
secondary [extqname ].append (read )
@@ -157,13 +158,12 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
157
158
read .qname = nameprefix + read .qname
158
159
read .qual = qual
159
160
extqname = read .qname
160
- #check if this read has been processed already. If so, skip to the next read
161
- if extqname in used : continue
162
161
newReads = []
163
162
if extqname in rdict :
163
+ newRead = rdict [extqname ][0 if read .is_read1 else 1 if read .is_read2 else 2 ]
164
164
if keepqual :
165
- rdict [ extqname ] .qual = read .qual
166
- newReads = [rdict [ extqname ] ]
165
+ newRead .qual = read .qual
166
+ newReads = [newRead ]
167
167
used .add (extqname )
168
168
recount += 1
169
169
if keepsecondary and extqname in secondary :
@@ -192,9 +192,11 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
192
192
if allreads :
193
193
for extqname in rdict .keys ():
194
194
if extqname not in used and extqname not in exclude :
195
- rdict [extqname ] = cleanup (rdict [extqname ],None ,RG )
196
- outputbam .write (rdict [extqname ])
197
- nadded += 1
195
+ for read in rdict [extqname ]:
196
+ if read is None : continue
197
+ read = cleanup (read ,None ,RG )
198
+ outputbam .write (read )
199
+ nadded += 1
198
200
logger .info ("added " + str (nadded ) + " reads due to --all\n " )
199
201
200
202
targetbam .close ()
0 commit comments