Skip to content

Commit 3f30d30

Browse files
authored
Merge pull request #213 from adamewing/fix212
fix: Fix replace_reads.py only writing out one read of each pair
2 parents b315cc8 + 93bec2a commit 3f30d30

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

bin/bamsurgeon/replace_reads.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
8585
'''
8686
targetbam = pysam.AlignmentFile(origbamfile)
8787
donorbam = pysam.AlignmentFile(mutbamfile)
88-
write_mode = 'wc' if origbamfile.endswith('.cram') else 'wb'
88+
write_mode = 'wc' if outbamfile.endswith('.cram') else 'wb'
8989
outputbam = pysam.AlignmentFile(outbamfile, write_mode, template=targetbam)
9090

9191
if seed is not None: random.seed(int(seed))
@@ -103,8 +103,7 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
103103
# load reads from donorbam into dict
104104
logger.info("loading donor reads into dictionary...\n")
105105

106-
#rdict = defaultdict(list)
107-
rdict = {}
106+
rdict = defaultdict(lambda: [None, None, None])
108107
secondary = defaultdict(list) # track secondary alignments, if specified
109108
supplementary = defaultdict(list) # track supplementary alignments, if specified
110109
excount = 0 # number of excluded reads
@@ -124,7 +123,9 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
124123
read.qual = qual
125124
extqname = read.qname
126125
if not read.is_secondary and not read.is_supplementary:
127-
rdict[extqname] = read
126+
rlist = rdict[extqname]
127+
# 0: first pair, 1: second pair, 2: unpaired
128+
rlist[0 if read.is_read1 else 1 if read.is_read2 else 2] = read
128129
nr += 1
129130
elif keepsecondary and read.is_secondary:
130131
secondary[extqname].append(read)
@@ -157,13 +158,12 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
157158
read.qname = nameprefix + read.qname
158159
read.qual = qual
159160
extqname = read.qname
160-
#check if this read has been processed already. If so, skip to the next read
161-
if extqname in used: continue
162161
newReads = []
163162
if extqname in rdict:
163+
newRead = rdict[extqname][0 if read.is_read1 else 1 if read.is_read2 else 2]
164164
if keepqual:
165-
rdict[extqname].qual = read.qual
166-
newReads = [rdict[extqname]]
165+
newRead.qual = read.qual
166+
newReads = [newRead]
167167
used.add(extqname)
168168
recount += 1
169169
if keepsecondary and extqname in secondary:
@@ -192,9 +192,11 @@ def replace_reads(origbamfile, mutbamfile, outbamfile, nameprefix=None, excludef
192192
if allreads:
193193
for extqname in rdict.keys():
194194
if extqname not in used and extqname not in exclude:
195-
rdict[extqname] = cleanup(rdict[extqname],None,RG)
196-
outputbam.write(rdict[extqname])
197-
nadded += 1
195+
for read in rdict[extqname]:
196+
if read is None: continue
197+
read = cleanup(read,None,RG)
198+
outputbam.write(read)
199+
nadded += 1
198200
logger.info("added " + str(nadded) + " reads due to --all\n")
199201

200202
targetbam.close()

0 commit comments

Comments
 (0)