It’s the moment you’ve been waiting for! How, exactly, do we go about assembling two nucleotide sequences? You might consider these steps en route to developing your final answer:
OUR SOLUTION:
str2 = 'ATGCG'
str1 = 'TGC'
def find_ind(seq1, seq2):
match_length = 0
max_match = 0
for i in range(0, len(seq2)):
for j in range(0, len(seq1)):
if seq2[i] == seq1[j]:
#print('match found' + str(i) + str(j))
match_length = len(seq1) - j
#print(match_length)
if seq1[j:] == seq2[i:i+match_length]:
#print(seq1[j:])
if match_length > max_match:
max_match = match_length
str1_start = j
str2_start = i
else:
continue
return (max_match, str1_start, str2_start)
def reassembler(seq1, seq2, start2):
return(seq1 + seq2[start2:])
def assemble(seq1, seq2):
str1_start = 0
str2_start = 0
##one seq is completely contained within the other
if seq1.find(seq2) > 0:
return seq1
elif seq2.find(seq1) > 0:
return seq2
soln1 = find_ind(seq1, seq2)
soln2 = find_ind(seq2, seq1)
print(soln1, soln2)
if soln1[0] > soln2[0]:
print(reassembler(seq1, seq2, soln2[2]))
else:
print(reassembler(seq2, seq1, soln1[2]))
assemble(str1, str2)
What if we wanted to assemble 3 fragments? How could we call on our function for two fragments to accomplish this?