Skip to content
Snippets Groups Projects
Commit 3fa5fa38 authored by max's avatar max
Browse files

fragmentGenome.py added to repository

parent 3342dc7a
No related branches found
No related tags found
No related merge requests found
import argparse, random
from Bio import SeqIO
parser = argparse.ArgumentParser(description="")
parser.add_argument("-ge", "--genome", help="Contigs in fasta format", type=str, required=True)
parser.add_argument("-out", "--o", help="output", type=str, required=True)
args = parser.parse_args()
faIn = open(args.genome)
faOut = open(args.o, "w")
minLen=2000
maxLen=50000
maxOL=-5000
maxDist=5000
nFragments=500
curStart=0
it=0
misjoins=20
ins=20
invs=20
dels=20
gap=0
for rec in SeqIO.parse(faIn, "fasta"):
maxLen=len(rec.seq)
curStart=0
adjusts=[]
randInts=[]
randGap=[]
for i in range(0,nFragments-1):
random.seed(i*5)
randInts.append(random.randint(0, maxLen))
randGap.append(random.randint(-5000, 5000))
sortedInts=sorted(randInts)
print(sortedInts)
print(randGap)
gap=0
start=0
it=0
mjCounter=0
insCounter=0
delCounter=0
invCounter=0
for i in range(0, len(randInts)):
end=sortedInts[i]
reverse=random.randint(0,1)
if reverse == 0:
frag=str(rec.seq[start:end])
fragID=">Frag%i_%i-%i_len(%i)"%(it, start, end, len(frag))
else:
frag=str(rec.seq[start:end].reverse_complement())
fragID=">Frag%i_%i-%i_len(%i)_rev"%(it, start, end, len(frag))
newMisjoin=random.randint(0,1)
if newMisjoin == 1 and mjCounter<misjoins:
print("add misjoin")
mjLen=(random.randint(1000, 20000))
mjstart=(random.randint(0, maxLen-mjLen))
mjID="MISJOIN_%i-%i_len(%i)"%(mjstart, mjstart+mjLen, mjLen)
mjSeq=str(rec.seq[mjstart:mjstart+mjLen])
fragID+=mjID
frag+=mjSeq
mjCounter+=1
if len(frag) > 10000:
newIns=random.randint(0,5)
if newIns == 1 and insCounter<ins:
print("add ins")
insLen=(random.randint(1000, 5000))
insstart=(random.randint(0, maxLen-insLen))
insseq=str(rec.seq[insstart:insstart+insLen])
inspoint=random.randint(1000, len(frag)-1000)
frag=frag[:inspoint] + insseq + frag[inspoint:]
fragID+="_INS_%i-%i"%(inspoint, inspoint+insLen)
insCounter+=1
else:
newDel=random.randint(0,5)
if newDel == 1 and delCounter<dels:
print("add del")
delLen=(random.randint(1000, 5000))
delpoint=random.randint(1000, len(frag)-1000)
frag=frag[:delpoint] + frag[delpoint+delLen:]
fragID+="_DEL_%i(%i)"%(delpoint, delLen)
delCounter+=1
gap=randGap[i]
faOut.write("%s\n%s\n"%(fragID, frag))
start=end+gap
it+=1
end=randInts[i]-gap
reverse=random.randint(0,1)
if reverse == 0:
frag=str(rec.seq[start:])
fragID=">Frag%i_%i-%i_len(%i)"%(it, start, len(rec.seq), len(frag))
else:
frag=str(rec.seq[start:].reverse_complement())
fragID=">Frag%i_%i-%i_len(%i)_rev"%(it, start, len(rec.seq), len(frag))
gap=randGap[i]
faOut.write("%s\n%s\n"%(fragID, frag))
'''
for rec in SeqIO.parse(faIn, "fasta"):
curLen=random.randint(minLen, maxLen)
print(rec.id)
inLen=len(rec.seq)-1
while curStart+curLen < inLen:
reverse=random.randint(0, 1)
misjoin=random.randint(0, 10)
nextGAP=random.randint(maxOL, maxDist)
if misjoin==0:
print("do misjoin")
misstart=random.randint(0, len(rec.seq)-5001)
misend=misstart+5000
misFrag=str(rec.seq[misstart:misend])
misID="_MISJOIN_Frag%i_%i-%i"%(it, misstart, misend)
else:
misFrag=""
misID=""
if reverse==0:
curSeq=str(rec.seq[curStart:curStart+curLen])+misFrag
curID=">Frag%i_%i-%i"%(it, curStart, curStart+curLen)+misID
elif reverse==1:
curSeq=str(rec.seq[curStart:curStart+curLen].reverse_complement())+misFrag
curID=">Frag%i_%i-%i_reversed"%(it, curStart, curStart+curLen)+misID
faOut.write("%s\n%s\n"%(curID, curSeq))
curStart+=curLen + nextGAP
curLen=random.randint(minLen, maxLen)
it+=1
faIn.close()
'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment