Skip to content
Snippets Groups Projects
Commit bbacf7a4 authored by max's avatar max
Browse files

comments added

parent a9768992
No related branches found
No related tags found
No related merge requests found
...@@ -8,7 +8,7 @@ def checkINSDELS(hit1, hit2): ...@@ -8,7 +8,7 @@ def checkINSDELS(hit1, hit2):
#print("check INS/DELS") #print("check INS/DELS")
hdist = ((hit2.split - hit1.split) - 1) * hit1.q_len hdist = ((hit2.split - hit1.split) - 1) * hit1.q_len
if hit1.strand == "+" and hit2.strand == "+": if hit1.strand == "+" and hit2.strand == "+": #both map on fwd strand
h1_end = min(hit1.t_end + (hit1.q_len - hit1.q_end), hit1.t_len-1) h1_end = min(hit1.t_end + (hit1.q_len - hit1.q_end), hit1.t_len-1)
h2_start = max(hit2.t_start - hit2.q_start, 0) h2_start = max(hit2.t_start - hit2.q_start, 0)
dist = h2_start - h1_end dist = h2_start - h1_end
...@@ -18,7 +18,7 @@ def checkINSDELS(hit1, hit2): ...@@ -18,7 +18,7 @@ def checkINSDELS(hit1, hit2):
cstart=hit1.t_start cstart=hit1.t_start
cend=hit2.t_end cend=hit2.t_end
elif hit1.strand == "-" and hit2.strand == "-": elif hit1.strand == "-" and hit2.strand == "-": #both map on rev strand
h1_end = min(hit1.t_start - (hit1.q_len - hit1.q_end), hit1.t_len-1) h1_end = min(hit1.t_start - (hit1.q_len - hit1.q_end), hit1.t_len-1)
h2_start = max(hit2.t_end + hit2.q_start, 0) h2_start = max(hit2.t_end + hit2.q_start, 0)
dist = h1_end - h2_start dist = h1_end - h2_start
...@@ -27,8 +27,8 @@ def checkINSDELS(hit1, hit2): ...@@ -27,8 +27,8 @@ def checkINSDELS(hit1, hit2):
eStart=hit2.t_end-1 eStart=hit2.t_end-1
cstart=hit1.t_end cstart=hit1.t_end
cend=hit2.t_start cend=hit2.t_start
#if dist error larger(pos or neg) than min SV size
if diffDIST >= globalVars.minSVL or diffDIST <= -1*globalVars.minSVL: if diffDIST >= globalVars.minSVL or diffDIST <= -1*globalVars.minSVL:
#print("Error detected") #print("Error detected")
#print(hit1) #print(hit1)
#print(hit2) #print(hit2)
...@@ -47,7 +47,7 @@ def checkINSDELS(hit1, hit2): ...@@ -47,7 +47,7 @@ def checkINSDELS(hit1, hit2):
return curError, curCorrect return curError, curCorrect
# identify inversions # fucntion to identify inversions e.g. compare hits in different orientations on same contig
def checkINVS(hit1, hit2, oriHASH): def checkINVS(hit1, hit2, oriHASH):
#print("check INVS") #print("check INVS")
# to store the hits/fragments of inversion --> start and end # to store the hits/fragments of inversion --> start and end
...@@ -63,6 +63,8 @@ def checkINVS(hit1, hit2, oriHASH): ...@@ -63,6 +63,8 @@ def checkINVS(hit1, hit2, oriHASH):
return curError return curError
# function to calculate the distance between to contigs
# if both splits map on rev strand
def calcHitContig_Dist_rev(hit1, hit2): def calcHitContig_Dist_rev(hit1, hit2):
err1=False err1=False
err2=False err2=False
...@@ -85,17 +87,7 @@ def calcHitContig_Dist_rev(hit1, hit2): ...@@ -85,17 +87,7 @@ def calcHitContig_Dist_rev(hit1, hit2):
#print(h2_start, hdist) #print(h2_start, hdist)
#print(hit2) #print(hit2)
err2=True err2=True
'''
if cdist<-1*globalVars.splitLen:
if hit1.t_start > globalVars.splitLen:
print("add error %s for cont1"%(hit1.target))
print(hit1)
err1=True
if globalVars.contLenDICT[hit2.target]-hit2.t_end > globalVars.splitLen:
print("add error %s for cont2"%(hit2.target))
print(hit2)
err2=True
'''
#print("RevRev") #print("RevRev")
#print(err1, err2) #print(err1, err2)
#print(hit1) #print(hit1)
...@@ -105,7 +97,8 @@ def calcHitContig_Dist_rev(hit1, hit2): ...@@ -105,7 +97,8 @@ def calcHitContig_Dist_rev(hit1, hit2):
#print("\t%i\n"%(cdist)) #print("\t%i\n"%(cdist))
return cdist, err1, err2 return cdist, err1, err2
# function to calculate the distance between to contigs
# if both splits map on fwd strand
def calcHitContig_Dist_pos(hit1, hit2): def calcHitContig_Dist_pos(hit1, hit2):
err1=False err1=False
err2=False err2=False
...@@ -138,7 +131,8 @@ def calcHitContig_Dist_pos(hit1, hit2): ...@@ -138,7 +131,8 @@ def calcHitContig_Dist_pos(hit1, hit2):
#print("\t%i\n"%(cdist)) #print("\t%i\n"%(cdist))
return cdist, err1, err2 return cdist, err1, err2
# function to calculate the distance between to contigs
# if hit1 on fwd strand and hit2 on rev strand
def calcHitContig_Distance_posrev(hit1, hit2): def calcHitContig_Distance_posrev(hit1, hit2):
err1=False err1=False
err2=False err2=False
...@@ -161,17 +155,7 @@ def calcHitContig_Distance_posrev(hit1, hit2): ...@@ -161,17 +155,7 @@ def calcHitContig_Distance_posrev(hit1, hit2):
#print(h2_start, hdist) #print(h2_start, hdist)
#print(hit2) #print(hit2)
err2=True err2=True
'''
if cdist<-1*globalVars.splitLen:
if globalVars.contLenDICT[hit1.target]-hit1.t_end > globalVars.splitLen:
print("add error %s for cont1"%(hit1.target))
print(hit1)
err1=True
if globalVars.contLenDICT[hit2.target]-hit2.t_end > globalVars.splitLen:
print("add error %s for cont2"%(hit2.target))
print(hit2)
err2=True
'''
#print("PosRev") #print("PosRev")
#print(err1, err2) #print(err1, err2)
#print(hit1) #print(hit1)
...@@ -181,7 +165,8 @@ def calcHitContig_Distance_posrev(hit1, hit2): ...@@ -181,7 +165,8 @@ def calcHitContig_Distance_posrev(hit1, hit2):
#print("\t%i\n"%(cdist)) #print("\t%i\n"%(cdist))
return cdist, err1, err2 return cdist, err1, err2
# function to calculate the distance between to contigs
# if hit1 on rev strand and hit2 on fwd strand
def calcHitContig_Distance_revpos(hit1, hit2): def calcHitContig_Distance_revpos(hit1, hit2):
err1=False err1=False
err2=False err2=False
...@@ -204,17 +189,6 @@ def calcHitContig_Distance_revpos(hit1, hit2): ...@@ -204,17 +189,6 @@ def calcHitContig_Distance_revpos(hit1, hit2):
#print(h2_start, hdist) #print(h2_start, hdist)
#print(hit2) #print(hit2)
err2=True err2=True
'''
if cdist<-1*globalVars.splitLen:
if hit1.t_start > globalVars.splitLen:
print("add error %s for cont1"%(hit1.target))
print(hit1)
err1=True
if hit2.t_start > globalVars.splitLen:
print("add error %s for cont2"%(hit2.target))
print(hit2)
err2=True
'''
#print("RevPos") #print("RevPos")
#print(err1, err2) #print(err1, err2)
...@@ -331,13 +305,7 @@ def checkHitlist(hitlist, edgeGRAPH, oriHASH): ...@@ -331,13 +305,7 @@ def checkHitlist(hitlist, edgeGRAPH, oriHASH):
# compare 2 hits following each other # compare 2 hits following each other
for curHits in hitlist: for curHits in hitlist:
#if curHit.target==globalVars.largestContig[0]: #if len(curHits) > 1 the hit maps repetitive ignore it
#print("hit on largest contig found --> add coverage")
# addLongestCOV(curHit)
#if len(curHits)>1:
# for curHit in curHits:
# globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
if len(curHits)==1: if len(curHits)==1:
curHit=curHits[0] curHit=curHits[0]
#globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end))) #globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
......
...@@ -11,8 +11,6 @@ import copy ...@@ -11,8 +11,6 @@ import copy
# function to set edges in graph # function to set edges in graph
def setEdges(setEdge, agraph): def setEdges(setEdge, agraph):
if setEdge.direct!="": if setEdge.direct!="":
newEdgeStart = globalVars.myEdge(setEdge.start, setEdge.target, newEdgeStart = globalVars.myEdge(setEdge.start, setEdge.target,
setEdge.weight, setEdge.dist, setEdge.stdev, setEdge.direct) setEdge.weight, setEdge.dist, setEdge.stdev, setEdge.direct)
...@@ -39,6 +37,7 @@ def setEdges(setEdge, agraph): ...@@ -39,6 +37,7 @@ def setEdges(setEdge, agraph):
#print(oldEdge[1]) #print(oldEdge[1])
return oldEdge return oldEdge
# function to reset an old edge
def resetEdge(setEdge, agraph): def resetEdge(setEdge, agraph):
#print("resetting edge") #print("resetting edge")
#print(setEdge) #print(setEdge)
...@@ -85,6 +84,8 @@ def removeMergedNEW(rkey, newTar, addDist): ...@@ -85,6 +84,8 @@ def removeMergedNEW(rkey, newTar, addDist):
#print("Now pointing to %s with dist %i"%(newTar, addDist)) #print("Now pointing to %s with dist %i"%(newTar, addDist))
globalVars.adjustDICT[rkey]=(newTar, addDist) globalVars.adjustDICT[rkey]=(newTar, addDist)
# function to check for check for merging and calls the merging functions
# from merging.py
def checkMerge(curEdge, cHASH): def checkMerge(curEdge, cHASH):
#print(curEdge.dist, globalVars.minOverlap*-1) #print(curEdge.dist, globalVars.minOverlap*-1)
#print(curEdge.dist <= globalVars.minOverlap*-1) #print(curEdge.dist <= globalVars.minOverlap*-1)
...@@ -107,6 +108,7 @@ def checkMerge(curEdge, cHASH): ...@@ -107,6 +108,7 @@ def checkMerge(curEdge, cHASH):
return merged, mAdjust return merged, mAdjust
# function to merge paths as defined in the GPMA paper
def mergeNEW(nEdge, inGraph): def mergeNEW(nEdge, inGraph):
graph=inGraph.cGraph graph=inGraph.cGraph
sucess=True sucess=True
...@@ -304,7 +306,9 @@ def getNodePath(ggraph, anode): ...@@ -304,7 +306,9 @@ def getNodePath(ggraph, anode):
return ePath, pHash return ePath, pHash
# given list of edges this function returns the happy and
# unhappiness for all edges with both nodes in the path
# in regard to the currently selected path
def checkHappiness(pdict, cDICT): def checkHappiness(pdict, cDICT):
nH=0 nH=0
nUH=0 nUH=0
...@@ -349,8 +353,7 @@ def checkHappiness(pdict, cDICT): ...@@ -349,8 +353,7 @@ def checkHappiness(pdict, cDICT):
else: else:
#print("Incorrect orientation increasing UNhappy by %i"%(curEdge[0])) #print("Incorrect orientation increasing UNhappy by %i"%(curEdge[0]))
nUH+=curEdge[0] nUH+=curEdge[0]
continue continue
return nH, nUH return nH, nUH
...@@ -433,7 +436,7 @@ def gpma(newEdge, inGraph): ...@@ -433,7 +436,7 @@ def gpma(newEdge, inGraph):
#check happiness #check happiness
if newHP - (nHP1+nHP2) >= newUHP - (nUHP1+nUHP2): if newHP - (nHP1+nHP2) >= newUHP - (nUHP1+nUHP2):
#print("GPMA VERY HAPPY") #print("GPMA VERY HAPPY")
for remove in removed: for remove in removed:
removeMergedNEW(remove[0][:-2]+"_0", remove[1], remove[2]) removeMergedNEW(remove[0][:-2]+"_0", remove[1], remove[2])
removeMergedNEW(remove[0][:-2]+"_1", remove[3], remove[4]) removeMergedNEW(remove[0][:-2]+"_1", remove[3], remove[4])
else: else:
...@@ -444,11 +447,8 @@ def gpma(newEdge, inGraph): ...@@ -444,11 +447,8 @@ def gpma(newEdge, inGraph):
#print("Happiness p1: %i/%i(H/UH)"%(nHP1, nUHP1)) #print("Happiness p1: %i/%i(H/UH)"%(nHP1, nUHP1))
#[print(e) for e in ePath2] #[print(e) for e in ePath2]
#print("Happiness p2: %i/%i(H/UH)"%(nHP2, nUHP2)) #print("Happiness p2: %i/%i(H/UH)"%(nHP2, nUHP2))
for edge in oldEdges: for edge in oldEdges: # if GPMA unhappy reset old graph
resetEdge(edge, inGraph) resetEdge(edge, inGraph)
#else: #else:
# print("GPMA FAILED!!!") # print("GPMA FAILED!!!")
......
...@@ -36,38 +36,6 @@ def runBLAST(id1, id2, eOL, checkOL): ...@@ -36,38 +36,6 @@ def runBLAST(id1, id2, eOL, checkOL):
elif id1.endswith("0"): elif id1.endswith("0"):
seq1_trimmed=seq1[(eOL*-1)-len(seq2) : eOL*-1] seq1_trimmed=seq1[(eOL*-1)-len(seq2) : eOL*-1]
'''
if checkOL==True:
#only blast overlaps against each other
#if contig longer than overlap
if len(seq1) > estOL*(-1)+500:
if id1.endswith("_1"):
seq1_trimmed=seq1[int(len(seq1)+(estOL-500)):]
elif id1.endswith("_0"):
seq1_trimmed=seq1[:int(estOL*(-1)+500)]
else:
print("WTF is this name - %s"%(id1))
else:
seq1_trimmed=seq1
checkOL=False
if len(seq2) > estOL*(-1)+500:
if id2.endswith("_1"):
seq2_trimmed=seq2[int(len(seq2)+(estOL-500)):]
elif id2.endswith("_0"):
seq2_trimmed=seq2[:int(estOL*(-1)+500)]
else:
print("WTF is this name - %s"%(id2))
else:
seq2_trimmed=seq2
checkOL=False
else:
seq1_trimmed=seq1
seq2_trimmed=seq2
'''
tmpDIR = globalVars.outDir tmpDIR = globalVars.outDir
#print(len(seq1), len(seq1_trimmed)) #print(len(seq1), len(seq1_trimmed))
...@@ -281,7 +249,7 @@ def tryMERGE(blastPATH, cID1, cID2, len1, len2): ...@@ -281,7 +249,7 @@ def tryMERGE(blastPATH, cID1, cID2, len1, len2):
blastReader.close() blastReader.close()
return merged, alnLen*-1, mergeAdjust return merged, alnLen*-1, mergeAdjust
# function to save the coordinates of the exact overlap a contig
def saveMerge(nID, oID1, oID2, olap): def saveMerge(nID, oID1, oID2, olap):
if oID1<=oID2: if oID1<=oID2:
combi=oID1+"__"+oID2 combi=oID1+"__"+oID2
......
import globalVars import globalVars
class myFQ:
# small class defining the content of fastq file
class myFQ:
def __init__(self, iD, seq, opt, qual): def __init__(self, iD, seq, opt, qual):
self.iD = iD self.iD = iD
self.seq = seq self.seq = seq
self.opt = opt self.opt = opt
self.qual = qual self.qual = qual
# small class defining the content of fasta file
class myFA: class myFA:
def __init__(self, iD, seq): def __init__(self, iD, seq):
self.iD = iD self.iD = iD
self.seq = seq self.seq = seq
#function to split reads in fastq format
def splitReadsFQ(inFQ, oFQ, splitSize): def splitReadsFQ(inFQ, oFQ, splitSize):
if inFQ.endswith(".gz"): if inFQ.endswith(".gz"):
...@@ -75,7 +77,7 @@ def splitReadsFQ(inFQ, oFQ, splitSize): ...@@ -75,7 +77,7 @@ def splitReadsFQ(inFQ, oFQ, splitSize):
myWriter.close() myWriter.close()
fqParser.close() fqParser.close()
#function to split reads in fasta format
def splitReadsFA(inFA, oFA, splitSize): def splitReadsFA(inFA, oFA, splitSize):
if inFA.endswith(".gz"): if inFA.endswith(".gz"):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment