Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
masterTool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mdriller
masterTool
Commits
bbacf7a4
"git@git.imp.fu-berlin.de:podlesny/dune-tectonic.git" did not exist on "069cc9532e156c9570e085b7d1a31904666e69be"
Commit
bbacf7a4
authored
5 years ago
by
max
Browse files
Options
Downloads
Patches
Plain Diff
comments added
parent
a9768992
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
checkHitlist.py
+16
-48
16 additions, 48 deletions
checkHitlist.py
gpma.py
+10
-10
10 additions, 10 deletions
gpma.py
merging.py
+1
-33
1 addition, 33 deletions
merging.py
splitReads.py
+6
-4
6 additions, 4 deletions
splitReads.py
with
33 additions
and
95 deletions
checkHitlist.py
+
16
−
48
View file @
bbacf7a4
...
@@ -8,7 +8,7 @@ def checkINSDELS(hit1, hit2):
...
@@ -8,7 +8,7 @@ def checkINSDELS(hit1, hit2):
#print("check INS/DELS")
#print("check INS/DELS")
hdist
=
((
hit2
.
split
-
hit1
.
split
)
-
1
)
*
hit1
.
q_len
hdist
=
((
hit2
.
split
-
hit1
.
split
)
-
1
)
*
hit1
.
q_len
if
hit1
.
strand
==
"
+
"
and
hit2
.
strand
==
"
+
"
:
if
hit1
.
strand
==
"
+
"
and
hit2
.
strand
==
"
+
"
:
#both map on fwd strand
h1_end
=
min
(
hit1
.
t_end
+
(
hit1
.
q_len
-
hit1
.
q_end
),
hit1
.
t_len
-
1
)
h1_end
=
min
(
hit1
.
t_end
+
(
hit1
.
q_len
-
hit1
.
q_end
),
hit1
.
t_len
-
1
)
h2_start
=
max
(
hit2
.
t_start
-
hit2
.
q_start
,
0
)
h2_start
=
max
(
hit2
.
t_start
-
hit2
.
q_start
,
0
)
dist
=
h2_start
-
h1_end
dist
=
h2_start
-
h1_end
...
@@ -18,7 +18,7 @@ def checkINSDELS(hit1, hit2):
...
@@ -18,7 +18,7 @@ def checkINSDELS(hit1, hit2):
cstart
=
hit1
.
t_start
cstart
=
hit1
.
t_start
cend
=
hit2
.
t_end
cend
=
hit2
.
t_end
elif
hit1
.
strand
==
"
-
"
and
hit2
.
strand
==
"
-
"
:
elif
hit1
.
strand
==
"
-
"
and
hit2
.
strand
==
"
-
"
:
#both map on rev strand
h1_end
=
min
(
hit1
.
t_start
-
(
hit1
.
q_len
-
hit1
.
q_end
),
hit1
.
t_len
-
1
)
h1_end
=
min
(
hit1
.
t_start
-
(
hit1
.
q_len
-
hit1
.
q_end
),
hit1
.
t_len
-
1
)
h2_start
=
max
(
hit2
.
t_end
+
hit2
.
q_start
,
0
)
h2_start
=
max
(
hit2
.
t_end
+
hit2
.
q_start
,
0
)
dist
=
h1_end
-
h2_start
dist
=
h1_end
-
h2_start
...
@@ -27,8 +27,8 @@ def checkINSDELS(hit1, hit2):
...
@@ -27,8 +27,8 @@ def checkINSDELS(hit1, hit2):
eStart
=
hit2
.
t_end
-
1
eStart
=
hit2
.
t_end
-
1
cstart
=
hit1
.
t_end
cstart
=
hit1
.
t_end
cend
=
hit2
.
t_start
cend
=
hit2
.
t_start
#if dist error larger(pos or neg) than min SV size
if
diffDIST
>=
globalVars
.
minSVL
or
diffDIST
<=
-
1
*
globalVars
.
minSVL
:
if
diffDIST
>=
globalVars
.
minSVL
or
diffDIST
<=
-
1
*
globalVars
.
minSVL
:
#print("Error detected")
#print("Error detected")
#print(hit1)
#print(hit1)
#print(hit2)
#print(hit2)
...
@@ -47,7 +47,7 @@ def checkINSDELS(hit1, hit2):
...
@@ -47,7 +47,7 @@ def checkINSDELS(hit1, hit2):
return
curError
,
curCorrect
return
curError
,
curCorrect
# identify inversions
#
fucntion to
identify inversions
e.g. compare hits in different orientations on same contig
def
checkINVS
(
hit1
,
hit2
,
oriHASH
):
def
checkINVS
(
hit1
,
hit2
,
oriHASH
):
#print("check INVS")
#print("check INVS")
# to store the hits/fragments of inversion --> start and end
# to store the hits/fragments of inversion --> start and end
...
@@ -63,6 +63,8 @@ def checkINVS(hit1, hit2, oriHASH):
...
@@ -63,6 +63,8 @@ def checkINVS(hit1, hit2, oriHASH):
return
curError
return
curError
# function to calculate the distance between to contigs
# if both splits map on rev strand
def
calcHitContig_Dist_rev
(
hit1
,
hit2
):
def
calcHitContig_Dist_rev
(
hit1
,
hit2
):
err1
=
False
err1
=
False
err2
=
False
err2
=
False
...
@@ -85,17 +87,7 @@ def calcHitContig_Dist_rev(hit1, hit2):
...
@@ -85,17 +87,7 @@ def calcHitContig_Dist_rev(hit1, hit2):
#print(h2_start, hdist)
#print(h2_start, hdist)
#print(hit2)
#print(hit2)
err2
=
True
err2
=
True
'''
if cdist<-1*globalVars.splitLen:
if hit1.t_start > globalVars.splitLen:
print(
"
add error %s for cont1
"
%(hit1.target))
print(hit1)
err1=True
if globalVars.contLenDICT[hit2.target]-hit2.t_end > globalVars.splitLen:
print(
"
add error %s for cont2
"
%(hit2.target))
print(hit2)
err2=True
'''
#print("RevRev")
#print("RevRev")
#print(err1, err2)
#print(err1, err2)
#print(hit1)
#print(hit1)
...
@@ -105,7 +97,8 @@ def calcHitContig_Dist_rev(hit1, hit2):
...
@@ -105,7 +97,8 @@ def calcHitContig_Dist_rev(hit1, hit2):
#print("\t%i\n"%(cdist))
#print("\t%i\n"%(cdist))
return
cdist
,
err1
,
err2
return
cdist
,
err1
,
err2
# function to calculate the distance between to contigs
# if both splits map on fwd strand
def
calcHitContig_Dist_pos
(
hit1
,
hit2
):
def
calcHitContig_Dist_pos
(
hit1
,
hit2
):
err1
=
False
err1
=
False
err2
=
False
err2
=
False
...
@@ -138,7 +131,8 @@ def calcHitContig_Dist_pos(hit1, hit2):
...
@@ -138,7 +131,8 @@ def calcHitContig_Dist_pos(hit1, hit2):
#print("\t%i\n"%(cdist))
#print("\t%i\n"%(cdist))
return
cdist
,
err1
,
err2
return
cdist
,
err1
,
err2
# function to calculate the distance between to contigs
# if hit1 on fwd strand and hit2 on rev strand
def
calcHitContig_Distance_posrev
(
hit1
,
hit2
):
def
calcHitContig_Distance_posrev
(
hit1
,
hit2
):
err1
=
False
err1
=
False
err2
=
False
err2
=
False
...
@@ -161,17 +155,7 @@ def calcHitContig_Distance_posrev(hit1, hit2):
...
@@ -161,17 +155,7 @@ def calcHitContig_Distance_posrev(hit1, hit2):
#print(h2_start, hdist)
#print(h2_start, hdist)
#print(hit2)
#print(hit2)
err2
=
True
err2
=
True
'''
if cdist<-1*globalVars.splitLen:
if globalVars.contLenDICT[hit1.target]-hit1.t_end > globalVars.splitLen:
print(
"
add error %s for cont1
"
%(hit1.target))
print(hit1)
err1=True
if globalVars.contLenDICT[hit2.target]-hit2.t_end > globalVars.splitLen:
print(
"
add error %s for cont2
"
%(hit2.target))
print(hit2)
err2=True
'''
#print("PosRev")
#print("PosRev")
#print(err1, err2)
#print(err1, err2)
#print(hit1)
#print(hit1)
...
@@ -181,7 +165,8 @@ def calcHitContig_Distance_posrev(hit1, hit2):
...
@@ -181,7 +165,8 @@ def calcHitContig_Distance_posrev(hit1, hit2):
#print("\t%i\n"%(cdist))
#print("\t%i\n"%(cdist))
return
cdist
,
err1
,
err2
return
cdist
,
err1
,
err2
# function to calculate the distance between to contigs
# if hit1 on rev strand and hit2 on fwd strand
def
calcHitContig_Distance_revpos
(
hit1
,
hit2
):
def
calcHitContig_Distance_revpos
(
hit1
,
hit2
):
err1
=
False
err1
=
False
err2
=
False
err2
=
False
...
@@ -204,17 +189,6 @@ def calcHitContig_Distance_revpos(hit1, hit2):
...
@@ -204,17 +189,6 @@ def calcHitContig_Distance_revpos(hit1, hit2):
#print(h2_start, hdist)
#print(h2_start, hdist)
#print(hit2)
#print(hit2)
err2
=
True
err2
=
True
'''
if cdist<-1*globalVars.splitLen:
if hit1.t_start > globalVars.splitLen:
print(
"
add error %s for cont1
"
%(hit1.target))
print(hit1)
err1=True
if hit2.t_start > globalVars.splitLen:
print(
"
add error %s for cont2
"
%(hit2.target))
print(hit2)
err2=True
'''
#print("RevPos")
#print("RevPos")
#print(err1, err2)
#print(err1, err2)
...
@@ -331,13 +305,7 @@ def checkHitlist(hitlist, edgeGRAPH, oriHASH):
...
@@ -331,13 +305,7 @@ def checkHitlist(hitlist, edgeGRAPH, oriHASH):
# compare 2 hits following each other
# compare 2 hits following each other
for
curHits
in
hitlist
:
for
curHits
in
hitlist
:
#if curHit.target==globalVars.largestContig[0]:
#if len(curHits) > 1 the hit maps repetitive ignore it
#print("hit on largest contig found --> add coverage")
# addLongestCOV(curHit)
#if len(curHits)>1:
# for curHit in curHits:
# globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
if
len
(
curHits
)
==
1
:
if
len
(
curHits
)
==
1
:
curHit
=
curHits
[
0
]
curHit
=
curHits
[
0
]
#globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
#globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
...
...
This diff is collapsed.
Click to expand it.
gpma.py
+
10
−
10
View file @
bbacf7a4
...
@@ -11,8 +11,6 @@ import copy
...
@@ -11,8 +11,6 @@ import copy
# function to set edges in graph
# function to set edges in graph
def
setEdges
(
setEdge
,
agraph
):
def
setEdges
(
setEdge
,
agraph
):
if
setEdge
.
direct
!=
""
:
if
setEdge
.
direct
!=
""
:
newEdgeStart
=
globalVars
.
myEdge
(
setEdge
.
start
,
setEdge
.
target
,
newEdgeStart
=
globalVars
.
myEdge
(
setEdge
.
start
,
setEdge
.
target
,
setEdge
.
weight
,
setEdge
.
dist
,
setEdge
.
stdev
,
setEdge
.
direct
)
setEdge
.
weight
,
setEdge
.
dist
,
setEdge
.
stdev
,
setEdge
.
direct
)
...
@@ -39,6 +37,7 @@ def setEdges(setEdge, agraph):
...
@@ -39,6 +37,7 @@ def setEdges(setEdge, agraph):
#print(oldEdge[1])
#print(oldEdge[1])
return
oldEdge
return
oldEdge
# function to reset an old edge
def
resetEdge
(
setEdge
,
agraph
):
def
resetEdge
(
setEdge
,
agraph
):
#print("resetting edge")
#print("resetting edge")
#print(setEdge)
#print(setEdge)
...
@@ -85,6 +84,8 @@ def removeMergedNEW(rkey, newTar, addDist):
...
@@ -85,6 +84,8 @@ def removeMergedNEW(rkey, newTar, addDist):
#print("Now pointing to %s with dist %i"%(newTar, addDist))
#print("Now pointing to %s with dist %i"%(newTar, addDist))
globalVars
.
adjustDICT
[
rkey
]
=
(
newTar
,
addDist
)
globalVars
.
adjustDICT
[
rkey
]
=
(
newTar
,
addDist
)
# function to check for check for merging and calls the merging functions
# from merging.py
def
checkMerge
(
curEdge
,
cHASH
):
def
checkMerge
(
curEdge
,
cHASH
):
#print(curEdge.dist, globalVars.minOverlap*-1)
#print(curEdge.dist, globalVars.minOverlap*-1)
#print(curEdge.dist <= globalVars.minOverlap*-1)
#print(curEdge.dist <= globalVars.minOverlap*-1)
...
@@ -107,6 +108,7 @@ def checkMerge(curEdge, cHASH):
...
@@ -107,6 +108,7 @@ def checkMerge(curEdge, cHASH):
return
merged
,
mAdjust
return
merged
,
mAdjust
# function to merge paths as defined in the GPMA paper
def
mergeNEW
(
nEdge
,
inGraph
):
def
mergeNEW
(
nEdge
,
inGraph
):
graph
=
inGraph
.
cGraph
graph
=
inGraph
.
cGraph
sucess
=
True
sucess
=
True
...
@@ -304,7 +306,9 @@ def getNodePath(ggraph, anode):
...
@@ -304,7 +306,9 @@ def getNodePath(ggraph, anode):
return
ePath
,
pHash
return
ePath
,
pHash
# given list of edges this function returns the happy and
# unhappiness for all edges with both nodes in the path
# in regard to the currently selected path
def
checkHappiness
(
pdict
,
cDICT
):
def
checkHappiness
(
pdict
,
cDICT
):
nH
=
0
nH
=
0
nUH
=
0
nUH
=
0
...
@@ -349,8 +353,7 @@ def checkHappiness(pdict, cDICT):
...
@@ -349,8 +353,7 @@ def checkHappiness(pdict, cDICT):
else
:
else
:
#print("Incorrect orientation increasing UNhappy by %i"%(curEdge[0]))
#print("Incorrect orientation increasing UNhappy by %i"%(curEdge[0]))
nUH
+=
curEdge
[
0
]
nUH
+=
curEdge
[
0
]
continue
continue
return
nH
,
nUH
return
nH
,
nUH
...
@@ -433,7 +436,7 @@ def gpma(newEdge, inGraph):
...
@@ -433,7 +436,7 @@ def gpma(newEdge, inGraph):
#check happiness
#check happiness
if
newHP
-
(
nHP1
+
nHP2
)
>=
newUHP
-
(
nUHP1
+
nUHP2
):
if
newHP
-
(
nHP1
+
nHP2
)
>=
newUHP
-
(
nUHP1
+
nUHP2
):
#print("GPMA VERY HAPPY")
#print("GPMA VERY HAPPY")
for
remove
in
removed
:
for
remove
in
removed
:
removeMergedNEW
(
remove
[
0
][:
-
2
]
+
"
_0
"
,
remove
[
1
],
remove
[
2
])
removeMergedNEW
(
remove
[
0
][:
-
2
]
+
"
_0
"
,
remove
[
1
],
remove
[
2
])
removeMergedNEW
(
remove
[
0
][:
-
2
]
+
"
_1
"
,
remove
[
3
],
remove
[
4
])
removeMergedNEW
(
remove
[
0
][:
-
2
]
+
"
_1
"
,
remove
[
3
],
remove
[
4
])
else
:
else
:
...
@@ -444,11 +447,8 @@ def gpma(newEdge, inGraph):
...
@@ -444,11 +447,8 @@ def gpma(newEdge, inGraph):
#print("Happiness p1: %i/%i(H/UH)"%(nHP1, nUHP1))
#print("Happiness p1: %i/%i(H/UH)"%(nHP1, nUHP1))
#[print(e) for e in ePath2]
#[print(e) for e in ePath2]
#print("Happiness p2: %i/%i(H/UH)"%(nHP2, nUHP2))
#print("Happiness p2: %i/%i(H/UH)"%(nHP2, nUHP2))
for
edge
in
oldEdges
:
for
edge
in
oldEdges
:
# if GPMA unhappy reset old graph
resetEdge
(
edge
,
inGraph
)
resetEdge
(
edge
,
inGraph
)
#else:
#else:
# print("GPMA FAILED!!!")
# print("GPMA FAILED!!!")
...
...
This diff is collapsed.
Click to expand it.
merging.py
+
1
−
33
View file @
bbacf7a4
...
@@ -36,38 +36,6 @@ def runBLAST(id1, id2, eOL, checkOL):
...
@@ -36,38 +36,6 @@ def runBLAST(id1, id2, eOL, checkOL):
elif
id1
.
endswith
(
"
0
"
):
elif
id1
.
endswith
(
"
0
"
):
seq1_trimmed
=
seq1
[(
eOL
*-
1
)
-
len
(
seq2
)
:
eOL
*-
1
]
seq1_trimmed
=
seq1
[(
eOL
*-
1
)
-
len
(
seq2
)
:
eOL
*-
1
]
'''
if checkOL==True:
#only blast overlaps against each other
#if contig longer than overlap
if len(seq1) > estOL*(-1)+500:
if id1.endswith(
"
_1
"
):
seq1_trimmed=seq1[int(len(seq1)+(estOL-500)):]
elif id1.endswith(
"
_0
"
):
seq1_trimmed=seq1[:int(estOL*(-1)+500)]
else:
print(
"
WTF is this name - %s
"
%(id1))
else:
seq1_trimmed=seq1
checkOL=False
if len(seq2) > estOL*(-1)+500:
if id2.endswith(
"
_1
"
):
seq2_trimmed=seq2[int(len(seq2)+(estOL-500)):]
elif id2.endswith(
"
_0
"
):
seq2_trimmed=seq2[:int(estOL*(-1)+500)]
else:
print(
"
WTF is this name - %s
"
%(id2))
else:
seq2_trimmed=seq2
checkOL=False
else:
seq1_trimmed=seq1
seq2_trimmed=seq2
'''
tmpDIR
=
globalVars
.
outDir
tmpDIR
=
globalVars
.
outDir
#print(len(seq1), len(seq1_trimmed))
#print(len(seq1), len(seq1_trimmed))
...
@@ -281,7 +249,7 @@ def tryMERGE(blastPATH, cID1, cID2, len1, len2):
...
@@ -281,7 +249,7 @@ def tryMERGE(blastPATH, cID1, cID2, len1, len2):
blastReader
.
close
()
blastReader
.
close
()
return
merged
,
alnLen
*-
1
,
mergeAdjust
return
merged
,
alnLen
*-
1
,
mergeAdjust
# function to save the coordinates of the exact overlap a contig
def
saveMerge
(
nID
,
oID1
,
oID2
,
olap
):
def
saveMerge
(
nID
,
oID1
,
oID2
,
olap
):
if
oID1
<=
oID2
:
if
oID1
<=
oID2
:
combi
=
oID1
+
"
__
"
+
oID2
combi
=
oID1
+
"
__
"
+
oID2
...
...
This diff is collapsed.
Click to expand it.
splitReads.py
+
6
−
4
View file @
bbacf7a4
import
globalVars
import
globalVars
class
myFQ
:
# small class defining the content of fastq file
class
myFQ
:
def
__init__
(
self
,
iD
,
seq
,
opt
,
qual
):
def
__init__
(
self
,
iD
,
seq
,
opt
,
qual
):
self
.
iD
=
iD
self
.
iD
=
iD
self
.
seq
=
seq
self
.
seq
=
seq
self
.
opt
=
opt
self
.
opt
=
opt
self
.
qual
=
qual
self
.
qual
=
qual
# small class defining the content of fasta file
class
myFA
:
class
myFA
:
def
__init__
(
self
,
iD
,
seq
):
def
__init__
(
self
,
iD
,
seq
):
self
.
iD
=
iD
self
.
iD
=
iD
self
.
seq
=
seq
self
.
seq
=
seq
#function to split reads in fastq format
def
splitReadsFQ
(
inFQ
,
oFQ
,
splitSize
):
def
splitReadsFQ
(
inFQ
,
oFQ
,
splitSize
):
if
inFQ
.
endswith
(
"
.gz
"
):
if
inFQ
.
endswith
(
"
.gz
"
):
...
@@ -75,7 +77,7 @@ def splitReadsFQ(inFQ, oFQ, splitSize):
...
@@ -75,7 +77,7 @@ def splitReadsFQ(inFQ, oFQ, splitSize):
myWriter
.
close
()
myWriter
.
close
()
fqParser
.
close
()
fqParser
.
close
()
#function to split reads in fasta format
def
splitReadsFA
(
inFA
,
oFA
,
splitSize
):
def
splitReadsFA
(
inFA
,
oFA
,
splitSize
):
if
inFA
.
endswith
(
"
.gz
"
):
if
inFA
.
endswith
(
"
.gz
"
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment