diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000000000000000000000000000000000000..429e53a4cba07b74109b551238d57b2f1d8797a1
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "minimap2"]
+	path = minimap2
+	url = https://github.com/lh3/minimap2
+	branch = master
diff --git a/README.md b/README.md
index 9463379dc30f8900695a513113e7a0eccefcf115..ff1375c7328f2340cd26d67bf2d2cac87821c346 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,73 @@
 # masterTool
+## Tool to Scaffold and Quality Control genome assemblies
+<p align="center"> 
+<img src="graphics/simple_workflow.svg">
+</p>
+
+### Dependencies/Prerequisites:
+1. Python3 (3.6.6)  
+→ script is executable (default python3 on machine)  
+→ should be downward compatible (Python2.6)  
+2. Biopython needs to be set up for used python version  
+→ pip/pip3 install biopython  
+3. NCBI-BLAST+ package (blastn) needs to be set up GLOBALLY  
+4. minimap2 set up within the script folder  
+→ Build on Ubuntu 18.04 might need to be compiled again depending on the machine (“make”) 
+
+### Setup:
+Clone and include minimap2  
+git clone --recursive https://github.com/mdriller/masterTool.git  
+Move into minimap2 directoy and build it  
+cd masterTool/scripts/minimap2/  
+make  
+
+### Usage:
+If python3 is in path the script can be called like: "./main.py" otherwise just "python main.py"  
+The help function can be accessed via: ./main.py -h|--help  
+To run the script needs a genome assembly in fasta format and long reads (e.g. PacBio or ONT) in fastq or fasta format.   
+>usage: main.py [-h] -ge GENOME -fq FASTQ -out OUTDIR [-m MODE] [-t THREADS]
+               [-mo MINOVERLAP] [-mi MINIDENT] [-it ITERATIONS] [-s SIZE]
+               [-fa]
+
+>>optional arguments:
+  -h, --help            show this help message and exit  
+  -ge GENOME, --genome GENOME
+                        Contigs in fasta format  
+  -fq FASTQ, --fastq FASTQ
+                        Reads in fastq format  
+  -out OUTDIR, --outdir OUTDIR
+                        Directory for output files  
+  -m MODE, --mode MODE  Mode for different types of input data data. Options:
+                        pb/ont [default=pb]  
+  -t THREADS, --threads THREADS
+                        Threads to use(during mapping for now...) [default=4]  
+  -mo MINOVERLAP, --minoverlap MINOVERLAP
+                        Minimum overlap needed for two contigs to be merged
+                        [default=100]  
+  -mi MINIDENT, --minident MINIDENT
+                        Minimum identity in overlap needed for two contigs to
+                        be merged [default=90.0]
+  -it ITERATIONS, --iterations ITERATIONS
+                        maximal iterations performed for breaking set to 0 to
+                        perfrom NO breaking
+  -s SIZE, --size SIZE  Size in which the input reads will be split into
+                        [default=500]
+  -fa, --fasta          Enable if input reads are in fasta format
+  
+ Example Usage:
+ 
+./main.py -s 500 -ge /home/max/tests/genome.fasta -fq /home/max/tests/pacbioReads.fastq -out /home/max/tests/output -it 3
+  
+### Output files:
+  
+1. scaffolds.fasta → final scaffolded assembly
+2. haplotigs.fasta → removed “haplotigs” or (partially) duplicated contigs
+3. adjusted_contigs.fa → set of adjusted contigs (if broken in first iteration)
+4. conts_scafs.ids → Map contigIDs back to new scaffoldIDs
+5. scaffolds_QC_iti.gff - gff file with the GOODNESS for each assembly i representing the reference used for the current iteration.
+
+### Workflow
+<p align="center"> 
+<img src="graphics/workflow.svg">
+</p>
 
diff --git a/checkHitlist.py b/checkHitlist.py
new file mode 100644
index 0000000000000000000000000000000000000000..25221eeb10cb5ce2649c66c7fa6d5304be69a448
--- /dev/null
+++ b/checkHitlist.py
@@ -0,0 +1,384 @@
+import globalVars
+
+# function to compare to fragments on same scaf to check for insertions
+# and deletions
+def checkINSDELS(hit1, hit2):
+    curError=None
+    curCorrect=None
+    #print("check INS/DELS")
+    hdist = ((hit2.split - hit1.split) - 1) * hit1.q_len
+
+    if hit1.strand == "+" and hit2.strand == "+":
+        h1_end = min(hit1.t_end + (hit1.q_len - hit1.q_end), hit1.t_len-1)
+        h2_start = max(hit2.t_start - hit2.q_start, 0)
+        dist = h2_start - h1_end
+        diffDIST = dist - hdist
+        eStart=hit1.t_end
+        eEnd=hit2.t_start
+        cstart=hit1.t_start
+        cend=hit2.t_end
+
+    elif hit1.strand == "-" and hit2.strand == "-":
+        h1_end = min(hit1.t_start - (hit1.q_len - hit1.q_end), hit1.t_len-1)
+        h2_start = max(hit2.t_end + hit2.q_start, 0)
+        dist = h1_end - h2_start
+        diffDIST = dist - hdist
+        eEnd=hit1.t_start
+        eStart=hit2.t_end-1
+        cstart=hit1.t_end
+        cend=hit2.t_start
+
+    if diffDIST >= globalVars.minSVL or diffDIST <= -1*globalVars.minSVL:
+        #print("Error detected")
+        #print(hit1)
+        #print(hit2)
+        #print(eStart, eEnd, diffDIST)
+        curError=(hit1.target, min(eStart, eEnd), max(eStart, eEnd), diffDIST)
+        #if diffDIST < 0:
+        #    print("DELETION!!!")
+    else:
+        #print("Correct mapping")
+        curCorrect=(min(cstart, cend), max(cstart, cend))
+        #print(curCorrect)
+   # print(hit1)
+   # print(hit2)
+   # print(curCorrect)
+   # print(curError)
+    return curError, curCorrect
+          
+
+# identify inversions
+def checkINVS(hit1, hit2, oriHASH):
+    #print("check INVS")
+    # to store the hits/fragments of inversion --> start and end
+    #hdist = ((hit2.split - hit1.split) - 1) * hit1.q_len
+    #readOri = oriHASH[hit1.query]
+
+    curError=None
+
+    #if hit2.strand != oriHASH[hit2.query]:
+    invStart=min(hit1.t_start, hit2.t_start)
+    invEnd=max(hit1.t_start, hit2.t_start)
+    curError=(hit1.target, invStart, invEnd, invEnd-invStart) 
+    return curError
+
+
+def calcHitContig_Dist_rev(hit1, hit2):
+    err1=False
+    err2=False
+    h1_end = hit1.t_start - (hit1.q_len - hit1.q_end) # rest of contig 1 after extending aln to theoretical end can be negative
+    h2_start = hit2.t_len - (hit2.t_end + hit2.q_start)# rest of contig 2 after extending aln to theoretical end can be negative
+    # hdist->=distance between splits
+    if hit1.split != hit2.split:
+        hdist = ((hit2.split - hit1.split) - 1) * hit1.q_len
+    else:
+        hdist = 0
+    # calc contig distance
+    cdist = hdist - (h1_end + h2_start)
+    if h1_end > hdist and h1_end > globalVars.splitLen:
+        #print("add error %s for cont1"%(hit1.target))
+        #print(h1_end, hdist)
+        #print(hit1)
+        err1=True
+    if h2_start > hdist and h2_start > globalVars.splitLen:
+        #print("add error %s for cont2"%(hit2.target))
+        #print(h2_start, hdist)
+        #print(hit2)
+        err2=True              
+    '''
+    if cdist<-1*globalVars.splitLen:
+        if hit1.t_start > globalVars.splitLen:
+            print("add error %s for cont1"%(hit1.target))
+            print(hit1)
+            err1=True
+        if globalVars.contLenDICT[hit2.target]-hit2.t_end > globalVars.splitLen:
+            print("add error %s for cont2"%(hit2.target))
+            print(hit2)
+            err2=True
+    '''
+    #print("RevRev")
+    #print(err1, err2)
+    #print(hit1)
+    #print(h1_end)
+    #print(hit2)
+    #print(h2_start)
+    #print("\t%i\n"%(cdist))
+    return cdist, err1, err2
+
+
+def calcHitContig_Dist_pos(hit1, hit2):
+    err1=False
+    err2=False
+    h1_end = hit1.t_len -  (hit1.t_end + (hit1.q_len - hit1.q_end))
+    h2_start = hit2.t_start - hit2.q_start
+    # hdist->distance between splits
+    if hit1.split != hit2.split:
+        hdist = ((hit2.split - hit1.split - 1) * hit1.q_len)
+    else:
+        hdist = 0
+    # calc contig distance
+    cdist = hdist - (h1_end + h2_start)
+    if h1_end > hdist and h1_end > globalVars.splitLen:
+        #print("add error %s for cont1"%(hit1.target))
+        #print(h1_end, hdist)
+        #print(hit1)
+        err1=True
+    if h2_start > hdist and h2_start > globalVars.splitLen:
+        #print(h2_start, hdist)
+        #print("add error %s for cont2"%(hit2.target))
+        #print(hit2)
+        err2=True              
+
+    #print("PosPos")
+    #print(err1, err2)
+    #print(hit1)
+    #print(h1_end)
+    #print(hit2)
+    #print(h2_start)
+    #print("\t%i\n"%(cdist))
+    return cdist, err1, err2
+
+
+def calcHitContig_Distance_posrev(hit1, hit2):
+    err1=False
+    err2=False
+    h1_end = hit1.t_len -  (hit1.t_end + (hit1.q_len - hit1.q_end))
+    h2_start = hit2.t_len - (hit2.t_end + hit2.q_start)
+    # hdist->distance between splits
+    if hit1.split != hit2.split:
+        hdist = ((hit2.split - hit1.split - 1) * hit1.q_len)
+    else:
+        hdist = 0
+    # calc contig distance
+    cdist = hdist - (h1_end + h2_start)
+    if h1_end > hdist and h1_end > globalVars.splitLen:
+        #print("add error %s for cont1"%(hit1.target))
+        #print(h1_end, hdist)
+        #print(hit1)
+        err1=True
+    if h2_start > hdist and h2_start > globalVars.splitLen:
+        #print("add error %s for cont2"%(hit2.target))
+        #print(h2_start, hdist)
+        #print(hit2)
+        err2=True  
+    '''
+    if cdist<-1*globalVars.splitLen:
+        if globalVars.contLenDICT[hit1.target]-hit1.t_end > globalVars.splitLen:
+            print("add error %s for cont1"%(hit1.target))
+            print(hit1)
+            err1=True
+        if globalVars.contLenDICT[hit2.target]-hit2.t_end > globalVars.splitLen:
+            print("add error %s for cont2"%(hit2.target))
+            print(hit2)
+            err2=True
+    '''
+    #print("PosRev")
+    #print(err1, err2)
+    #print(hit1)
+    #print(h1_end)
+    #print(hit2)
+    #print(h2_start)
+    #print("\t%i\n"%(cdist))
+    return cdist, err1, err2
+
+
+def calcHitContig_Distance_revpos(hit1, hit2):
+    err1=False
+    err2=False
+    h1_end = hit1.t_start - (hit1.q_len - hit1.q_end)
+    h2_start = hit2.t_start - hit2.q_start
+    # hdist->distance between splits
+    if hit1.split != hit2.split:
+        hdist = ((hit2.split - hit1.split - 1) * hit1.q_len)
+    else:
+        hdist = 0
+    # calc contig distance
+    cdist = hdist - (h1_end + h2_start)
+    if h1_end > hdist and h1_end > globalVars.splitLen:
+        #print("add error %s for cont1"%(hit1.target))
+        #print(h1_end, hdist)
+        #print(hit1)
+        err1=True
+    if h2_start > hdist and h2_start > globalVars.splitLen:
+        #print("add error %s for cont2"%(hit2.target))
+        #print(h2_start, hdist)
+        #print(hit2)
+        err2=True  
+    '''
+    if cdist<-1*globalVars.splitLen:
+        if hit1.t_start > globalVars.splitLen:
+            print("add error %s for cont1"%(hit1.target))
+            print(hit1)
+            err1=True
+        if hit2.t_start > globalVars.splitLen:
+            print("add error %s for cont2"%(hit2.target))
+            print(hit2)
+            err2=True
+    '''
+    
+    #print("RevPos")
+    #print(err1, err2)
+    #print(hit1)
+    #print(h1_end)
+    #print(hit2)
+    #print(h2_start)
+    #print("\t%i\n"%(cdist))
+    return cdist, err1, err2
+
+
+# compares 2 hits on different contigs --> adds edges
+def connectCONTS(prevhit, curhit, edgeHash):
+    #print("running connectCONTS")
+    #print(prevhit)
+    #print(curhit)
+    #if alns not at borders add errors for breaking of misassemblies
+    errors=[]
+    if prevhit.strand == '+' and curhit.strand == '+':
+        # nodes to set connections from oriHA
+        startNode = prevhit.target + "_1"
+        endNode = curhit.target + "_0"
+        hDist, e1, e2 = calcHitContig_Dist_pos(prevhit, curhit)
+        #curError1=(prevhit.target, max(0,prevhit.t_end-100), min(prevhit.t_len-1, prevhit.t_end+100))
+        #curError2=(curhit.target, max(0,curhit.t_start-100), min(curhit.t_len-1,curhit.t_start+100))
+        curError1=(prevhit.target, prevhit.t_end+(prevhit.q_len-prevhit.q_end), prevhit.t_end+(prevhit.q_len-prevhit.q_end))
+        curError2=(curhit.target, curhit.t_start-curhit.q_start, curhit.t_start-curhit.q_start)
+
+    elif prevhit.strand == '-' and curhit.strand == '-':
+        # nodes to set connections from
+        startNode = prevhit.target + "_0"
+        endNode = curhit.target + "_1"
+        hDist, e1, e2 = calcHitContig_Dist_rev(prevhit, curhit)
+        #curError1=(prevhit.target, max(0,prevhit.t_start-100), min(prevhit.t_len-1,prevhit.t_start+100))
+        #curError2=(curhit.target, max(0,curhit.t_end-100), min(curhit.t_len-1,curhit.t_end+100))
+        curError1=(prevhit.target, prevhit.t_start+(prevhit.q_start), prevhit.t_start+(prevhit.q_start))
+        curError2=(curhit.target, curhit.t_end+(prevhit.q_start), curhit.t_end+(prevhit.q_start))
+
+    elif prevhit.strand == '+' and curhit.strand == '-':
+        # nodes to set connections from
+        startNode = prevhit.target + "_1"
+        endNode = curhit.target + "_1"
+        hDist, e1, e2 = calcHitContig_Distance_posrev(prevhit, curhit)
+        #curError1=(prevhit.target, max(0,prevhit.t_end-100), min(prevhit.t_len-1,prevhit.t_end+100))
+        #curError2=(curhit.target, max(0,curhit.t_end-100), min(curhit.t_len-1,curhit.t_end+100))
+        curError1=(prevhit.target, prevhit.t_end+(prevhit.q_len-prevhit.q_end), prevhit.t_end+(prevhit.q_len-prevhit.q_end))
+        curError2=(curhit.target, curhit.t_end+(prevhit.q_start), curhit.t_end+(prevhit.q_start))
+
+    elif prevhit.strand == '-' and curhit.strand == '+':
+        # nodes to set connections from
+        startNode = prevhit.target + "_0"
+        endNode = curhit.target + "_0"
+        hDist, e1, e2 = calcHitContig_Distance_revpos(prevhit, curhit)
+        #curError1=(prevhit.target, max(0,prevhit.t_start-100), min(prevhit.t_len-1,prevhit.t_start+100))
+        #curError2=(curhit.target, max(0,curhit.t_start-100), min(curhit.t_len-1,curhit.t_start+100))
+        curError1=(prevhit.target, prevhit.t_start+(prevhit.q_start), prevhit.t_start+(prevhit.q_start))
+        curError2=(curhit.target, curhit.t_start-curhit.q_start, curhit.t_start-curhit.q_start)
+
+    #if contigs had long overlap left 
+    #if e1==True and prevhit.split==curhit.split-1:
+    if e1==True:
+        #print("APPEND MISJOIN", curError1)
+        #print(prevhit)
+        #print(curhit)
+        errors.append(curError1)
+    #if e2==True and prevhit.split==curhit.split-1:
+    if e2==True:
+        #print("APPEND MISJOIN", curError2)
+        #print(prevhit)
+        #print(curhit)
+        errors.append(curError2)
+    # sort both nodes lexicographically to not have 2 different edges between
+    # the same nodes e.g. one a->b and b->a in the dict
+    if startNode < endNode:
+        node1 = startNode
+        node2 = endNode
+    else:
+        node1 = endNode
+        node2 = startNode
+
+
+    # add edge to "first" node
+    if not node1 in edgeHash:
+        edgeHash[node1] = {}
+        edgeHash[node1][node2] = [hDist]
+        #print("added edge with weight %i for conts %s and %s"%(hDist, node1, node2))
+        #save readID spanning the gap
+        globalVars.gapDICT[node1]={}
+        globalVars.gapDICT[node1][node2]=[curhit.query]
+        #print("Read %s connects conts %s and %s with dist %i"%(curhit.query, node1, node2, hDist))
+        #print(edgeHash[node1][node2])
+    else:
+        if not node2 in edgeHash[node1]:
+            edgeHash[node1][node2] = [hDist]
+            #print("added edge with weight %i for conts %s and %s"%(hDist, node1, node2))
+            #save readID spanning the gap
+            globalVars.gapDICT[node1][node2]=[curhit.query]
+            #print("Read %s connects conts %s and %s with dist %i"%(curhit.query, node1, node2, hDist))
+            #print(edgeHash[node1][node2])
+        else:
+            edgeHash[node1][node2].append(hDist)
+            #print("added edge with weight %i for conts %s and %s"%(hDist, node1, node2))
+            #save readID spanning the gap
+            globalVars.gapDICT[node1][node2].append(curhit.query)
+            #print("Read %s connects conts %s and %s with dist %i"%(curhit.query, node1, node2, hDist))
+            #print(edgeHash[node1][node2])
+    return errors
+
+
+# general function to loop over each hit list and compare each split to
+# the next one --> calls connectCONTs and checkCONT
+def checkHitlist(hitlist, edgeGRAPH, oriHASH):
+    lastHit = ""
+    # compare 2 hits following each other
+    for curHits in hitlist:
+
+        #if curHit.target==globalVars.largestContig[0]:
+            #print("hit on largest contig found --> add coverage")
+        #    addLongestCOV(curHit)
+        #if len(curHits)>1:
+        #    for curHit in curHits:
+        #        globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
+
+        if len(curHits)==1:           
+            curHit=curHits[0]
+            #globalVars.goodE[curHit.target].append((curHit.target, min(curHit.t_start, curHit.t_end), max(curHit.t_start, curHit.t_end)))
+            #compare two hits
+            if lastHit!="":
+                #print("Comparing 2 hits:")
+                #print(lastHit)
+                #print(curHit)
+
+                # check hits on same contig --> e.g. continuity
+                if curHit.target == lastHit.target:
+                    if curHit.strand == lastHit.strand:
+                        #print("Check for INS/DEL")
+                        # check for inseinsertErrorrtions and deletions
+                        cError, cGood= checkINSDELS(lastHit, curHit)
+                        #print("GOOD:", cGood)
+                        #print("BAD:", cError)
+                            
+                        if cError!=None:
+                            if cError[3] > 0:  
+                                globalVars.insE[cError[0]].append(cError)
+                            else:
+                                globalVars.delE[cError[0]].append(cError)
+                        else:
+                            globalVars.goodE[curHit.target].append((curHit.target, cGood[0], cGood[1]))
+
+                    else:
+                        #print("Check for INV")
+                        # add inversions
+                        cError=checkINVS(lastHit, curHit, oriHASH)
+                        if cError!=None:
+                            globalVars.invE[cError[0]].append(cError)
+
+                # check contig connections --> add edges to graph
+                else:
+                    #print("Check for CONTIG CONNECTION\n")
+                    cErrors=connectCONTS(lastHit, curHit, edgeGRAPH)
+                    for e in cErrors:
+                        #e[2]+=100
+                        globalVars.mjE[e[0]].append(e)
+                     
+            lastHit=curHit
+
+    return edgeGRAPH
\ No newline at end of file
diff --git a/globalVars.py b/globalVars.py
new file mode 100644
index 0000000000000000000000000000000000000000..32c75caa0d54cff2b04db8895bb1019f783c314d
--- /dev/null
+++ b/globalVars.py
@@ -0,0 +1,249 @@
+import sqlite3, subprocess
+###############################
+#######global variables########
+###############################
+
+###################################
+###classes#########################
+###################################
+
+
+class coGRAPH:
+
+    def __init__(self):
+        self.cGraph={}
+
+    def addContigEdge(self, cID, sLen):
+        cEdge1 = myEdge(cID + "_0", cID + "_1", sLen, sLen, 0, 'c')
+        cEdge2 = myEdge(cID + "_1", cID + "_0", sLen, sLen, 0, 'c')
+        self.cGraph[cID + "_0"] = myCont(cEdge1, "", {})
+        self.cGraph[cID + "_1"] = myCont(cEdge2, "", {})
+
+    def removeNode(self, cID):
+        del self.cGraph[cID]
+        print("%s had been removed"%(cID))
+
+    #function to return distance between node a and b if there is a path
+    def returnDist(self, a, b, circle=False):
+        visited=[]
+        restart=False
+        #print("getting distance")
+        #print(a,b)
+        #print(self.cGraph[a].active)
+        #print(self.cGraph[b].active)
+        #print(self.cGraph[a].cEdge)
+        #print(self.cGraph[b].cEdge)
+        if a in self.cGraph and b in self.cGraph:
+            #print("both in graph")
+            nextN=a
+            #print(nextN)
+            #print(self.cGraph[nextN].active)
+            dist=0
+            while self.cGraph[nextN].active != "":
+                visited.append(nextN)
+                dist+=self.cGraph[nextN].active.dist
+                nextN=self.cGraph[nextN].active.target
+                #print(nextN, b)
+                if nextN==b:
+                    #print("FOUND IT")
+                    if dist==0:
+                        dist=1
+                    return(dist)
+                nextN=self.cGraph[nextN].cEdge.target
+                #print(nextN)
+                if nextN in visited:
+                    restart=True
+                    break
+                visited.append(nextN)
+                if circle==True:
+                    if nextN==b:
+                        #print("FOUND IT")
+                        if dist==0:
+                            dist=1
+                        return(dist)
+            
+            if restart==True:
+                #print("CIRCLE DETECTED RESTART")
+                return self.returnDist(a, b, True)
+
+            return False
+
+        else:
+            print("wut not both in graph???")
+            return False
+
+
+    # function to find start of a path starting with a node in the path
+    def findStart(self, anode):
+        inPath=[]
+        #print("Finding start for node %s"%(anode))
+        # just to make sure to alayws start at 0 end of contig and then go to the
+        # left from there
+        if anode.endswith("0"):
+            cN = anode
+        elif anode.endswith("1"):
+            cN = self.cGraph[anode].cEdge.target
+        else:
+            print("ERROR incorrect scaffold name found %s" %(anode))
+
+        inPath.append(cN[:-2])
+
+        while self.cGraph[cN].active != "":
+            cN = self.cGraph[cN].active.target
+            #print(cN)
+            if cN[:-2] in inPath:
+                print("CIRCLE FOUND!!!!")
+                #print(self.cGraph[cN].cEdge)
+                #print(self.cGraph[cN].active)
+                return cN
+            inPath.append(cN[:-2])
+            # move to other end of the contig for possible other read edge -->
+            # otherwse endless loop
+            # print(cN)
+            cN = self.cGraph[cN].cEdge.target
+        #print("Start of path found: %s"%(cN))
+        return cN
+
+    def getPath(self, anode):
+        inPath=[]
+        startNode=self.findStart(anode)
+        curNode=startNode
+        while 1:
+            if curNode in inPath:
+                print("getPath - Circle found!!!")
+                exit()
+                
+            inPath.append(curNode)
+            curNode=self.cGraph[curNode].cEdge.target
+            inPath.append(curNode)
+
+            if self.cGraph[curNode].active == "":
+                print("End of path reached")
+                return inPath
+            else:
+                curNode=self.cGraph[curNode].active.target
+        
+
+
+
+ # returns an alternating path of edges
+    def returnEgdePath(self, anode):
+        inPath=[]
+        ePath=[]
+        pHash={}
+        curDist=0
+        startNode = self.findStart(anode)
+        curNode=startNode
+        inPath.append(startNode[:-2])
+        pHash[startNode]=(curDist, 'a')
+        ePath.append(self.cGraph[curNode].cEdge)
+        curDist+=self.cGraph[curNode].cEdge.dist
+        curNode=self.cGraph[curNode].cEdge.target
+        
+        while self.cGraph[curNode].active != "":
+            pHash[curNode]=(curDist, 'b')
+            ePath.append(self.cGraph[curNode].active)
+            curDist+=self.cGraph[curNode].active.dist
+            curNode=self.cGraph[curNode].active.target
+            pHash[curNode]=(curDist, 'a')
+            ePath.append(self.cGraph[curNode].cEdge)
+            curDist+=self.cGraph[curNode].cEdge.dist
+            curNode=self.cGraph[curNode].cEdge.target
+            
+            if not curNode[:-2] in inPath:
+                inPath.append(curNode[:-2])
+        
+        return ePath, pHash
+    
+
+
+class myCont:
+
+    def __init__(self, cEdge, active, eList):
+        self.cEdge=cEdge
+        self.active=active
+        self.eList=eList
+
+
+# class for an edge
+class myEdge:
+
+    def __init__(self, start, target, weight, dist, stdev, direct):
+        self.start = start
+        self.target = target
+        self.weight = weight
+        self.dist = dist
+        self.stdev = stdev
+        self.direct = direct
+
+    # comparing edes will comapre their weights
+    def __lt__(self, other):   
+        return self.weight < other.weight
+
+    def __gt__(self, other):
+        return self.weight > other.weight
+
+    # printing function
+    def __str__(self):
+        return("\t%s, %s, weight:%i, dist:%i, stdev:%f, %s" % 
+            (self.start, self.target, self.weight, self.dist, self.stdev, self.direct))
+
+
+# class for outputting scaffolds in the end
+class mySCAF:
+
+    def __init__(self, names, seqs, dirs):
+        self.names = names
+        self.seqs = seqs
+        self.dirs = dirs
+
+
+# function to initialize global variables
+def init(out, sL, mL, mO, mI, t, mSV, bPATH, mPATH):
+    global largestContig, distError, outDir, splitLen, minOverlap, minLinks
+    global minIdent, threads, contsMerged, contLenDICT, contDICT, mergedDICT, readHash
+    global contigGR, adjustDICT, gapDICT, eDICT, edgeDICT
+    global blastCounter, poserrorHASH, minSVL
+    global blastP, minimap2P 
+
+    global insE, delE, invE, mjE, goodE
+    insE={}
+    delE={}
+    invE={}
+    mjE={}
+    goodE={}
+
+    blastCounter=0
+    poserrorHASH={}
+    readHash={}
+    
+    outDir = out
+    splitLen = sL
+    #variable for largest contig
+    largestContig=("", 0)
+    #largestMapped=[]
+    #distance error is margin of read mappings
+    distError=float(splitLen)*0.15*2
+    minLinks = mL
+    minOverlap = mO
+    minIdent = mI
+    threads = t
+    contsMerged=0
+    minSVL=mSV
+
+    contLenDICT = {}
+    contDICT = {}
+    # saves the ids of merged contigs
+    mergedDICT = {}
+    # contig GRAPH
+    contigGR = {}
+    # adjust hash stores edges and saves the new edge to which they should be
+    # redirected and the new distance that needs to be added to it
+    adjustDICT = {}
+    #for errors --> key scafID --> returns reads that need to be mapped back
+    eDICT = {}
+    # store readIDs of read spanning gaps
+    gapDICT={}
+    # for contig edges implied by hits
+    edgeDICT={}
+
diff --git a/gpma.py b/gpma.py
new file mode 100644
index 0000000000000000000000000000000000000000..e964d82282198b72ae3935e01d7c2af0e1a8ba76
--- /dev/null
+++ b/gpma.py
@@ -0,0 +1,456 @@
+import globalVars
+import merging
+import copy
+
+##################################
+##################################
+######functions###################
+##################################
+##################################
+
+
+# function to set edges in graph
+def setEdges(setEdge, agraph):
+    
+
+    if setEdge.direct!="":
+        newEdgeStart = globalVars.myEdge(setEdge.start, setEdge.target,
+                            setEdge.weight, setEdge.dist, setEdge.stdev, setEdge.direct)
+        newEdgeTarget = globalVars.myEdge(setEdge.target, setEdge.start,
+                            setEdge.weight, setEdge.dist, setEdge.stdev, setEdge.direct)
+    else:
+        newEdgeStart=""
+        newEdgeTarget=""
+        #print(setEdge)
+    #print("\tLooks good --> Setting edge: %s" % (newEdgeStart))
+    #print("\tLooks good --> Setting edge: %s" % (newEdgeTarget))
+
+    oldEdge1=copy.copy(agraph.cGraph[setEdge.start].active)
+    oldEdge2=copy.copy(agraph.cGraph[setEdge.target].active)
+    if oldEdge1=="":
+        oldEdge1=globalVars.myEdge(setEdge.start, setEdge.target, 0, 0, 0, "")
+    if oldEdge2=="":
+        oldEdge2=globalVars.myEdge(setEdge.target, setEdge.start, 0, 0, 0, "")
+    agraph.cGraph[setEdge.start].active = newEdgeStart
+    agraph.cGraph[setEdge.target].active = newEdgeTarget
+    oldEdge=[oldEdge1, oldEdge2]
+    #print("OLD EDGE")
+    #print(oldEdge[0])
+    #print(oldEdge[1])
+    return oldEdge
+
+def resetEdge(setEdge, agraph):
+    #print("resetting edge")
+    #print(setEdge)
+    if setEdge.direct!="":
+        newEdgeStart = globalVars.myEdge(setEdge.start, setEdge.target,
+                            setEdge.weight, setEdge.dist, setEdge.stdev, setEdge.direct)
+    else:
+        newEdgeStart=""
+    
+    agraph.cGraph[setEdge.start].active = newEdgeStart
+
+#function to adjust the gap dictionary in case  
+def adjustGAPdict(new1, new2, old1, old2):
+
+    #print("Adjusting gap dict from %s to %s by taking them from %s to %s"%(new1, new2, old1, old2))
+
+    if old1 < old2:
+        n1 = old1
+        n2 = old2
+    else:
+        n1 = old2
+        n2 = old1
+    #print(n1, n2)
+    gapReads=globalVars.gapDICT[n1][n2]
+
+    if new1 < new2:
+        node1 = new1
+        node2 = new2
+    else:
+        node1 = new2
+        node2 = new1
+    if globalVars.gapDICT.get(node1, "nope")=="nope":
+        globalVars.gapDICT[node1]={}
+        globalVars.gapDICT[node1][node2]=[]
+    elif globalVars.gapDICT[node1].get(node2, "nope")=="nope":
+        globalVars.gapDICT[node1][node2]=gapReads
+    else:
+        globalVars.gapDICT[node1][node2]+=gapReads
+
+# function to properly remove key from a dictionary
+def removeMergedNEW(rkey, newTar, addDist):
+
+    #print("Removing %s from dict"%(rkey))
+    #print("Now pointing to %s with dist %i"%(newTar, addDist))
+    globalVars.adjustDICT[rkey]=(newTar, addDist)
+
+def checkMerge(curEdge, cHASH):
+   #print(curEdge.dist, globalVars.minOverlap*-1)
+    #print(curEdge.dist <= globalVars.minOverlap*-1)
+    #print("\tContigs are apparently overlapping - check for overlap for possible merging")
+    #check if merging is possible and get the real overlap
+    merged, realOL, mAdjust = merging.runBLAST(curEdge.start, curEdge.target, curEdge.dist, True)           
+    #if cant be merged still set edge with distance 0
+    #print("\t"+str(merged))    
+    if merged==True:
+        #print("\tsucessfully merged")
+        #print(curEdge.start, curEdge.target, curEdge.dist, realOL, curEdge.dist*(-1)-realOL*(-1))
+        curEdge.dist=realOL
+        
+    else:
+        if merged=="Removed":
+            #removeMergedNEW(cHASH, mAdjust[0][:-2]+"_0", mAdjust[1], mAdjust[2])
+            #removeMergedNEW(cHASH, mAdjust[0][:-2]+"_0", mAdjust[3], mAdjust[4])
+            curEdge.dist=realOL
+            #print("Cont %s was part of the other -> abort for now"%(mAdjust[0][:-2]))
+
+    return merged, mAdjust
+
+def mergeNEW(nEdge, inGraph):
+    graph=inGraph.cGraph
+    sucess=True
+    edgeList=[]
+    removal=[]
+   #print("Start Merging Paths")
+    inPath=[]
+    curEdge=copy.copy(nEdge)
+    end=False
+    zipper='r'
+    #zip right
+    firstL=False
+    while end==False:
+        #print("while")
+        #print(curEdge)     
+        if curEdge.start[:-2] == curEdge.target[:-2]:
+            #print("\tEdge with itself - skip")
+            end=True
+            break
+       
+        #if original edge is supposed to overlap contigs check for overlap first
+        #abort if overlap not there
+        if curEdge.dist < 0:
+            merged, eAdjust=checkMerge(curEdge, inGraph)
+            if merged==False:
+                if curEdge.dist > (-2)*globalVars.distError:
+                    #print("Merging failed but distance within range --> set to 1N for now")
+                    curEdge.dist=1                  
+                else:
+                    #print("Merging failed!!!")
+                    sucess=False
+                    return sucess, [], []
+            if merged == "Removed":
+                next1=copy.copy(graph[curEdge.start].active)
+                next2=copy.copy(graph[curEdge.target].cEdge)
+                next22=copy.copy(graph[next2.target].active)
+                #edgeList.append(copy.copy(curEdge))
+                if curEdge.dist*(-1) >= globalVars.contLenDICT[curEdge.target[:-2]]-20:
+                    #print("Duplicate case1: contig %s is part of %s"%(curEdge.target, curEdge.start))
+                    if next22=="":
+                        #print("End of path reached")
+                        removal.append(eAdjust)
+                        if graph[curEdge.target].active!="":
+                            curEdge=copy.copy(graph[curEdge.target].active)
+                            #print("Trying to zipper left path from merged cont guide edge:")
+                            #print(curEdge)
+                        else:
+                            #print("nothing left to merge done here")
+                            break
+                    else:
+                        curEdge.start=curEdge.start
+                        curEdge.target=next22.target
+                        curEdge.dist=curEdge.dist + (next2.dist + next22.dist)
+                        #print("Merged implied edge:")
+                        #print(curEdge)                      
+                        removal.append(eAdjust) 
+                        #next iteration
+                        continue
+                elif curEdge.dist*(-1) >= globalVars.contLenDICT[curEdge.start[:-2]]-20:
+                    #print("Duplicate case2: contig %s is part of %s"%(curEdge.start, curEdge.target))
+                    if next1=="":
+                        #print("End of path reached")
+                        removal.append(eAdjust)
+                        otherEnd = graph[curEdge.start].cEdge.target
+                        if graph[otherEnd].active!="":
+                            curEdge=copy.copy(graph[otherEnd].active)
+                            #print("Trying to zipper left path from merged cont guide edge:")
+                            #print(curEdge)
+                        else:
+                            #print("nothing left to merge done here")
+                            break
+                    else:
+                        curEdge.start=next2.target
+                        curEdge.target=next1.target
+                        curEdge.dist=next1.dist - curEdge.dist - next2.dist
+                        #print("Merged implied edge:")
+                        #print(curEdge)
+                        removal.append(eAdjust)
+                        #next iteration
+                        continue
+                else:
+                    print("WTF why removed???")
+                    print(globalVars.contLenDICT[curEdge.start[:-2]])
+                    print(globalVars.contLenDICT[curEdge.target[:-2]])
+                    exit()
+
+                #sucess=False
+                #return sucess, []                 
+                
+        if firstL==True:
+            #print("skip first left")
+            next1 = globalVars.myEdge(edgeList[0].target, edgeList[0].start, edgeList[0].weight, edgeList[0].dist, edgeList[0].stdev, edgeList[0].direct)
+            firstL=False
+        else:
+            next1=copy.copy(graph[curEdge.start].active)
+        next2=copy.copy(graph[curEdge.target].cEdge)
+        if next1!="":
+            next11=copy.copy(graph[next1.target].cEdge)
+            #print(next1)
+            #print(next11)
+            #print(next2)
+            if curEdge.start == next1.start and curEdge.target == next1.target:
+                if abs(curEdge.dist - next1.dist) < 3*curEdge.stdev: 
+                    sucess=True
+                    break
+                else:
+                    #print("Egde already exists but distance failed")
+                    next1=""
+                    sucess=False
+                    break
+        elif next1=="":
+            #print("End of %s-path reached edge looks good to set"%(zipper))
+            edgeList.append(curEdge)
+            if zipper=='r':
+                zipper='l'
+                firstL=True
+                curEdge=copy.copy(graph[nEdge.target].active)
+                if curEdge!="":
+                    #print("Checking left path starting with edge")
+                    #print(curEdge)
+                    continue
+                else:
+                    #print("Original edge had nothing left to merge")
+                    #print(nEdge)
+                    #print(curEdge)
+                    end=True
+                    break
+            else:
+                end=True
+                break
+                
+        
+        #case curEdge+contigEdge of p2 fits in next read edge of p1
+        #case1
+        #if next1.dist >= curEdge.dist and next1.dist > 0:
+        if next1.dist >= curEdge.dist:
+            #adjust gapHASH if new dist > 0
+            if (next1.dist-(curEdge.dist+next2.dist)) > (-2)*globalVars.distError:
+                #print("New dist %i > 0 adjust gapHASH"%(next1.dist-(curEdge.dist+next2.dist)))
+                adjustGAPdict(next2.target, next1.target, next1.start, next1.target)           
+            #add curEdge to list because it would be set if merging is possible 
+            edgeList.append(copy.copy(curEdge))      
+            #imply new edge
+            curEdge.start=next2.target # flip target and start here to continue
+            curEdge.target=next1.target # flip target and start here to continue
+            curEdge.dist=next1.dist-(curEdge.dist+next2.dist)
+            #print("\timplied edge1")
+            #print(curEdge)
+            continue
+        
+        #case readEdge+ next contigEdge of p1 fits in curEdge
+        #case2
+        #elif curEdge.dist > next1.dist and curEdge.dist > 0:
+        elif curEdge.dist > next1.dist:      
+            #adjust gapHASH if new dist > 0
+            if (curEdge.dist-(next1.dist+next11.dist)) > (-2)*globalVars.distError:
+                #print("New dist %i > 0 adjust gapHASH"%(curEdge.dist-(next1.dist+next11.dist)))
+                adjustGAPdict(next11.target, curEdge.target, curEdge.start, curEdge.target)        
+            #imply new edge
+            curEdge.start=next11.target # no flip needed
+            curEdge.target=curEdge.target # no flip needed
+            curEdge.dist=curEdge.dist-(next1.dist+next11.dist)
+
+            #print("\timplied edge2")
+            #print(curEdge)
+            continue
+
+    return sucess, edgeList, removal
+
+ # returns an alternating path of edges
+def getNodePath(ggraph, anode):
+    #print("Get node path")
+    ePath=[]
+    pHash=[]
+    curDist=0
+    startNode = ggraph.findStart(anode)
+    curNode=startNode
+    pHash.append(startNode)
+    ePath.append(ggraph.cGraph[curNode].cEdge)
+    curDist+=ggraph.cGraph[curNode].cEdge.dist
+    curNode=ggraph.cGraph[curNode].cEdge.target
+        
+    while ggraph.cGraph[curNode].active != "" and curNode not in pHash:
+        #print(curNode)
+        pHash.append(curNode)
+        ePath.append(ggraph.cGraph[curNode].active)
+        curNode=ggraph.cGraph[curNode].active.target
+        #print(curNode)
+        pHash.append(curNode)
+        ePath.append(ggraph.cGraph[curNode].cEdge)
+        curNode=ggraph.cGraph[curNode].cEdge.target
+        
+    
+    pHash.append(curNode)
+
+    return ePath, pHash
+    
+
+def checkHappiness(pdict, cDICT):
+    nH=0
+    nUH=0
+    #print("Checking happiness of path")
+    checked=[]
+    #print(len(pdict))
+    for c1 in pdict:
+        checked.append(c1)
+        for c2 in pdict:
+            #print(c1, c2)
+            # sort both nodes lexicographically to not have 2 different edges between
+            # the same nodes e.g. one a->b and b->a in the dict
+            if c1 < c2:
+                node1 = c1
+                node2 = c2
+            else:
+                node1 = c2
+                node2 = c1
+            
+            if not c2 in checked:
+                if node1 in globalVars.edgeDICT:
+                    if node2 in globalVars.edgeDICT[node1]:
+                        dist=cDICT.returnDist(node1, node2)
+                        #print("%s and %s both in list with dist: %i"%(node1, node2, dist))
+                        curEdges=globalVars.edgeDICT[node1][node2]
+                        #curEdges+=globalVars.edgeDICT[node2][node1]
+                        for curEdge in curEdges:
+                            if curEdge[0] >= globalVars.minLinks:
+                                #print(node1, node2)
+                                #print("comparing distance %i with edge"%(dist))
+                                #print(curEdge)
+                                if dist != False:
+                                    #print("Orientations fit set distance is %i predicted distance %i"%(dist, curEdge[1]))
+                                    if abs(curEdge[1] - dist) <= max(3*curEdge[2], 100): #check if within 3*stdev of bundeld edge
+                                        #print("Distance fits increasing happy by %i"%(curEdge[0]))
+                                        nH+=curEdge[0]
+                                        continue
+                                    else:
+                                        #print("Distance does not fit increasing UNhappy by %i"%(curEdge[0]))
+                                        nUH+=curEdge[0]
+                                        continue
+                                else:
+                                    #print("Incorrect orientation increasing UNhappy by %i"%(curEdge[0]))
+                                    nUH+=curEdge[0]
+                                    continue
+            
+
+    return nH, nUH
+
+
+# main function to run gpma
+def gpma(newEdge, inGraph):
+    #print("gpma")
+    inputEdge=copy.copy(newEdge)
+    #print(newEdge)
+
+    # check if one edge points to a merged contig --> in that case needs to be
+    # adjusted
+    # while loop to go through multiple adjustments
+    while globalVars.adjustDICT.get(newEdge.start, "nope") != "nope":
+        #print(inputEdge)
+        #if globalVars.adjustDICT.get(newEdge.start, "nope") != "nope":
+        newEdge.dist -= globalVars.adjustDICT[newEdge.start][1]
+        newEdge.start = globalVars.adjustDICT[newEdge.start][0]
+        #print(newEdge.start, newEdge.target, inputEdge.start, inputEdge.target)
+        adjustGAPdict(newEdge.start, newEdge.target, inputEdge.start, inputEdge.target)
+
+        #print("edge adjusted to merged edge")
+        #print(newEdge)
+
+    while globalVars.adjustDICT.get(newEdge.target, "nope") != "nope":
+        #print(inputEdge)
+        #if globalVars.adjustDICT.get(newEdge.target, "nope") != "nope":
+        newEdge.dist -= globalVars.adjustDICT[newEdge.target][1]
+        newEdge.target = globalVars.adjustDICT[newEdge.target][0]
+        #print(newEdge.start, newEdge.target, inputEdge.start, inputEdge.target)
+        adjustGAPdict(newEdge.start, newEdge.target, inputEdge.start, inputEdge.target)
+        #print("edge adjusted to merged edge")
+        #print(newEdge)
+
+
+    inputEdge=copy.copy(newEdge)
+    #start directions
+    
+    #print("Path1:")
+    #ePath1, pHASH1 = inGraph.returnEgdePath(newEdge.start)
+    ePath1, pHASH1 = getNodePath(inGraph, newEdge.start)    
+    #print(len(pHASH1))
+    #for ele in ePath1:
+    #    print(ele)
+    nHP1, nUHP1 = checkHappiness(pHASH1, inGraph)
+    #print("Happiness p1: %i/%i(H/UH)"%(nHP1, nUHP1))
+
+    #print("Path2:")
+    #ePath2, pHASH2 = inGraph.returnEgdePath(newEdge.target)
+    ePath2, pHASH2 = getNodePath(inGraph, newEdge.target)
+    #print(len(pHASH2))
+    #for ele in ePath2:
+    #    print(ele)
+    nHP2, nUHP2 = checkHappiness(pHASH2, inGraph)
+    #print("Happiness p2: %i/%i(H/UH)"%(nHP2, nUHP2))
+                
+    result, setList, removed = mergeNEW(inputEdge, inGraph)
+    
+    if result==True:
+        oldEdges=[]
+        for edge in setList:
+            oE=setEdges(edge, inGraph)
+            oldEdges=oE+oldEdges
+
+        #print("New path:")
+        t=True
+        for ele in removed:
+            if ele[0][:-2] == newEdge.start[:-2]:
+                t=False
+        if t== True:
+            newPath, newHASH = getNodePath(inGraph, newEdge.start)
+        else:
+            newPath, newHASH = getNodePath(inGraph, newEdge.target)
+        #print(len(newHASH))
+        #for ele in newPath:
+        #    print(ele)
+        newHP, newUHP = checkHappiness(newHASH, inGraph)
+        
+        
+        #check happiness
+        if newHP - (nHP1+nHP2) >= newUHP - (nUHP1+nUHP2):
+            #print("GPMA VERY HAPPY")
+            for remove in removed:
+                removeMergedNEW(remove[0][:-2]+"_0", remove[1], remove[2])
+                removeMergedNEW(remove[0][:-2]+"_1", remove[3], remove[4])
+        else:
+            #print("GPMA UNHAPPY EDGE SHOULD NOT HAVE BEEN SET")
+            #[print(e) for e in newPath]
+            #print("Happiness NEW PATH: %i/%i(H/UH)"%(newHP, newUHP))
+            #[print(e) for e in ePath1]
+            #print("Happiness p1: %i/%i(H/UH)"%(nHP1, nUHP1))
+            #[print(e) for e in ePath2]
+            #print("Happiness p2: %i/%i(H/UH)"%(nHP2, nUHP2))
+            for edge in oldEdges:
+                resetEdge(edge, inGraph)
+        
+
+
+    #else:
+    #    print("GPMA FAILED!!!")
+
+
+    
diff --git a/main.py b/main.py
new file mode 100755
index 0000000000000000000000000000000000000000..83ed55f67074140868c845666eeedf40d6a212c4
--- /dev/null
+++ b/main.py
@@ -0,0 +1,975 @@
+#!/usr/bin/env python3
+
+import argparse, subprocess, sys
+import statistics
+import datetime
+import copy
+# import own scripts for stuff
+import checkHitlist
+from gpma import gpma
+import globalVars
+import splitReads
+import merging
+
+try:
+    from subprocess import DEVNULL  # py3k
+except ImportError:
+    import os  # for python2
+    DEVNULL = open(os.devnull, 'wb')
+# biopython for now --> for reverse complement function
+#from Bio.Seq import Seq
+
+
+###############################
+#######global variables########
+###############################
+
+
+
+# key is readID(without split) --> save orientation for each read to check
+# for inversions later
+oriHASH = {}
+
+###################################
+#############classes###############
+###################################
+
+
+# class for for each hit within the PAF file
+class myHIT:
+
+    def __init__(self, query, split, strand, target, t_len, t_start, t_end, q_len, q_start, q_end, mq):
+        self.query = query
+        self.split = split
+        self.strand = strand
+        self.target = target
+        self.t_len = t_len
+        self.t_start = t_start
+        self.t_end = t_end
+        self.q_len = q_len
+        self.q_start = q_start
+        self.q_end = q_end
+        self.mq = mq
+
+    def __str__(self):
+        return("\t%s, %i, %s, %s, %s, %s, %s, %s, %s, %s, %i" % (self.query, self.split, self.strand, self.target, str(self.t_len), str(self.t_start), str(self.t_end), str(self.q_len), str(self.q_start), str(self.q_end), self.mq))
+    # comparing edges will compare their weights
+
+    def __lt__(self, other):
+        return self.q_start < other.q_start
+
+    def __gt__(self, other):
+        return self.q_start > other.q_start
+
+# class for outputting scaffolds in the end
+class mySCAF:
+
+    def __init__(self, names, seqs):
+        self.names = names
+        self.seqs = seqs
+
+
+
+###################################
+###functions#######################
+###################################
+
+# function to parse the config file containing the paths to
+# blastn and minimap2
+def parsePATHs():
+    scriptPath=sys.argv[0].rstrip("main.py")
+    configReader=open("%s/config.txt"%scriptPath)
+    minimap2Path=""
+    blastnPath=""
+    for line in configReader:
+        if not line.startswith("#"):
+            if line.startswith("MINIMAP2"):
+                minimap2Path = line.strip().split("=")[-1]
+            elif line.startswith("BLASTN="):
+                blastnPath = line.strip().split("=")[-1]
+    if minimap2Path=="":
+        print("Error no entry for minimap2 path in config.txt found - exiting...")
+        exit()
+    if blastnPath=="":
+        print("Error no entry for blastn path in config.txt found - exiting...")
+        exit()
+    configReader.close()
+    #print("minimap path: %s"%(minimap2Path))
+    #print("blastn path: %s"%(blastnPath))
+    
+    return minimap2Path, blastnPath
+    
+#function to reverse complement a sequence
+def reverseComplement(seq):
+    old = "ACGT"
+    replace = "TGCA"
+    return seq.upper().translate(str.maketrans(old, replace))[::-1]
+
+# function to run minimap2
+def runMinimap2(nThreads, mapMode, inFA, inFQ, outPAF):
+    scriptPath=sys.argv[0].rstrip("main.py")
+    try:
+        subprocess.call("%s/minimap2/minimap2 -t %i -x %s %s %s> %s" %
+                        (scriptPath, nThreads, mapMode, inFA, inFQ, outPAF), shell=True, stdout=DEVNULL, stderr=subprocess.STDOUT)
+        #subprocess.call("%s/minimap2/minimap2 -t %i -x %s %s %s> %s" %
+        #                (scriptPath, nThreads, mapMode, inFA, inFQ, outPAF), shell=True)
+    except:
+        print("Problem running minimap2 - check if its properly set up")
+
+# function to parse contigs and initialize the contigGraph
+def parseContigs(path, cograph):
+    if path.endswith(".gz"):
+        import gzip
+        faParser = gzip.open(path, mode='rt')
+    else:
+        faParser = open(path)
+    # for first iteration/line
+    seq = ""
+    for line in faParser:
+        if line.startswith(">"):
+            if seq != "":
+                seqLen = len(seq)
+                # initalize contigGraph
+                cograph.addContigEdge(contID, seqLen)               
+                # initalize contLenDICT
+                globalVars.contLenDICT[contID] = seqLen
+                globalVars.contDICT[contID] = seq
+                globalVars.insE[contID]=[] # insertions 
+                globalVars.invE[contID]=[] # inversions
+                globalVars.delE[contID]=[] # deletions
+                globalVars.mjE[contID]=[] # misjoins
+                globalVars.goodE[contID]=[] # continuous mapping hits
+                if seqLen>=globalVars.largestContig[1]:
+                    globalVars.largestContig=(contID, seqLen)
+
+            contID = line.strip().split(" ")[0][1:]
+            seq = ""
+        else:
+            seq += line.strip()
+
+    faParser.close()
+
+    # for last sequence not covered by loop
+    seqLen = len(seq)
+    # initalize contigGraph
+    cograph.addContigEdge(contID, seqLen)
+    # initalize globalVars.contLenDICT
+    globalVars.contLenDICT[contID] = seqLen
+    globalVars.contDICT[contID] = seq
+    globalVars.insE[contID]=[]
+    globalVars.invE[contID]=[]
+    globalVars.delE[contID]=[]
+    globalVars.mjE[contID]=[]
+    globalVars.goodE[contID]=[]
+
+
+
+#function to bundle a set of edges between two nodes
+#as described in gpma
+def bundleEdges(edgeSet, noMed=False):
+    #print("BUNDELING:")
+    #print(edgeSet)
+    nm=False
+    bundledList=[]
+    if noMed ==True: # if median picked before clusered nothing take first edge
+        #print("Taking median failed before so now just pick first edge")
+        medianEdgeLen=edgeSet[0]
+    else:
+        medianEdgeLen = statistics.median(edgeSet) #calc median
+    #stDev=numpy.std(edgeSet)
+    #print("Median edge chosen: %i"%(medianEdgeLen))
+    #print(stDev, globalVars.distError*2)
+    newWeight=0
+    fitDists=[]
+    #if edgeSet only has length one to avaoid unnecessary computations
+    if len(edgeSet) == 1 and globalVars.minLinks<=1:
+        bundledList=[(1, edgeSet[0], 0)]
+        edgeSet.remove(edgeSet[0])
+
+    else:
+        for dist in edgeSet:
+            #print(abs(medianEdgeLen - dist), 2*globalVars.distError)
+            if abs(medianEdgeLen - dist) < 2*globalVars.distError:
+                newWeight+=1
+                fitDists.append(dist)
+                #print("dist %i fit and was added and removed!"%dist)
+        # important to remove after loop otherwise it causes the error of skipping some edges
+        for dist in fitDists:
+            edgeSet.remove(dist)
+        if len(fitDists) > 1:
+            # bundle edges as defined in GPMA
+            p=sum([dist/((globalVars.distError*2)**2) for dist in fitDists])
+            q=sum([1/((globalVars.distError*2)**2) for dist in fitDists])
+            newDist=int(p/q)
+            newSDev=statistics.stdev(fitDists)
+            newEdge=(newWeight, newDist, newSDev)
+            #print("bundeled new edge added:")
+            #print(newEdge)
+            #print(fitDists)
+            bundledList.append(newEdge)
+        elif len(fitDists) == 0:
+            #print("Bundeling failed")
+            nm=True
+            #if globalVars.minLinks<=1:               
+            #    bundledList+=[(1, edge, 0) for edge in edgeSet]
+            #    edgeSet=[]
+            
+        elif len(fitDists) == 1 and globalVars.minLinks<=1:
+            newDist=fitDists[0]
+            bundledList.append((1, fitDists[0], 0))
+            
+    
+    #newEdge = (edgeWeight, medianEdgeLen)
+    
+
+    # repeat if still unbundled edges left 
+    if len(edgeSet)>0:
+        #print("recall bundeling there is still some edges left")
+        bundledList+=bundleEdges(edgeSet, nm)
+    #print("\n\n")
+    return(bundledList)
+
+
+# parses PAF gets hitlists and checks them --> builds edges and bundles them
+# returns set of bundled edges
+def parsePAF(path):
+    if path.endswith(".gz"):
+        import gzip
+        pafParser = gzip.open(path)
+    else:
+        pafParser = open(path)
+
+    # list for all hit of one reads --> e.g. best hit per split part
+    hitList = []
+    posCounts = 0
+    revCounts = 0
+
+    for line in pafParser:
+        splitted = line.strip().split("\t")
+        query = splitted[0]
+        q_len = int(splitted[1])
+        q_start = int(splitted[2])
+        q_end = int(splitted[3])-1
+        strand = splitted[4]
+        tar = splitted[5]
+        t_len = int(splitted[6])
+        t_start = int(splitted[7])
+        t_end = int(splitted[8])-1
+        #matching = splitted[9]
+        #opts = splitted[10]
+        mq = int(splitted[11])
+        #P for primary aln or S for secondary
+        #alnType=splitted[12].split(":")[-1]
+
+        # get fragment ID
+        qID = query.split("_")[0]
+        splitID = int(query.split("_")[-1])
+
+        curHIT = myHIT(qID, splitID, strand, tar, t_len,
+                       t_start, t_end, q_len, q_start, q_end, mq)
+
+        # take only the best hit for each fragment --> replace if the last one was
+
+        if q_end-q_start>q_len*0.3:
+            if len(hitList)==0:
+                hitList.append([curHIT])
+                continue
+            if curHIT.query == hitList[-1][0].query:
+                #if curHIT.split == lastHit.split:
+                
+                if curHIT.split == hitList[-1][0].split:
+                    #print("Mult hit found")
+                    #print(lastHit)
+                    #print(curHIT)
+                    #identical hit
+                    hitList[-1].append(curHIT)           
+                else:
+                    hitList.append([curHIT])
+                    if curHIT.strand == "+":
+                        posCounts += 1
+                    elif curHIT.strand == "-":
+                        revCounts += 1
+
+            else:
+                # more than 1 hit --> only then it makes sense to check the
+                # hitlist
+                if len(hitList) > 1:
+                    #check the hitlist to identify SVs misjoins and edges
+                    checkHitlist.checkHitlist(hitList, globalVars.edgeDICT, oriHASH)
+
+                # start hit list for new read and reset repCounter
+                hitList = []
+                if q_end-q_start>=q_len*0.3:
+                    hitList.append([curHIT])
+                    if curHIT.strand == "+":
+                        posCounts += 1
+                    elif curHIT.strand == "-":
+                        revCounts += 1
+
+    #for last hitlist
+    checkHitlist.checkHitlist(hitList, globalVars.edgeDICT, oriHASH)
+
+    pafParser.close()
+
+    
+    print("\t2.1) Bundling Edges")
+    edgeLIST = []
+    # bundleEdges
+    for cont in globalVars.edgeDICT:
+        for tCont in globalVars.edgeDICT[cont]:
+            bundeledEdges=bundleEdges(globalVars.edgeDICT[cont][tCont])
+            #print(bundeledEdges)
+            globalVars.edgeDICT[cont][tCont] = bundeledEdges
+
+            # ignore edges with only weight 1
+            acceptedEdges=[]
+            for edge in globalVars.edgeDICT[cont][tCont]:
+                
+                #print(edge)
+                if edge[0] >= globalVars.minLinks:
+                    
+                    #check if overlap so large it indicates a merge
+                    if edge[1]*-1 >= globalVars.contLenDICT[cont[:-2]] or edge[1]*-1 >= globalVars.contLenDICT[tCont[:-2]]:
+                        #print("overlap so large it indicates a merge")
+                        if not edge[1]*-1 > 50000:
+                            merged, realOL, mAdjust = merging.runBLAST(cont, tCont, edge[1], True)
+                            if merged=="Removed":
+                                print("Merging passed")
+                                #removeMergedNEW(remove[0][:-2]+"_0", remove[1], remove[2])
+                                #removeMergedNEW(remove[0][:-2]+"_1", remove[3], remove[4])
+                            #else:
+                            #    print("Merging failed")
+                    else:
+                    
+                        newEdge = globalVars.myEdge(cont, tCont, edge[0], edge[1], edge[2], 'r')
+                        acceptedEdges.append(edge)
+                        ins = False
+                        for i, e in enumerate(edgeLIST):
+                            if e < newEdge:
+                                edgeLIST.insert(i, newEdge)
+                                ins = True
+                                break
+
+                        if ins == False:
+                            edgeLIST.append(newEdge)
+
+                        #print("Accepted edges between %s and %s"%(cont, tCont))
+                        #for e in acceptedEdges:
+                        #    print(e)
+                        #print("\n")
+            globalVars.edgeDICT[cont][tCont]=acceptedEdges
+
+    return edgeLIST
+
+
+# running GPMA from gpma.py
+def runGPMA(orderedEdges, cGRAPH):
+    writer=open(globalVars.outDir + "/tmp_edges", "w")
+    for i, edge in enumerate(orderedEdges):
+        #print("Running GPMA trying to set edge (%i/%i):"%(i, len(orderedEdges)-1))
+        #print(edge) 
+        gpma(edge, cGRAPH)
+        writer.write(str(edge)+"\n")
+    writer.close()
+
+#function to cluster variants per contig
+def clusterSVs(svList, cMap, writer, ms, misJoin=False, deletion=False):
+    #to avoid unnecessary checking
+    if len(svList) < 2:
+        return []
+
+    breaks=[]
+    import networkx as nx
+    G = nx.Graph()
+    nINS=len(svList)
+    #print(svList)     
+    for i, sv1 in enumerate(svList):
+        for j in range(i+1, nINS):
+            sv2 = svList[j]
+            #print(sv1, sv2)
+            sd = abs((sv1[2]-sv1[1]) - (sv2[2]-sv2[1])) / max((sv1[2]-sv1[1]), (sv2[2]-sv2[1]), 1)
+            pd = min(abs(sv1[1]-sv2[1]), abs(sv1[2]-sv2[2]), abs((sv1[2]+sv1[1])/2 - (sv2[2]+sv2[1])/2))
+            #print(sd, pd)
+            spd=sd+pd/2000
+            #print(spd)
+            if spd < 0.7:
+                #print("CLUSTER TOGETHER")
+                G.add_edge(i, j)                   
+    #print("\n")
+    insClusts=nx.find_cliques(G)
+    #print("nClusters found:")
+    for iClust in insClusts:
+        #print(len(iClust))
+        #print(iClust)
+        if len(iClust) > 2:
+            svB=[]
+            svE=[]
+            svW=0
+            scaf=""
+            for i in iClust:
+                #print(svList[i])
+                ins = svList[i]
+                scaf=ins[0]
+                svB.append(ins[1])
+                svE.append(ins[2])
+                svW+=1
+                #print(ins)
+            
+            
+            #print(svB, svE)
+
+            if misJoin==True:
+                svB=int(statistics.mean(svB))
+                svE=int(statistics.mean(svE)+100)
+                if svE >= len(cMap):
+                    if svB >= len(cMap):
+                        continue
+                    svE=len(cMap)-1
+                #print(svB, svE)
+                meanGood=round(statistics.mean(cMap[svB:svE+1]))
+                meanSTD=round(statistics.stdev(cMap[svB:svE+1]))
+                meanGood+=(-3)*meanSTD
+            else:
+                svB=int(statistics.mean(svB))
+                svE=int(statistics.mean(svE)) 
+             
+            meanGood=round(statistics.mean(cMap[svB:svE+1]))
+            if deletion==True:
+                svW+=meanGood
+            #print(svB, svE, svW, meanGood, svW/(svW+meanGood))  
+            if svW > 2 and svW/(svW+meanGood)>=0.7: 
+                #print("ACCEPT SV!!!")
+                #print(cMap[svB:svE+1])
+                #print(svB, svE, svW, meanGood, svW/(svW+meanGood))
+                #breaks.append((svB, svE))
+                breaks.append(svB)
+                breaks.append(svE)
+                writer.write("%s\t%i\t%i\t%i\t%i\t%f\n"%(scaf ,svB, svE, svW, meanGood, svW/(svW+meanGood)))
+            cMap[svB]-=svW
+            if svB!=svE:
+                cMap[svE]-=svW
+
+            
+    G.clear()
+
+    return breaks
+
+# function to break contigs given a 
+def breakConts(bHASH, cit):
+    oWriter=open(args.outdir+"/"+"adjusted_contigs_it%i.fa"%(cit), "w")
+    for scaf in globalVars.contDICT:
+        if scaf in bHASH:
+            curStart=0
+            counter=1
+            breakEMargin=500
+            for error in bHASH[scaf]:
+                #print(curStart, error, error-breakEMargin)
+                #only output fragments larger than 1000bp
+                if len(globalVars.contDICT[scaf][curStart:error-breakEMargin]) > 1000:
+                    oWriter.write(">%s_%i\n"%(scaf, counter))
+                    #print("Write splitted scaf: %s from: %i-%i" %(scaf, curStart, error-breakEMargin))
+                    oWriter.write("%s\n"%(globalVars.contDICT[scaf][curStart:error-breakEMargin]))
+
+                curStart=error+breakEMargin
+                counter+=1
+            if len(globalVars.contDICT[scaf][curStart:]) > 1000:
+                oWriter.write(">%s_%i\n"%(scaf, counter))
+                #print("Write splitted scaf: %s from: %i-%i" %(scaf, curStart, len(globalVars.contDICT[scaf])))
+                oWriter.write("%s\n"%(globalVars.contDICT[scaf][curStart:]))                              
+        else:
+            oWriter.write(">%s\n"%(scaf))
+            oWriter.write("%s\n"%(globalVars.contDICT[scaf]))                       
+    oWriter.close()
+
+# function that generates the pos. coverage map per contig
+# and then runs clusterSVs on the different SVs for that contig
+# and then checks if any contigs must be broken (breakConts)
+def checkContigsNEW(curIt, noBreak):
+    svbreak=False
+    covWriter=open(globalVars.outDir + "/coverage_map_it%i.txt"%(curIt), "w")
+    insWriter=open(globalVars.outDir + "/insertions_it%i.txt"%(curIt), "w")
+    invWriter=open(globalVars.outDir + "/inversions_it%i.txt"%(curIt), "w")
+    delWriter=open(globalVars.outDir + "/deletions_it%i.txt"%(curIt), "w")
+    mjWriter=open(globalVars.outDir + "/misjoins_it%i.txt"%(curIt), "w")
+    svBreaks={}
+    for scaf in globalVars.goodE:
+        #print("Checking scaf %s"%(scaf))
+        covMap=[0]*globalVars.contLenDICT[scaf]
+        breakpoints=[]
+        for g in globalVars.goodE[scaf]:
+            for i in range(g[1],g[2]+1):
+                covMap[i]+=1
+        meanSupport=statistics.median(covMap[50:-50])
+        #print("cont %s has a mean goodness of scaf %f"%(scaf, meanSupport))
+        #print(covMap)
+        #print("INSERTIONS:")
+        breakpoints+=clusterSVs(globalVars.insE[scaf], covMap, insWriter, meanSupport)
+        #print("DELETIONS:")
+        breakpoints+=clusterSVs(globalVars.delE[scaf], covMap, delWriter, meanSupport, deletion=True)
+        #print("INVERSIONS:")
+        breakpoints+=clusterSVs(globalVars.invE[scaf], covMap, invWriter, meanSupport)
+        #print("MISJOINS:")
+        breakpoints+=clusterSVs(globalVars.mjE[scaf], covMap, mjWriter, meanSupport, misJoin=True)
+
+        if breakpoints!=[]:
+            svbreak=True
+            svBreaks[scaf]=breakpoints
+
+        checkCovMap(scaf, covMap, covWriter)
+
+
+    if svbreak==True and noBreak==False:
+        breakConts(svBreaks, curIt)
+
+    covWriter.close()
+    insWriter.close()
+    delWriter.close()
+    invWriter.close()
+    mjWriter.close()
+
+    return svbreak
+
+# outputs the coverage map
+def checkCovMap(curCont, cmap, oWriter):
+    oWriter.write(">%s\n"%(curCont))
+    totalBps=0
+    nGood=0
+    nBad=0
+    nUnknown=0
+    curRange=["", "", 0]
+
+    for pos, cov in enumerate(cmap):
+        totalBps+=1
+        if cov>0:
+            nGood+=1
+        elif cov==0:
+            nUnknown+=1
+        else:
+            nBad+=1
+        if pos==0:
+            curRange[0]=pos
+            curRange[1]=pos
+            curRange[2]=cov
+        else:
+            if curRange[2]==cov:
+                curRange[1]=pos
+            else:
+                oWriter.write("\t%s\t%i\t%i\t%i\n"%(curCont, curRange[0], curRange[1], curRange[2]))
+                curRange[0]=pos
+                curRange[1]=pos
+                curRange[2]=cov
+    oWriter.write("\t%s\t%i\t%i\t%i\n"%(curCont, curRange[0], curRange[1], curRange[2]))                     
+                
+
+# function to write a scaffold for final output
+def writeScaf(sSeqs, sNames, oNR, sWriter, gffWriter):
+    #Write output
+    newSeq = ""
+    #print("writing out")
+    
+    
+    for i, name in enumerate(sNames):                        
+        seq = sSeqs[i]
+        start=len(newSeq)+1
+        newSeq += seq
+        end=len(newSeq)
+        if name.startswith("read_"):
+            gffWriter.write("scaf_%i\t.\tread(%s)\t%i\t%i\n"%(oNR, name, start, end))
+        else:
+            gffWriter.write("scaf_%i\t.\tcontig(%s)\t%i\t%i\n"%(oNR, name, start, end))
+    
+            
+    scafLen = len(newSeq)
+    sWriter.write(">scaf_%i(len=%i)\n" % (oNR, scafLen))
+    # print(newName)
+    # print(newSeq)
+    myIndex = 0
+    # fasta format capped at 80bps per line
+    while (myIndex + 80) <= len(newSeq):
+        sWriter.write("%s\n" % (newSeq[myIndex:myIndex + 80]))
+        myIndex += 80
+    sWriter.write("%s\n" % (newSeq[myIndex:]))
+
+    return scafLen
+
+   
+
+# function to output all contigs/scaffolds in the end --> gets the path
+# for each and then puts them together before writing out
+def getPATH(myGraph):
+    scafWriter = open(globalVars.outDir + "/scaffolds.fasta", "w")
+    scafCount = 0
+    idWriter = open(globalVars.outDir + "/conts_scafs.ids", "w")
+    gffWriter = open(globalVars.outDir + "/scaffolds.gff", "w")
+    # hash to keep track of already visited contigs
+    cLens=[]
+    visited = {}
+    count=1
+    #print(len(myGraph.cGraph))
+    for cont in myGraph.cGraph:
+        #print("Checking cont %s - %i/%i"%(cont, count, len(myGraph.cGraph)))
+        count+=1
+        if not cont[:-2] in visited:
+            startNode = myGraph.findStart(copy.copy(cont))
+            #print(myGraph.getPath(copy.copy(cont)))
+            first=True
+            #print("Getting path for cont %s start of path is at: %s"%(cont, startNode))
+
+            newScaf = mySCAF([], []) # intitalize output scaf
+            idWriter.write(">scaf_%i\n"%(scafCount))
+            curNode=copy.copy(startNode)
+            while 1:
+                #print("while curNode=%s"%(curNode))
+                # check if already visited
+                if not curNode[:-2] in visited:
+                    visited[curNode[:-2]]=1
+                else:
+                    #print("CIRCLE FOUND")
+                    curLen=writeScaf(newScaf.seqs, newScaf.names, scafCount, scafWriter, gffWriter)
+                    cLens.append(curLen)
+                    scafCount+=1
+                    idWriter.write("\n")
+                    break 
+                visited[curNode[:-2]] = 1
+                #print("visited %s"%(curNode[:-2]))
+
+                #case for startNode set dist to 0
+                if first==True:
+                    first=False
+                    offset=0                  
+                else:
+                    offset = myGraph.cGraph[curNode].active.dist *(-1) # change pos/neg to get offset for contigSeq
+                if myGraph.cGraph[curNode].active != "":
+                    idWriter.write("\tdist=%i, links=%i\n"%(offset*-1, myGraph.cGraph[curNode].active.weight))
+                else:
+                    idWriter.write("\tdist=%i\n"%(offset*-1))
+
+
+                
+                # get scaf seq in correctOrientation
+                if curNode[-1]=="0":
+                    contSeq = globalVars.contDICT[curNode[:-2]]
+                elif curNode[-1]=="1": # reverse complement seq
+                    #contSeq=str(Seq(globalVars.contDICT[curNode[:-2]]).reverse_complement())
+                    contSeq=reverseComplement(globalVars.contDICT[curNode[:-2]])
+
+                
+                idWriter.write("\t%s(len=%i)\n"%(curNode, globalVars.contLenDICT[curNode[:-2]]))
+                
+                #fill gap
+                if offset < 0: # gap that must be filled exists
+                    #print("Filling gap between %s and %s"%(myGraph.cGraph[curNode].active.target, curNode))
+                    fillID, fillSeq = fillEDGEgap(myGraph.cGraph[curNode].active.target, curNode, newScaf.seqs[len(newScaf.seqs)-1], contSeq, offset*-1)
+                    #print("Filler = %s len=%i"%(fillID, len(fillSeq)))
+
+                    if fillID=="100N":
+                        #print("Dont set edge")
+                        del visited[curNode[:-2]]
+                        myGraph.cGraph[curNode].active=""
+                        curLen=writeScaf(newScaf.seqs, newScaf.names, scafCount, scafWriter, gffWriter)
+                        cLens.append(curLen)
+                        scafCount+=1
+                        idWriter.write("\n")
+                        break                       
+                    else:    
+                        newScaf.names.append("read_"+fillID)
+                        newScaf.seqs.append(fillSeq)
+
+                        offset = 0
+
+                #print("appending cont %s with offset %i"%(curNode, offset))
+                newScaf.names.append(curNode)
+                #print("appending seq of len=%i"%(len(contSeq[offset:])))
+                newScaf.seqs.append(contSeq[offset:])
+                
+                curNode = myGraph.cGraph[curNode].cEdge.target
+                
+                if myGraph.cGraph[curNode].active=="":
+                    #print("End of path reached")
+                    curLen=writeScaf(newScaf.seqs, newScaf.names, scafCount, scafWriter, gffWriter)
+                    cLens.append(curLen)
+                    scafCount+=1
+                    idWriter.write("\n")
+                    break
+                else:
+                    #print(myGraph.cGraph[curNode].active)
+                    curNode = myGraph.cGraph[curNode].active.target
+                    #print("Next in path is %s"%(curNode))
+                    #print(myGraph.cGraph[curNode].active)
+    
+    scafWriter.close()
+    idWriter.close()
+    gffWriter.close()
+    return cLens
+
+
+# function to fill the gap between to contigs with a pacbio read
+def fillEDGEgap(cont1, cont2, cSeq1, cSeq2,  gLen):
+    gapFilled=False
+    #print("Trying to fill gap between %s and %s "%(cont1, cont2))
+    #sort contigs to get same order as before for edge connection
+    if cont1 < cont2:
+        node1 = cont1
+        node2 = cont2
+    else:
+        node1 = cont2
+        node2 = cont1
+    #print(node1, node2)
+    gapReads=globalVars.gapDICT[node1][node2]
+
+    if len(cSeq1)>1000:
+        adjustedSeq1=cSeq1[-1000:]
+        adjustedLen1=1000
+    else:
+        adjustedSeq1=cSeq1
+        adjustedLen1=len(cSeq1)
+    if len(cSeq2)>1000:
+        adjustedSeq2=cSeq2[:1000]
+        adjustedLen2=1000
+    else:
+        adjustedSeq2=cSeq2
+        adjustedLen2=len(cSeq2)
+
+    for read in gapReads:
+        #print("\tUsing read %s" %(read))        
+        rSeq=globalVars.readHash[read]
+        gapFilled=merging.fillGAPS(cont1, cont2, adjustedLen1, adjustedLen2, adjustedSeq1, adjustedSeq2, read, len(rSeq), rSeq)
+
+        #if gap can be filled abort gapFilling process
+        if gapFilled!=False:
+            break
+    
+    if gapFilled==False:
+        read="100N"
+        gapFilled="N"*100
+        #print("GAP could not be filled --> add 100 Ns in between???!!!")
+        #print(cont1, cont2)
+
+    return read, gapFilled
+
+# calculates the N-statistics (N10,..,N50,..,N100)
+def calcNstats(scafLens, totalLen):
+    Nvals=[(0,0)]*10
+    n=0
+    curSum=0
+    curN=0.1
+    curPos=0
+    for slen in scafLens:
+        curSum+=slen
+        #print(curN, curPos, curSum, n, slen)
+        n+=1
+        curThresh=totalLen*curN
+        #print("cur threshold %f"%curThresh)
+        if curSum >= curThresh:
+            curN=round(curN+0.1, 2)
+            Nvals[curPos]=(slen, n)
+            curPos+=1
+
+    return(Nvals)               
+
+# function to generate and ouput the scaffolds statistics
+def scafStats(sLens):
+    writer=open(globalVars.outDir + "/scaffolds.stats", "w")
+    sortedLens=sorted(sLens, reverse=True)
+    totalBps=sum(sLens)
+    totalScafs=len(sLens)
+    avgLen=statistics.mean(sLens)
+    sortedLens=sorted(sLens, reverse=True)
+
+    writer.write("Total number basepairs in assembly: %i\n"%(totalBps))
+    writer.write("Total number of scaffolds: %i\n"%(totalScafs))
+    writer.write("Longest scaffold: %i\n"%(sortedLens[0]))
+    writer.write("Average scaffold length: %i\n"%(avgLen))
+
+    nVs=calcNstats(sortedLens, totalBps)
+
+    names=["N10","N20","N30","N40","N50","N60","N70","N80","N90","N100"]
+    for i, nv in enumerate(nVs):
+        writer.write("%s = %i, n = %i\n"%(names[i], nv[0], nv[1]))
+
+    writer.close()
+
+# function to remove all tmp files created while running the script
+def removeTMPfiles():
+    subprocess.call("rm %s/tmp*" % (globalVars.outDir), shell=True)
+
+
+
+###########################################################################
+###########################RUN EVERYTHING##################################
+def run(genome, nobreak, curIter):
+    # create output directory if it not exists already
+    subprocess.call("mkdir -p %s" % (args.outdir), shell=True)
+    
+    miniPATH, blPATH = parsePATHs()
+
+    # initlaiye global variables
+    if curIter > 0:
+        tempStore=globalVars.readHash
+    globalVars.init(args.outdir, args.size, args.minlinks, args.minoverlap, args.minident, args.threads, args.minSVLen, blPATH, miniPATH)
+    
+    if curIter>0:
+        globalVars.readHash=tempStore
+        tempStore=""
+    contigGraph=globalVars.coGRAPH()
+
+    print("0a) Parsing Contigs")
+    parseContigs(genome, contigGraph)
+
+
+    t0=datetime.datetime.now()
+    splitFile = args.outdir + "/tmp_%ibp_splits" % (args.size)
+    if curIter == 0:
+        print("\n0b) Splitting reads into %ibp chunks" % (args.size))
+        if args.fasta:
+            splitReads.splitReadsFA(args.fastq, splitFile, args.size) 
+        else:            
+            splitReads.splitReadsFQ(args.fastq, splitFile, args.size)
+    t1=datetime.datetime.now()
+    splittime=t1-t0
+
+    print("\n1) Running minimap2")
+    # name for output file
+    t0=datetime.datetime.now()
+    myPAF = globalVars.outDir + "/tmp_it%i.paf"%(curIter)
+    runMinimap2(args.threads, mapMode, genome, splitFile, myPAF)
+    t1 = datetime.datetime.now()
+    maptime=t1-t0
+
+    print("\n2) Parsing PAF")
+    t0=datetime.datetime.now()
+    edgeSet = parsePAF(myPAF)
+    t1=datetime.datetime.now()
+    parseTime=t1-t0
+
+    #avgCOV=statistics.median(globalVars.largestMapped)
+    #print(globalVars.largestMapped)
+    #print("\tCalculated average coverage: %i"%(avgCOV))
+
+    print("\n3) Clustering and identification of SVs and misjoins")
+    t0 = datetime.datetime.now()
+    restart=checkContigsNEW(curIter, nobreak)
+    t1 = datetime.datetime.now()
+    svTime=t1-t0
+    #restart, contStats = checkContigs(curIter, nobreak)
+    if restart==True and nobreak==False:
+        print("\tContigs were adjusted restart needed!!!")
+        return False
+    else:
+
+        # check if contigs need to be readjusted if so --> readjust and rerun
+        # mapping before running GPMA
+        print("\n4) Running GPMA")
+        print("\tNumber of edges: %i" %(len(edgeSet)))
+        t0 = datetime.datetime.now()
+        runGPMA(edgeSet, contigGraph)
+        t1 = datetime.datetime.now()
+        gpmaTime=t1-t0
+
+        # get the paths between contigs for output
+        print("\n5) Outputting scaffolds and filling gaps")
+        t0 = datetime.datetime.now()
+        scafLengths=getPATH(contigGraph)
+        t1 = datetime.datetime.now()
+        getPathTime=t1-t0
+
+        haploWriter = open(globalVars.outDir + "/duplicates.fasta", "w")
+        print("\n6) Outputting unscaffolded duplicates")
+        #output haplotigs
+        visited={}
+        for ele in globalVars.adjustDICT:
+            sID=ele[:-2]
+            if visited.get(sID, "nope")==1:
+                continue
+                
+            contSeq = globalVars.contDICT[sID]
+            haploWriter.write(">%s\n%s\n"%(sID, contSeq))
+            
+            visited[sID]=1
+        haploWriter.close()
+
+
+        #get stats
+        scafStats(scafLengths)
+
+        
+        # cleaning up:
+        print("\n7) Cleaning up - removing tmp files")
+        removeTMPfiles()
+
+
+        print("\n\n")
+
+        etime = datetime.datetime.now()
+        totaltime = etime - stime
+        #print(globalVars.contsMerged)
+        print("Total elapsed time: " + str(totaltime) + " [h:min:sec:millisec]")
+        print("0) Splitting time: " + str(splittime) + " [h:min:sec:millisec]")
+        print("1) Mapping time(minimap2): " +
+            str(maptime) + " [h:min:sec:millisec]")
+        print("2) Parsing time: " + str(parseTime) + " [h:min:sec:millisec]")
+        print("2) SV clustering time: " + str(svTime) + " [h:min:sec:millisec]")    
+        print("4) GPMA time: " + str(gpmaTime) + " [h:min:sec:millisec]")
+        #print("\tTotal BLASTS done: %i"%(globalVars.blastCounter))
+        print("5) GetPath time: " + str(getPathTime) + " [h:min:sec:millisec]")
+
+
+        return True
+        
+
+
+##########################################################################
+##########################################################################
+##########################################################################
+parser = argparse.ArgumentParser(description="")
+parser.add_argument(
+    "-ge", "--genome", help="Contigs in fasta format", type=str, required=True)
+parser.add_argument(
+    "-fq", "--fastq", help="Reads in fastq format", type=str, required=True)
+parser.add_argument(
+    "-out", "--outdir", help="Directory for output", type=str, required=True)
+parser.add_argument(
+    "-m", "--mode", help="Mapping mode in minimap2 for different types of input data data. Options: pb/ont [default=pb]", type=str, default="pb")
+parser.add_argument(
+    "-t", "--threads", help="Threads to use for minimap2 only [default=4]", type=int, default=4)
+parser.add_argument(
+    "-ml", "--minlinks", help="Minimum reads supporting a link needed to set an edge between two contigs [default=2]", type=int, default=2)
+parser.add_argument(
+    "-mo", "--minoverlap", help="Minimum overlap needed for two contigs to be merged [default=50]", type=int, default=50)
+parser.add_argument(
+    "-mi", "--minident", help="Minimum identity in overlap needed for two contigs to be merged [default=85.0]", type=float, default=85.0)
+parser.add_argument(
+    "-it", "--iterations", help="maximal iterations performed for breaking set to 0 to perfrom NO breaking", type=int, default=2)
+parser.add_argument(
+    "-msvl", "--minSVLen", help="Miniumum length of a structural variant (insertion, deletion, inversion) to break a contig for [default=1000]", type=int, default=1000, choices=range(500, 10001), metavar="[500, 10000]")
+parser.add_argument(
+    "-s", "--size", help="Size to split the input reads into (in bps) [default=1000]", type=int, default=1000)
+parser.add_argument(
+    "-fa", "--fasta", help="Enable if input reads are in fasta format", action="store_true")
+args = parser.parse_args()
+
+
+##########################################################################
+if __name__ == "__main__":
+    # starttime
+    stime = datetime.datetime.now()
+    # get right mode
+    if args.mode == "pb":
+        mapMode = 'map-pb'
+    elif args.mode == "ont":
+        mapMode = 'map-ont'
+
+
+    genome=args.genome
+    curIt=0
+    mode=False
+    while curIt <= args.iterations:
+        print("---------------------------------------------ITERATION %i---------------------------------------------"%(curIt))
+        if curIt == args.iterations:
+            mode=True
+        end=run(genome, mode, curIt)
+        genome=args.outdir+"/"+"adjusted_contigs_it%i.fa"%(curIt)
+        curIt+=1
+        if end == True:
+            curIt=args.iterations+1
+        print("\n\n")
\ No newline at end of file
diff --git a/merging.py b/merging.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec236e706e8ba77924c317745bcd5863be570cac
--- /dev/null
+++ b/merging.py
@@ -0,0 +1,472 @@
+import globalVars
+import gpma
+import subprocess, datetime
+try:
+    from subprocess import DEVNULL  # py3k
+except ImportError:
+    import os  # for python2
+    DEVNULL = open(os.devnull, 'wb')
+
+
+#function to reverse complemet a sequence
+def reverseComplement(seq):
+    old = "ACGT"
+    replace = "TGCA"
+    return seq.upper().translate(str.maketrans(old, replace))[::-1]
+
+
+# function to run a blastn comparison between to contigs
+def runBLAST(id1, id2, eOL, checkOL):
+ 
+    #print("Merging %s and %s with estimasted OL:%i" % (id1, id2, estOL))
+    seq1=globalVars.contDICT[id1[:-2]]
+    seq2=globalVars.contDICT[id2[:-2]]
+    seq1_trimmed=seq1
+    seq2_trimmed=seq2
+    if eOL*-1 >= len(seq1):
+        
+        if id2.endswith("1"):
+            seq2_trimmed=seq2[len(seq2)-eOL*-1 : (len(seq2)-eOL*-1)+len(seq1)]
+        elif id2.endswith("0"):
+            seq2_trimmed=seq2[(eOL*-1)-len(seq1) : eOL*-1]
+    if eOL*-1 >= len(seq2):
+        
+        if id1.endswith("1"):
+            seq1_trimmed=seq1[len(seq2)-eOL*-1 : (len(seq2)-eOL*-1)+len(seq2)]
+        elif id1.endswith("0"):
+            seq1_trimmed=seq1[(eOL*-1)-len(seq2) : eOL*-1]
+
+    
+
+    '''
+    if checkOL==True:
+        #only blast overlaps against each other
+        #if contig longer than overlap
+        if len(seq1) > estOL*(-1)+500:
+            if id1.endswith("_1"):
+                seq1_trimmed=seq1[int(len(seq1)+(estOL-500)):]
+            elif id1.endswith("_0"):
+                seq1_trimmed=seq1[:int(estOL*(-1)+500)]
+            else:
+                print("WTF is this name - %s"%(id1))
+        else:
+            seq1_trimmed=seq1
+            checkOL=False
+        
+        if len(seq2) > estOL*(-1)+500:
+            if id2.endswith("_1"):
+                seq2_trimmed=seq2[int(len(seq2)+(estOL-500)):]         
+            elif id2.endswith("_0"):
+                seq2_trimmed=seq2[:int(estOL*(-1)+500)]
+            else:
+                print("WTF is this name - %s"%(id2))
+        else:
+            seq2_trimmed=seq2
+            checkOL=False
+    else:
+        seq1_trimmed=seq1
+        seq2_trimmed=seq2
+    '''   
+
+    tmpDIR = globalVars.outDir
+
+    #print(len(seq1), len(seq1_trimmed))
+    #print(len(seq2), len(seq2_trimmed))
+    oWriter = open(tmpDIR + "/tmp1.fa", "w")
+    #oWriter.write(">%s\n%s\n" % (id1, seq1))
+    oWriter.write(">%s\n%s\n" % (id1, seq1_trimmed))
+    oWriter.close()
+    oWriter = open(tmpDIR + "/tmp2.fa", "w")
+    #oWriter.write(">%s\n%s\n" % (id2, seq2))
+    oWriter.write(">%s\n%s\n" % (id2, seq2_trimmed))
+    oWriter.close()
+
+    # define blastn output columns
+    outfmt = "'6 qseqid sseqid pident length qstart qend sstart send sstrand evalue bitscore'"
+    stime = datetime.datetime.now()
+    #nod threads --> 'num_threads' is currently ignored when 'subject' is specified.
+    subprocess.call("blastn -word_size 15 -outfmt %s -subject %s -query %s -out %s" % (outfmt, tmpDIR + "/tmp1.fa", tmpDIR + "/tmp2.fa", tmpDIR + "/tmp.blast6"),
+                    shell=True, stdout=DEVNULL, stderr=subprocess.STDOUT)
+    etime = datetime.datetime.now()
+    #print("Blast Done time: " + str(etime-stime) + " [h:min:sec:millisec]")
+    #increase BLAST Counter
+    globalVars.blastCounter+=1
+
+    
+    # length need to be this way around --> 2 first then 1 beacuse 1 was used
+    # as reference
+    sucess, rOL, mAdjust = tryMERGE(tmpDIR + "/tmp.blast6", id1, id2, len(seq1_trimmed), len(seq2_trimmed))
+
+    #if (rOL*(-1)==len(seq1_trimmed)-10 or rOL*(-1)==len(seq2_trimmed)-10) and checkOL==True:
+    #    print("Full overlap rerun with more sequence!!!")
+    #    return runBLAST(id1, id2, eOL, False)
+
+    return sucess, rOL, mAdjust
+
+
+# function for merging contigs by reading the blastn output
+def tryMERGE(blastPATH, cID1, cID2, len1, len2):
+    blastReader = open(blastPATH)
+
+    mergeAdjust=""
+    #just a initialization beacuse it needs a return value
+    alnLen=1
+
+    alllines = blastReader.readlines()
+    if len(alllines) == 0:
+        #print("\tERROR no hit found - NO MERGING CAN BE DONE!!!")
+        merged = False
+    else:
+        #print("Number of lines in blast comp=%i" %(len(alllines)))
+        for line in alllines:
+            #print(line)
+            splitted = line.strip().split("\t")
+            # s2 first beacause s1 was used as reference
+            s2 = splitted[0]
+            s1 = splitted[1]
+            ident = float(splitted[2])
+            alnLen = int(splitted[3])
+            # IMPORTANT BLAST INDICES START WITH 1 NOT WITH 0 --> -1 for all of
+            s2_start = int(splitted[4]) - 1
+            s2_end = int(splitted[5]) - 1
+            s1_start = int(splitted[6]) - 1
+            s1_end = int(splitted[7]) - 1
+            s1_strand = splitted[8]
+
+            #print("\tc1:%s len=%i ostart=%i oend=%i" %
+            #      (cID1, len1, s1_start, s1_end))
+            #print("\tc2:%s len=%i ostart=%i oend=%i" %
+            #      (cID2, len2, s2_start, s2_end))
+
+            # to merge as input argument
+            if alnLen >= globalVars.minOverlap and ident >= globalVars.minIdent:
+                
+                #first check if alnLen is equal to len of one contig which means its fully part of the other and can be removed
+                if alnLen >= len1-10:
+                    #print("Contig %s is fully part of contig %s and will be removed!!!"%(cID1, cID2))
+                    if s1_start < s1_end:
+                        if s2_start < s2_end:
+                            nTar0=cID2[:-2]+"_0"
+                            aDist0=s2_start
+                            nTar1=cID2[:-2]+"_1"
+                            aDist1=len2-s2_end
+                        elif s2_start > s2_end:
+                            nTar0=cID2[:-2]+"_1"
+                            aDist0=len2-s2_end
+                            nTar1=cID2[:-2]+"_0"
+                            aDist1=s2_start                  
+                    elif s1_start > s1_end:
+                        if s2_start < s2_end:
+                            nTar0=cID2[:-2]+"_1"
+                            aDist0=len2-s2_end
+                            nTar1=cID2[:-2]+"_0"
+                            aDist1=s2_start   
+                        elif s2_start > s2_end:
+                            nTar0=cID2[:-2]+"_0"
+                            aDist0=s2_start
+                            nTar1=cID2[:-2]+"_1"
+                            aDist1=len2-s2_end                    
+                    
+                    #removeMerged(globalVars.contigGR, cID1[:-2]+"_0", nTar0, aDist0)
+                    #removeMerged(globalVars.contigGR, cID1[:-2]+"_1", nTar1, aDist1)
+                    mergeAdjust=(cID1, nTar0, aDist0, nTar1, aDist1)                                  
+                    merged="Removed"
+                    break        
+
+                elif alnLen >= len2-10:
+                    #print("Contig %s is fully part of contig %s and will be removed!!!"%(cID2, cID1))
+                    if s1_start < s1_end:
+                        if s2_start < s2_end:
+                            nTar0=cID1[:-2]+"_0"
+                            aDist0=s1_start
+                            nTar1=cID1[:-2]+"_1"
+                            aDist1=len1-s1_end
+                        elif s2_start > s2_end:
+                            nTar0=cID1[:-2]+"_1"
+                            aDist0=len1-s1_end
+                            nTar1=cID1[:-2]+"_0"
+                            aDist1=s1_start                  
+                    elif s1_start > s1_end:
+                        if s2_start < s2_end:
+                            nTar0=cID1[:-2]+"_1"
+                            aDist0=len1-s1_end
+                            nTar1=cID1[:-2]+"_0"
+                            aDist1=s1_start   
+                        elif s2_start > s2_end:
+                            nTar0=cID1[:-2]+"_0"
+                            aDist0=s1_start
+                            nTar1=cID1[:-2]+"_1"
+                            aDist1=len1-s1_end                     
+                    
+                    #removeMerged(globalVars.contigGR, cID2[:-2]+"_0", nTar0, aDist0)
+                    #removeMerged(globalVars.contigGR, cID2[:-2]+"_1", nTar1, aDist1)
+                    mergeAdjust=(cID2, nTar0, aDist0, nTar1, aDist1) 
+                    merged="Removed"
+                    break
+
+
+                # get orientations of contigs meaning which ends are overlapping
+                ori1 = cID1[-1]
+                ori2 = cID2[-1]
+
+                # case1 c1_1 --> c2_0
+                if ori1 == "1" and ori2 == "0":
+                    # example: c1:ctg7180000370328_1 len=1919 ostart=1418 oend=1918
+                    # example: c2:ctg7180000371159_0 len=5383 ostart=0 oend=500
+                    # to make sure alns are at the borders
+                    if s1_end + 10 >= len1 - 1 and s2_start - 10 <= 0:
+                        newSeqID = cID1 + "__" + cID2
+                        saveMerge(newSeqID, cID1, cID2, alnLen)
+                        #print("\tGOOD MERGING POSSIBLE!!!")
+                        merged = True
+                    else:
+                        #print("ALNS NOT AT THE BORDERS!!!")
+                        merged = False
+
+                # case2 c1_1 --> c2_1
+                elif ori1 == "1" and ori2 == "1":
+                    #print("MERGE CASE 1-1")
+                    # example: c1:ctg7180000370630_1 len=5524 ostart=5523 oend=5458
+                    # example: c2:jtg7180000371240f_7180000371241f_1 len=12170
+                    # ostart=12104 oend=12169
+                    if s1_start + 10 >= len1 - 1 and s2_end + 10 >= len2 - 1:
+                        newSeqID = cID1 + "__" + cID2
+                        saveMerge(newSeqID, cID1, cID2, alnLen)
+                        #print("\tGOOD MERGING POSSIBLE!!!")
+                        merged = True
+                    else:
+                        #print("ALNS NOT AT THE BORDERS!!!")
+                        merged = False
+
+                # case3 c1_0 --> c2_1
+                elif ori1 == "0" and ori2 == "1":
+                    # example: c1:ctg7180000370268_0 len=3867 ostart=0 oend=419
+                    # example: c2:ctg7180000370715_1 len=8150 ostart=7730 oend=8149
+                    if s2_end + 10 >= len2 - 1 and s1_start - 10 <= 0:
+                        newSeqID = cID2 + "__" + cID1
+                        saveMerge(newSeqID, cID1, cID2, alnLen)
+                        #print("\tGOOD MERGING POSSIBLE!!!")
+                        merged = True
+                    else:
+                        #print("ALNS NOT AT THE BORDERS!!!")
+                        merged = False
+
+                # case4 c1_0 --> c2_0
+                elif ori1 == "0" and ori2 == "0":
+                    # example:  c1:ctg7180000370227_0 len=1568 ostart=415 oend=0
+                    # example:  c2:ctg7180000371034_0 len=1684 ostart=0 oend=415
+                    if s1_end - 10 <= 0 and s2_start - 10 <= 0:
+                        newSeqID = cID2 + "__" + cID1
+                        saveMerge(newSeqID, cID1, cID2, alnLen)
+                        #print("\tGOOD MERGING POSSIBLE!!!")
+                        merged = True
+                    else:
+                        #print("ALNS NOT AT THE BORDERS!!!")
+                        merged = False
+                #else:
+                    #print("WHATDE WEIRD CONTIG ENDS!!!")
+
+                if merged == True:
+                    #print("\tMERGING SUCESSFULL!!!")
+                    #print(newSeqID)
+                    #if merging sucessful no reason to check more lines break out of loop ignore last alns
+                    break
+                #else:
+                    #print("\tMERGING FAILED!!!")
+            else:
+                #print("\tMERGING FAILED EITHER TOO LITTLE OVERLAP(%i) OR IDENTITY(%f) TOO LOW" % (
+                #    alnLen, ident))
+                merged = False
+
+    blastReader.close()
+    return merged, alnLen*-1, mergeAdjust
+
+
+def saveMerge(nID, oID1, oID2, olap):
+    if oID1<=oID2:
+        combi=oID1+"__"+oID2
+    else:
+        combi=oID2+"__"+oID1
+    
+    globalVars.mergedDICT[combi]=(nID, olap)
+
+
+
+# function to properly remove key from a dictionary
+def removeMerged(rdict, rkey, newTar, addDist):
+    print("Tyring to remove %s from dict"%(rkey))
+    try:
+        print("Removing %s from dict"%(rkey))
+        #part to adjust existing edge
+        existingEdge=rdict[rkey].active
+        globalVars.adjustDICT[rkey]=(newTar, addDist)
+        if existingEdge!="":
+            #print("existing Edge")
+            #print(existingEdge)
+            existingTarget=existingEdge.target
+            rdict[existingTarget].active=""
+            #print("Update edge with newTarget %s and added dist +%i"%(newTar, addDist))
+            adjustedEdge=existingEdge
+            adjustedEdge.target=newTar
+            adjustedEdge.dist-=addDist # must be sustracted to properly adjust
+            #print("adjusted Edge")
+            #print(adjustedEdge)
+            #print("Rerun checkEdge fit with adjusted edge!")
+            #gpma.checkEdgeFit(adjustedEdge, rdict)
+            #gpma.gpma()
+        #remove contig from hash
+        del rdict[rkey]
+        
+
+
+    except KeyError:
+        print("keyError  while removeing")
+        pass
+
+
+def fillGAPS(cID1, cID2, cLen1, cLen2, cSeq1, cSeq2, rID, rLen, rSeq):
+
+    tmpDIR = globalVars.outDir
+    oWriter = open(tmpDIR + "/tmp_cont1.fa", "w")
+    oWriter.write(">%s\n%s\n" % (cID1, cSeq1))
+    oWriter.close()
+    oWriter = open(tmpDIR + "/tmp_cont2.fa", "w")
+    oWriter.write(">%s\n%s\n" % (cID2, cSeq2))
+    oWriter.close()
+    oWriter = open(tmpDIR + "/tmp_read.fa", "w")
+    oWriter.write(">%s\n%s\n" % (rID, rSeq))
+    oWriter.close()
+
+    # define blastn output columns
+    outfmt = "'6 qseqid sseqid pident length qstart qend sstart send sstrand evalue bitscore'"
+    #no threads --> 'num_threads' is currently ignored when 'subject' is specified.
+    subprocess.call("blastn -word_size 15 -outfmt %s -subject %s -query %s -out %s" % (outfmt, tmpDIR + "/tmp_cont1.fa", tmpDIR + "/tmp_read.fa", tmpDIR + "/tmp1.blast6"),
+                    shell=True, stdout=DEVNULL, stderr=subprocess.STDOUT)
+    subprocess.call("blastn -word_size 15 -outfmt %s -subject %s -query %s -out %s" % (outfmt, tmpDIR + "/tmp_cont2.fa", tmpDIR + "/tmp_read.fa", tmpDIR + "/tmp2.blast6"),
+                    shell=True, stdout=DEVNULL, stderr=subprocess.STDOUT)
+
+    ori1=cID1[-1]
+    ori2=cID2[-1] 
+
+    
+    #print("TRYING TO MERGE C1:%s AND READ:%s..."%(cID1, rID))
+    blastReader = open(tmpDIR + "/tmp1.blast6")
+    alllines = blastReader.readlines()
+    if len(alllines) == 0:
+        #print("\tERROR no hit found - NO C1 MERGING CAN BE DONE!!!!!!!")
+        merge1 = False
+    else:
+        #print("Number of lines in blast comp=%i" %(len(alllines)))
+        for line in alllines:
+            merge1=False
+            #print(line)
+            splitted = line.strip().split("\t")
+            rID = splitted[0]
+            cID = splitted[1]
+            ident = float(splitted[2])
+            alnLen = int(splitted[3])
+            # IMPORTANT BLAST INDICES START WITH 1 NOT WITH 0 --> -1 for all of
+            r_start = int(splitted[4]) - 1
+            r_end = int(splitted[5]) - 1
+            c_start = int(splitted[6]) - 1
+            c_end = int(splitted[7]) - 1
+            c_strand1 = splitted[8]
+
+            #print("\tcontig:%s len=%i ostart=%i oend=%i cstrand=%s" %
+            #      (cID1, cLen1, c_start, c_end, c_strand1))
+            #print("\tread:%s len=%i ostart=%i oend=%i" %
+            #      (rID, rLen, r_start, r_end))
+            
+            if alnLen >= 0.3*globalVars.splitLen and ident >= 80:
+                end=max(c_start, c_end)
+                if end + 20 >= cLen1:
+                    merge1=True
+                    #print("\tGOOD C1 MERGING POSSIBLE FROM START!!!!!!!") 
+                    #get read starting position for concatenation
+                    if c_strand1=="minus":
+                        readStart=r_start
+                    else:
+                        readStart=r_end
+                    #print(line)
+                    break
+                
+                #else:
+                #    print("\tERROR C1 MERGING FAILED ALN NOT AT BORDERS OF CONT!!!!!!!")
+            #else:
+            #    print("\tC1 MERGING FAILED EITHER TOO LITTLE OVERLAP(%i) OR IDENTITY(%f) TOO LOW" % (
+            #        alnLen, ident))
+
+    # no need to test to merge C2 with read if C1 merge failed 
+    if merge1==False:
+        #print("GAP FILLING FAILED!!!!!")
+        return False
+
+
+    blastReader.close()
+
+    #print("TRYING TO MERGE C2:%s AND READ:%s..."%(cID2, rID))
+    blastReader = open(tmpDIR + "/tmp2.blast6")   
+    alllines = blastReader.readlines()
+    if len(alllines) == 0:
+        #print("\tERROR no hit found - NO C2 MERGING CAN BE DONE!!!!!!!")
+        merge2 = False
+    else:
+        #print("Number of lines in blast comp=%i" %(len(alllines)))
+        for line in alllines:
+            merge2=False
+            #print(line)
+            splitted = line.strip().split("\t")
+            # s2 first beacause s1 was used as reference
+            rID = splitted[0]
+            cID = splitted[1]
+            ident = float(splitted[2])
+            alnLen = int(splitted[3])
+            # IMPORTANT BLAST INDICES START WITH 1 NOT WITH 0 --> -1 for all of
+            r_start = int(splitted[4]) - 1
+            r_end = int(splitted[5]) - 1
+            c_start = int(splitted[6]) - 1
+            c_end = int(splitted[7]) - 1
+            c_strand2 = splitted[8]
+
+            #print("\tcontig:%s len=%i ostart=%i oend=%i cstrand=%s" %
+            #      (cID2, cLen2, c_start, c_end, c_strand2))
+            #print("\tread:%s len=%i ostart=%i oend=%i" %
+            #      (rID, rLen, r_start, r_end))
+
+            if alnLen >= 0.3*globalVars.splitLen and ident >= 80:
+                start=min(c_start, c_end)
+                if start-20 <= 0:
+                    merge2=True
+                    #print("\tGOOD C2 MERGING POSSIBLE FROM START!!!!!!!") 
+                    #get read starting position for concatenation
+                    if c_strand2=="minus":
+                        readEnd=r_end
+                    else:
+                        readEnd=r_start
+                    #print(line)
+                    break          
+                #else:
+                #    print("\tERROR C2 MERGING FAILED ALN NOT AT BORDERS OF CONT!!!!!!!")
+            #else:
+            #    print("\tC2 MERGING FAILED EITHER TOO LITTLE OVERLAP(%i) OR IDENTITY(%f) TOO LOW" % (
+            #        alnLen, ident))
+
+    blastReader.close()
+
+
+    if merge1==True and merge2==True:
+        if c_strand1 != c_strand1:
+            #print("Gap filling failed hits were in different directions")
+            return False
+        #print("FILLING GAP IS POSSIBLE!!!!!")
+        #print("rStart:%i rEnd%i"%(readStart, readEnd))
+        #newID=cID1+"_"+rID+"_"+cID2
+        #newSeq=cSeq1+rSeq[readStart:readEnd]+cSeq2
+        if readStart<=readEnd:
+            return rSeq[readStart:readEnd]
+        else:
+            return reverseComplement(rSeq[readEnd:readStart])
+    else:
+        #print("GAP FILLING FAILED!!!!!")
+        return False
+
diff --git a/minimap2 b/minimap2
new file mode 160000
index 0000000000000000000000000000000000000000..d90583b83cd81abd652abb025f39b8884937d59c
--- /dev/null
+++ b/minimap2
@@ -0,0 +1 @@
+Subproject commit d90583b83cd81abd652abb025f39b8884937d59c
diff --git a/splitReads.py b/splitReads.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe57fa2694e49605a0406ef791c4cae459391125
--- /dev/null
+++ b/splitReads.py
@@ -0,0 +1,131 @@
+import globalVars
+
+class myFQ:
+
+    def __init__(self, iD, seq, opt, qual):
+        self.iD = iD
+        self.seq = seq
+        self.opt = opt
+        self.qual = qual
+
+
+class myFA:
+
+    def __init__(self, iD, seq):
+        self.iD = iD
+        self.seq = seq
+
+def splitReadsFQ(inFQ, oFQ, splitSize):
+
+    if inFQ.endswith(".gz"):
+        import gzip
+        fqParser = gzip.open(inFQ, mode='rt')
+    else:
+        fqParser = open(inFQ)
+
+    myWriter = open(oFQ, "w")
+
+    lineCount = 0
+    seqCount = 0
+    writtenREAD_ID = 1
+    for line in fqParser:
+        lineCount += 1
+
+        if lineCount == 1:
+            rid=line[1:].rstrip().split("\t")[0].split(" ")[0]
+            curRead = myFQ(line.strip(), "", "", "")
+        elif lineCount == 2:
+            curRead.seq = line.strip()
+        elif lineCount == 3:
+            curRead.opt = line.strip()
+        elif lineCount == 4:
+            curRead.qual = line.strip()
+
+            curStart = 0
+            curEnd = splitSize
+            splitCount = 0
+
+            while curEnd < len(curRead.seq):
+                
+                splitID=str(writtenREAD_ID) + "_" + str(splitCount)
+                #splitID=str(rid) + "_" + str(splitCount)
+                myWriter.write("@" + splitID + "\n")
+                myWriter.write(str(curRead.seq[curStart:curEnd]) + "\n")
+                myWriter.write(str(curRead.opt) + "\n")
+                myWriter.write(str(curRead.qual[curStart:curEnd]) + "\n")
+
+                splitCount += 1
+                curStart += splitSize
+                curEnd += splitSize
+
+            splitID=str(writtenREAD_ID) + "_" + str(splitCount)
+            #splitID=str(rid) + "_" + str(splitCount)
+            myWriter.write("@" + splitID + "\n")
+            myWriter.write(str(curRead.seq[curStart:]) + "\n")
+            myWriter.write(str(curRead.opt) + "\n")
+            myWriter.write(str(curRead.qual[curStart:]) + "\n")
+
+            globalVars.readHash[str(writtenREAD_ID)]=curRead.seq           
+            #globalVars.readHash[str(rid)]=curRead.seq  
+            
+            writtenREAD_ID += 1
+            seqCount += 1
+            lineCount = 0
+
+    myWriter.close()
+    fqParser.close()
+
+
+def splitReadsFA(inFA, oFA, splitSize):
+
+    if inFA.endswith(".gz"):
+        import gzip
+        faParser = gzip.open(inFA)
+    else:
+        faParser = open(inFA)
+
+    myWriter = open(oFA, "w")
+
+    curID=""
+    seqCount = 0
+    writtenREAD_ID = 1
+    for line in faParser:
+
+        if line.startswith(">"):
+
+            if curID!="":
+
+                curStart = 0
+                curEnd = splitSize
+                splitCount = 0
+
+                while curEnd < len(curRead.seq):
+
+                    splitID=str(writtenREAD_ID) + "_" + str(splitCount)
+                    curSeq=str(curRead.seq[curStart:curEnd])
+                    myWriter.write(">" + splitID + "\n")
+                    myIndex=0
+                    # fasta format capped at 80bps per line
+                    while (myIndex + 80) <= len(curSeq):
+                        myWriter.write("%s\n" % (curSeq[myIndex:myIndex + 80]))
+                        myIndex += 80
+                    myWriter.write("%s\n" % (curSeq[myIndex:]))
+
+                    splitCount += 1
+                    curStart += splitSize
+                    curEnd += splitSize
+
+                globalVars.readHash[str(writtenREAD_ID)]=curRead.seq    
+
+                writtenREAD_ID += 1
+                seqCount += 1
+
+            curID=line.strip()
+            curRead = myFA(curID, "")
+
+        else:
+            curRead.seq+=line.strip()
+        
+  
+    myWriter.close()
+    faParser.close()