diff --git a/envs/genomescope.yaml b/envs/genomescope.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e852490bd58cb8416bfeb7ca9dda514b3221d14e --- /dev/null +++ b/envs/genomescope.yaml @@ -0,0 +1,6 @@ +channels: + - bioconda + - conda-forge +dependencies: + - genomescope2=2.0 + - python=3.9.2 diff --git a/envs/merylMerq.yaml b/envs/merylMerq.yaml index a7d2a4e35e66b8fe7b04a37d6ea44a2bf16ae128..96c6aedf8e1ed57389eff28c2921f46dab7d18db 100644 --- a/envs/merylMerq.yaml +++ b/envs/merylMerq.yaml @@ -1,15 +1,12 @@ channels: - - bioconda - conda-forge - - r + - bioconda dependencies: - igvtools - openjdk=11.0.8 - - r=3 + - r-base=3.6.3 - r-ggplot2 - r-argparse - r-scales - - genomescope2=2.0 - samtools - bedtools - - python=3.9.2 diff --git a/programs/merqury-1.1/LICENSE b/programs/merqury-1.1/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..16968438d07f14775ea0b351619498049614976f --- /dev/null +++ b/programs/merqury-1.1/LICENSE @@ -0,0 +1,19 @@ + + PUBLIC DOMAIN NOTICE + + This software is "United States Government Work" under the terms of the United + States Copyright Act. It was written as part of the authors' official duties + for the United States Government and thus cannot be copyrighted. This software + is freely available to the public for use without a copyright + notice. Restrictions cannot be placed on its present or future use. + + Although all reasonable efforts have been taken to ensure the accuracy and + reliability of the software and associated data, the National Human Genome + Research Institute (NHGRI), National Institutes of Health (NIH) and the + U.S. Government do not and cannot warrant the performance or results that may + be obtained by using this software or data. NHGRI, NIH and the U.S. Government + disclaim all warranties as to performance, merchantability or fitness for any + particular purpose. + + Please cite the authors in any work or product based on this material. + diff --git a/programs/merqury-1.1/README.md b/programs/merqury-1.1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4808a8cd8913408800e2289804e93c05b9b81d17 --- /dev/null +++ b/programs/merqury-1.1/README.md @@ -0,0 +1,191 @@ +# Merqury + +## Evaluate genome assemblies with k-mers and more + +Often, genome assembly projects have illumina whole genome sequencing reads available for the assembled individual. +The k-mer spectrum of this read set can be used for independently evaluating assembly quality without the need of a high quality reference. +Merqury provides a set of tools for this purpose. + +## Dependency +* gcc 4.8 or higher +* meryl +* Java run time environment (JRE) +* R with argparse, ggplot2, and scales (tested on R 3.6.1) +* bedtools +* samtools +* igvtools + +## Installation + +### Get a working meryl in your PATH +Download meryl release: https://github.com/marbl/meryl/releases/tag/v1.0 + +If the binary doesn't work, download the source and compile: +```shell +cd meryl/src +make -j 24 +export PATH=/path/to/meryl/…/bin:$PATH +``` +See if we get help message for `meryl`. + +### Add a path variable MERQURY +```shell +git clone https://github.com/marbl/merqury.git +cd merqury +export MERQURY=$PWD +``` +Add the “export” part to your environment (~/.bash_profile or ~/.profile). +Add installation dir paths for `bedtools`, `samtools` and `igvtools` to your enviroenment. +`source` it. + + +## Run + +* !! Merqury assumes all meryl dbs (dirs) are named with `.meryl`. !! + +On a single machine: +``` +ln -s $MERQURY/merqury.sh # Link merqury +./merqury.sh <read-db.meryl> [<mat.meryl> <pat.meryl>] <asm1.fasta> [asm2.fasta] <out> + +Usage: merqury.sh <read-db.meryl> [<mat.meryl> <pat.meryl>] <asm1.fasta> [asm2.fasta] <out> + <read-db.meryl> : k-mer counts of the read set + <mat.meryl> : k-mer counts of the maternal haplotype (ex. mat.only.meryl or mat.hapmer.meryl) + <pat.meryl> : k-mer counts of the paternal haplotype (ex. pat.only.meryl or pat.hapmer.meryl) + <asm1.fasta> : Assembly fasta file (ex. pri.fasta, hap1.fasta or maternal.fasta) + [asm2.fasta] : Additional fasta file (ex. alt.fasta, hap2.fasta or paternal.fasta) + *asm1.meryl and asm2.meryl will be generated. Avoid using the same names as the hap-mer dbs + <out> : Output prefix +``` +`< >` : required +`[ ]` : optional + +## Example + +Below is showing examples how to run Merqury using the prebuilt meryl dbs on a. thaliana F1 hybrid. +The fasta files are the trio-binned assemblies from [Koren et al](https://doi.org/10.1038/nbt.4277). + +```shell +### Download assemblies ### +wget https://gembox.cbcb.umd.edu/triobinning/athal_COL.fasta +wget https://gembox.cbcb.umd.edu/triobinning/athal_CVI.fasta + +### Download prebuilt meryl dbs ### +# read.meryl of the F1 hybrid between COL-0 and CVI-0 +wget https://obj.umiacs.umd.edu/marbl_publications/merqury/athal/a_thal.k18.meryl.tar.gz +# hap-mers for COL-0 haplotype +wget https://obj.umiacs.umd.edu/marbl_publications/merqury/athal/a_thal.col0.hapmer.meryl.tar.gz +# hap-mers for CVI-0 haplotype +wget https://obj.umiacs.umd.edu/marbl_publications/merqury/athal/a_thal.cvi0.hapmer.meryl.tar.gz + +# Untar +for gz in *.tar.gz +do + tar -zxf $gz +done + +# Run merqury +$MERQURY/merqury.sh F1.k18.meryl col0.hapmer.meryl cvi0.hapmer.meryl athal_COL.fasta athal_CVI.fasta test +``` + + +### 1. I have one assembly (pseudo-haplotype or mixed-haplotype) +```shell +# I don't have the hap-mers +$MERQURY/merqury.sh read-db.meryl asm1.fasta out_prefix +# Using the example above +$MERQURY/merqury.sh F1.k18.meryl athal_COL.fasta test-1 + +# I have the hap-mers +$MERQURY/merqury.sh read-db.meryl mat.meryl pat.meryl asm1.fasta out_prefix +# Using the example above +$MERQURY/merqury.sh F1.k18.meryl col0.hapmer.meryl cvi0.hapmer.meryl athal_COL.fasta test-1 +``` + +### 2. I have two assemblies (diploid) +```shell +# I don't have the hap-mers +$MERQURY/merqury.sh read-db.meryl asm1.fasta asm2.fasta out_prefix +# Using the example above +$MERQURY/merqury.sh F1.k18.meryl athal_COL.fasta athal_CVI.fasta test-2 + +# I have the hap-mers +$MERQURY/merqury.sh read-db.meryl mat.meryl pat.meryl asm1.fasta asm2.fasta out_prefix +# Using the example above +$MERQURY/merqury.sh F1.k18.meryl col0.hapmer.meryl cvi0.hapmer.meryl athal_COL.fasta athal_CVI.fasta test-2 +``` + +* Note there is no need to run merqury per-assemblies. Give two fasta files, Merqury generates stats for each and combined. + + + +### How to parallelize +Merqury starts with `eval/spectra_cn.sh`. +When hap-mers are provided, merqury runs modules under `trio/` in addition to `eval/spectra_cn.sh`. + + +The following can run at the same time. Modules with dependency are followed by arrows (->). +* `eval/spectra_cn.sh` -> `trio/spectra_hap.sh` +* `trio/hap_blob.sh` +* `trio/phase_block.sh` per assembly -> `trio/block_n_stats.sh` + + +Meryl, the k-mer counter inside, uses the maximum cpus available. +Set `OMP_NUM_THREADS=24` for example to use 24 threads. + +On slurm environment, simply run: +``` +ln -s $MERQURY/_submit_merqury.sh # Link merqury +./_submit_merqury.sh <read-db.meryl> [<mat.meryl> <pat.meryl>] <asm1.fasta> [asm2.fasta] <out> +``` +Change the `sbatch` to match your environment. (ex. partition) + + +## Outputs from each modules +* `eval/spectra_cn.sh`: k-mer completeness, qv, spectra-cn and spectra-asm plots, asm-only `.bed` and `.tdf` for tracking errors +* `eval/qv.sh`: just get the qv stats and quit. +* `trio/spectra_hap.sh`: hap-mer level spectra-cn plots, hap-mer completeness +* `trio/hap_blob.sh`: blob plots of the hap-mers in each contg/scaffold +* `trio/phase_block.sh`: phase block statistics, phase block N* plots, hap-mer tracks (`.bed` and `.tdf` files) +* `trio/block_n_stats.sh`: continuity plots (phase block N* or NG* plots, phase block vs. contig/scaffold plots) +* `trio/switch_error.sh`: this is run part of `phase_blck.sh`, however can be re-run with desired short-range switch parameters. Run `trio/block_n_stats.sh` along with it to get the associated plots. + +## Tips for helps +Run each script without any parameters if not sure what to do. +For example, `./trio/switch_error.sh` will give a help message and quit. + +Following wiki pages have more detailed examples. + +## 1. Prepare meryl dbs ([details](https://github.com/marbl/merqury/wiki/1.-Prepare-meryl-dbs)) +1. Get the right k size +2. Build k-mer dbs with meryl +3. Build hap-mers for trios + +## 2. Overall assembly evaluation ([details](https://github.com/marbl/merqury/wiki/2.-Overall-k-mer-evaluation)) +1. Reference free QV estimate +2. k-mer completeness (recovery rate) +3. Spectra copy number analysis +4. Track error bases in the assembly + +## 3. Phasing assessment with hap-mers ([details](https://github.com/marbl/merqury/wiki/3.-Phasing-assessment-with-hap-mers)) +1. Inherited hap-mer plots +2. Hap-mer blob plots +3. Hap-mer completeness (recovery rate) +4. Spectra copy number analysis per hap-mers +5. Phased block statistics and switch error rates +6. Track each haplotype block in the assembly + +## Available pre-built meryl dbs +Meryl dbs from Illumina WGS and hapmers are available [here](https://obj.umiacs.umd.edu/marbl_publications/merqury/index.html) for +* A. thaliana COL-0 x CVI-0 F1 +* NA12878 (HG001) +* HG002 + +## Citing merqury + +Please use the following [preprint](https://www.biorxiv.org/content/10.1101/2020.03.15.992941v1) to cite Merqury: + +Arang Rhie, Brian P. Walenz, Sergey Koren, Adam M. Phillippy, Merqury: reference-free quality and phasing assessment for genome assemblies, bioRxiv (2020). doi: https://doi.org/10.1101/2020.03.15.992941 + + + diff --git a/programs/merqury_June2020/_submit_build.sh b/programs/merqury-1.1/_submit_build.sh similarity index 96% rename from programs/merqury_June2020/_submit_build.sh rename to programs/merqury-1.1/_submit_build.sh index 988e26d1fd5c56f7f5ef73c2d03f4e5cc4a9adce..e44a8b8a7adc2bc162afdf19ce86f9d9af6a0ad0 100755 --- a/programs/merqury_June2020/_submit_build.sh +++ b/programs/merqury-1.1/_submit_build.sh @@ -45,11 +45,11 @@ for i in $(seq 1 $LEN) do fq=`sed -n ${i}p $input_fofn` GB=`du -k $fq | awk '{printf "%.0f", $1/1024/1024}'` - if [[ $GB -lt 12 ]]; then - echo "$fq is $GB, less than 12GB. Skip splitting." + if [[ $GB -lt 15 ]]; then + echo "$fq is $GB, less than 15GB. Skip splitting." echo $fq >> $input_fofn.$i else - echo "$fq is $GB, over 12GB. Will split and run meryl in parallel." + echo "$fq is $GB, over 15GB. Will split and run meryl in parallel." echo "Split files will be in $input_fofn.$i" args="$input_fofn" split_arrs="$split_arrs$i," diff --git a/programs/merqury_June2020/_submit_build_10x.sh b/programs/merqury-1.1/_submit_build_10x.sh similarity index 98% rename from programs/merqury_June2020/_submit_build_10x.sh rename to programs/merqury-1.1/_submit_build_10x.sh index 6c424db5743253a9cf70ba5c334cc9958869f58c..c1c9c4292d5da41e4ecbff188b511ce01e0057dd 100755 --- a/programs/merqury_June2020/_submit_build_10x.sh +++ b/programs/merqury-1.1/_submit_build_10x.sh @@ -1,6 +1,6 @@ #!/bin/bash -build=/srv/public/users/tomas/programs/merqury/build +build=$MERQURY/build if [ -z $1 ]; then echo "Usage: ./_submit_build.sh <k-size> <R1.fofn> <R2.fofn> <out_prefix> [mem=T]" diff --git a/programs/merqury_June2020/_submit_hapmers.sh b/programs/merqury-1.1/_submit_hapmers.sh similarity index 100% rename from programs/merqury_June2020/_submit_hapmers.sh rename to programs/merqury-1.1/_submit_hapmers.sh diff --git a/programs/merqury_June2020/_submit_merqury.sh b/programs/merqury-1.1/_submit_merqury.sh similarity index 100% rename from programs/merqury_June2020/_submit_merqury.sh rename to programs/merqury-1.1/_submit_merqury.sh diff --git a/programs/merqury_June2020/_submit_split.sh b/programs/merqury-1.1/_submit_split.sh similarity index 100% rename from programs/merqury_June2020/_submit_split.sh rename to programs/merqury-1.1/_submit_split.sh diff --git a/programs/merqury_June2020/best_k.sh b/programs/merqury-1.1/best_k.sh similarity index 100% rename from programs/merqury_June2020/best_k.sh rename to programs/merqury-1.1/best_k.sh diff --git a/programs/merqury_June2020/build/concat_splits.sh b/programs/merqury-1.1/build/concat_splits.sh similarity index 100% rename from programs/merqury_June2020/build/concat_splits.sh rename to programs/merqury-1.1/build/concat_splits.sh diff --git a/programs/merqury_June2020/build/count.sh b/programs/merqury-1.1/build/count.sh similarity index 100% rename from programs/merqury_June2020/build/count.sh rename to programs/merqury-1.1/build/count.sh diff --git a/programs/merqury_June2020/build/count_10x.sh b/programs/merqury-1.1/build/count_10x.sh similarity index 74% rename from programs/merqury_June2020/build/count_10x.sh rename to programs/merqury-1.1/build/count_10x.sh index 8ec5736d1ae44092fde0f7ea669c1032c78165b0..1f5e22600379bbc9a0933bda7ec6deafb9ebc8c0 100755 --- a/programs/merqury_June2020/build/count_10x.sh +++ b/programs/merqury-1.1/build/count_10x.sh @@ -28,9 +28,19 @@ else line_num=$SLURM_ARRAY_TASK_ID fi -# Note: Provide memory in Gb unit. SLURM provides $SLURM_MEM_PER_NODE in Mb. -# Give extra 4Gb to avoid 'Bus Error' form running out of memory. -mem=$(((SLURM_MEM_PER_NODE/1024)-4)) +if [[ ! -z $SLURM_CPUS_PER_TASK ]]; then + cpus="threads=$SLURM_CPUS_PER_TASK" +fi + +# If SLURM_MEM_PER_NODE exist; give extra 4Gb +# otherwise, let meryl determine +if [[ ! -z $SLURM_MEM_PER_NODE ]]; then + # Note: Provide memory in Gb unit. SLURM provides $SLURM_MEM_PER_NODE in Mb. + # Give extra 4Gb to avoid 'Bus Error' form running out of memory. + mem=$(((SLURM_MEM_PER_NODE/1024)-4)) + mem="memory=$mem" +fi + line_num=$(((offset * 1000) + $line_num)) # Read in the input path @@ -45,9 +55,9 @@ output=$name.$k.$line_num.meryl if [ ! -d $output ]; then # Run meryl count: Collect k-mer frequencies echo " - zcat $input | awk '{if (NR%2==1) {print $1} else {print substr($1,24)}}' | meryl k=$k threads=$SLURM_CPUS_PER_TASK memory=$mem count output $output - + zcat $input | awk '{if (NR%2==1) {print $1} else {print substr($1,24)}}' | meryl k=$k $cpus $mem count output $output - " - zcat $input | awk '{if (NR%2==1) {print $1} else {print substr($1,24)}}' | meryl k=$k threads=$SLURM_CPUS_PER_TASK memory=$mem count output $output - + zcat $input | awk '{if (NR%2==1) {print $1} else {print substr($1,24)}}' | meryl k=$k $cpus $mem count output $output - else echo "$output dir already exist. Nothing to do with $name." fi diff --git a/programs/merqury_June2020/build/diff.sh b/programs/merqury-1.1/build/diff.sh similarity index 100% rename from programs/merqury_June2020/build/diff.sh rename to programs/merqury-1.1/build/diff.sh diff --git a/programs/merqury_June2020/build/filt.sh b/programs/merqury-1.1/build/filt.sh similarity index 100% rename from programs/merqury_June2020/build/filt.sh rename to programs/merqury-1.1/build/filt.sh diff --git a/programs/merqury_June2020/build/intersect.sh b/programs/merqury-1.1/build/intersect.sh similarity index 100% rename from programs/merqury_June2020/build/intersect.sh rename to programs/merqury-1.1/build/intersect.sh diff --git a/programs/merqury_June2020/build/split.sh b/programs/merqury-1.1/build/split.sh similarity index 91% rename from programs/merqury_June2020/build/split.sh rename to programs/merqury-1.1/build/split.sh index 40e3980adb39b67da87c8dfcc4385674e9893c49..9dca1508bd7a60b1115966a4a4d4a2ca618683bd 100755 --- a/programs/merqury_June2020/build/split.sh +++ b/programs/merqury-1.1/build/split.sh @@ -3,7 +3,7 @@ echo "Usage: ./split.sh <input.fofn> [LINE_NUM]" echo -e "\t<input.fofn>: fastq.gz files we want to split by every 300 million lines." echo -e "\t<input.fofn>.LINE_NUM will be generated." -echo -e "\t\tUse it for building meryl dbs" +echo -e "\t\tUse it for building meryl dbs. pigz will use maximum of 8 processes by default." echo FOFN=$1 @@ -25,6 +25,9 @@ if [ -z $tid ]; then fi cpus=$SLURM_CPUS_PER_TASK +if [[ -z $cpus ]]; then + cpus=8 +fi fq=`sed -n ${tid}p $FOFN` fq_prefix=`echo $fq | sed 's/.fastq.gz$//g' | sed 's/.fq.gz$//g' | sed 's/.fastq$//g' | sed 's/.fq$//g'` diff --git a/programs/merqury_June2020/build/split_10x.sh b/programs/merqury-1.1/build/split_10x.sh similarity index 87% rename from programs/merqury_June2020/build/split_10x.sh rename to programs/merqury-1.1/build/split_10x.sh index 66540966f62f5f5e1fcaeb50a97b0c240de266ba..e1252338a0cee3eb01259d64284a655858ccbc12 100755 --- a/programs/merqury_June2020/build/split_10x.sh +++ b/programs/merqury-1.1/build/split_10x.sh @@ -3,7 +3,7 @@ echo "Usage: ./split_10x.sh <input.fofn> [LINE_NUM]" echo -e "\t<input.fofn>: 10XG fastq R1.gz files to split per 300 million lines." echo -e "\t<input.fofn>.LINE_NUM will be generated." -echo -e "\t\tUse it for building meryl dbs" +echo -e "\t\tUse it for building meryl dbs. pigz will use maximum of 8 processes by default." echo FOFN=$1 @@ -12,7 +12,7 @@ if [ -z $FOFN ]; then exit -1 fi -tid=300000000 # slurm environment variable for job arrays +tid=$SLURM_ARRAY_TASK_ID # slurm environment variable for job arrays LINE_NUM=$2 if [ -z $tid ]; then @@ -24,11 +24,13 @@ if [ -z $tid ]; then exit -1 fi -cpus=20 +cpus=$SLURM_CPUS_PER_TASK +if [[ -z $cpus ]]; then + cpus=8 +fi fq=`sed -n ${tid}p $FOFN` fq_prefix=`echo $fq | sed 's/.fastq.gz$//g' | sed 's/.fq.gz$//g' | sed 's/.fastq$//g' | sed 's/.fq$//g'` -echo $fq_prefix fq_prefix=`basename $fq_prefix` mkdir -p split diff --git a/programs/merqury_June2020/build/union_sum.sh b/programs/merqury-1.1/build/union_sum.sh similarity index 90% rename from programs/merqury_June2020/build/union_sum.sh rename to programs/merqury-1.1/build/union_sum.sh index dc21905b1a01f117df1762d83a368fd6fd1b0247..a01c15e7f0c0cc35efb2e211e4c8080982e2ec6a 100755 --- a/programs/merqury_June2020/build/union_sum.sh +++ b/programs/merqury-1.1/build/union_sum.sh @@ -16,8 +16,6 @@ output_prefix=$3.k$k LEN=`wc -l $input_fofn | awk '{print $1}'` NUM_DBS_TO_JOIN=100 # Join every $NUM_DBS_TO_JOIN as intermediates, then merge at the end JOIN_IDX=0 -CPU=$SLURM_CPUS_PER_TASK -MEM=$SLURM_MEM_PER_NODE echo "Set ulimit: ulimit -Sn 32000" ulimit -Sn 32000 @@ -44,8 +42,6 @@ do if [ ! -d $output ]; then echo " meryl \ - threads=$CPU \ - memory=$((MEM/1024)) \ k=$k \ union-sum \ output $output \ @@ -53,8 +49,6 @@ do " meryl \ - threads=$CPU \ - memory=$((MEM/1024)) \ k=$k \ union-sum \ output $output \ @@ -97,8 +91,6 @@ echo "union-sum of $output_prefix.[ 1 - $JOIN_IDX ] :" if [ ! -d $output_prefix ]; then echo " meryl \ - threads=$CPU \ - memory=$((MEM/1024)) \ k=$k \ union-sum \ output $output_prefix.meryl \ @@ -106,8 +98,6 @@ if [ ! -d $output_prefix ]; then " meryl \ - threads=$CPU \ - memory=$((MEM/1024)) \ k=$k \ union-sum \ output $output_prefix.meryl \ diff --git a/programs/merqury-1.1/eval/asm_multiplicity.sh b/programs/merqury-1.1/eval/asm_multiplicity.sh new file mode 100755 index 0000000000000000000000000000000000000000..51ed3ec40a9acd672d999004dfc8b4abdf373bd7 --- /dev/null +++ b/programs/merqury-1.1/eval/asm_multiplicity.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +if [[ "$#" -lt 3 ]]; then + echo "Usage: ./asm_multiplicity.sh <asm.fasta> <asm.meryl> <out>" + echo -e "\t<asm.fasta>: assembly fasta file" + echo -e "\t<asm.meryl>: assembly meryl dir" + echo -e "\t<out>: output file prefix. <out>.copies.wig and <out>.asm_multiplicity.bigWig will be generated." + exit 0 +fi + +asm_fa=$1 +asm=$2 +out=$3 + +# Requirements: samtools, ucsc kent utils +module load samtools +module load ucsc/396 + +if [[ ! -e $asm_fa ]]; then + samtools faidx $asm_fa +fi + + +echo " +# Collect copy numbers in assembly" +meryl-lookup -dump -memory 12 -sequence $asm_fa -mers $asm | awk '$4=="T"' | java -jar -Xmx1g $MERQURY/util/merylDumpToWig.jar - > $out.copies.wig + +echo " +# Convert to bigwig" +wigToBigWig $out.copies.wig $asm_fa.fai $out.asm_multiplicity.bigWig + +echo "Done!" + diff --git a/programs/merqury_June2020/eval/bedCalcN50.jar b/programs/merqury-1.1/eval/bedCalcN50.jar similarity index 100% rename from programs/merqury_June2020/eval/bedCalcN50.jar rename to programs/merqury-1.1/eval/bedCalcN50.jar diff --git a/programs/merqury-1.1/eval/false_duplications.sh b/programs/merqury-1.1/eval/false_duplications.sh new file mode 100755 index 0000000000000000000000000000000000000000..ed7bf0a61ccccd3c18dffb3a7589a518d4a3fcff --- /dev/null +++ b/programs/merqury-1.1/eval/false_duplications.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +if [[ -z $1 ]]; then + echo "Usage: ./false_duplications.sh <name.asm.spectra-cn.hist>" + echo "Get the number of additional k-mers found in an assembly in the single and two copy k-mer peaks from the reads" + echo -e "<name.asm.spectra-cn.hist>: spectra-cn.hist generated with Merqury for a pseudo-haplotype or mixed haplotype assembly" + exit 0 +fi + +hist=$1 + +cutoff=`cat $hist | awk '$1==1 {print $2"\t"$3}' | awk -v max=0 'max<$2 {max=$2; mult=$1 } END {printf "%.0f\n", mult*(1.5)}'` +one_cp=`awk -v cutoff=$cutoff '$1==1 && $2<cutoff {sum+=$NF} END {print sum}' $hist` +two_cp=`awk -v cutoff=$cutoff '$1==2 && $2<cutoff {sum+=$NF} END {print sum}' $hist` +thr_cp=`awk -v cutoff=$cutoff '$1==3 && $2<cutoff {sum+=$NF} END {print sum}' $hist` +fou_cp=`awk -v cutoff=$cutoff '$1==4 && $2<cutoff {sum+=$NF} END {print sum}' $hist` +mor_cp=`awk -v cutoff=$cutoff '$1==">4" && $2<cutoff {sum+=$NF} END {print sum}' $hist` +DUPS_TOTAL=`echo "$one_cp $two_cp $thr_cp $fou_cp $mor_cp" | awk '{dup=$2+$3+$4+$5; all=dup+$1} END {print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"dup"\t"all"\t"(100*dup/all)}'` +echo -e "hist\tcutoff\t1\t2\t3\t4\t>4\tdup(>1)\tall\tdup%" +echo -e "$hist\t$cutoff\t$DUPS_TOTAL" + diff --git a/programs/merqury_June2020/eval/kmerHistToPloidyDepth.jar b/programs/merqury-1.1/eval/kmerHistToPloidyDepth.jar similarity index 100% rename from programs/merqury_June2020/eval/kmerHistToPloidyDepth.jar rename to programs/merqury-1.1/eval/kmerHistToPloidyDepth.jar diff --git a/programs/merqury-1.1/eval/per_seq_qv.sh b/programs/merqury-1.1/eval/per_seq_qv.sh new file mode 100755 index 0000000000000000000000000000000000000000..9f0125d97f54117759025ac68665318915c4184d --- /dev/null +++ b/programs/merqury-1.1/eval/per_seq_qv.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +if [[ "$#" -lt 3 ]]; then + echo "Usage: ./per_seq_qv.sh seq.fasta read.meryl out" + echo "" + echo "Get QV per seqIDs in seq.fasta" + echo -e "seq.fasta:\tassembly, multi-fasta file" + echo -e "read.meryl:\tk-mer counts of read set" + echo -e "out:\toutput prefix" + echo + echo "Output will be generated as out.qv" + echo "Arang Rhie, 2020-06-15. arrhie@gmail.com" + echo + exit 0 +fi + +seq=$1 # asm.fasta +read=$2 # read.meryl +name=$3 # output prefix + +k=`meryl print $read | head -n 2 | tail -n 1 | awk '{print length($1)}'` +echo "Detected k-mer size $k" +echo + +seq_name=`echo $seq | sed 's/.fasta$//g' | sed 's/.fa$//g'` + + +if [[ ! -e $seq_name.0.meryl ]]; then + + if [[ ! -e $seq_name.meryl ]]; then + echo "# No $seq_name.meryl found. Counting ${k}-mers..." + meryl count k=$k output $seq_name.meryl $seq + echo + fi + echo "# Collect k-mers found in $seq_name only" + meryl difference $seq_name.meryl $read output $seq_name.0.meryl + echo +fi + +echo "QV per sequences" +meryl-lookup -existence -sequence $seq -mers $seq_name.0.meryl/ | \ + awk -v k=$k '{print $1"\t"$NF"\t"$(NF-2)"\t"(-10*log(1-(1-$NF/$(NF-2))^(1/k))/log(10))"\t"(1-(1-$NF/$(NF-2))^(1/k))}' > $name.qv + diff --git a/programs/merqury_June2020/eval/qv.sh b/programs/merqury-1.1/eval/qv.sh similarity index 87% rename from programs/merqury_June2020/eval/qv.sh rename to programs/merqury-1.1/eval/qv.sh index bc37f0087ea154459c2ff3061ffeb846e0c31668..c5ccdaff463506834ffa5b008920dd92ea693965 100755 --- a/programs/merqury_June2020/eval/qv.sh +++ b/programs/merqury-1.1/eval/qv.sh @@ -50,6 +50,9 @@ do QV=`echo "$ASM_ONLY $TOTAL" | awk -v k=$k '{print (-10*log(1-(1-$1/$2)^(1/k))/log(10))}'` echo -e "$asm\t$ASM_ONLY\t$TOTAL\t$QV\t$ERROR" >> $name.qv echo + + meryl-lookup -existence -sequence $asm_fa -mers $asm.0.meryl/ | \ + awk -v k=$k '{print $1"\t"$NF"\t"$(NF-2)"\t"(-10*log(1-(1-$NF/$(NF-2))^(1/k))/log(10))"\t"(1-(1-$NF/$(NF-2))^(1/k))}' > $name.$asm.qv done if [[ "$asm2_fa" == "" ]]; then @@ -62,8 +65,8 @@ asm2=`echo $asm2_fa | sed 's/.fasta.gz//g' | sed 's/.fa.gz//g' | sed 's/.fasta// asm="both" -meryl union output $asm.meryl $asm1.meryl $asm2.meryl -meryl union output $asm.0.meryl $asm1.0.meryl $asm2.0.meryl +meryl union-sum output $asm.meryl $asm1.meryl $asm2.meryl +meryl union-sum output $asm.0.meryl $asm1.0.meryl $asm2.0.meryl echo "# QV statistics for $asm" ASM_ONLY=`meryl statistics $asm.0.meryl | head -n4 | tail -n1 | awk '{print $2}'` @@ -75,3 +78,4 @@ echo rm -r $asm1.0.meryl $asm1.meryl $asm2.0.meryl $asm2.meryl $asm.0.meryl $asm.meryl echo "Done!" + diff --git a/programs/merqury-1.1/eval/read_multiplicity.sh b/programs/merqury-1.1/eval/read_multiplicity.sh new file mode 100755 index 0000000000000000000000000000000000000000..062add8a8e4179a66c91cef4cc79863311ab175d --- /dev/null +++ b/programs/merqury-1.1/eval/read_multiplicity.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +if [[ "$#" -lt 3 ]]; then + echo "Usage: ./read_multiplicity.sh <asm.fasta> <read.meryl> <out>" + echo -e "\t<asm.fasta>: assembly fasta file" + echo -e "\t<read.meryl>: k-mer counts of the reads" + echo -e "\t<out>: output file prefix. <out>.read.wig and <out>.read_multiplicity.bigWig will be generated." + exit 0 +fi + +asm_fa=$1 +read=$2 +out=$3 +asm=`echo $asm_fa | sed 's/.gz$//g' | sed 's/.fasta$//g' | sed 's/.fa$//g'` + +module load samtools +module load ucsc/396 + +if [[ ! -e $asm_fa.fai ]]; then + samtools faidx $asm_fa +fi + +echo " +# Collect k-mer multiplicity in reads" +# Adjust memory accordingly +meryl-lookup -dump -memory 68 -sequence $asm_fa -mers $read | awk '$4=="T"' | java -jar -Xmx1g $MERQURY/util/merylDumpToWig.jar - > $out.read.wig + +echo " +# Convert to bigwig" +wigToBigWig $out.read.wig $asm_fa.fai $out.read_multiplicity.bigwig + diff --git a/programs/merqury_June2020/eval/spectra-cn.sh b/programs/merqury-1.1/eval/spectra-cn.sh similarity index 92% rename from programs/merqury_June2020/eval/spectra-cn.sh rename to programs/merqury-1.1/eval/spectra-cn.sh index 03386999291257d2096a7d6737324d21392e2acb..f528c474c5f410e85e817e8dffe8f1596aa329c0 100755 --- a/programs/merqury_June2020/eval/spectra-cn.sh +++ b/programs/merqury-1.1/eval/spectra-cn.sh @@ -115,8 +115,8 @@ do echo "# Plot $hist" echo "\ - $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-cn -z $hist_asm_only" - $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-cn -z $hist_asm_only + Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.$asm.spectra-cn -z $hist_asm_only" + Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.$asm.spectra-cn -z $hist_asm_only echo echo "# QV statistics" @@ -127,6 +127,11 @@ do echo -e "$asm\t$ASM_ONLY\t$TOTAL\t$QV\t$ERROR" >> $name.qv echo + echo "# Per seq QV statistics" + meryl-lookup -existence -sequence $asm_fa -mers $asm.0.meryl/ | \ + awk -v k=$k '{print $1"\t"$NF"\t"$(NF-2)"\t"(-10*log(1-(1-$NF/$(NF-2))^(1/k))/log(10))"\t"(1-(1-$NF/$(NF-2))^(1/k))}' > $name.$asm.qv + echo + echo "# k-mer completeness (recoveray rate) with solid k-mers for $asm with > $filt counts" meryl intersect output $asm.solid.meryl $asm.meryl $read_solid TOTAL=`meryl statistics $read_solid | head -n3 | tail -n1 | awk '{print $2}'` @@ -177,8 +182,8 @@ if [[ "$asm2_fa" = "" ]]; then echo "# Plot $hist" echo "\ - $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-asm -z $hist_asm_dist_only" - $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-asm -z $hist_asm_dist_only + Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-asm -z $hist_asm_dist_only" + Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-asm -z $hist_asm_dist_only echo echo "# Clean up" @@ -265,8 +270,8 @@ echo echo "# Plot $hist" echo "\ -$MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-cn -z $hist_asm_only" -$MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-cn -z $hist_asm_only +Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-cn -z $hist_asm_only" +Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-cn -z $hist_asm_only echo echo "# QV" @@ -317,7 +322,7 @@ else fi echo "Plot $hist" -$MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-asm -z $hist_asm_dist_only +Rscript $MERQURY/plot/plot_spectra_cn.R -f $hist -o $name.spectra-asm -z $hist_asm_dist_only echo echo "Clean up" diff --git a/programs/merqury_June2020/example/README.txt b/programs/merqury-1.1/example/README.txt similarity index 100% rename from programs/merqury_June2020/example/README.txt rename to programs/merqury-1.1/example/README.txt diff --git a/programs/merqury_June2020/example/inherited_hapmers.fl.png b/programs/merqury-1.1/example/inherited_hapmers.fl.png similarity index 100% rename from programs/merqury_June2020/example/inherited_hapmers.fl.png rename to programs/merqury-1.1/example/inherited_hapmers.fl.png diff --git a/programs/merqury_June2020/example/inherited_hapmers.ln.png b/programs/merqury-1.1/example/inherited_hapmers.ln.png similarity index 100% rename from programs/merqury_June2020/example/inherited_hapmers.ln.png rename to programs/merqury-1.1/example/inherited_hapmers.ln.png diff --git a/programs/merqury_June2020/example/inherited_hapmers.st.png b/programs/merqury-1.1/example/inherited_hapmers.st.png similarity index 100% rename from programs/merqury_June2020/example/inherited_hapmers.st.png rename to programs/merqury-1.1/example/inherited_hapmers.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.100_20000.col.block.N.png b/programs/merqury-1.1/example/triocanu_clr.100_20000.col.block.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.100_20000.col.block.N.png rename to programs/merqury-1.1/example/triocanu_clr.100_20000.col.block.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.100_20000.col.continuity.N.png b/programs/merqury-1.1/example/triocanu_clr.100_20000.col.continuity.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.100_20000.col.continuity.N.png rename to programs/merqury-1.1/example/triocanu_clr.100_20000.col.continuity.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.100_20000.cvi.block.N.png b/programs/merqury-1.1/example/triocanu_clr.100_20000.cvi.block.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.100_20000.cvi.block.N.png rename to programs/merqury-1.1/example/triocanu_clr.100_20000.cvi.block.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.100_20000.cvi.continuity.N.png b/programs/merqury-1.1/example/triocanu_clr.100_20000.cvi.continuity.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.100_20000.cvi.continuity.N.png rename to programs/merqury-1.1/example/triocanu_clr.100_20000.cvi.continuity.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.10_20000.col.block.N.png b/programs/merqury-1.1/example/triocanu_clr.10_20000.col.block.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.10_20000.col.block.N.png rename to programs/merqury-1.1/example/triocanu_clr.10_20000.col.block.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.10_20000.col.continuity.N.png b/programs/merqury-1.1/example/triocanu_clr.10_20000.col.continuity.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.10_20000.col.continuity.N.png rename to programs/merqury-1.1/example/triocanu_clr.10_20000.col.continuity.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.10_20000.cvi.block.N.png b/programs/merqury-1.1/example/triocanu_clr.10_20000.cvi.block.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.10_20000.cvi.block.N.png rename to programs/merqury-1.1/example/triocanu_clr.10_20000.cvi.block.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.10_20000.cvi.continuity.N.png b/programs/merqury-1.1/example/triocanu_clr.10_20000.cvi.continuity.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.10_20000.cvi.continuity.N.png rename to programs/merqury-1.1/example/triocanu_clr.10_20000.cvi.continuity.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.10_20000.phased_block.blob.png b/programs/merqury-1.1/example/triocanu_clr.col.10_20000.phased_block.blob.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.10_20000.phased_block.blob.png rename to programs/merqury-1.1/example/triocanu_clr.col.10_20000.phased_block.blob.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.block.N.png b/programs/merqury-1.1/example/triocanu_clr.col.block.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.block.N.png rename to programs/merqury-1.1/example/triocanu_clr.col.block.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.col0.hapmer.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.col.col0.hapmer.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.col0.hapmer.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.col.col0.hapmer.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.col0.hapmer.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.col.col0.hapmer.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.col0.hapmer.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.col.col0.hapmer.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.col0.hapmer.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.col.col0.hapmer.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.col0.hapmer.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.col.col0.hapmer.spectra-cn.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.continuity.N.png b/programs/merqury-1.1/example/triocanu_clr.col.continuity.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.continuity.N.png rename to programs/merqury-1.1/example/triocanu_clr.col.continuity.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.col.cvi0.hapmer.spectra-cn.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.col.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.col.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.col.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.col.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.col.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.col.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.col.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.col.spectra-cn.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.10_20000.phased_block.blob.png b/programs/merqury-1.1/example/triocanu_clr.cvi.10_20000.phased_block.blob.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.10_20000.phased_block.blob.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.10_20000.phased_block.blob.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.block.N.png b/programs/merqury-1.1/example/triocanu_clr.cvi.block.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.block.N.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.block.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.col0.hapmer.spectra-cn.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.continuity.N.png b/programs/merqury-1.1/example/triocanu_clr.cvi.continuity.N.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.continuity.N.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.continuity.N.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.cvi0.hapmer.spectra-cn.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.cvi.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.cvi.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.cvi.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.cvi.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.cvi.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.cvi.spectra-cn.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.hapmers.blob.png b/programs/merqury-1.1/example/triocanu_clr.hapmers.blob.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.hapmers.blob.png rename to programs/merqury-1.1/example/triocanu_clr.hapmers.blob.png diff --git a/programs/merqury_June2020/example/triocanu_clr.spectra-asm.fl.png b/programs/merqury-1.1/example/triocanu_clr.spectra-asm.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.spectra-asm.fl.png rename to programs/merqury-1.1/example/triocanu_clr.spectra-asm.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.spectra-asm.ln.png b/programs/merqury-1.1/example/triocanu_clr.spectra-asm.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.spectra-asm.ln.png rename to programs/merqury-1.1/example/triocanu_clr.spectra-asm.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.spectra-asm.st.png b/programs/merqury-1.1/example/triocanu_clr.spectra-asm.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.spectra-asm.st.png rename to programs/merqury-1.1/example/triocanu_clr.spectra-asm.st.png diff --git a/programs/merqury_June2020/example/triocanu_clr.spectra-cn.fl.png b/programs/merqury-1.1/example/triocanu_clr.spectra-cn.fl.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.spectra-cn.fl.png rename to programs/merqury-1.1/example/triocanu_clr.spectra-cn.fl.png diff --git a/programs/merqury_June2020/example/triocanu_clr.spectra-cn.ln.png b/programs/merqury-1.1/example/triocanu_clr.spectra-cn.ln.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.spectra-cn.ln.png rename to programs/merqury-1.1/example/triocanu_clr.spectra-cn.ln.png diff --git a/programs/merqury_June2020/example/triocanu_clr.spectra-cn.st.png b/programs/merqury-1.1/example/triocanu_clr.spectra-cn.st.png similarity index 100% rename from programs/merqury_June2020/example/triocanu_clr.spectra-cn.st.png rename to programs/merqury-1.1/example/triocanu_clr.spectra-cn.st.png diff --git a/programs/merqury_June2020/merqury-mash.sh b/programs/merqury-1.1/merqury-mash.sh similarity index 81% rename from programs/merqury_June2020/merqury-mash.sh rename to programs/merqury-1.1/merqury-mash.sh index d7b4372e391c592adc86e1d168c47192ae870a0a..8788b464dc53f227719406d37a655e0cbf593757 100755 --- a/programs/merqury_June2020/merqury-mash.sh +++ b/programs/merqury-1.1/merqury-mash.sh @@ -15,7 +15,15 @@ input_fofn=$3 cpus=$4 name=`echo $asm | sed 's/.fasta$//g' | sed 's/.fa$//g' | sed 's/.fasta.gz//g' | sed 's/.fa.gz//g'` -module load mash + +source $MERQURY/util/util.sh + +has_module=$(check_module) +if [[ $has_module -gt 0 ]]; then + echo "No modules available.." +else + module load mash +fi mash sketch -s 1000000 -k $k $asm mash screen -p $cpus $asm.msh `cat $input_fofn | tr '\n' ' '` > $name.msh.idy diff --git a/programs/merqury_June2020/merqury.sh b/programs/merqury-1.1/merqury.sh similarity index 100% rename from programs/merqury_June2020/merqury.sh rename to programs/merqury-1.1/merqury.sh diff --git a/programs/merqury_June2020/plot/plot_blob.R b/programs/merqury-1.1/plot/plot_blob.R similarity index 100% rename from programs/merqury_June2020/plot/plot_blob.R rename to programs/merqury-1.1/plot/plot_blob.R diff --git a/programs/merqury_June2020/plot/plot_block_N.R b/programs/merqury-1.1/plot/plot_block_N.R similarity index 100% rename from programs/merqury_June2020/plot/plot_block_N.R rename to programs/merqury-1.1/plot/plot_block_N.R diff --git a/programs/merqury_June2020/plot/plot_spectra_asm.R b/programs/merqury-1.1/plot/plot_spectra_asm.R similarity index 100% rename from programs/merqury_June2020/plot/plot_spectra_asm.R rename to programs/merqury-1.1/plot/plot_spectra_asm.R diff --git a/programs/merqury_June2020/plot/plot_spectra_cn.R b/programs/merqury-1.1/plot/plot_spectra_cn.R similarity index 100% rename from programs/merqury_June2020/plot/plot_spectra_cn.R rename to programs/merqury-1.1/plot/plot_spectra_cn.R diff --git a/programs/merqury_June2020/trio/bedMerToPhaseBlock.jar b/programs/merqury-1.1/trio/bedMerToPhaseBlock.jar similarity index 100% rename from programs/merqury_June2020/trio/bedMerToPhaseBlock.jar rename to programs/merqury-1.1/trio/bedMerToPhaseBlock.jar diff --git a/programs/merqury_June2020/trio/block_n_stats.sh b/programs/merqury-1.1/trio/block_n_stats.sh similarity index 96% rename from programs/merqury_June2020/trio/block_n_stats.sh rename to programs/merqury-1.1/trio/block_n_stats.sh index ae7eadbc484dee0123ee03e0433af97eb1dcc5a8..af85ef5a1a7f0b6912af581f9a23f9db6f582370 100755 --- a/programs/merqury_June2020/trio/block_n_stats.sh +++ b/programs/merqury-1.1/trio/block_n_stats.sh @@ -65,7 +65,7 @@ do if [[ $num_gaps -gt 0 ]]; then echo "# Found $num_gaps. Generating stats for both scaffolds and contigs." awk -v asm=$asm '{print "scaffold\t"asm"\t"$2}' $asm.fasta.fai | sort -nr -k3 - > $out.$asm.scaff.sizes - awk '{print $1"\t0\t"$2}' $asm.fasta.fai | bedtools subtract -a - -b $asm.gaps.bed | awk '{print "contig\t"asm"\t"($NF-$(NF-1))}' | sort -nr -k3 - > $out.$asm.contig.sizes + awk '{print $1"\t0\t"$2}' $asm.fasta.fai | bedtools subtract -a - -b $asm.gaps.bed | awk -v asm=$asm '{print "contig\t"asm"\t"($NF-$(NF-1))}' | sort -nr -k3 - > $out.$asm.contig.sizes else echo "# No gaps found. This is a contig set." awk -v asm=$asm '{print "contig\t"asm"\t"$2}' $asm.fasta.fai | sort -nr -k3 - > $out.$asm.contig.sizes diff --git a/programs/merqury_June2020/trio/exclude_reads.sh b/programs/merqury-1.1/trio/exclude_reads.sh similarity index 100% rename from programs/merqury_June2020/trio/exclude_reads.sh rename to programs/merqury-1.1/trio/exclude_reads.sh diff --git a/programs/merqury_June2020/trio/fastaGetGaps.jar b/programs/merqury-1.1/trio/fastaGetGaps.jar similarity index 100% rename from programs/merqury_June2020/trio/fastaGetGaps.jar rename to programs/merqury-1.1/trio/fastaGetGaps.jar diff --git a/programs/merqury_June2020/trio/hap_blob.sh b/programs/merqury-1.1/trio/hap_blob.sh similarity index 100% rename from programs/merqury_June2020/trio/hap_blob.sh rename to programs/merqury-1.1/trio/hap_blob.sh diff --git a/programs/merqury_June2020/trio/hapmers.sh b/programs/merqury-1.1/trio/hapmers.sh similarity index 97% rename from programs/merqury_June2020/trio/hapmers.sh rename to programs/merqury-1.1/trio/hapmers.sh index 6e4551cce9a85c2f22753ef59c9fc67c1bb55b95..aeaa48f24817fbd575c6be5480d59331d281e770 100755 --- a/programs/merqury_June2020/trio/hapmers.sh +++ b/programs/merqury-1.1/trio/hapmers.sh @@ -22,9 +22,11 @@ if [[ "$#" -lt 2 ]]; then exit -1 fi -hap1_meryl=$1 -hap2_meryl=$2 -child_meryl=$3 +source $MERQURY/util/util.sh + +hap1_meryl=`link $1` +hap2_meryl=`link $2` +child_meryl=`link $3` hap1=${hap1_meryl%.meryl*} hap2=${hap2_meryl%.meryl*} diff --git a/programs/merqury-1.1/trio/hapmers_to_bigwig.sh b/programs/merqury-1.1/trio/hapmers_to_bigwig.sh new file mode 100755 index 0000000000000000000000000000000000000000..4b208ac816e5064a2c6d5e68acf8d4be48c3ca0f --- /dev/null +++ b/programs/merqury-1.1/trio/hapmers_to_bigwig.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +if [[ -z $1 ]]; then + echo "Usage: ./hapmers_to_bigWig.sh <in.hap.bed> <asm.fai> [convert_pipes]" + echo -e "\t<in.hap.bed>: ex. out.asm.mat.inherited.bed" + echo -e "\t<asm.fai>: generate with samtools faidx" + echo -e "\t[convert_pipes]: convert '|' to '_' in asm seq names by default. Set to F if not wanted." + exit 0 +fi + +bed=$1 +fai=$2 +convert=$3 + +module load bedtools +module load ucsc/396 + +bg=${bed/.bed/.bg} + +if [[ "$convert" = "F" ]]; then + echo "# keep pipes" + sizes=${fai/.fai/.sizes} + cat $fai | cut -f1-2 > $sizes + cat $bed | bedtools genomecov -bg -g $sizes -i - > $bg +else + sizes=${fai/.fai/.sizes} + cat $fai | cut -f1-2 | sed 's/|/_/g' > $sizes + cat $bed | sed 's/|/_/g' | bedtools genomecov -bg -g $sizes -i - > $bg +fi + +sort=${bg/.bg/.srt.bg} +bedSort $bg $sort + +bw=${bed/.bed/.bigwig} +bedGraphToBigWig $sort $sizes $bw + +rm $bg $sort + diff --git a/programs/merqury_June2020/trio/phase_block.sh b/programs/merqury-1.1/trio/phase_block.sh similarity index 100% rename from programs/merqury_June2020/trio/phase_block.sh rename to programs/merqury-1.1/trio/phase_block.sh diff --git a/programs/merqury_June2020/trio/phase_block/IO/Rwrapper.java b/programs/merqury-1.1/trio/phase_block/IO/Rwrapper.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/Rwrapper.java rename to programs/merqury-1.1/trio/phase_block/IO/Rwrapper.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/BufferedFileReader.java b/programs/merqury-1.1/trio/phase_block/IO/basic/BufferedFileReader.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/BufferedFileReader.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/BufferedFileReader.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/FileMaker.java b/programs/merqury-1.1/trio/phase_block/IO/basic/FileMaker.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/FileMaker.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/FileMaker.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/FileReader.java b/programs/merqury-1.1/trio/phase_block/IO/basic/FileReader.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/FileReader.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/FileReader.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/Format.java b/programs/merqury-1.1/trio/phase_block/IO/basic/Format.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/Format.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/Format.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/IOUtil.java b/programs/merqury-1.1/trio/phase_block/IO/basic/IOUtil.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/IOUtil.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/IOUtil.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/RegExp.java b/programs/merqury-1.1/trio/phase_block/IO/basic/RegExp.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/RegExp.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/RegExp.java diff --git a/programs/merqury_June2020/trio/phase_block/IO/basic/Wrapper.java b/programs/merqury-1.1/trio/phase_block/IO/basic/Wrapper.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/IO/basic/Wrapper.java rename to programs/merqury-1.1/trio/phase_block/IO/basic/Wrapper.java diff --git a/programs/merqury_June2020/trio/phase_block/MerToPhaseBlock.java b/programs/merqury-1.1/trio/phase_block/MerToPhaseBlock.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/MerToPhaseBlock.java rename to programs/merqury-1.1/trio/phase_block/MerToPhaseBlock.java diff --git a/programs/merqury_June2020/trio/phase_block/README.md b/programs/merqury-1.1/trio/phase_block/README.md similarity index 100% rename from programs/merqury_June2020/trio/phase_block/README.md rename to programs/merqury-1.1/trio/phase_block/README.md diff --git a/programs/merqury_June2020/trio/phase_block/bed/util/Bed.java b/programs/merqury-1.1/trio/phase_block/bed/util/Bed.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/bed/util/Bed.java rename to programs/merqury-1.1/trio/phase_block/bed/util/Bed.java diff --git a/programs/merqury_June2020/trio/phase_block/bed/util/Region.java b/programs/merqury-1.1/trio/phase_block/bed/util/Region.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/bed/util/Region.java rename to programs/merqury-1.1/trio/phase_block/bed/util/Region.java diff --git a/programs/merqury_June2020/trio/phase_block/build_jar.sh b/programs/merqury-1.1/trio/phase_block/build_jar.sh similarity index 100% rename from programs/merqury_June2020/trio/phase_block/build_jar.sh rename to programs/merqury-1.1/trio/phase_block/build_jar.sh diff --git a/programs/merqury_June2020/trio/phase_block/genome/Chromosome.java b/programs/merqury-1.1/trio/phase_block/genome/Chromosome.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/genome/Chromosome.java rename to programs/merqury-1.1/trio/phase_block/genome/Chromosome.java diff --git a/programs/merqury_June2020/trio/phase_block/genome/ChromosomeComparator.java b/programs/merqury-1.1/trio/phase_block/genome/ChromosomeComparator.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/genome/ChromosomeComparator.java rename to programs/merqury-1.1/trio/phase_block/genome/ChromosomeComparator.java diff --git a/programs/merqury_June2020/trio/phase_block/genome/util/Util.java b/programs/merqury-1.1/trio/phase_block/genome/util/Util.java similarity index 100% rename from programs/merqury_June2020/trio/phase_block/genome/util/Util.java rename to programs/merqury-1.1/trio/phase_block/genome/util/Util.java diff --git a/programs/merqury_June2020/trio/spectra-hap.sh b/programs/merqury-1.1/trio/spectra-hap.sh similarity index 100% rename from programs/merqury_June2020/trio/spectra-hap.sh rename to programs/merqury-1.1/trio/spectra-hap.sh diff --git a/programs/merqury_June2020/trio/switch_error.sh b/programs/merqury-1.1/trio/switch_error.sh similarity index 100% rename from programs/merqury_June2020/trio/switch_error.sh rename to programs/merqury-1.1/trio/switch_error.sh diff --git a/programs/merqury-1.1/util/bed_to_bigwig.sh b/programs/merqury-1.1/util/bed_to_bigwig.sh new file mode 100755 index 0000000000000000000000000000000000000000..fcc554b508f6580b2a039c7a91b1278a3fa62b63 --- /dev/null +++ b/programs/merqury-1.1/util/bed_to_bigwig.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +if [[ -z $1 ]]; then + echo "Usage: ./bed_to_bigWig.sh <bed> <asm.fai> <convert_pipes>" + echo -e "\t<bed>: ex. out.asm.mat.inherited.bed" + echo -e "\t<asm.fai>: generate with samtools faidx" + echo -e "\t<convert_pipes>: convert | to _ by default. Set to F if not wanted." + exit 0 +fi + +bed=$1 +fai=$2 +convert=$3 + +module load bedtools +module load ucsc/396 + +bg=${bed/.bed/.bg} + +if [[ "$convert" = "F" ]]; then + echo "# keep pipes" + sizes=${fai/.fai/.sizes} + cat $fai | cut -f1-2 > $sizes + cat $bed | bedtools genomecov -bg -g $sizes -i - > $bg +else + sizes=${fai/.fai/.sizes} + cat $fai | cut -f1-2 | sed 's/|/_/g' > $sizes + cat $bed | sed 's/|/_/g' | bedtools genomecov -bg -g $sizes -i - > $bg +fi + +sort=${bg/.bg/.srt.bg} +bedSort $bg $sort + +bw=${bed/.bed/.bigwig} +bedGraphToBigWig $sort $sizes $bw + +rm $bg $sort + + diff --git a/programs/merqury-1.1/util/merylDumpToWig.jar b/programs/merqury-1.1/util/merylDumpToWig.jar new file mode 100644 index 0000000000000000000000000000000000000000..96b05c7206e38a4a03522e3e6c33d0d8f9543071 Binary files /dev/null and b/programs/merqury-1.1/util/merylDumpToWig.jar differ diff --git a/programs/merqury_June2020/util/util.sh b/programs/merqury-1.1/util/util.sh similarity index 100% rename from programs/merqury_June2020/util/util.sh rename to programs/merqury-1.1/util/util.sh diff --git a/programs/merqury_June2020/.DS_Store b/programs/merqury_June2020/.DS_Store deleted file mode 100644 index f30f8f11f1832ec8963d2f01db1c78ff59a93b04..0000000000000000000000000000000000000000 Binary files a/programs/merqury_June2020/.DS_Store and /dev/null differ diff --git a/programs/meryl-1.0/.DS_Store b/programs/meryl-1.0/.DS_Store deleted file mode 100644 index cc1ad41252f6ce4314ddfc1ef5f88a2bc6376f17..0000000000000000000000000000000000000000 Binary files a/programs/meryl-1.0/.DS_Store and /dev/null differ