Update getting_promoter_regions.sh

3a82f570 · vladijouan · c65a8b7e · 3a82f570
Commit 3a82f570 authored Aug 17, 2023 by vladijouan
--- a/getting_promoter_regions.sh
+++ b/getting_promoter_regions.sh
 #!/bin/bash
+genenames=/path/to/file
+annotation=/path/to/annotationfile
+genome=/path/to/genome/fastafile
+#adding quotes around the gene names for more specific grep later
+awk '{print "\x22"$1"\x22"}' $genenames > genenames_merged.txt
 #get the 2000 bp long region before the transcript starting position
-grep -f $posgenenames <(awk '{if ($3=="transcript") print}' $annotation) | awk '{OFS="\t"; split($10,a,";"); split(a[1],b,"\""); if ($7=="-") print $1,$5,$5+2000,b[2],"1000",$7; else print $1,$4-2000,$4,b[2],"1000",$7}' | awk '{OFS="\t"; if ($2<0) $2+0; print $0}' | awk '{if ($3>4) print}' | sort -k1,1 -k2,2n | uniq > $pospositions.bed6.bed
+grep -f genenames_merged.txt <(awk '{if ($3=="transcript") print}' $annotation) | awk '{OFS="\t"; split($10,a,";"); split(a[1],b,"\""); if ($7=="-") print $1,$5,$5+2000,b[2],"1000",$7; else print $1,$4-2000,$4,b[2],"1000",$7}' | awk '{OFS="\t"; if ($2<0) $2+0; print $0}' | awk '{if ($3>4) print}' | sort -k1,1 -k2,2n | uniq > positions.bed6.bed
 #merge the promoter regions if they are overlapping 
-bedtools merge -d 100 -s -i $pospositions.bed6.bed  > $mergedbed.bed
+bedtools merge -d 100 -s -i positions.bed6.bed  > mergedbed.bed
 #get the sequence 
-bedtools getfasta -fi $GENOME -bed $mergedbed.bed -fo $mergedbad.fa
+bedtools getfasta -fi $genome -bed mergedbed.bed -fo mergedbad.fa