Skip to content
Snippets Groups Projects
Commit 3a82f570 authored by vladijouan's avatar vladijouan
Browse files

Update getting_promoter_regions.sh

parent c65a8b7e
Branches
No related tags found
No related merge requests found
#!/bin/bash #!/bin/bash
genenames=/path/to/file
annotation=/path/to/annotationfile
genome=/path/to/genome/fastafile
#adding quotes around the gene names for more specific grep later
awk '{print "\x22"$1"\x22"}' $genenames > genenames_merged.txt
#get the 2000 bp long region before the transcript starting position #get the 2000 bp long region before the transcript starting position
grep -f $posgenenames <(awk '{if ($3=="transcript") print}' $annotation) | awk '{OFS="\t"; split($10,a,";"); split(a[1],b,"\""); if ($7=="-") print $1,$5,$5+2000,b[2],"1000",$7; else print $1,$4-2000,$4,b[2],"1000",$7}' | awk '{OFS="\t"; if ($2<0) $2+0; print $0}' | awk '{if ($3>4) print}' | sort -k1,1 -k2,2n | uniq > $pospositions.bed6.bed grep -f genenames_merged.txt <(awk '{if ($3=="transcript") print}' $annotation) | awk '{OFS="\t"; split($10,a,";"); split(a[1],b,"\""); if ($7=="-") print $1,$5,$5+2000,b[2],"1000",$7; else print $1,$4-2000,$4,b[2],"1000",$7}' | awk '{OFS="\t"; if ($2<0) $2+0; print $0}' | awk '{if ($3>4) print}' | sort -k1,1 -k2,2n | uniq > positions.bed6.bed
#merge the promoter regions if they are overlapping #merge the promoter regions if they are overlapping
bedtools merge -d 100 -s -i $pospositions.bed6.bed > $mergedbed.bed bedtools merge -d 100 -s -i positions.bed6.bed > mergedbed.bed
#get the sequence #get the sequence
bedtools getfasta -fi $GENOME -bed $mergedbed.bed -fo $mergedbad.fa bedtools getfasta -fi $genome -bed mergedbed.bed -fo mergedbad.fa
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment