1
1
# make reference annotations for hg19 genes and gene symbols
2
2
# requires BEDOPS http://bedops.readthedocs.io/en/latest/content/reference/file-management/conversion/gtf2bed.html
3
3
SHELL: =/bin/bash
4
+ PATH: =$(CURDIR ) /bin:$(PATH )
4
5
5
6
# no default action to take
6
7
none :
7
8
9
+ # download for the Bedops programs needed
10
+ bin :
11
+ wget https://github.com/bedops/bedops/releases/download/v2.4.41/bedops_linux_x86_64-v2.4.41.tar.bz2 && \
12
+ tar xjvf bedops_linux_x86_64-v2.4.41.tar.bz2
13
+
8
14
# make all sets of annotations
9
15
all : gencode-hg19 ensembl-hg19 gencode-hg38 ensembl-hg38 ensembl-mm10
10
16
@@ -25,8 +31,8 @@ ensembl-mm10: Mus_musculus.GRCm38.91.chr.bed
25
31
26
32
# ~~~~~ GENCODE hg19 ~~~~~ #
27
33
# generate the Gencode hg19 annotations .bed file
28
- gencode.v19.annotation.gtf.gz :
29
- wget ftp ://ftp.sanger .ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz
34
+ gencode.v19.annotation.gtf.gz :
35
+ wget https ://ftp.ebi .ac.uk/pub/databases /gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz
30
36
31
37
gencode.v19.annotation.genes.bed : gencode.v19.annotation.gtf.gz
32
38
zcat gencode.v19.annotation.gtf.gz | grep -w gene | convert2bed --input=gtf - > gencode.v19.annotation.genes.bed
@@ -36,13 +42,13 @@ gencode.v19.annotation.genes.id4.bed: gencode.v19.annotation.genes.bed
36
42
37
43
# ~~~~~ GENCODE hg38 ~~~~~ #
38
44
# generate the Gencode hg38 annotations .bed file
39
- gencode.v27.annotation.gtf.gz :
45
+ gencode.v27.annotation.gtf.gz :
40
46
wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_27/gencode.v27.annotation.gtf.gz
41
47
42
48
gencode.v27.annotation.genes.bed : gencode.v27.annotation.gtf.gz
43
49
zcat gencode.v27.annotation.gtf.gz | grep -w gene | awk ' { if ($$0 ~ "transcript_id") print $$0; else print $$0" transcript_id \"\";"; }' | convert2bed --input=gtf - > gencode.v27.annotation.genes.bed
44
50
45
- gencode.v41.annotation.gtf.gz :
51
+ gencode.v41.annotation.gtf.gz :
46
52
wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz
47
53
48
54
gencode.v41.annotation.genes.bed : gencode.v41.annotation.gtf.gz
@@ -51,7 +57,7 @@ gencode.v41.annotation.genes.bed: gencode.v41.annotation.gtf.gz
51
57
52
58
# ~~~~~ ENSEMBL hg19 ~~~~~ #
53
59
# generate the Ensembl hg19 annotations .bed file
54
- Homo_sapiens.GRCh37.82.chr.gtf.gz :
60
+ Homo_sapiens.GRCh37.82.chr.gtf.gz :
55
61
wget ftp://ftp.ensembl.org/pub/grch37/release-84/gtf/homo_sapiens/Homo_sapiens.GRCh37.82.chr.gtf.gz
56
62
57
63
# remove comment lines
@@ -116,6 +122,3 @@ Mus_musculus.GRCm38.91.chr.bed: Mus_musculus.GRCm38.91.chr.gtf
116
122
Homo_sapiens.GRCh37.82.chr.gtf.gz \
117
123
Mus_musculus.GRCm38.91.chr.gtf.gz \
118
124
Mus_musculus.GRCm38.91.chr.gtf
119
-
120
-
121
-
0 commit comments