Skip to content

Commit 0d5dd04

Browse files
committed
feat: CLIN-4027 missing normalized gnomad joint v4 and fix alignments
1 parent b2ebd6c commit 0d5dd04

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

Diff for: datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/publictables/PublicDatasets.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ case class PublicDatasets(alias: String, tableDatabase: Option[String], viewData
1313
DatasetConf("raw_clinvar" , alias, "/raw/landing/clinvar/clinvar.vcf.gz" , VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")),
1414
DatasetConf("raw_dbsnp" , alias, "/raw/landing/dbsnp/GCF_000001405.40.gz" , VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")),
1515
DatasetConf("raw_gnomad_genomes_v3" , alias, "/release/3.1/vcf/genomes/gnomad.genomes.v3.1.sites.chr[^M]*.vcf.bgz", VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")).copy(storageid = gnomadStorageId),
16-
DatasetConf("raw_gnomad_joint_v4" , alias, "/raw/landing/gnomad_v4/release/4.1/vcf/joint/gnomad.joint.v4.1.sites.chr[^M]*.vcf.bgz", VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")),
16+
DatasetConf("raw_gnomad_joint_v4" , alias, "/raw/landing/gnomad_v4/release/4.1/vcf/joint/gnomad.joint.v4.1.sites.chr[^M]*.vcf.bgz", VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")),
1717
DatasetConf("raw_gnomad_constraint_v2_1_1" , alias, "/raw/landing/gnomad_v2_1_1/gnomad.v2.1.1.lof_metrics.by_gene.txt.gz", CSV , OverWrite , readoptions = Map("header" -> "true", "sep" -> "\t")),
1818
DatasetConf("raw_topmed_bravo" , alias, "/raw/landing/topmed/bravo-dbsnp-*.vcf.gz" , VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")),
1919
DatasetConf("raw_1000_genomes" , alias, "/raw/landing/1000Genomes/ALL.*.sites.vcf.gz" , VCF , OverWrite , readoptions = Map("flattenInfoFields" -> "true", "split_multiallelics" -> "true")),
@@ -51,7 +51,7 @@ case class PublicDatasets(alias: String, tableDatabase: Option[String], viewData
5151
DatasetConf("normalized_gnomad_exomes_v2_1_1" , alias, "/public/gnomad_exomes_v2_1_1_liftover_grch38" , DELTA, OverWrite , partitionby = List("chromosome"), table = table("gnomad_exomes_v2_1_1") , view = view("gnomad_exomes_v2_1_1")),
5252
DatasetConf("normalized_gnomad_constraint_v2_1_1", alias, "/public/gnomad_constraint_v2_1_1" , DELTA, OverWrite , partitionby = List("chromosome"), table = table("gnomad_constraint_v_2_1_1"), view = view("gnomad_constraint_v_2_1_1")),
5353
DatasetConf("normalized_gnomad_genomes_v3" , alias, "/public/gnomad_genomes_v3" , DELTA, OverWrite , partitionby = List("chromosome"), table = table("gnomad_genomes_v3") , view = view("gnomad_genomes_v3")),
54-
DatasetConf("normalized_gnomad_genomes_v4" , alias, "/public/gnomad_genomes_v4" , DELTA, OverWrite , partitionby = List("chromosome"), table = table("gnomad_genomes_v4") , view = view("gnomad_genomes_v4")),
54+
DatasetConf("normalized_gnomad_joint_v4" , alias, "/public/gnomad_joint_v4" , DELTA, OverWrite , partitionby = List("chromosome"), table = table("gnomad_joint_v4") , view = view("gnomad_joint_v4")),
5555
DatasetConf("normalized_human_genes" , alias, "/public/human_genes" , DELTA, OverWrite , partitionby = List() , table = table("human_genes") , view = view("human_genes")),
5656
DatasetConf("normalized_hpo_gene_set" , alias, "/public/hpo_gene_set" , DELTA, OverWrite , partitionby = List() , table = table("hpo_gene_set") , view = view("hpo_gene_set")),
5757
DatasetConf("normalized_omim_gene_set" , alias, "/public/omim_gene_set" , DELTA, OverWrite , partitionby = List() , table = table("omim_gene_set") , view = view("omim_gene_set")),

0 commit comments

Comments
 (0)