Skip to content

Commit e17a4b0

Browse files
authored
Merge pull request #264 from Ferlab-Ste-Justine/feat/clin-4060
feat: CLIN-4060 use gnomad 4 for hc_complement calc
2 parents 82eb935 + c4eca1b commit e17a4b0

File tree

3 files changed

+12
-11
lines changed

3 files changed

+12
-11
lines changed

datalake-spark3/src/main/scala/bio/ferlab/datalake/spark3/publictables/enriched/RareVariant.scala

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import java.time.LocalDateTime
1313
case class RareVariant(rc: RuntimeETLContext) extends SimpleSingleETL(rc) {
1414

1515
override val mainDestination: DatasetConf = conf.getDataset("enriched_rare_variant")
16-
val gnomad: DatasetConf = conf.getDataset("normalized_gnomad_genomes_v3")
16+
val gnomad: DatasetConf = conf.getDataset("normalized_gnomad_joint_v4")
1717

1818
override def extract(lastRunValue: LocalDateTime,
1919
currentRunValue: LocalDateTime): Map[String, DataFrame] = {
@@ -25,9 +25,9 @@ case class RareVariant(rc: RuntimeETLContext) extends SimpleSingleETL(rc) {
2525
lastRunValue: LocalDateTime,
2626
currentRunValue: LocalDateTime): DataFrame = {
2727
data(gnomad.id)
28-
.select(columns.locus :+ col("af"): _*)
28+
.select(columns.locus :+ col("af_joint"): _*)
2929
.groupByLocus()
30-
.agg(max("af") as "af")
30+
.agg(max("af_joint") as "af")
3131
.withColumn("is_rare", col("af") <= 0.01)
3232
}
3333

datalake-spark3/src/test/scala/bio/ferlab/datalake/spark3/publictables/enriched/RareVariantSpec.scala

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
package bio.ferlab.datalake.spark3.publictables.enriched
22

33
import bio.ferlab.datalake.commons.config.DatasetConf
4-
import bio.ferlab.datalake.testutils.models.enriched.{EnrichedRareVariantInput, EnrichedRareVariantOutput}
4+
import bio.ferlab.datalake.testutils.models.enriched.EnrichedRareVariantOutput
55
import bio.ferlab.datalake.spark3.testutils.WithTestConfig
6+
import bio.ferlab.datalake.testutils.models.normalized.NormalizedGnomadJoint4
67
import bio.ferlab.datalake.testutils.{SparkSpec, TestETLContext}
78

89
class RareVariantSpec extends SparkSpec with WithTestConfig {
@@ -13,14 +14,14 @@ class RareVariantSpec extends SparkSpec with WithTestConfig {
1314

1415
val gnomad_df: DatasetConf = job.gnomad
1516

16-
"transformSingle" should "transform Gnomad v2 to rare variant" in {
17+
"transformSingle" should "transform Gnomad v4 to rare variant" in {
1718
val inputData = Map(
1819
gnomad_df.id -> Seq(
19-
EnrichedRareVariantInput(chromosome = "1", start = 1000, reference = "A", alternate = "T", af = 0.005),
20-
EnrichedRareVariantInput(chromosome = "1", start = 1000, reference = "A", alternate = "T", af = 0.03),
21-
EnrichedRareVariantInput(chromosome = "1", start = 2000, reference = "A", alternate = "T", af = 0.011),
22-
EnrichedRareVariantInput(chromosome = "2", start = 1000, reference = "A", alternate = "T", af = 0.005),
23-
EnrichedRareVariantInput(chromosome = "2", start = 1000, reference = "A", alternate = "T", af = 0.01)
20+
NormalizedGnomadJoint4(chromosome = "1", start = 1000, reference = "A", alternate = "T", af_joint = 0.005),
21+
NormalizedGnomadJoint4(chromosome = "1", start = 1000, reference = "A", alternate = "T", af_joint = 0.03),
22+
NormalizedGnomadJoint4(chromosome = "1", start = 2000, reference = "A", alternate = "T", af_joint = 0.011),
23+
NormalizedGnomadJoint4(chromosome = "2", start = 1000, reference = "A", alternate = "T", af_joint = 0.005),
24+
NormalizedGnomadJoint4(chromosome = "2", start = 1000, reference = "A", alternate = "T", af_joint = 0.01)
2425

2526
)
2627
.toDF()

datalake-test-utils/src/main/scala/bio/ferlab/datalake/testutils/models/enriched/EnrichedRareVariantInput.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ case class EnrichedRareVariantInput(chromosome: String = "1",
44
start: Long = 210862942,
55
reference: String = "GGCA",
66
alternate: String = "G",
7-
af: Double = 1.0
7+
af_joint: Double = 1.0
88
) {
99

1010
}

0 commit comments

Comments
 (0)