Skip to content

Commit b399784

Browse files
committed
Merge branch 'dev' of github.com:broadinstitute/seqr-loading-pipelines
2 parents b3708d4 + 0b87970 commit b399784

4 files changed

+89
-38
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import hail as hl
2+
import luigi
3+
import luigi.util
4+
5+
from v03_pipeline.lib.misc.allele_registry import register_alleles_in_chunks
6+
from v03_pipeline.lib.misc.io import checkpoint
7+
from v03_pipeline.lib.model import (
8+
Env,
9+
)
10+
from v03_pipeline.lib.paths import (
11+
new_variants_table_path,
12+
)
13+
from v03_pipeline.lib.tasks.base.base_loading_run_params import (
14+
BaseLoadingRunParams,
15+
)
16+
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
17+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
18+
from v03_pipeline.lib.tasks.write_new_variants_table import WriteNewVariantsTableTask
19+
20+
21+
@luigi.util.inherits(BaseLoadingRunParams)
22+
class UpdateNewVariantsWithCAIDsTask(BaseUpdateTask):
23+
def output(self) -> luigi.Target:
24+
return GCSorLocalTarget(
25+
new_variants_table_path(
26+
self.reference_genome,
27+
self.dataset_type,
28+
self.run_id,
29+
),
30+
)
31+
32+
def requires(self) -> list[luigi.Task]:
33+
return [
34+
self.clone(WriteNewVariantsTableTask),
35+
]
36+
37+
def complete(self) -> bool:
38+
return super().complete() and hasattr(hl.read_table(self.output().path), 'CAID')
39+
40+
def update_table(self, ht: hl.Table) -> hl.Table:
41+
# Register the new variant alleles to the Clingen Allele Registry
42+
# and annotate new_variants table with CAID.
43+
if not (
44+
Env.CLINGEN_ALLELE_REGISTRY_LOGIN and Env.CLINGEN_ALLELE_REGISTRY_PASSWORD
45+
):
46+
return ht.annotate(CAID=hl.missing(hl.tstr))
47+
ar_ht = hl.Table.parallelize(
48+
[],
49+
hl.tstruct(
50+
locus=hl.tlocus(self.reference_genome.value),
51+
alleles=hl.tarray(hl.tstr),
52+
CAID=hl.tstr,
53+
),
54+
key=('locus', 'alleles'),
55+
)
56+
for ar_ht_chunk in register_alleles_in_chunks(
57+
ht,
58+
self.reference_genome,
59+
):
60+
ar_ht = ar_ht.union(ar_ht_chunk)
61+
ar_ht, _ = checkpoint(ar_ht)
62+
return ht.join(ar_ht, 'left')

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
1616
BaseUpdateVariantAnnotationsTableTask,
1717
)
18+
from v03_pipeline.lib.tasks.update_new_variants_with_caids import (
19+
UpdateNewVariantsWithCAIDsTask,
20+
)
1821
from v03_pipeline.lib.tasks.write_new_variants_table import WriteNewVariantsTableTask
1922

2023

@@ -25,7 +28,9 @@ class UpdateVariantAnnotationsTableWithNewSamplesTask(
2528
def requires(self) -> list[luigi.Task]:
2629
return [
2730
*super().requires(),
28-
self.clone(WriteNewVariantsTableTask),
31+
self.clone(UpdateNewVariantsWithCAIDsTask)
32+
if self.dataset_type.should_send_to_allele_registry
33+
else self.clone(WriteNewVariantsTableTask),
2934
]
3035

3136
def complete(self) -> bool:

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,10 @@ def test_missing_interval_reference_dataset(
140140
self.assertFalse(uvatwns_task.complete())
141141

142142
@responses.activate
143-
@patch('v03_pipeline.lib.tasks.write_new_variants_table.register_alleles_in_chunks')
143+
@patch(
144+
'v03_pipeline.lib.tasks.update_new_variants_with_caids.register_alleles_in_chunks',
145+
)
146+
@patch('v03_pipeline.lib.tasks.update_new_variants_with_caids.Env')
144147
@patch('v03_pipeline.lib.tasks.write_new_variants_table.Env')
145148
@patch(
146149
'v03_pipeline.lib.tasks.write_new_variants_table.UpdateVariantAnnotationsTableWithUpdatedReferenceDataset',
@@ -160,7 +163,8 @@ def test_multiple_update_vat(
160163
mock_vep: Mock,
161164
mock_standard_contigs: Mock,
162165
mock_update_vat_with_rd_task: Mock,
163-
mock_env: Mock,
166+
mock_env_new_variants: Mock,
167+
mock_env_caids: Mock,
164168
mock_register_alleles: Mock,
165169
) -> None:
166170
mock_update_vat_with_rd_task.return_value = (
@@ -174,9 +178,11 @@ def test_multiple_update_vat(
174178
{'ENST00000327044': 'NM_015658.4'},
175179
)
176180
# make register_alleles return CAIDs for 4 of 30 variants
177-
mock_env.GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
178-
mock_env.CLINGEN_ALLELE_REGISTRY_LOGIN = 'login'
179-
mock_env.CLINGEN_ALLELE_REGISTRY_PASSWORD = 'password1' # noqa: S105
181+
mock_env_new_variants.GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
182+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
183+
)
184+
mock_env_caids.CLINGEN_ALLELE_REGISTRY_LOGIN = 'login'
185+
mock_env_caids.CLINGEN_ALLELE_REGISTRY_PASSWORD = 'password1' # noqa: S105
180186
mock_register_alleles.side_effect = [
181187
iter(
182188
[
@@ -543,7 +549,9 @@ def test_multiple_update_vat(
543549
)
544550

545551
@responses.activate
546-
@patch('v03_pipeline.lib.tasks.write_new_variants_table.register_alleles_in_chunks')
552+
@patch(
553+
'v03_pipeline.lib.tasks.update_new_variants_with_caids.register_alleles_in_chunks',
554+
)
547555
@patch(
548556
'v03_pipeline.lib.tasks.write_new_variants_table.UpdateVariantAnnotationsTableWithUpdatedReferenceDataset',
549557
)
@@ -698,7 +706,9 @@ def test_update_vat_grch37(
698706
)
699707

700708
@responses.activate
701-
@patch('v03_pipeline.lib.tasks.write_new_variants_table.register_alleles_in_chunks')
709+
@patch(
710+
'v03_pipeline.lib.tasks.update_new_variants_with_caids.register_alleles_in_chunks',
711+
)
702712
@patch(
703713
'v03_pipeline.lib.tasks.write_new_variants_table.UpdateVariantAnnotationsTableWithUpdatedReferenceDataset',
704714
)
@@ -771,7 +781,9 @@ def test_update_vat_without_accessing_private_datasets(
771781
)
772782

773783
@responses.activate
774-
@patch('v03_pipeline.lib.tasks.write_new_variants_table.register_alleles_in_chunks')
784+
@patch(
785+
'v03_pipeline.lib.tasks.update_new_variants_with_caids.register_alleles_in_chunks',
786+
)
775787
@patch(
776788
'v03_pipeline.lib.tasks.write_new_variants_table.UpdateVariantAnnotationsTableWithUpdatedReferenceDataset',
777789
)

v03_pipeline/lib/tasks/write_new_variants_table.py

+1-29
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
import luigi.util
66

77
from v03_pipeline.lib.annotations.fields import get_fields
8-
from v03_pipeline.lib.misc.allele_registry import register_alleles_in_chunks
98
from v03_pipeline.lib.misc.callsets import get_callset_ht
10-
from v03_pipeline.lib.misc.io import checkpoint, remap_pedigree_hash
9+
from v03_pipeline.lib.misc.io import remap_pedigree_hash
1110
from v03_pipeline.lib.misc.math import constrain
1211
from v03_pipeline.lib.model import (
1312
Env,
@@ -191,33 +190,6 @@ def create_table(self) -> hl.Table:
191190
},
192191
)
193192
new_variants_ht = new_variants_ht.join(reference_dataset_ht, 'left')
194-
195-
# Register the new variant alleles to the Clingen Allele Registry
196-
# and annotate new_variants table with CAID.
197-
if (
198-
Env.CLINGEN_ALLELE_REGISTRY_LOGIN
199-
and Env.CLINGEN_ALLELE_REGISTRY_PASSWORD
200-
and self.dataset_type.should_send_to_allele_registry
201-
):
202-
ar_ht = hl.Table.parallelize(
203-
[],
204-
hl.tstruct(
205-
locus=hl.tlocus(self.reference_genome.value),
206-
alleles=hl.tarray(hl.tstr),
207-
CAID=hl.tstr,
208-
),
209-
key=('locus', 'alleles'),
210-
)
211-
for ar_ht_chunk in register_alleles_in_chunks(
212-
new_variants_ht,
213-
self.reference_genome,
214-
):
215-
ar_ht = ar_ht.union(ar_ht_chunk)
216-
ar_ht, _ = checkpoint(ar_ht)
217-
new_variants_ht = new_variants_ht.join(ar_ht, 'left')
218-
elif self.dataset_type.should_send_to_allele_registry:
219-
new_variants_ht = new_variants_ht.annotate(CAID=hl.missing(hl.tstr))
220-
221193
return new_variants_ht.select_globals(
222194
updates={
223195
hl.Struct(

0 commit comments

Comments
 (0)