Skip to content

Commit 1c305ab

Browse files
authored
chore: Remove code that supports expecting filters (#1072)
* Remove code that supports expecting filters * kebab case test
1 parent 85bf51d commit 1c305ab

File tree

8 files changed

+1
-96
lines changed

8 files changed

+1
-96
lines changed

v03_pipeline/lib/model/dataset_type.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import hail as hl
55

66
from v03_pipeline.lib.annotations import gcnv, mito, shared, snv_indel, sv
7-
from v03_pipeline.lib.model.definitions import ReferenceGenome, SampleType
7+
from v03_pipeline.lib.model.definitions import ReferenceGenome
88

99
MITO_MIN_HOM_THRESHOLD = 0.95
1010
ZERO = 0.0
@@ -183,12 +183,6 @@ def has_gencode_ensembl_to_refseq_id_mapping(
183183
self == DatasetType.SNV_INDEL and reference_genome == ReferenceGenome.GRCh38
184184
)
185185

186-
def expect_filters(
187-
self,
188-
sample_type: SampleType,
189-
) -> bool:
190-
return self == DatasetType.SNV_INDEL and sample_type == SampleType.WES
191-
192186
def expect_tdr_metrics(
193187
self,
194188
reference_genome: ReferenceGenome,

v03_pipeline/lib/model/feature_flag.py

-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
)
88
CHECK_SEX_AND_RELATEDNESS = os.environ.get('CHECK_SEX_AND_RELATEDNESS') == '1'
99
EXPECT_TDR_METRICS = os.environ.get('EXPECT_TDR_METRICS') == '1'
10-
EXPECT_WES_FILTERS = os.environ.get('EXPECT_WES_FILTERS') == '1'
1110
INCLUDE_PIPELINE_VERSION_IN_PREFIX = (
1211
os.environ.get('INCLUDE_PIPELINE_VERSION_IN_PREFIX') == '1'
1312
)
@@ -22,7 +21,6 @@ class FeatureFlag:
2221
ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS
2322
CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS
2423
EXPECT_TDR_METRICS: bool = EXPECT_TDR_METRICS
25-
EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS
2624
INCLUDE_PIPELINE_VERSION_IN_PREFIX: bool = INCLUDE_PIPELINE_VERSION_IN_PREFIX
2725
RUN_PIPELINE_ON_DATAPROC: bool = RUN_PIPELINE_ON_DATAPROC
2826
SHOULD_TRIGGER_HAIL_BACKEND_RELOAD: bool = SHOULD_TRIGGER_HAIL_BACKEND_RELOAD

v03_pipeline/lib/paths.py

-19
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import hashlib
22
import os
3-
import re
43

54
import hailtop.fs as hfs
65

@@ -297,24 +296,6 @@ def sex_check_table_path(
297296
)
298297

299298

300-
def valid_filters_path(
301-
dataset_type: DatasetType,
302-
sample_type: SampleType,
303-
callset_path: str,
304-
) -> str | None:
305-
if (
306-
not FeatureFlag.EXPECT_WES_FILTERS
307-
or not dataset_type.expect_filters(sample_type)
308-
or 'part_one_outputs' not in callset_path
309-
):
310-
return None
311-
return re.sub(
312-
'part_one_outputs/.*$',
313-
'part_two_outputs/*.filtered.*.vcf.gz',
314-
callset_path,
315-
)
316-
317-
318299
def valid_reference_dataset_path(
319300
reference_genome: ReferenceGenome,
320301
reference_dataset: ReferenceDataset,

v03_pipeline/lib/paths_test.py

-21
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
remapped_and_subsetted_callset_path,
2121
sex_check_table_path,
2222
tdr_metrics_path,
23-
valid_filters_path,
2423
validation_errors_for_run_path,
2524
variant_annotations_table_path,
2625
)
@@ -66,26 +65,6 @@ def test_family_table_path(self) -> None:
6665
'/var/bucket/GRCh37/SNV_INDEL/families/WES/franklin.ht',
6766
)
6867

69-
def test_valid_filters_path(self) -> None:
70-
self.assertEqual(
71-
valid_filters_path(
72-
DatasetType.MITO,
73-
SampleType.WES,
74-
'/var/bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz',
75-
),
76-
None,
77-
)
78-
with patch('v03_pipeline.lib.paths.FeatureFlag') as mock_ff:
79-
mock_ff.EXPECT_WES_FILTERS = True
80-
self.assertEqual(
81-
valid_filters_path(
82-
DatasetType.SNV_INDEL,
83-
SampleType.WES,
84-
'/var/bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz',
85-
),
86-
'/var/bucket/RDG_Broad_WES_Internal_Oct2023/part_two_outputs/*.filtered.*.vcf.gz',
87-
)
88-
8968
def test_project_table_path(self) -> None:
9069
self.assertEqual(
9170
project_table_path(

v03_pipeline/lib/tasks/base/base_loading_run_params.py

-4
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ class BaseLoadingRunParams(luigi.Task):
2525
default=False,
2626
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
2727
)
28-
skip_expect_filters = luigi.BoolParameter(
29-
default=False,
30-
parsing=luigi.BoolParameter.EXPLICIT_PARSING,
31-
)
3228
skip_expect_tdr_metrics = luigi.BoolParameter(
3329
default=False,
3430
parsing=luigi.BoolParameter.EXPLICIT_PARSING,

v03_pipeline/lib/tasks/dataproc/create_dataproc_cluster.py

-3
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,6 @@ def get_cluster_config(reference_genome: ReferenceGenome, run_id: str):
105105
'spark-env:EXPECT_TDR_METRICS': '1'
106106
if FeatureFlag.EXPECT_TDR_METRICS
107107
else '0',
108-
'spark-env:EXPECT_WES_FILTERS': '1'
109-
if FeatureFlag.EXPECT_WES_FILTERS
110-
else '0',
111108
'spark-env:HAIL_SEARCH_DATA_DIR': Env.HAIL_SEARCH_DATA_DIR,
112109
'spark-env:HAIL_TMP_DIR': Env.HAIL_TMP_DIR,
113110
'spark-env:INCLUDE_PIPELINE_VERSION_IN_PREFIX': '1'

v03_pipeline/lib/tasks/dataproc/misc_test.py

-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ def test_to_kebab_str_args(self, _: Mock):
4141
'["test_pedigree"]',
4242
'--skip-check-sex-and-relatedness',
4343
'False',
44-
'--skip-expect-filters',
45-
'False',
4644
'--skip-expect-tdr-metrics',
4745
'False',
4846
'--skip-validation',

v03_pipeline/lib/tasks/write_imported_callset.py

-38
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from v03_pipeline.lib.misc.callsets import get_additional_row_fields
66
from v03_pipeline.lib.misc.io import (
77
import_callset,
8-
import_vcf,
98
select_relevant_fields,
109
split_multi_hts,
1110
)
@@ -14,10 +13,8 @@
1413
validate_imported_field_types,
1514
)
1615
from v03_pipeline.lib.misc.vets import annotate_vets
17-
from v03_pipeline.lib.model.feature_flag import FeatureFlag
1816
from v03_pipeline.lib.paths import (
1917
imported_callset_path,
20-
valid_filters_path,
2118
variant_annotations_table_path,
2219
)
2320
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
@@ -43,26 +40,7 @@ def output(self) -> luigi.Target:
4340
)
4441

4542
def requires(self) -> list[luigi.Task]:
46-
requirements = []
47-
if (
48-
FeatureFlag.EXPECT_WES_FILTERS
49-
and not self.skip_expect_filters
50-
and self.dataset_type.expect_filters(
51-
self.sample_type,
52-
)
53-
):
54-
requirements = [
55-
*requirements,
56-
CallsetTask(
57-
valid_filters_path(
58-
self.dataset_type,
59-
self.sample_type,
60-
self.callset_path,
61-
),
62-
),
63-
]
6443
return [
65-
*requirements,
6644
CallsetTask(self.callset_path),
6745
]
6846

@@ -74,21 +52,6 @@ def create_table(self) -> hl.MatrixTable:
7452
self.reference_genome,
7553
self.dataset_type,
7654
)
77-
filters_path = None
78-
if (
79-
FeatureFlag.EXPECT_WES_FILTERS
80-
and not self.skip_expect_filters
81-
and self.dataset_type.expect_filters(
82-
self.sample_type,
83-
)
84-
):
85-
filters_path = valid_filters_path(
86-
self.dataset_type,
87-
self.sample_type,
88-
self.callset_path,
89-
)
90-
filters_ht = import_vcf(filters_path, self.reference_genome).rows()
91-
mt = mt.annotate_rows(filters=filters_ht[mt.row_key].filters)
9255
additional_row_fields = get_additional_row_fields(
9356
mt,
9457
self.reference_genome,
@@ -139,5 +102,4 @@ def create_table(self) -> hl.MatrixTable:
139102
mt = annotate_vets(mt)
140103
return mt.select_globals(
141104
callset_path=self.callset_path,
142-
filters_path=filters_path or hl.missing(hl.tstr),
143105
)

0 commit comments

Comments
 (0)