Skip to content

Commit af601ff

Browse files
Add support for all-missing GT variants
Closes #328
1 parent 5341735 commit af601ff

File tree

5 files changed

+29
-3
lines changed

5 files changed

+29
-3
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 0.1.4 2025-03-XX
2+
3+
- Fix bug in handling all-missing genotypes (#328)
4+
15
# 0.1.3 2025-03-04
26

37
- Fix missing dependency issue for packaging

bio2zarr/vcf2zarr/icf.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1095,9 +1095,8 @@ def process_partition(self, partition_index):
10951095
for field in info_fields:
10961096
tcw.append(field.full_name, variant.INFO.get(field.name, None))
10971097
if has_gt:
1098-
if variant.genotype is None:
1099-
val = None
1100-
else:
1098+
val = None
1099+
if "GT" in variant.FORMAT and variant.genotype is not None:
11011100
val = variant.genotype.array()
11021101
tcw.append("FORMAT/GT", val)
11031102
for field in format_fields:
1.09 KB
Binary file not shown.
190 Bytes
Binary file not shown.

tests/test_vcf_examples.py

+23
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,29 @@ def test_region_index(self, ds):
449449
)
450450
nt.assert_array_equal(ds["region_index"], region_index)
451451

452+
def test_small_example_all_missing_gts(self, ds, tmp_path_factory):
453+
data_path = "tests/data/vcf/sample_all_missing_gts.vcf.gz"
454+
out = tmp_path_factory.mktemp("data") / "example.vcf.zarr"
455+
vcf2zarr.convert([data_path], out, worker_processes=0)
456+
ds2 = sg.load_dataset(out)
457+
458+
assert_dataset_equal(
459+
ds,
460+
ds2,
461+
drop_vars=["call_genotype", "call_genotype_mask", "call_genotype_phased"],
462+
)
463+
gt1 = ds["call_genotype"].values
464+
gt1[1] = -1
465+
nt.assert_array_equal(gt1, ds2["call_genotype"].values)
466+
m1 = ds["call_genotype_mask"].values
467+
m1[1] = True
468+
nt.assert_array_equal(m1, ds2["call_genotype_mask"].values)
469+
p1 = ds["call_genotype_phased"].values
470+
# NOTE: Not sure this is the correct behaviour, but testing here anyway
471+
# to keep a record that this is what we're doing
472+
p1[1] = True
473+
nt.assert_array_equal(p1, ds2["call_genotype_phased"].values)
474+
452475

453476
class TestSmallExampleLocalAlleles:
454477
data_path = "tests/data/vcf/sample.vcf.gz"

0 commit comments

Comments
 (0)