Skip to content

Commit 143037d

Browse files
committed
Add tskit CLI
1 parent 598c95f commit 143037d

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

bio2zarr/cli.py

+79
Original file line numberDiff line numberDiff line change
@@ -630,3 +630,82 @@ def vcfpartition(vcfs, verbose, num_partitions, partition_size):
630630
)
631631
for region in regions:
632632
click.echo(f"{region}\t{vcf_path}")
633+
634+
635+
@click.command(name="convert")
636+
@click.argument("ts_path", type=click.Path(exists=True))
637+
@click.argument("zarr_path", type=click.Path())
638+
@click.option(
639+
"--ploidy",
640+
type=int,
641+
default=2,
642+
help="Ploidy to use when tree sequence has no individuals (default: 2)",
643+
)
644+
@click.option("--contig-id", type=str, help="Contig/chromosome ID (default: '1')")
645+
@click.option(
646+
"--isolated-as-missing", is_flag=True, help="Treat isolated nodes as missing"
647+
)
648+
@variants_chunk_size
649+
@samples_chunk_size
650+
@verbose
651+
@progress
652+
@worker_processes
653+
@force
654+
def convert_tskit(
655+
ts_path,
656+
zarr_path,
657+
ploidy,
658+
contig_id,
659+
isolated_as_missing,
660+
variants_chunk_size,
661+
samples_chunk_size,
662+
verbose,
663+
progress,
664+
worker_processes,
665+
force,
666+
):
667+
"""
668+
Convert a tree sequence file to vcfzarr format.
669+
670+
TS_PATH is the path to the tree sequence file.
671+
ZARR_PATH is the path for the output zarr directory.
672+
"""
673+
setup_logging(verbose)
674+
check_overwrite_dir(zarr_path, force)
675+
676+
import tskit
677+
678+
from bio2zarr import tskit as tskit_mod
679+
680+
ts = tskit.load(ts_path)
681+
682+
if ts.num_individuals == 0:
683+
individuals_nodes = ts.split_samples_by_ploidy(ploidy)
684+
else:
685+
individuals_nodes = ts.individuals_nodes
686+
687+
tskit_mod.convert(
688+
ts_path,
689+
zarr_path,
690+
individuals_nodes,
691+
contig_id=contig_id,
692+
isolated_as_missing=isolated_as_missing,
693+
variants_chunk_size=variants_chunk_size,
694+
samples_chunk_size=samples_chunk_size,
695+
worker_processes=worker_processes,
696+
show_progress=progress,
697+
)
698+
699+
700+
@version
701+
@click.group(cls=NaturalOrderGroup, name="tskit2zarr")
702+
def tskit2zarr_main():
703+
"""
704+
Convert tree sequence file(s) to the vcfzarr format.
705+
706+
See the online documentation at https://sgkit-dev.github.io/bio2zarr/
707+
for more information.
708+
"""
709+
710+
711+
tskit2zarr_main.add_command(convert_tskit)

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ documentation = "https://sgkit-dev.github.io/bio2zarr/"
5252
[project.scripts]
5353
vcf2zarr = "bio2zarr.cli:vcf2zarr_main"
5454
vcfpartition = "bio2zarr.cli:vcfpartition"
55+
tskit2zarr = "bio2zarr.cli:tskit2zarr_main"
5556

5657
[project.optional-dependencies]
5758
dev = [

0 commit comments

Comments
 (0)