@@ -630,3 +630,82 @@ def vcfpartition(vcfs, verbose, num_partitions, partition_size):
630
630
)
631
631
for region in regions :
632
632
click .echo (f"{ region } \t { vcf_path } " )
633
+
634
+
635
+ @click .command (name = "convert" )
636
+ @click .argument ("ts_path" , type = click .Path (exists = True ))
637
+ @click .argument ("zarr_path" , type = click .Path ())
638
+ @click .option (
639
+ "--ploidy" ,
640
+ type = int ,
641
+ default = 2 ,
642
+ help = "Ploidy to use when tree sequence has no individuals (default: 2)" ,
643
+ )
644
+ @click .option ("--contig-id" , type = str , help = "Contig/chromosome ID (default: '1')" )
645
+ @click .option (
646
+ "--isolated-as-missing" , is_flag = True , help = "Treat isolated nodes as missing"
647
+ )
648
+ @variants_chunk_size
649
+ @samples_chunk_size
650
+ @verbose
651
+ @progress
652
+ @worker_processes
653
+ @force
654
+ def convert_tskit (
655
+ ts_path ,
656
+ zarr_path ,
657
+ ploidy ,
658
+ contig_id ,
659
+ isolated_as_missing ,
660
+ variants_chunk_size ,
661
+ samples_chunk_size ,
662
+ verbose ,
663
+ progress ,
664
+ worker_processes ,
665
+ force ,
666
+ ):
667
+ """
668
+ Convert a tree sequence file to vcfzarr format.
669
+
670
+ TS_PATH is the path to the tree sequence file.
671
+ ZARR_PATH is the path for the output zarr directory.
672
+ """
673
+ setup_logging (verbose )
674
+ check_overwrite_dir (zarr_path , force )
675
+
676
+ import tskit
677
+
678
+ from bio2zarr import tskit as tskit_mod
679
+
680
+ ts = tskit .load (ts_path )
681
+
682
+ if ts .num_individuals == 0 :
683
+ individuals_nodes = ts .split_samples_by_ploidy (ploidy )
684
+ else :
685
+ individuals_nodes = ts .individuals_nodes
686
+
687
+ tskit_mod .convert (
688
+ ts_path ,
689
+ zarr_path ,
690
+ individuals_nodes ,
691
+ contig_id = contig_id ,
692
+ isolated_as_missing = isolated_as_missing ,
693
+ variants_chunk_size = variants_chunk_size ,
694
+ samples_chunk_size = samples_chunk_size ,
695
+ worker_processes = worker_processes ,
696
+ show_progress = progress ,
697
+ )
698
+
699
+
700
+ @version
701
+ @click .group (cls = NaturalOrderGroup , name = "tskit2zarr" )
702
+ def tskit2zarr_main ():
703
+ """
704
+ Convert tree sequence file(s) to the vcfzarr format.
705
+
706
+ See the online documentation at https://sgkit-dev.github.io/bio2zarr/
707
+ for more information.
708
+ """
709
+
710
+
711
+ tskit2zarr_main .add_command (convert_tskit )
0 commit comments