diff --git a/pycistopic-lib/Cargo.lock b/pycistopic-lib/Cargo.lock
new file mode 100644
index 0000000..17eeb5f
--- /dev/null
+++ b/pycistopic-lib/Cargo.lock
@@ -0,0 +1,357 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
+[[package]]
+name = "autocfg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
+[[package]]
+name = "bstr"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "crc32fast"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "flate2"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "indexmap"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "indoc"
+version = "2.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
+
+[[package]]
+name = "libc"
+version = "0.2.170"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "noodles"
+version = "0.93.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74eea88cacba7e16cb6aff1bc3e077f3f9e036e59b9107a8415bf9242d0c2d97"
+dependencies = [
+ "noodles-bgzf",
+ "noodles-core",
+ "noodles-csi",
+ "noodles-tabix",
+]
+
+[[package]]
+name = "noodles-bgzf"
+version = "0.36.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c10c4cb29950028823653395ebedfcaf5b652e504aacf816b510fb0268daa2eb"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "crossbeam-channel",
+ "flate2",
+]
+
+[[package]]
+name = "noodles-core"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "962b13b79312f773a12ffcb0cdaccab6327f8343b6f440a888eff10c749d52b0"
+dependencies = [
+ "bstr",
+]
+
+[[package]]
+name = "noodles-csi"
+version = "0.44.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45b402963653cacd5df474889a1cfb969e2f66469023836ad3b0a2c75e2b4e7c"
+dependencies = [
+ "bit-vec",
+ "bstr",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core",
+]
+
+[[package]]
+name = "noodles-tabix"
+version = "0.50.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e46ee069d57770840554e3d8bd02e7415b3a2d972cdea7525bea02472032c234"
+dependencies = [
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core",
+ "noodles-csi",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.20.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
+
+[[package]]
+name = "portable-atomic"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.94"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pycistopic-lib"
+version = "0.1.0"
+dependencies = [
+ "noodles",
+ "pyo3",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.23.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "memoffset",
+ "once_cell",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.23.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.23.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.23.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.23.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.213"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.213"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e02e925281e18ffd9d640e234264753c43edc62d64b2d4cf898f1bc5e75f3fc2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "unindent"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
diff --git a/pycistopic-lib/Cargo.toml b/pycistopic-lib/Cargo.toml
new file mode 100644
index 0000000..4dda0b7
--- /dev/null
+++ b/pycistopic-lib/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "pycistopic-lib"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+# The name of the native library. This is the name which will be used in Python to import the
+# library (i.e. `import string_sum`). If you change this, you must also change the name of the
+# `#[pymodule]` in `src/lib.rs`.
+name = "pycistopic_lib"
+# "cdylib" is necessary to produce a shared library for Python to import from.
+#
+# Downstream Rust code (including code in `bin/`, `examples/`, and `tests/`) will not be able
+# to `use string_sum;` unless the "rlib" or "lib" crate type is also included, e.g.:
+# crate-type = ["cdylib", "rlib"]
+crate-type = ["cdylib"]
+
+[dependencies]
+pyo3 = { version = "0.23.5", features = ["extension-module"] }
+noodles = { version = "0.93.0", features = ["tabix", "bgzf", "csi", "core"] }
diff --git a/pycistopic-lib/src/custom_errors.rs b/pycistopic-lib/src/custom_errors.rs
new file mode 100644
index 0000000..1653eb7
--- /dev/null
+++ b/pycistopic-lib/src/custom_errors.rs
@@ -0,0 +1,58 @@
+use pyo3::exceptions::{PyIOError, PyValueError};
+use std::fmt;
+use pyo3::PyErr;
+
+#[derive(Debug)]
+pub struct InvalidFragmentFileError {
+    pub filename: String,
+}
+
+impl InvalidFragmentFileError {
+    pub fn new(filename: &str) -> InvalidFragmentFileError {
+        InvalidFragmentFileError{
+            filename: filename.to_string()
+        }
+    }
+}
+
+impl std::error::Error for InvalidFragmentFileError{}
+
+impl fmt::Display for InvalidFragmentFileError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}: Invalid fragment file.", self.filename)
+    }
+}
+
+impl From<InvalidFragmentFileError> for PyErr {
+    fn from(err: InvalidFragmentFileError) -> PyErr {
+        PyIOError::new_err(err.to_string())
+    }
+}
+
+#[derive(Debug)]
+pub struct ValueError {
+    pub message: String,
+}
+
+impl ValueError {
+    pub fn new(message: String) -> ValueError {
+        ValueError {
+           message
+        }
+    }
+}
+
+impl std::error::Error for ValueError{}
+
+impl fmt::Display for ValueError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.message)
+    }
+}
+
+impl From<ValueError> for PyErr {
+    fn from(err: ValueError) -> PyErr {
+        PyValueError::new_err(err.to_string())
+    }
+}
+
diff --git a/pycistopic-lib/src/lib.rs b/pycistopic-lib/src/lib.rs
new file mode 100644
index 0000000..ee315e2
--- /dev/null
+++ b/pycistopic-lib/src/lib.rs
@@ -0,0 +1,14 @@
+mod pseudobulk;
+mod custom_errors;
+
+use pyo3::prelude::*;
+
+
+/// A Python module implemented in Rust. The name of this function must match
+/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
+/// import the module.
+#[pymodule]
+fn pycistopic_lib(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_function(wrap_pyfunction!(pseudobulk::split_fragment_files_by_cell_type, m)?)?;
+    Ok(())
+}
diff --git a/pycistopic-lib/src/pseudobulk.rs b/pycistopic-lib/src/pseudobulk.rs
new file mode 100644
index 0000000..280793e
--- /dev/null
+++ b/pycistopic-lib/src/pseudobulk.rs
@@ -0,0 +1,294 @@
+use super::custom_errors;
+use std::fmt;
+use std::fs::File;
+use std::path::Path;
+use std::collections::BinaryHeap;
+use std::collections::HashMap;
+use std::cmp::Reverse;
+use std::io::{BufRead, Write};
+use pyo3::prelude::*;
+use std::thread;
+use noodles::{tabix, bgzf};
+use noodles::csi::BinningIndex;
+use noodles::core::{region::Interval, Position};
+
+#[derive(Eq, PartialEq, Clone)]
+struct GenomicRange {
+    chromosome: String,
+    start: usize,
+    end: usize,
+    cell_barcode: String,
+    score: Option<usize>,
+    file_index: usize,
+    file_name: String
+}
+
+impl Ord for GenomicRange {
+    fn cmp(&self, other: &GenomicRange) -> std::cmp::Ordering {
+           self.start.cmp(&other.start)
+            .then(self.end.cmp(&other.end))
+    }
+}
+
+impl PartialOrd for GenomicRange {
+    fn partial_cmp(&self, other: &GenomicRange) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl GenomicRange {
+    fn new(line: &str, file_index: usize, filename: &str) -> Result<GenomicRange, custom_errors::InvalidFragmentFileError> {
+        let fields: Vec<&str> = line.split('\t').collect();
+        if fields.len() < 4 {
+            return Err(custom_errors::InvalidFragmentFileError::new(filename));
+        }
+        Ok(GenomicRange{
+            chromosome: fields[0].to_string(),
+            start: fields[1].parse::<usize>()
+                .map_err(|_| custom_errors::InvalidFragmentFileError::new(filename))?,
+            end: fields[2].parse::<usize>()
+                .map_err(|_| custom_errors::InvalidFragmentFileError::new(filename))?,
+            cell_barcode: fields[3].to_string(),
+            score: if fields.len() > 4 {
+                    Some(fields[4].parse::<usize>()
+                        .map_err(|_| custom_errors::InvalidFragmentFileError::new(filename))?
+                    )
+                } else {None},
+            file_index,
+            file_name: filename.to_string()
+        })
+    }
+}
+
+impl fmt::Display for GenomicRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self.score {
+            Some(score) => write!(
+                f,
+                "{}\t{}\t{}\t{}\t{}",
+                self.chromosome, self.start, self.end, self.cell_barcode, score
+            ),
+            None => write!(
+                f,
+                "{}\t{}\t{}\t{}",
+                self.chromosome, self.start, self.end, self.cell_barcode
+            ),
+        }
+    }
+}
+
+
+struct FragmentFileReader {
+    reader: bgzf::Reader<File>,
+    index: Option<tabix::Index>,
+    fragment: Result<GenomicRange, custom_errors::InvalidFragmentFileError>,
+    buffer: String,
+    valid_cell_barcodes: Vec<String>,
+    target_chrom: String,
+    file_index: usize,
+    file_name: String,
+    at_end_of_file: bool,
+}
+
+impl FragmentFileReader {
+    fn new(fragment_file_path: &str, valid_cell_barcodes: Vec<String>, target_chrom: String, file_index: usize) -> Result<FragmentFileReader, std::io::Error> {
+        let reader = bgzf::Reader::new(File::open(Path::new(fragment_file_path))?);
+        let mut index: Option<tabix::Index> = None;
+        if Path::new(&format!("{}.tbi", fragment_file_path)).exists() {
+            println!("Index found: {}", format!("{}.tbi", fragment_file_path));
+            index = Some(tabix::fs::read(format!("{}.tbi", fragment_file_path))?);
+        }
+        Ok(FragmentFileReader{
+            reader,
+            index,
+            fragment: GenomicRange::new("", file_index, fragment_file_path),
+            buffer: String::new(),
+            valid_cell_barcodes,
+            target_chrom,
+            file_index, 
+            file_name: fragment_file_path.to_string(),
+            at_end_of_file: false
+        })
+    }
+
+    fn at_chrom(&self) -> bool {
+        if let Ok(fragment) = &self.fragment{
+            fragment.chromosome == self.target_chrom
+        } else {
+            false
+        }
+    }
+
+    fn read_next(&mut self) -> Result<(), custom_errors::InvalidFragmentFileError>{
+        self.buffer.clear();
+        match self.reader.read_line(&mut self.buffer) {
+            Ok(bytes_read) => {
+                if bytes_read == 0 {
+                    self.at_end_of_file = true
+                }
+            },
+            Err(_) => {
+                return Err(custom_errors::InvalidFragmentFileError::new(&self.file_name))
+            }
+        }
+        self.buffer = self.buffer.trim().to_string();
+        self.fragment = GenomicRange::new(&self.buffer, self.file_index, &self.file_name);
+        Ok(())
+    }
+
+    fn skip_header(&mut self, pattern: &str) -> Result<(), custom_errors::InvalidFragmentFileError> {
+        self.buffer = pattern.to_string();
+        while self.buffer.starts_with(pattern) && !self.at_end_of_file {
+            self.read_next()?;
+        }
+        Ok(())
+    }
+
+    fn skip_to_chromosome(&mut self, chromosome: &str) -> Result<(), custom_errors::InvalidFragmentFileError> {
+        let mut using_index = false;
+        if let Some(index) = &self.index {
+            if let Some(header) = index.header() {
+                let seq_names = header.reference_sequence_names();
+                if let Some(chromosome_index) = seq_names.get_index_of(chromosome.as_bytes()) {
+                    if let Ok(q) = index.query(
+                        chromosome_index,
+                        Interval::from(Position::MIN..)
+                    ) {
+                        let first_chunk = q[0];
+                        self.reader.seek(first_chunk.start()).map_err(|_| custom_errors::InvalidFragmentFileError::new(&self.file_name))?;
+                        self.read_next()?;
+                        using_index = true;
+                    } 
+                }
+            }
+        } 
+        if !using_index {
+            // If no index, manually skip header lines.
+            // otherwise this is done automatically
+            self.skip_header("#")?;
+        }
+        while !self.at_end_of_file {
+            if let Ok(fragment) = &self.fragment {
+                if fragment.chromosome == chromosome {
+                    return Ok(());
+                }
+            };
+            self.read_next()?;
+        }
+        Ok(())
+    }
+
+    fn get_next_valid_fragment(&mut self) -> Result<Option<GenomicRange>, custom_errors::InvalidFragmentFileError> {
+        while !self.at_end_of_file {
+            // first check current fragment
+            // it could be that after skip_header and skip_to_chromosome we are already at a 
+            // valid fragment.
+            if let Ok(fragment) = &self.fragment {
+                if self.valid_cell_barcodes.contains(&fragment.cell_barcode) {
+                    let fragment = fragment.clone();
+                    if fragment.chromosome != self.target_chrom {
+                        return Ok(None)
+                    }
+                    // read next fragment so next time we enter this function we have a new frag,et
+                    self.read_next()?;
+                    return Ok(Some(fragment));
+                }
+            }
+            self.read_next()?;
+        }
+        Ok(None)
+    }
+}
+
+fn split_fragments_by_cell_barcodes_for_chromosome(
+    fragment_file_paths: &[&str],
+    fragment_file_to_cell_barcode: &HashMap<String, Vec<String>>,
+    chromosome: &str,
+    gz_output_file: &mut bgzf::Writer<File>
+) -> PyResult<()>{
+
+    // Open fragment files which are gzipped, and pos-sorted.
+    let mut readers: Vec<FragmentFileReader> = Vec::new();
+    for (file_index, fragment_file_path) in fragment_file_paths.iter().enumerate() {
+        if let Some(cell_barcodes) = fragment_file_to_cell_barcode.get(&fragment_file_path.to_string()) {
+            readers.push(
+                FragmentFileReader::new(
+                    fragment_file_path,
+                    cell_barcodes.to_vec(),
+                    chromosome.to_string(),
+                    file_index)?
+            );
+        }
+    }
+
+    // A binary heap will be used to write fragments in order from different files
+    let mut heap = BinaryHeap::new();
+    
+    // go to correct chromosome and push first valid fragment to binary heap
+    for reader in readers.iter_mut(){
+        reader.skip_to_chromosome(chromosome)?;
+        if !reader.at_end_of_file  && reader.at_chrom() {
+            if let Some(fragment) = reader.get_next_valid_fragment()? {
+                heap.push(Reverse(fragment));
+            }
+        }
+    }
+
+    while let Some(Reverse(fragment)) = heap.pop() {
+        gz_output_file.write_all(format!("{}\n", fragment).as_bytes())?;
+        // read from file that currently has the smallest genomic range
+        let reader = &mut readers[fragment.file_index];
+        if !reader.at_end_of_file && reader.at_chrom() {
+            if let Some(fragment) = reader.get_next_valid_fragment()? {
+                heap.push(Reverse(fragment));
+            }
+        }
+    }
+    Ok(())
+}
+
+#[pyfunction]
+pub fn split_fragment_files_by_cell_type(
+    fragment_file_paths: Vec<String>,
+    output_file_prefix: &str,
+    cell_type_to_fragment_file_to_cell_barcode: HashMap<String, HashMap<String, Vec<String>>>,
+    chromosomes: Vec<String>
+) -> PyResult<()> {
+    for cell_type in cell_type_to_fragment_file_to_cell_barcode.keys() {
+        let mut handles: Vec<thread::JoinHandle<_>> = Vec::new();
+        for chromosome in &chromosomes {
+            let output_file_name = format!("{}_{}.{}.tsv.gz", output_file_prefix, cell_type, chromosome);
+            let fragment_file_paths = fragment_file_paths.clone(); // Need to clone since threads take ownership
+            let fragment_file_to_cell_barcode = cell_type_to_fragment_file_to_cell_barcode
+                .get(cell_type)
+                .unwrap()
+                .clone();
+            let file = File::create(output_file_name)?;
+            let chromosome = chromosome.clone();
+            let handle = thread::spawn(move || {
+                let mut gz_output_file = bgzf::Writer::new(file);
+                split_fragments_by_cell_barcodes_for_chromosome(
+                    &fragment_file_paths.iter().map(|p| p.as_str()).collect::<Vec<_>>(),
+                    &fragment_file_to_cell_barcode,
+                    &chromosome,
+                    &mut gz_output_file
+                )
+            });
+            handles.push(handle);
+        }
+        for handle in handles {
+            handle.join().expect("Thread panicked")?;
+        }
+        // concat all chromosomes
+        let output_file_name = format!("{}_{}.tsv.gz", output_file_prefix, cell_type);
+        let output_file = File::create(&output_file_name)?;
+        let mut writer = std::io::BufWriter::new(output_file);
+        for chromosome in &chromosomes {
+            let input_file_name = format!("{}_{}.{}.tsv.gz", output_file_prefix, cell_type, chromosome);
+            let mut input_file = File::open(&input_file_name)?;
+            std::io::copy(&mut input_file, &mut writer)?;
+        }
+        writer.flush()?;
+    }
+    Ok(())
+}
diff --git a/pycistopic-lib/test/bcs_nextgem.txt b/pycistopic-lib/test/bcs_nextgem.txt
new file mode 100644
index 0000000..2441cc2
--- /dev/null
+++ b/pycistopic-lib/test/bcs_nextgem.txt
@@ -0,0 +1,10 @@
+CCACGTTCAGTAACTC-1
+TTCGATTCATCTCTCG-1
+TTGCCCACAAGGTTCT-1
+TTTGGCCAGTACAGAT-1
+CTAACTTAGAAATGGG-1
+TGAGTCACACCACGAC-1
+CGAGTTATCAAGTTGC-1
+TGCATTTGTAACATAG-1
+TGCATTTGTAACATAG-1
+GAAGTCTTCTACTTTG-1
diff --git a/pycistopic-lib/test/bcs_v1.txt b/pycistopic-lib/test/bcs_v1.txt
new file mode 100644
index 0000000..cc57001
--- /dev/null
+++ b/pycistopic-lib/test/bcs_v1.txt
@@ -0,0 +1,10 @@
+GGTGTCGTCAAGGCAG-1
+CTCAACCTCAGGTTTG-1
+GGTGTCGTCAAGGCAG-1
+AGACAAACACATCATG-1
+CATGCCTGTCCTTATT-1
+GGTGTCGTCAAGGCAG-1
+GCATTCCGTTAGAGAT-1
+CAGCTAATCTGATCCC-1
+GAGCATTAGCTTACCA-1
+GCATTCCGTTAGAGAT-1
diff --git a/pycistopic-lib/test/pseudobulk_test.py b/pycistopic-lib/test/pseudobulk_test.py
new file mode 100644
index 0000000..19d4353
--- /dev/null
+++ b/pycistopic-lib/test/pseudobulk_test.py
@@ -0,0 +1,154 @@
+import pytest
+import urllib.request
+import urllib
+import gzip
+import os
+import pathlib
+import shutil
+
+from pycistopic_lib import split_fragment_files_by_cell_type
+
+TEST_DIRECTORY = pathlib.Path(__file__).parent.absolute()
+
+FRAGMENT_FILEs = [
+    TEST_DIRECTORY / pathlib.Path("atac_pbmc_500_nextgem_fragments.tsv.gz"),
+    TEST_DIRECTORY / pathlib.Path("atac_pbmc_500_v1_fragments.tsv.gz")
+]
+
+INDEX_FILEs= [
+    TEST_DIRECTORY / pathlib.Path("atac_pbmc_500_nextgem_fragments.tsv.gz.tbi"),
+    TEST_DIRECTORY / pathlib.Path("atac_pbmc_500_v1_fragments.tsv.gz.tbi")
+]
+
+@pytest.fixture
+def fragment_file(tmp_path):
+    for f in FRAGMENT_FILEs:
+        shutil.copy(f, tmp_path)
+    return [tmp_path / f.name for f in FRAGMENT_FILEs]
+
+@pytest.fixture
+def fragment_file_and_index(tmp_path):
+    for f in FRAGMENT_FILEs:
+        shutil.copy(f, tmp_path)
+    for f in INDEX_FILEs:
+        shutil.copy(f, tmp_path)
+    return [tmp_path / f.name for f in FRAGMENT_FILEs]
+
+def count_number_of_fragments_per_chrom_for_cbs(file_name: str, cbs: list[str]):
+    n_fragment_per_cb_per_chrom = {
+        cb: {} for cb in cbs
+    }
+    with gzip.open(file_name) as f:
+        for fragment in f:
+            chrom, _, _, cb = fragment.decode().split()[0:4]
+            if cb in cbs:
+                if chrom not in n_fragment_per_cb_per_chrom[cb]:
+                    n_fragment_per_cb_per_chrom[cb][chrom] = 0
+                n_fragment_per_cb_per_chrom[cb][chrom] += 1
+    return n_fragment_per_cb_per_chrom
+
+def fragment_file_correct_order(file_name: str):
+    chrom_to_start = {}
+    with gzip.open(file_name) as f:
+        for l in f:
+            l = l.decode()
+            if l.startswith("#"):
+                continue
+            chrom, start = l.split()[0:2]
+            if chrom not in chrom_to_start:
+                chrom_to_start[chrom] = []
+            chrom_to_start[chrom].append(int(start))
+    def always_inc(l):
+        x = l[0]
+        for y in l[1:]:
+            if y < x:
+                return False
+        return True
+    return all([always_inc(chrom_to_start[chrom]) for chrom in chrom_to_start])
+
+def test_pseudobulk_indexed(fragment_file_and_index, tmp_path):
+    fragment_files = fragment_file_and_index
+    bcs_1 = []
+    bcs_2 = []
+    with open(TEST_DIRECTORY / pathlib.Path("bcs_nextgem.txt")) as f:
+        for l in f:
+            bcs_1.append(l.strip())
+
+    with open(TEST_DIRECTORY / pathlib.Path("bcs_v1.txt")) as f:
+        for l in f:
+            bcs_2.append(l.strip())
+    sample_to_cb = {
+        str(fragment_files[0]): bcs_1,
+        str(fragment_files[1]): bcs_2,
+    }
+    split_fragment_files_by_cell_type(
+        fragment_file_paths=[str(f) for f in fragment_files],
+        output_file_prefix=str(tmp_path) +  "/",
+        cell_type_to_fragment_file_to_cell_barcode={
+            "cell_type": sample_to_cb
+        },
+        chromosomes=[*[f"chr{x + 1}" for x in range(22)], "chrY", "chrX"]
+    )
+    assert(fragment_file_correct_order(tmp_path / "_cell_type.tsv.gz"))
+    n_fragment_per_cb_per_chrom_outf = count_number_of_fragments_per_chrom_for_cbs(
+        tmp_path / "_cell_type.tsv.gz",
+        [*list(sample_to_cb.values())[0], *list(sample_to_cb.values())[1]]
+    )
+    n_fragment_per_cb_per_chrom_inf = {}
+    for sample in fragment_files:
+        n_fragment_per_cb_per_chrom_sample = count_number_of_fragments_per_chrom_for_cbs(
+            sample, sample_to_cb[str(sample)])
+        for cb in n_fragment_per_cb_per_chrom_sample:
+            if cb not in n_fragment_per_cb_per_chrom_inf:
+                n_fragment_per_cb_per_chrom_inf[cb] = {}
+            for chrom in n_fragment_per_cb_per_chrom_sample[cb]:
+                if chrom not in n_fragment_per_cb_per_chrom_inf[cb]:
+                    n_fragment_per_cb_per_chrom_inf[cb][chrom] = 0
+                n_fragment_per_cb_per_chrom_inf[cb][chrom] += n_fragment_per_cb_per_chrom_sample[cb][chrom]
+    for cb in n_fragment_per_cb_per_chrom_inf:
+        for chrom in n_fragment_per_cb_per_chrom_inf[cb]:
+            assert(n_fragment_per_cb_per_chrom_inf[cb][chrom] == n_fragment_per_cb_per_chrom_outf[cb][chrom])
+
+
+def test_pseudobulk_not_indexed(fragment_file, tmp_path):
+    fragment_files = fragment_file
+    bcs_1 = []
+    bcs_2 = []
+    with open(TEST_DIRECTORY / pathlib.Path("bcs_nextgem.txt")) as f:
+        for l in f:
+            bcs_1.append(l.strip())
+
+    with open(TEST_DIRECTORY / pathlib.Path("bcs_v1.txt")) as f:
+        for l in f:
+            bcs_2.append(l.strip())
+    sample_to_cb = {
+        str(fragment_files[0]): bcs_1,
+        str(fragment_files[1]): bcs_2,
+    }
+    split_fragment_files_by_cell_type(
+        fragment_file_paths=[str(f) for f in fragment_files],
+        output_file_prefix=str(tmp_path) +  "/",
+        cell_type_to_fragment_file_to_cell_barcode={
+            "cell_type": sample_to_cb
+        },
+        chromosomes=[*[f"chr{x + 1}" for x in range(22)], "chrY", "chrX"]
+    )
+    assert(fragment_file_correct_order(tmp_path / "_cell_type.tsv.gz"))
+    n_fragment_per_cb_per_chrom_outf = count_number_of_fragments_per_chrom_for_cbs(
+        tmp_path / "_cell_type.tsv.gz",
+        [*list(sample_to_cb.values())[0], *list(sample_to_cb.values())[1]]
+    )
+    n_fragment_per_cb_per_chrom_inf = {}
+    for sample in fragment_files:
+        n_fragment_per_cb_per_chrom_sample = count_number_of_fragments_per_chrom_for_cbs(
+            sample, sample_to_cb[str(sample)])
+        for cb in n_fragment_per_cb_per_chrom_sample:
+            if cb not in n_fragment_per_cb_per_chrom_inf:
+                n_fragment_per_cb_per_chrom_inf[cb] = {}
+            for chrom in n_fragment_per_cb_per_chrom_sample[cb]:
+                if chrom not in n_fragment_per_cb_per_chrom_inf[cb]:
+                    n_fragment_per_cb_per_chrom_inf[cb][chrom] = 0
+                n_fragment_per_cb_per_chrom_inf[cb][chrom] += n_fragment_per_cb_per_chrom_sample[cb][chrom]
+    for cb in n_fragment_per_cb_per_chrom_inf:
+        for chrom in n_fragment_per_cb_per_chrom_inf[cb]:
+            assert(n_fragment_per_cb_per_chrom_inf[cb][chrom] == n_fragment_per_cb_per_chrom_outf[cb][chrom])
\ No newline at end of file