diff --git a/.gitignore b/.gitignore
index ea8c4bf..2a0038a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 /target
+.idea
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 7630133..5baf7b5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,15 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "addr2line"
+version = "0.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
+dependencies = [
+ "gimli",
+]
+
 [[package]]
 name = "adler"
 version = "1.0.2"
@@ -44,8 +53,23 @@ dependencies = [
  "anstyle",
  "anstyle-parse",
  "anstyle-query",
- "anstyle-wincon",
+ "anstyle-wincon 2.1.0",
+ "colorchoice",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon 3.0.3",
  "colorchoice",
+ "is_terminal_polyfill",
  "utf8parse",
 ]
 
@@ -70,7 +94,7 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -80,7 +104,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd"
 dependencies = [
  "anstyle",
- "windows-sys",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+dependencies = [
+ "anstyle",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -98,6 +132,21 @@ version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 
+[[package]]
+name = "backtrace"
+version = "0.3.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11"
+dependencies = [
+ "addr2line",
+ "cc",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+]
+
 [[package]]
 name = "bit-vec"
 version = "0.6.3"
@@ -179,12 +228,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.83"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
-dependencies = [
- "libc",
-]
+checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
 
 [[package]]
 name = "cfg-if"
@@ -199,6 +245,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b1d7b8d5ec32af0fadc644bf1fd509a688c2103b185644bb1e29d164e0703136"
 dependencies = [
  "clap_builder",
+ "clap_derive",
 ]
 
 [[package]]
@@ -207,12 +254,24 @@ version = "4.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5179bb514e4d7c2051749d8fcefa2ed6d06a9f4e6d69faf3805f5d80b8cf8d56"
 dependencies = [
- "anstream",
+ "anstream 0.5.0",
  "anstyle",
  "clap_lex",
  "strsim",
 ]
 
+[[package]]
+name = "clap_derive"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "clap_lex"
 version = "0.5.1"
@@ -233,7 +292,7 @@ checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6"
 dependencies = [
  "is-terminal",
  "lazy_static",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -362,6 +421,12 @@ dependencies = [
  "crypto-common",
 ]
 
+[[package]]
+name = "either"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
+
 [[package]]
 name = "enum-iterator"
 version = "1.5.0"
@@ -396,7 +461,7 @@ checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd"
 dependencies = [
  "errno-dragonfly",
  "libc",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -417,7 +482,10 @@ dependencies = [
  "colored",
  "compare",
  "csv",
+ "human-panic",
  "io",
+ "itertools",
+ "log",
  "noodles",
  "regex",
  "serde",
@@ -566,6 +634,23 @@ dependencies = [
  "version_check",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.2.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "gimli"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
+
 [[package]]
 name = "gmeta"
 version = "1.3.0"
@@ -626,6 +711,12 @@ dependencies = [
  "allocator-api2",
 ]
 
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
 [[package]]
 name = "hermit-abi"
 version = "0.3.3"
@@ -638,6 +729,22 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
+[[package]]
+name = "human-panic"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4c5d0e9120f6bca6120d142c7ede1ba376dd6bf276d69dd3dbe6cbeb7824179"
+dependencies = [
+ "anstream 0.6.14",
+ "anstyle",
+ "backtrace",
+ "os_info",
+ "serde",
+ "serde_derive",
+ "toml",
+ "uuid",
+]
+
 [[package]]
 name = "impl-codec"
 version = "0.6.0"
@@ -688,7 +795,22 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
 dependencies = [
  "hermit-abi",
  "rustix",
- "windows-sys",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
 ]
 
 [[package]]
@@ -788,6 +910,12 @@ version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db"
 
+[[package]]
+name = "log"
+version = "0.4.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+
 [[package]]
 name = "lzma-sys"
 version = "0.1.20"
@@ -934,12 +1062,32 @@ dependencies = [
  "noodles-csi",
 ]
 
+[[package]]
+name = "object"
+version = "0.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
+[[package]]
+name = "os_info"
+version = "3.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae99c7fa6dd38c7cafe1ec085e804f8f555a2f8659b0dbe03f1f9963a9b51092"
+dependencies = [
+ "log",
+ "serde",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "page_size"
 version = "0.6.0"
@@ -968,7 +1116,7 @@ version = "3.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "be30eaf4b0a9fba5336683b38de57bb86d179a35862ba6bfcf57625d006bde5b"
 dependencies = [
- "proc-macro-crate 2.0.2",
+ "proc-macro-crate 2.0.0",
  "proc-macro2",
  "quote",
  "syn 1.0.109",
@@ -1016,11 +1164,10 @@ dependencies = [
 
 [[package]]
 name = "proc-macro-crate"
-version = "2.0.2"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b00f26d3400549137f92511a46ac1cd8ce37cb5598a96d382381458b992a5d24"
+checksum = "7e8366a6159044a37876a2b9817124296703c586a5c92e2c53751fa06d8d43e8"
 dependencies = [
- "toml_datetime",
  "toml_edit 0.20.2",
 ]
 
@@ -1080,6 +1227,12 @@ version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
 
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.0"
@@ -1099,7 +1252,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -1158,6 +1311,15 @@ dependencies = [
  "syn 2.0.60",
 ]
 
+[[package]]
+name = "serde_spanned"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "serde_yaml"
 version = "0.9.25"
@@ -1239,11 +1401,26 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
+[[package]]
+name = "toml"
+version = "0.8.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_edit 0.22.14",
+]
+
 [[package]]
 name = "toml_datetime"
-version = "0.6.3"
+version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b"
+checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+dependencies = [
+ "serde",
+]
 
 [[package]]
 name = "toml_edit"
@@ -1267,6 +1444,18 @@ dependencies = [
  "winnow",
 ]
 
+[[package]]
+name = "toml_edit"
+version = "0.22.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38"
+dependencies = [
+ "indexmap",
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+]
+
 [[package]]
 name = "typenum"
 version = "1.17.0"
@@ -1309,12 +1498,27 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
+[[package]]
+name = "uuid"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0"
+dependencies = [
+ "getrandom",
+]
+
 [[package]]
 name = "version_check"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
 
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -1343,7 +1547,16 @@ version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.5",
 ]
 
 [[package]]
@@ -1352,13 +1565,29 @@ version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.5",
+ "windows_aarch64_msvc 0.52.5",
+ "windows_i686_gnu 0.52.5",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.5",
+ "windows_x86_64_gnu 0.52.5",
+ "windows_x86_64_gnullvm 0.52.5",
+ "windows_x86_64_msvc 0.52.5",
 ]
 
 [[package]]
@@ -1367,42 +1596,90 @@ version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
+
 [[package]]
 name = "winnow"
 version = "0.5.40"
diff --git a/Cargo.toml b/Cargo.toml
index 4c55605..a523174 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,13 +6,16 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-clap = { version = "4.4.4", features = ["cargo"] }
+clap = { version = "4.4.4", features = ["cargo", "derive"] }
 colored = "2.0.4"
 compare = "0.1.0"
 csv = "1.3.0"
 io = "0.0.2"
+itertools = "0.13.0"
 noodles = { version = "0.52.0", features = ["fasta", "cram", "csi", "core"] }
 regex = "1.9.5"
 serde = { version = "1.0.188", features = ["derive"] }
 serde_yaml = "0.9.25"
 stacker = "0.1.15"
+log = "0.4.21"
+human-panic = "2.0.0"
diff --git a/README.md b/README.md
index 79c7001..6c478a2 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # FastaManipulator
 
+![img](https://github.com/Rust-Wellcome/FasMan/actions/workflows/release-repo.yml/badge.svg)
+
 This is a re-write of the current fasta manipulation scripts I've written whilst at ToL, as well as adding some functionality needed for future projects.
 
 Currently, this program has the following arguments:
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
new file mode 100644
index 0000000..2638ee9
--- /dev/null
+++ b/src/cli/mod.rs
@@ -0,0 +1,181 @@
+use clap::{Parser, Subcommand};
+
+const SPLIT_OPTIONS: [&str; 5] = ["pep", "cds", "cdna", "rna", "other"];
+
+// CLI for Fasta Processing
+#[derive(Parser)]
+#[command(version="v1.0.0", about, long_about = None)]
+pub struct Cli {
+    // command is optional (TODO: Make this not optional)
+    // Reference: https://docs.rs/clap/latest/clap/_derive/_tutorial/chapter_2/index.html#defaults
+    #[command(subcommand)]
+    pub command: Option<Commands>,
+}
+
+// Reference: https://docs.rs/clap/latest/clap/_derive/_tutorial/chapter_2/index.html
+#[derive(Subcommand)]
+pub enum Commands {
+    YamlValidator {
+        // Path to the TreeVal yaml file generated by the user
+        #[arg(short, long)]
+        yaml: String,
+
+        // Print explainers as to why validation fails, if it does fail
+        #[arg(short = 'v', long)]
+        verbose: bool,
+
+        // Output the log to file
+        #[arg(short = 'o', long, default_value_t=String::from("./"))]
+        output: String,
+    },
+
+    SplitByCount {
+        // A path to a valid fasta file.
+        #[arg(short = 'f', long)]
+        fasta_file: String,
+
+        // The output directory that files will be placed in | outfile will be formatted like {input_file_prefix}_f{file_count}_c{requested_chunk_count}-a{actual_chunk_count}.fa
+        #[arg(short = 'o', long, default_value_t = String::from("./"))]
+        output_directory: String,
+
+        // The data type of the input data
+        #[arg(short = 'd', value_parser = clap::builder::PossibleValuesParser::new(SPLIT_OPTIONS))]
+        data_type: String,
+
+        // Do we need to sanitise the headers of the input fasta
+        #[arg(short = 's', value_parser = clap::value_parser!(bool))]
+        sanitise: bool,
+
+        // How many sequences per file
+        #[arg(short = 'c', value_parser = clap::value_parser!(u16))]
+        count: u16,
+    },
+
+    SplitBySize {
+        // A path to a valid fasta file.
+        #[arg(short = 'f', long)]
+        fasta_file: String,
+
+        // Size in MB that a fasta file is to be chunked into
+        #[arg(short = 's', long = "mem-size")]
+        mem_size: u16,
+
+        // The output directory that files will be placed in | outfile will be formatted like {input_file_prefix}_f{file_count}_c{requested_chunk_count}-a{actual_chunk_count}.fa
+        #[arg(short = 'o', long, default_value_t = String::from("./"))]
+        output_directory: String,
+    },
+
+    GenesetCSVS {
+        // The path to the top level directory of your geneset directory.
+        #[arg(short = 'd')]
+        geneset_dir: String,
+
+        // Specify the clade folder to refresh
+        #[arg(short = 'c', default_value_t = String::from("ALL"))]
+        specifiy_clade: String,
+    },
+
+    MapHeaders {
+        // A path to a valid fasta file.
+        #[arg(short = 'f', long)]
+        fasta_file: String,
+
+        // The output directory which will contain the mapped-heads.txt as well as the *mapped.fasta
+        #[arg(short = 'o', long, default_value_t = String::from("./"))]
+        output_directory: String,
+
+        #[arg(short = 'r', default_value_t = String::from("FMMH"))]
+        replace_with: String,
+    },
+
+    ReMapHeaders {
+        // A path to a valid fasta file.
+        #[arg(short = 'f', long)]
+        fasta_file: String,
+
+        // The output directory which will contain the mapped-heads.txt as well as the *mapped.fasta
+        #[arg(short = 'o', long, default_value_t = String::from("./new"))]
+        output_directory: String,
+
+        // "The original mapped header field, a TSV of old-header, new-header
+        #[arg(short = 'm', default_value_t = String::from("FMMH"))]
+        map_file: String,
+    },
+
+    #[command(version, about="Profile an input fasta file and return various statistics", long_about = None)]
+    Profile {
+        // A path to a valid fasta file.
+        #[arg(short = 'f', long)]
+        fasta_file: String,
+
+        // The input fasta file for profiling
+        #[arg(short = 'o', long, default_value_t = String::from("FasMan-out"))]
+        output_dir: String,
+    },
+
+    Curate {
+        // The input fasta file for re-organising
+        #[arg(short = 'f', long)]
+        fasta: String,
+
+        // The TPF file used to re-organise the input fasta
+        #[arg(short = 't', long)]
+        tpf: String,
+
+        // Size sort the output or leave as order in AGP
+        #[arg(short = 's')]
+        sort: bool,
+
+        #[arg(short = 'o', default_value_t = String::from("new.fasta"))]
+        output: String,
+
+        // Length that the N (gap) string should be.
+        #[arg(short, long, default_value_t = 200)]
+        n_length: usize,
+    },
+
+    Subset {
+        // A path to a valid fasta file for profiling.
+        #[arg(short = 'f', long)]
+        fasta_file: String,
+
+        // Random subset of input file. Default skims the first X given percent
+        #[arg(short = 'r', long)]
+        random: bool,
+
+        // Percentage of the original file entries that should be retained
+        #[arg(short = 'p', long, default_value_t = 50)]
+        percent: u16,
+    },
+
+    FilterFasta {
+        // A fasta file for processing.
+        #[arg(short = 'f', long)]
+        fasta: String,
+
+        // The outfile naming
+        #[arg(short = 'o', default_value_t = String::from("FilteredFasta.fa"))]
+        output: String,
+
+        #[arg(short = 'l', long = "filter_list")]
+        filter_list: String,
+    },
+
+    Mergehaps {
+        // The input fasta file for re-organising
+        #[arg(short = 'p', long)]
+        fasta_1: String,
+
+        // The second input fasta file
+        #[arg(short = 's', long)]
+        fasta_2: String,
+
+        // TA '/' separated list with an item per file, these are the namings of the new scaffolds in the merged output
+        #[arg(short = 's', long, default_value_t = String::from("PRI/HAP"))]
+        naming: String,
+
+        // Output file prefix
+        #[arg(short = 'o', default_value_t = String::from("merged"))]
+        output: String,
+    },
+}
diff --git a/src/errors/file_error.rs b/src/errors/file_error.rs
new file mode 100644
index 0000000..77ec0f8
--- /dev/null
+++ b/src/errors/file_error.rs
@@ -0,0 +1,27 @@
+use std::fmt::{self};
+
+use std::io::Error;
+
+// Define our error types. These may be customized for our error handling cases.
+// Now we will be able to write our own errors, defer to an underlying error
+// implementation, or do something in between.
+// Resource: https://doc.rust-lang.org/rust-by-example/error/multiple_error_types/define_error_type.html
+#[derive(Debug, Clone)]
+#[allow(dead_code)]
+pub struct FileError {
+    message: String,
+}
+
+impl fmt::Display for FileError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Error in handling the file.")
+    }
+}
+
+impl From<Error> for FileError {
+    fn from(error: Error) -> Self {
+        FileError {
+            message: format!("{}", error),
+        }
+    }
+}
diff --git a/src/errors/mod.rs b/src/errors/mod.rs
new file mode 100644
index 0000000..6bf812b
--- /dev/null
+++ b/src/errors/mod.rs
@@ -0,0 +1 @@
+pub mod file_error;
diff --git a/src/exclude_seq.rs b/src/exclude_seq.rs
deleted file mode 100644
index ab82c4e..0000000
--- a/src/exclude_seq.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-pub mod exclude_seq_mod {
-    use clap::ArgMatches;
-    use noodles::fasta;
-    use std::error::Error;
-    use std::{fs, io::BufRead, str};
-
-    fn open_fasta<'a>(
-        exclusions: Vec<&str>,
-        fasta: &'a str,
-        out_file: &str,
-    ) -> std::result::Result<&'a str, Box<dyn Error>> {
-        let reader: Result<fasta::Reader<Box<dyn BufRead>>, std::io::Error> =
-            fasta::reader::Builder.build_from_path(fasta);
-        let file = fs::OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(out_file)?;
-        let mut writer = fasta::Writer::new(file);
-
-        match reader {
-            Ok(fasta) => {
-                let mut binding = fasta;
-                for result in binding.records() {
-                    let record = result?;
-                    if !exclusions.contains(&record.name()) {
-                        writer.write_record(&record)?;
-                    } else {
-                        println!("Found record to exclude: {:?}", &record.name());
-                    }
-                }
-                Ok("Removed Exclusionary List")
-            }
-            Err(_) => Err("Error: Fasta is not valid check file!".into()),
-        }
-    }
-
-    pub fn filter_fasta(arguments: std::option::Option<&ArgMatches>) {
-        let fasta = arguments.unwrap().get_one::<String>("fasta").unwrap();
-        let exclude = arguments.unwrap().get_one::<String>("filter_list").unwrap();
-        let outfile = arguments.unwrap().get_one::<String>("output").unwrap();
-        let list_to_exclude = exclude.split(',').collect::<Vec<&str>>();
-        let _x = open_fasta(list_to_exclude, fasta, outfile);
-    }
-}
diff --git a/src/file_utils/file_utility.rs b/src/file_utils/file_utility.rs
new file mode 100644
index 0000000..8a30eaa
--- /dev/null
+++ b/src/file_utils/file_utility.rs
@@ -0,0 +1,136 @@
+use log::info;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+
+use crate::errors::file_error::FileError;
+use itertools::Itertools;
+
+#[allow(dead_code)]
+struct Records<T> {
+    items: Vec<T>,
+}
+
+#[allow(dead_code)]
+impl Records<String> {
+    pub fn size(&self) -> usize {
+        self.items.len()
+    }
+}
+
+#[allow(dead_code)]
+struct BatchFileReader {}
+
+#[allow(dead_code)]
+pub trait DefaultReader {
+    fn default() -> Self;
+}
+
+impl DefaultReader for BatchFileReader {
+    fn default() -> Self {
+        BatchFileReader {}
+    }
+}
+
+#[allow(dead_code)]
+impl BatchFileReader {
+    /*
+     * Reads a specific number of lines from a file from the top
+     */
+    pub fn read_lines(
+        &mut self,
+        file_path: &str,
+        num_lines: usize,
+    ) -> Result<Records<String>, FileError> {
+        info!("Reading lines in file.");
+        let file = File::open(file_path);
+
+        let result = match file {
+            Ok(file) => file,
+            Err(error) => {
+                info!("Error in file handler: {:?}", error);
+                return Err(error.into());
+            }
+        };
+
+        let reader = BufReader::new(result);
+        let mut internal_buffer = Vec::<String>::new();
+
+        // Error unwrapping: https://tinyurl.com/brt9fphk
+        // take() function https://tinyurl.com/6vx7m3k6
+        for line in reader.lines().take(num_lines) {
+            let result = line.expect("Error in reading file"); // This will panic if errored
+            internal_buffer.push(result.clone())
+        }
+
+        Ok(Records {
+            items: internal_buffer,
+        })
+    }
+
+    /**
+     * Reads a file batch by batch, and applies a function Fn for each chunk
+     * Function pointers documentation: https://doc.rust-lang.org/book/ch19-05-advanced-functions-and-closures.html#function-pointers
+     * f is a closure pushed into the stack of read_file_by_batch that is similar to an anonymous function in Java/JavaScript/C#
+     * https://doc.rust-lang.org/book/ch13-01-closures.html#moving-captured-values-out-of-closures-and-the-fn-traits
+     * Note that f is not intended to mutate the captured Records value, and should not return anything (i.e., move the captured Record value out of the closure).
+     */
+    pub fn read_file_by_batch(
+        &mut self,
+        file_path: &str,
+        batch_size: usize,
+        f: &dyn Fn(Records<String>),
+    ) -> Result<(), FileError> {
+        info!("Reading file by chunk.");
+
+        let file = File::open(file_path);
+
+        let result = match file {
+            Ok(file) => file,
+            Err(error) => {
+                info!("Error in file handler: {:?}", error);
+                return Err(error.into());
+            }
+        };
+
+        let reader = BufReader::new(result);
+
+        // map_while() Creates an iterator that both yields elements based on a predicate and maps.
+        // https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.map_while
+        for chunk in &reader.lines().map_while(Result::ok).chunks(batch_size) {
+            f(Records {
+                items: chunk.collect(),
+            });
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+
+    const TEST_FILE_PATH: &str = "test_data/synthetic/tiny.fa";
+
+    #[test]
+    fn read_lines() {
+        let mut batch_file_reader = BatchFileReader::default();
+        let records = batch_file_reader.read_lines(TEST_FILE_PATH, 3).unwrap();
+        assert_eq!(3, records.items.len());
+    }
+
+    // You can create the closure in one place and then call the closure elsewhere to evaluate it in a different context.
+    // Reference: https://doc.rust-lang.org/book/ch13-01-closures.html
+    fn assert_function(input: Records<String>) {
+        assert!(input.size() <= 3);
+    }
+
+    #[test]
+    fn read_file_batch() {
+        let mut batch_file_reader = BatchFileReader::default();
+        batch_file_reader
+            .read_file_by_batch(TEST_FILE_PATH, 3, &assert_function)
+            .unwrap_or_else(|e| panic!("Error: {:?}", e));
+    }
+}
diff --git a/src/file_utils/mod.rs b/src/file_utils/mod.rs
new file mode 100644
index 0000000..79b7217
--- /dev/null
+++ b/src/file_utils/mod.rs
@@ -0,0 +1 @@
+pub mod file_utility;
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..36ac37b
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,82 @@
+use clap::Parser;
+
+use cli::{Cli, Commands};
+use std::io::Error;
+
+// Reference: https://doc.rust-lang.org/book/ch07-02-defining-modules-to-control-scope-and-privacy.html
+use crate::processors::exclude_seq::filter_fasta;
+use crate::processors::map_headers::map_fasta_head;
+use crate::processors::remap_head::remapping_head;
+use crate::processors::split_by_count::split_file_by_count;
+use crate::processors::split_by_size::split_file_by_size;
+use crate::processors::tpf_fasta::curate_fasta;
+use crate::processors::yaml_validator::validate_yaml;
+
+mod cli;
+mod errors;
+mod file_utils;
+mod generics;
+
+mod processors;
+
+pub fn run() -> Result<(), Error> {
+    let cli = Cli::parse();
+
+    match &cli.command {
+        Some(Commands::YamlValidator {
+            yaml,
+            verbose,
+            output,
+        }) => validate_yaml(yaml, verbose, output),
+        Some(Commands::SplitByCount {
+            fasta_file,
+            output_directory,
+            data_type,
+            sanitise,
+            count,
+        }) => split_file_by_count(fasta_file, output_directory, data_type, sanitise, count),
+        Some(Commands::SplitBySize {
+            fasta_file,
+            mem_size,
+            output_directory,
+        }) => split_file_by_size(fasta_file, mem_size, output_directory),
+        Some(Commands::MapHeaders {
+            fasta_file,
+            output_directory,
+            replace_with,
+        }) => _ = map_fasta_head(fasta_file, output_directory, replace_with),
+        Some(Commands::ReMapHeaders {
+            fasta_file,
+            output_directory,
+            map_file,
+        }) => remapping_head(fasta_file, output_directory, map_file),
+        Some(Commands::Curate {
+            fasta,
+            tpf,
+            sort,
+            output,
+            n_length,
+        }) => curate_fasta(fasta, tpf, sort, output, n_length),
+        Some(Commands::FilterFasta {
+            fasta,
+            output,
+            filter_list,
+        }) => filter_fasta(fasta, output, filter_list),
+        Some(Commands::GenesetCSVS { .. }) => {
+            todo!()
+        }
+        Some(Commands::Profile { .. }) => {
+            todo!()
+        }
+        Some(Commands::Subset { .. }) => {
+            todo!()
+        }
+        Some(Commands::Mergehaps { .. }) => {
+            todo!()
+        }
+        None => {
+            println!("No command provided");
+        }
+    }
+    Ok(())
+}
diff --git a/src/main.rs b/src/main.rs
index 9e947f9..6163c35 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,327 +1,16 @@
 #![allow(non_snake_case)]
 
-use clap::{command, Arg, Command};
-use colored::Colorize;
-use std::env;
-use std::io::Error;
-
-mod yaml_validator;
-use crate::yaml_validator::yaml_validator_mod::validate_yaml;
-
-mod map_headers;
-use crate::map_headers::mapping_headers::map_fasta_head;
-
-mod remap_head;
-use crate::remap_head::remapping_headers::remapping_head;
-
-mod split_by_size;
-use crate::split_by_size::split_by_size_mod::split_file_by_size;
-
-mod split_by_count;
-use crate::split_by_count::split_by_count_mod::split_file_by_count;
-
-mod generics;
-//use crate::generics::validate_fasta;
-
-mod tpf_fasta;
-use crate::tpf_fasta::tpf_fasta_mod::curate_fasta;
-
-mod exclude_seq;
-use crate::exclude_seq::exclude_seq_mod::filter_fasta;
-
-fn main() -> Result<(), Error> {
-    let split_options = ["pep", "cds", "cdna", "rna", "other"];
-    let match_result = command!()
-    .about("A program for fasta manipulation and yaml validation ~ Used in TreeVal project")
-    .subcommand(
-        Command::new("validateyaml")
-            .about("Subcommand for validating the users TreeVal yaml file")
-            .arg(
-                Arg::new("yaml")
-                    .required(true)
-                    .help("Path to the TreeVal yaml file generated by the user")
-            )
-            .arg(
-                Arg::new("verbose")
-                    .short('v')
-                    .value_parser(clap::value_parser!(bool))
-                    .default_value("false")
-                    .help("Print explainers as to why validation fails, if it does fail")
-            )
-            .arg(
-                Arg::new("output")
-                    .short('o')
-                    .default_value("./")
-                    .help("Output the log to file")
-            )
-    )
-    .subcommand(
-        Command::new("splitbycount")
-            .about("Subcommand for splitting fasta files by number of sequence-header pairs, e.g., 100 pairs per file")
-            .arg(
-                Arg::new("fasta-file")
-                    .short('f')
-                    .required(true)
-                    .help("A path to a valid fasta file.")
-            )
-            .arg(
-                Arg::new("output-directory")
-                    .short('o')
-                    .default_value("./")
-                    .help("The output directory that files will be placed in | outfile will be formatted like {input_file_prefix}_f{file_count}_c{requested_chunk_count}-a{actual_chunk_count}.fa")
-            )
-            .arg(
-                Arg::new("data_type")
-                    .short('d')
-                    .value_parser(clap::builder::PossibleValuesParser::new(split_options))
-                    .help("The data type of the input data")
-            )
-            .arg(
-                Arg::new("sanitise")
-                    .short('s')
-                    .value_parser(clap::value_parser!(bool))
-                    .help("Do we need to sanitise the headers of the input fasta")
-            )
-            .arg(
-                Arg::new("count")
-                    .short('c')
-                    .value_parser(clap::value_parser!(u16))
-                    .help("How many sequences per file")
-            )
-    )
-    .subcommand(
-        Command::new("splitbysize")
-            .about("Subcommand for splitting fasta files by user given size (in MegaBytes) into n (fasta_size / user_given_size) files")
-            .arg(
-                Arg::new("fasta-file")
-                    .short('f')
-                    .required(true)
-                    .help("A path to a valid fasta file.")
-            )
-            .arg(
-                Arg::new("mem-size")
-                    .short('s')
-                    .required(true)
-                    .value_parser(clap::value_parser!(u16))
-                    .help("Size in MB that a fasta file is to be chunked into")
-            )
-            .arg(
-                Arg::new("output-directory")
-                    .short('o')
-                    .default_value("./")
-                    .help("The output directory that files will be placed in")
-            )
-    )
-    .subcommand(
-        Command::new("geneset_csvs")
-            .about("Subcommand to generate csv files that condense geneset directories generated by splitbycount/splitbysize. Mainly for use in TreeVal")
-            .arg(
-                Arg::new("geneset_dir")
-                    .short('d')
-                    .required(true)
-                    .help("The path to the top level directory of your geneset directory.")
-            )
-            .arg(
-                Arg::new("specifiy_clade")
-                    .short('c')
-                    .required(true)
-                    .default_value("ALL")
-                    .help("Specify the clade folder to refresh")
-            )
-    )
-    .subcommand(
-        Command::new("mapheaders")
-            .about("Subcommand for stripping out headers and replacing with a standardised automatic or user-given string, this also returns a dict of old:new headers")
-            .arg(
-                Arg::new("fasta-file")
-                    .short('f')
-                    .required(true)
-                    .help("A path to a valid fasta file.")
-            )
-            .arg(
-                Arg::new("output-directory")
-                    .short('o')
-                    .default_value("./")
-                    .help("The output directory which will contain the mapped-heads.txt as well as the *mapped.fasta")
-            )
-            .arg(
-                Arg::new("replace-with")
-                    .short('r')
-                    .default_value("FMMH")
-                    .help("The new header format, appended with a numerical value. Without being set the new header will default to 'FMMH_{numberical}'")
-            )
-    )
-    .subcommand(
-        Command::new("remapheaders")
-            .about("Subcommand for stripping out previously mapped headers and replacing with the old headers")
-            .arg(
-                Arg::new("fasta-file")
-                    .short('f')
-                    .required(true)
-                    .help("A path to a valid fasta file.")
-            )
-            .arg(
-                Arg::new("output-directory")
-                    .short('o')
-                    .default_value("./new")
-                    .help("The output directory which will contain the mapped-heads.txt as well as the *mapped.fasta")
-            )
-            .arg(
-                Arg::new("map-file")
-                    .short('m')
-                    .required(true)
-                    .help("The original mapped header field, a TSV of old-header, new-header")
-            )
-    )
-    .subcommand(
-        Command::new("profile")
-        .about("Profile an input fasta file and return various statistics")
-        .arg(
-            Arg::new("fasta-file")
-                .short('f')
-                .required(true)
-                .help("The input fasta file for profiling")
-        )
-        .arg(
-            Arg::new("output-dir")
-                .short('o')
-                .default_value("FasMan-out")
-                .help("The input fasta file for profiling")
-        )
-    )
-    .subcommand(
-        Command::new("curate")
-        .about("Convert an tpf file and original fasta file into a fasta file - useful for curation")
-        .arg(
-            Arg::new("fasta")
-                .short('f')
-                .required(true)
-                .help("The input fasta file for re-organising")
-        )
-        .arg(
-            Arg::new("tpf")
-                .short('t')
-                .required(true)
-                .help("The TPF file used to re-organise the input fasta")
-        )
-        .arg(
-            Arg::new("sort")
-                .short('s')
-                .value_parser(clap::value_parser!(bool))
-                .default_value("false")
-                .help("Size sort the output or leave as order in AGP")
-        )
-        .arg(
-            Arg::new("output")
-                .short('o')
-                .default_value("new.fasta")
-                .help("The output name of the new fasta file")
-        )
-        .arg(
-            Arg::new("n_length")
-                .value_parser(clap::value_parser!(usize))
-                .default_value("200")
-                .help("Length that the N (gap) string should be.")
-        )
-    )
-    .subcommand(
-        Command::new("subset")
-        .about("Subset a fasta file in a random manner by percentage of file")
-        .arg(
-            Arg::new("fasta-file")
-                .short('f')
-                .required(true)
-                .help("The input fasta file for profiling")
-        )
-        .arg(
-            Arg::new("random")
-                .short('r')
-                .value_parser(clap::value_parser!(bool))
-                .help("Random subset of input file. Default skims the first X given percent")
-        )
-        .arg(
-            Arg::new("percent")
-                .short('p')
-                .value_parser(clap::value_parser!(u16))
-                .default_value("50")
-                .help("Percentage of the original file entries that should be retained")
-        )
-    )
-    .subcommand(
-        Command::new("filterfasta")
-            .about("Filter a given list of sequences from fasta file")
-            .arg(
-                Arg::new("fasta")
-                    .short('f')
-                    .required(true)
-                    .help("A fasta file for processing")
-            )
-            .arg(
-                Arg::new("output")
-                    .short('o')
-                    .default_value("FiilteredFasta.fa")
-                    .help("The outfile naming")
-            )
-            .arg(
-                Arg::new("filter_list")
-                    .short('l')
-                    .help("A string comma-separated list of sequence names to exclude from the final fasta")
-            )
-    )
-    .subcommand(
-        Command::new("mergehaps")
-        .about("Merge haplotypes / multi fasta files together")
-        .arg(
-            Arg::new("fasta-1")
-                .short('p')
-                .required(true)
-                .help("The input fasta file for re-organising")
-        )
-        .arg(
-            Arg::new("fasta-2")
-                .short('s')
-                .required(true)
-                .help("The second input fasta file")
-        )
-        .arg(
-            Arg::new("naming")
-                .short('s')
-                .default_value("PRI/HAP")
-                .help("A '/' separated list with an item per file, these are the namings of the new scaffolds in the merged output")
-        )
-        .arg(
-            Arg::new("output")
-                .short('o')
-                .default_value("merged")
-                .help("Output file prefix")
-        )
-    )
-    .get_matches();
-
-    println! {
-        "{}\n{}\n{}\nRUNNING SUBCOMMAND: |\n-- {}\nRUNNING ON: |\n-- {}",
-        "WELCOME TO Fasta Manipulator".bold(),
-        "This has been made to help prep data for use in the Treeval and curationpretext pipelines".bold(),
-        "ONLY THE yamlvalidator IS SPECIFIC TO TREEVAL, THE OTHER COMMANDS CAN BE USED FOR ANY OTHER PURPOSE YOU WANT".purple(),
-        match_result.subcommand_name().unwrap(),
-        env::consts::OS
-    };
-
-    match match_result.subcommand_name() {
-        Some("splitbysize") => split_file_by_size(match_result.subcommand_matches("splitbysize")),
-        Some("splitbycount") => {
-            split_file_by_count(match_result.subcommand_matches("splitbycount"))
-        }
-        Some("mapheaders") => {
-            _ = map_fasta_head(match_result.subcommand_matches("mapheaders"));
-        }
-        Some("validateyaml") => validate_yaml(match_result.subcommand_matches("validateyaml")),
-        Some("remapheaders") => remapping_head(match_result.subcommand_matches("remapheaders")),
-        Some("curate") => curate_fasta(match_result.subcommand_matches("curate")),
-        Some("filterfasta") => filter_fasta(match_result.subcommand_matches("filterfasta")),
-        _ => {
-            unreachable!()
-        }
-    };
-    Ok(())
+use fasta_manipulation::run;
+use human_panic::setup_panic;
+
+// https://doc.rust-lang.org/book/ch12-03-improving-error-handling-and-modularity.html#separation-of-concerns-for-binary-projects
+fn main() {
+    // https://rust-cli.github.io/book/in-depth/human-communication.html
+    setup_panic!();
+    if let Err(e) = run() {
+        eprintln!("Error: {}", e);
+        std::process::exit(1);
+    } else {
+        println!("Done!");
+    }
 }
diff --git a/src/map_headers.rs b/src/map_headers.rs
deleted file mode 100644
index 2b066b0..0000000
--- a/src/map_headers.rs
+++ /dev/null
@@ -1,142 +0,0 @@
-pub mod mapping_headers {
-
-    use clap::ArgMatches;
-    use colored::Colorize;
-    use std::error::Error;
-    use std::fmt;
-    use std::fs::File;
-    use std::io::{BufRead, BufReader, BufWriter, Write};
-    use std::iter::Zip;
-
-    use crate::generics::only_keys;
-    use crate::generics::validate_fasta;
-
-    #[allow(dead_code)]
-    #[derive(Debug, Clone)]
-    struct EmptyVec;
-    impl Error for EmptyVec {}
-
-    impl fmt::Display for EmptyVec {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "Can't Display Empty Vec")
-        }
-    }
-
-    #[allow(clippy::explicit_counter_loop)]
-    pub fn create_mapping(
-        name_vec: Vec<std::string::String>,
-        new_name: &str,
-    ) -> Zip<std::vec::IntoIter<std::string::String>, std::vec::IntoIter<std::string::String>> {
-        // Generate a new mapping for the Fasta
-        //
-        let mut new_heads: Vec<String> = Vec::new();
-        let mut head_counter: i32 = 0;
-        let name_vec_clone = name_vec.clone();
-
-        for _x in name_vec {
-            new_heads.push(format!("{}_{}", new_name, head_counter));
-            head_counter += 1;
-        }
-
-        let mapped_heads: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
-            name_vec_clone.into_iter().zip(new_heads);
-
-        mapped_heads
-    }
-
-    pub fn save_mapping(
-        output: &str,
-        mapped: Zip<
-            std::vec::IntoIter<std::string::String>,
-            std::vec::IntoIter<std::string::String>,
-        >,
-    ) {
-        let f: File = File::create(output).expect("Unable to create file");
-        let mut f: BufWriter<File> = BufWriter::new(f);
-        for map_pair in mapped {
-            let line: String = format!("{}\t{}\n", map_pair.0, map_pair.1);
-            f.write_all(&line.into_bytes())
-                .expect("Unable to write data");
-        }
-    }
-
-    #[allow(unused_mut)]
-    pub fn create_mapped_fasta(
-        input: &str,
-        output: &str,
-        mapped: Zip<
-            std::vec::IntoIter<std::string::String>,
-            std::vec::IntoIter<std::string::String>,
-        >,
-    ) {
-        let file_reader: File = File::open(input).expect("CAN'T OPEN FILE");
-        let buff_reader: BufReader<File> = BufReader::new(file_reader);
-        let mut new_fasta: File = File::create(output).unwrap();
-
-        for line in buff_reader.lines() {
-            let l: &str = &line.as_ref().unwrap()[..];
-            if l.starts_with('>') {
-                let mut to_replace = l.replace('>', "");
-                let mut mapped_heads: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
-                    mapped.clone();
-                let mut map: Option<(String, String)> =
-                    mapped_heads.find(|x: &(String, String)| x.0 == to_replace);
-                let mut new_head: String = map.expect("").1;
-                let fmt_head: String = format!(">{}\n", new_head);
-                let _ = new_fasta.write_all(&fmt_head.into_bytes());
-            } else {
-                let mut seq = line.expect("");
-                let fmt_seq = format!("{}\n", seq);
-                let _ = new_fasta.write_all(&fmt_seq.into_bytes());
-            }
-        }
-    }
-
-    pub fn map_fasta_head(
-        arguments: std::option::Option<&ArgMatches>,
-    ) -> Result<(), Box<dyn Error>> {
-        let file: &String = arguments.unwrap().get_one::<String>("fasta-file").unwrap();
-        let replacer: &String = arguments
-            .unwrap()
-            .get_one::<String>("replace-with")
-            .unwrap();
-        let output: &String = arguments
-            .unwrap()
-            .get_one::<String>("output-directory")
-            .unwrap();
-
-        println!("Mapping headers for file: {}", file);
-        println!("Replace headers with string: {:?}", &replacer);
-
-        match validate_fasta(file) {
-            Ok(names) => {
-                let new_names = Vec::from_iter(only_keys(names));
-
-                let new_map: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
-                    create_mapping(new_names, replacer);
-
-                let map_to_save: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
-                    new_map.clone();
-                let output_file = format!("{}mapped-heads.tsv", output);
-
-                save_mapping(&output_file, map_to_save);
-
-                let new_fasta: String = format!("{output}mapped.fasta");
-
-                create_mapped_fasta(file, &new_fasta, new_map);
-
-                println!(
-                    "{}\n{}\n\t{}\n\t{}",
-                    "FASTA HAS BEEN MAPPED AND REWRITTEN".green(),
-                    "FOUND HERE:".green(),
-                    &new_fasta.green(),
-                    &output_file.green()
-                );
-            }
-
-            Err(e) => panic!("Something is wrong with the file! | {}", e),
-        };
-
-        Ok(())
-    }
-}
diff --git a/src/processors/exclude_seq.rs b/src/processors/exclude_seq.rs
new file mode 100644
index 0000000..1b0b8c1
--- /dev/null
+++ b/src/processors/exclude_seq.rs
@@ -0,0 +1,48 @@
+use noodles::fasta;
+use std::error::Error;
+use std::{fs, io::BufRead, str};
+
+fn open_fasta<'a>(
+    exclusions: Vec<&str>,
+    fasta: &'a str,
+    out_file: &str,
+) -> std::result::Result<&'a str, Box<dyn Error>> {
+    let reader: Result<fasta::Reader<Box<dyn BufRead>>, std::io::Error> =
+        fasta::reader::Builder.build_from_path(fasta);
+    let file = fs::OpenOptions::new()
+        .create(true)
+        .append(true)
+        .open(out_file)?;
+    let mut writer = fasta::Writer::new(file);
+
+    match reader {
+        Ok(fasta) => {
+            let mut binding = fasta;
+            for result in binding.records() {
+                let record = result?;
+                if !exclusions.contains(&record.name()) {
+                    writer.write_record(&record)?;
+                } else {
+                    println!("Found record to exclude: {:?}", &record.name());
+                }
+            }
+            Ok("Removed Exclusionary List")
+        }
+        Err(_) => Err("Error: Fasta is not valid check file!".into()),
+    }
+}
+
+pub fn filter_fasta(fasta: &str, outfile: &str, exclude: &str) {
+    let list_to_exclude = exclude.split(',').collect::<Vec<&str>>();
+    let _x = open_fasta(list_to_exclude, fasta, outfile);
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/processors/map_headers.rs b/src/processors/map_headers.rs
new file mode 100644
index 0000000..9c3390f
--- /dev/null
+++ b/src/processors/map_headers.rs
@@ -0,0 +1,134 @@
+use colored::Colorize;
+use std::error::Error;
+use std::fmt;
+use std::fs::File;
+use std::io::{BufRead, BufReader, BufWriter, Write};
+use std::iter::Zip;
+
+use crate::generics::only_keys;
+use crate::generics::validate_fasta;
+
+#[allow(dead_code)]
+#[derive(Debug, Clone)]
+struct EmptyVec;
+impl Error for EmptyVec {}
+
+impl fmt::Display for EmptyVec {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Can't Display Empty Vec")
+    }
+}
+
+#[allow(clippy::explicit_counter_loop)]
+pub fn create_mapping(
+    name_vec: Vec<std::string::String>,
+    new_name: &str,
+) -> Zip<std::vec::IntoIter<std::string::String>, std::vec::IntoIter<std::string::String>> {
+    // Generate a new mapping for the Fasta
+    //
+    let mut new_heads: Vec<String> = Vec::new();
+    let mut head_counter: i32 = 0;
+    let name_vec_clone = name_vec.clone();
+
+    for _x in name_vec {
+        new_heads.push(format!("{}_{}", new_name, head_counter));
+        head_counter += 1;
+    }
+
+    let mapped_heads: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
+        name_vec_clone.into_iter().zip(new_heads);
+
+    mapped_heads
+}
+
+pub fn save_mapping(
+    output: &str,
+    mapped: Zip<std::vec::IntoIter<std::string::String>, std::vec::IntoIter<std::string::String>>,
+) {
+    let f: File = File::create(output).expect("Unable to create file");
+    let mut f: BufWriter<File> = BufWriter::new(f);
+    for map_pair in mapped {
+        let line: String = format!("{}\t{}\n", map_pair.0, map_pair.1);
+        f.write_all(&line.into_bytes())
+            .expect("Unable to write data");
+    }
+}
+
+#[allow(unused_mut)]
+pub fn create_mapped_fasta(
+    input: &str,
+    output: &str,
+    mapped: Zip<std::vec::IntoIter<std::string::String>, std::vec::IntoIter<std::string::String>>,
+) {
+    let file_reader: File = File::open(input).expect("CAN'T OPEN FILE");
+    let buff_reader: BufReader<File> = BufReader::new(file_reader);
+    let mut new_fasta: File = File::create(output).unwrap();
+
+    for line in buff_reader.lines() {
+        let l: &str = &line.as_ref().unwrap()[..];
+        if l.starts_with('>') {
+            let mut to_replace = l.replace('>', "");
+            let mut mapped_heads: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
+                mapped.clone();
+            let mut map: Option<(String, String)> =
+                mapped_heads.find(|x: &(String, String)| x.0 == to_replace);
+            let mut new_head: String = map.expect("").1;
+            let fmt_head: String = format!(">{}\n", new_head);
+            let _ = new_fasta.write_all(&fmt_head.into_bytes());
+        } else {
+            let mut seq = line.expect("");
+            let fmt_seq = format!("{}\n", seq);
+            let _ = new_fasta.write_all(&fmt_seq.into_bytes());
+        }
+    }
+}
+
+pub fn map_fasta_head(
+    file: &String,
+    output: &String,
+    replacer: &String,
+) -> Result<(), Box<dyn Error>> {
+    println!("Mapping headers for file: {}", file);
+    println!("Replace headers with string: {:?}", &replacer);
+
+    match validate_fasta(file) {
+        Ok(names) => {
+            let new_names = Vec::from_iter(only_keys(names));
+
+            let new_map: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
+                create_mapping(new_names, replacer);
+
+            let map_to_save: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
+                new_map.clone();
+            let output_file = format!("{}mapped-heads.tsv", output);
+
+            save_mapping(&output_file, map_to_save);
+
+            let new_fasta: String = format!("{output}mapped.fasta");
+
+            create_mapped_fasta(file, &new_fasta, new_map);
+
+            println!(
+                "{}\n{}\n\t{}\n\t{}",
+                "FASTA HAS BEEN MAPPED AND REWRITTEN".green(),
+                "FOUND HERE:".green(),
+                &new_fasta.green(),
+                &output_file.green()
+            );
+        }
+
+        Err(e) => panic!("Something is wrong with the file! | {}", e),
+    };
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/processors/mod.rs b/src/processors/mod.rs
new file mode 100644
index 0000000..dc0f572
--- /dev/null
+++ b/src/processors/mod.rs
@@ -0,0 +1,7 @@
+pub mod exclude_seq;
+pub mod map_headers;
+pub mod remap_head;
+pub mod split_by_count;
+pub mod split_by_size;
+pub mod tpf_fasta;
+pub mod yaml_validator;
diff --git a/src/processors/remap_head.rs b/src/processors/remap_head.rs
new file mode 100644
index 0000000..ca30d10
--- /dev/null
+++ b/src/processors/remap_head.rs
@@ -0,0 +1,74 @@
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+use std::iter::Zip;
+
+use colored::Colorize;
+
+use crate::generics::validate_fasta;
+use crate::processors::map_headers::create_mapped_fasta;
+
+pub fn pull_map_from_tsv(
+    map_file: &str,
+) -> Zip<std::vec::IntoIter<std::string::String>, std::vec::IntoIter<std::string::String>> {
+    let file_reader: File = File::open(map_file).expect("CAN'T OPEN FILE");
+    let buff_reader: BufReader<File> = BufReader::new(file_reader);
+
+    let mut old_head: Vec<String> = Vec::new();
+    let mut new_head: Vec<String> = Vec::new();
+
+    for line in buff_reader.lines() {
+        match line {
+            Ok(string) => {
+                let mut old_new = string.split('\t');
+                let x = old_new.next().unwrap();
+                let y = old_new.next().unwrap();
+                old_head.push(x.to_string());
+                new_head.push(y.to_string());
+            }
+            Err(_) => {
+                print!("")
+            }
+        };
+    }
+
+    let mapped_heads: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
+        new_head.into_iter().zip(old_head);
+
+    mapped_heads
+}
+
+pub fn remapping_head(file: &String, output: &String, map_file: &String) {
+    println!("Mapping headers for file: {}", file);
+    println!("Replace headers with string: {}", map_file);
+
+    match validate_fasta(file) {
+        Ok(_thing) => {
+            let new_map: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
+                pull_map_from_tsv(map_file);
+
+            let new_fasta: String = format!("{output}_OH.fasta");
+
+            create_mapped_fasta(file, &new_fasta, new_map);
+
+            println!(
+                "{}\n{}\n\t{}\n",
+                "FASTA HAS BEEN RE-APPED AND REWRITTEN".green(),
+                "FOUND HERE:".green(),
+                &new_fasta.green()
+            );
+        }
+        Err(_) => {
+            println!("NOT A VALID FASTA")
+        }
+    };
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/processors/split_by_count.rs b/src/processors/split_by_count.rs
new file mode 100644
index 0000000..35ba1fe
--- /dev/null
+++ b/src/processors/split_by_count.rs
@@ -0,0 +1,115 @@
+use crate::generics::sanitise_header;
+
+use compare::{natural, Compare};
+use noodles::fasta::{self, Record};
+use std::cmp::Ordering;
+use std::fs::OpenOptions;
+use std::{
+    fs::{create_dir_all, File},
+    io::BufReader,
+    path::Path,
+};
+
+#[allow(clippy::needless_return)]
+fn fix_head(records: Record, sanitise: bool) -> Record {
+    if sanitise {
+        let header = sanitise_header(records.definition());
+        let definition = fasta::record::Definition::new(header, None);
+        let seq = records.sequence().to_owned();
+        return fasta::Record::new(definition, seq);
+    } else {
+        return records.to_owned();
+    };
+}
+
+fn write_fasta(outdir: &String, fasta_record: &Vec<Record>) {
+    println!("{}", outdir);
+
+    let _data_file = File::create(outdir);
+    let file = OpenOptions::new()
+        .append(true)
+        .open(outdir)
+        .expect("creation failed");
+
+    let mut writer = fasta::Writer::new(file);
+    for i in fasta_record {
+        writer.write_record(i).unwrap();
+    }
+}
+
+pub fn split_file_by_count(
+    fasta_file: &String,
+    output_directory: &String,
+    data_type: &String,
+    sanitise: &bool,
+    fasta_count: &u16,
+) {
+    let path_obj = Path::new(fasta_file);
+    let grab_name = path_obj.file_name().unwrap();
+    let actual_list: Vec<&str> = grab_name.to_str().unwrap().split('.').collect();
+    let actual_name = actual_list[0];
+
+    let new_outpath = format!("{}/{}/{}/", output_directory, actual_name, data_type);
+    create_dir_all(new_outpath.clone()).unwrap();
+    println!(
+        "Fasta file for processing: {:?}\nNumber of records per file: {:?}",
+        fasta_file, fasta_count
+    );
+
+    let mut counter: u16 = 0;
+    let mut file_counter: u16 = 1;
+
+    let file_name: Vec<&str> = actual_name.split('.').collect();
+
+    let mut reader = File::open(fasta_file)
+        .map(BufReader::new)
+        .map(fasta::Reader::new)
+        .unwrap();
+
+    let mut record_list: Vec<Record> = Vec::new();
+    for result in reader.records() {
+        let record = result.unwrap();
+        counter += 1;
+
+        let final_rec = fix_head(record, *sanitise);
+        record_list.push(final_rec);
+
+        let cmp = natural();
+        let compared = cmp.compare(&counter, fasta_count);
+        if compared == Ordering::Equal {
+            let full_outpath = format!(
+                "{}{}_f{}_c{}-a{}.fa",
+                new_outpath,
+                file_name[0],
+                file_counter,
+                &fasta_count,
+                &record_list.len()
+            );
+
+            write_fasta(&full_outpath, &record_list);
+            file_counter += 1;
+            counter = 0;
+            record_list = Vec::new();
+        }
+    }
+
+    let full_outpath = format!(
+        "{}{}_f{}_c{}-a{}.fa",
+        new_outpath,
+        file_name[0],
+        file_counter,
+        &fasta_count,
+        &record_list.len()
+    );
+    write_fasta(&full_outpath, &record_list);
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/processors/split_by_size.rs b/src/processors/split_by_size.rs
new file mode 100644
index 0000000..6445afd
--- /dev/null
+++ b/src/processors/split_by_size.rs
@@ -0,0 +1,14 @@
+pub fn split_file_by_size(fasta_file: &String, mem_size: &u16, _output_directory: &str) {
+    println!("Fasta file for processing: {:?}", &fasta_file);
+    println!("Size to chunk fasta into: {:?}", mem_size);
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/processors/tpf_fasta.rs b/src/processors/tpf_fasta.rs
new file mode 100644
index 0000000..0453798
--- /dev/null
+++ b/src/processors/tpf_fasta.rs
@@ -0,0 +1,278 @@
+use std::fs::OpenOptions;
+use std::io::Write;
+use std::{fs::read_to_string, fs::File, str};
+
+use noodles::core::Position;
+use noodles::fasta;
+use noodles::fasta::record::Sequence;
+use noodles::fasta::repository::adapters::IndexedReader;
+
+use crate::generics::validate_fasta;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct Tpf {
+    ori_scaffold: String,
+    start_coord: usize,
+    end_coord: usize,
+    new_scaffold: String,
+    orientation: String,
+}
+
+impl std::fmt::Display for Tpf {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(
+            fmt,
+            "\t{} -- {} -- {}",
+            self.ori_scaffold, self.start_coord, self.end_coord
+        )
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+struct NewFasta {
+    tpf: Tpf,
+    sequence: String,
+}
+
+#[derive(Debug)]
+struct MyRecord {
+    name: String,
+    sequence: Vec<String>,
+}
+
+fn parse_tpf(path: &String) -> Vec<Tpf> {
+    let mut all_tpf: Vec<Tpf> = Vec::new();
+    for line in read_to_string(path).unwrap().lines() {
+        if line.starts_with('?') {
+            let line_replaced = line.replace('\t', " ");
+            let line_list: Vec<&str> = line_replaced.split_whitespace().collect();
+            let scaff_data: Vec<&str> = line_list[1].split(':').collect();
+            let scaff_coords: Vec<&str> = scaff_data[1].split('-').collect();
+            let data = Tpf {
+                ori_scaffold: scaff_data[0].to_owned(),
+                start_coord: scaff_coords[0].to_owned().parse::<usize>().unwrap(),
+                end_coord: scaff_coords[1].to_owned().parse::<usize>().unwrap(),
+                new_scaffold: line_list[2].to_owned().replace("RL", "SUPER"),
+                orientation: line_list[3].to_owned(),
+            };
+            all_tpf.push(data);
+        }
+    }
+    all_tpf
+}
+
+fn subset_vec_tpf<'a>(tpf: &'a Vec<Tpf>, fasta: (&std::string::String, &usize)) -> Vec<&'a Tpf> {
+    //
+    // Subset the Vec<TPF> based on a search through the fasta
+    //
+    let mut subset_tpf: Vec<&Tpf> = Vec::new();
+    for i in tpf {
+        if i.ori_scaffold == *fasta.0 {
+            subset_tpf.push(i)
+        }
+    }
+    subset_tpf
+}
+
+fn check_orientation(
+    parsed: std::option::Option<noodles::fasta::record::Sequence>,
+    orientation: String,
+) -> String {
+    if orientation == "MINUS" {
+        let start = Position::try_from(1).unwrap();
+        let parse_orientation = parsed.unwrap();
+        let compliment: Sequence = parse_orientation
+            .complement()
+            .collect::<Result<_, _>>()
+            .unwrap();
+        let seq = compliment.get(start..).unwrap();
+        str::from_utf8(seq).unwrap().chars().rev().collect()
+    } else {
+        let start = Position::try_from(1).unwrap();
+        let parse_orientation = parsed.unwrap();
+        let seq = parse_orientation.get(start..).unwrap();
+        str::from_utf8(seq).unwrap().chars().collect()
+    }
+}
+
+fn parse_seq(
+    sequence: std::option::Option<noodles::fasta::record::Sequence>,
+    tpf: Vec<&Tpf>,
+) -> Vec<NewFasta> {
+    let mut subset_tpf: Vec<NewFasta> = Vec::new();
+    //
+    // Take the input sequence and scaffold name
+    // Parse the input sequence based on the data contained in
+    // the TPF. Which is already a subset based on scaff name
+    //
+
+    let new_seq = sequence.unwrap(); // Option(Sequence ()) -> Sequence ()
+    for &i in &tpf {
+        let start = Position::try_from(i.start_coord).unwrap();
+        let end = Position::try_from(i.end_coord).unwrap();
+        //let region = Region::new(&i.new_scaffold, start.unwrap()..=end.unwrap());
+        let parsed = new_seq.slice(start..=end);
+        let the_sequence = check_orientation(parsed, i.orientation.to_owned());
+        let data = NewFasta {
+            tpf: i.to_owned(),
+            sequence: the_sequence,
+        };
+        subset_tpf.push(data);
+    }
+    subset_tpf
+}
+
+fn get_uniques(tpf_list: &Vec<Tpf>) -> Vec<String> {
+    let mut uniques: Vec<String> = Vec::new();
+
+    for i in tpf_list {
+        if !uniques.contains(&i.new_scaffold) {
+            uniques.push(i.new_scaffold.to_owned())
+        }
+    }
+    uniques
+}
+
+fn save_to_fasta(fasta_data: Vec<NewFasta>, tpf_data: Vec<Tpf>, output: &String, n_length: usize) {
+    //
+    // TPF is in the input TPF order, this will continue to be the case until
+    // the script is modified and the Tpf struct gets modified in place for some reason
+    //
+    let _data_file = File::create(output);
+    let mut file = OpenOptions::new()
+        .write(true)
+        .open(output)
+        .expect("creation failed");
+
+    let _debugger = File::create("debug.txt");
+    let mut file2 = OpenOptions::new()
+        .write(true)
+        .open("debug.txt")
+        .expect("creation failed");
+
+    let uniques = get_uniques(&tpf_data);
+
+    // This is inefficient as we are scanning through the fasta_data, uniques number of times
+    // If uniques is 10 long and fasta is 100, then this is 1000 scans through in total.
+    let mut no_more: Vec<String> = Vec::new();
+    for x in uniques {
+        println!("NOW WRITING DATA FOR: {:?}", &x);
+        // X = "SUPER_1"
+        let stringy = format!(">{x}\n");
+        file.write_all(stringy.as_bytes())
+            .expect("Unable to write to file");
+        file2
+            .write_all(stringy.as_bytes())
+            .expect("Unable to write to file");
+
+        let mut data: MyRecord = MyRecord {
+            name: "".to_string(),
+            sequence: Vec::new(),
+        };
+
+        no_more.push(x.to_owned());
+        x.clone_into(&mut data.name);
+        for tpf in &tpf_data {
+            if tpf.new_scaffold == x {
+                for fasta in &fasta_data {
+                    if fasta.tpf == *tpf {
+                        let stringy = format!("\t{}\n", tpf);
+                        file2
+                            .write_all(stringy.as_bytes())
+                            .expect("Unable to write to file");
+                        data.sequence.push(fasta.sequence.to_owned());
+                    }
+                }
+            }
+        }
+
+        let line_len: usize = 60;
+        let fixed = data.sequence;
+        let n_string = "N".repeat(n_length);
+        let fixed2 = fixed.join(&n_string); //.join required a borrowed str
+        let fixed3 = fixed2
+            .as_bytes()
+            .chunks(line_len)
+            .map(str::from_utf8)
+            .collect::<Result<Vec<&str>, _>>()
+            .unwrap();
+
+        for i in fixed3 {
+            let formatted = i.to_owned() + "\n";
+            file.write_all(formatted.as_bytes()).unwrap();
+        }
+        println!("NO LONG SCANNING FOR: {:?}", &no_more)
+    }
+}
+
+#[allow(clippy::needless_borrow)]
+#[allow(clippy::let_and_return)]
+pub fn curate_fasta(
+    fasta_file: &String,
+    tpf_file: &String,
+    _sort: &bool,
+    output: &String,
+    n_length: &usize,
+) {
+    //
+    // Generate a curated fasta file based on the input TPF file
+    // which was generated by Pretext and the agp_to_tpf script.
+    // This new fasta file contains a new scaffold naming as well
+    // as pieced together sequences generated by the splitting of
+    // data in Pretext.
+    //
+    println!("LET'S GET CURATING THAT FASTA!");
+    stacker::maybe_grow(32 * 1024, 1024 * 5120, || {
+        match validate_fasta(fasta_file) {
+            Ok(fasta_d) => {
+                let tpf_data = parse_tpf(&tpf_file);
+                //let _validated = varify_validity(&tpf_data, &fasta_d);
+
+                //
+                // Start indexed reader of the input fasta
+                // if valid then use the data
+                //
+                let reader = fasta::indexed_reader::Builder::default().build_from_path(fasta_file);
+                let fasta_repo = match reader {
+                    Ok(data) => {
+                        let adapter = IndexedReader::new(data);
+                        let repository = fasta::Repository::new(adapter);
+                        repository
+                    }
+                    Err(_) => todo!(),
+                };
+
+                //
+                // For unique scaffold in the fasta file iter through and
+                // parse sequence for each line in the tpf
+                // The tpf will contain multiple enteries for each scaffold, minimum of one entry.
+                //
+                let mut new_fasta_data: Vec<NewFasta> = Vec::new();
+                for i in fasta_d {
+                    let subset_tpf = subset_vec_tpf(&tpf_data, (&i.0, &i.1));
+                    let sequence = fasta_repo.get(&i.0).transpose();
+
+                    match sequence {
+                        Ok(data) => {
+                            let subset_results = parse_seq(data, subset_tpf);
+                            new_fasta_data.extend(subset_results);
+                        }
+                        Err(e) => panic!("{:?}", e),
+                    };
+                }
+                save_to_fasta(new_fasta_data, tpf_data, output, n_length.to_owned())
+            }
+            Err(e) => panic!("Something is wrong with the file! | {}", e),
+        }
+    })
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/processors/yaml_validator.rs b/src/processors/yaml_validator.rs
new file mode 100644
index 0000000..a0a595c
--- /dev/null
+++ b/src/processors/yaml_validator.rs
@@ -0,0 +1,272 @@
+use std::fs::{self, File};
+use std::io::ErrorKind;
+use std::path::PathBuf;
+
+use colored::Colorize;
+use csv::Error;
+use csv::ReaderBuilder;
+use noodles::fasta;
+use serde::{Deserialize, Serialize};
+
+// Would be nice if there was a simple format_check
+// use noodles::cram as cram;
+
+#[derive(Debug, Serialize, Deserialize)]
+struct TreeValYaml {
+    assembly: Assembly,
+    reference_file: String,
+    assem_reads: AssemReads,
+    alignment: Alignment,
+    self_comp: SelfComp,
+    intron: Intron,
+    telomere: Telomere,
+    synteny: Synteny,
+    busco: Busco,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Assembly {
+    level: String,
+    sample_id: String,
+    latin_name: String,
+    class_t: String,
+    asm_version: u16,
+    geval_type: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct AssemReads {
+    pacbio: String,
+    hic: String,
+    supplementary: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Alignment {
+    data_dir: String,
+    common_name: String,
+    geneset: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct SelfComp {
+    motif_len: u16,
+    mummer_chunk: u16,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Intron {
+    size: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Telomere {
+    teloseq: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Synteny {
+    synteny_genome_path: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Busco {
+    lineages_path: String,
+    lineage: String,
+}
+
+//
+// CSV STRUCT
+//
+//#[derive(Deserialize)]
+//struct Record {
+//    org: String,
+//    type: String,
+//    data_file: String
+//}
+
+pub fn validate_paths(path: &str, field_id: &str) {
+    match fs::metadata(path) {
+        Ok(_) => {
+            println!(
+                "{}{}   \t{}\t{}",
+                ">-".green(),
+                &field_id.green(),
+                "| PATH EXISTS: ".green(),
+                path.green()
+            );
+            match field_id {
+                "REFERENCE" => validate_fasta(path),
+                "GENESET-CSV" => {
+                    _ = validate_csv(path);
+                }
+                "HIC" => {}
+                _ => println!("Error"),
+            }
+        }
+        Err(_) => println!(
+            "{}{}   \t{}\t{}",
+            "<-".red().bold(),
+            &field_id.red().bold(),
+            "| CHECK YAML!:".red().bold(),
+            path
+        ),
+    }
+}
+
+pub fn validate_fasta(path: &str) {
+    let reader = fasta::reader::Builder.build_from_path(path);
+
+    let mut binding = reader.expect("NO VALID HEADER / SEQUENCE PAIRS");
+    let result = binding.records();
+    let counter = result.count();
+    println!(
+        "{} {} {}",
+        ">- REFERENCE H/S PAIRS:".green(),
+        counter,
+        "H/S PAIRS".green()
+    )
+}
+
+pub fn validate_csv(path: &str) -> Result<(), Error> {
+    let file = File::open(path)?;
+
+    let mut reader = ReaderBuilder::new()
+        .has_headers(true)
+        .delimiter(b',')
+        .from_reader(file);
+
+    let record = reader.records().count();
+    println!(
+        "{} {} {}",
+        ">-GENESET-RECORD-COUNT: >".green(),
+        record,
+        "<".green()
+    );
+
+    Ok(())
+}
+
+//
+// FUNCTION: Check if pacbio has fasta.gz files, cram has cram and crai and synteny has fasta
+//           could make this much easier and consise by passing in a list of file types to check
+//           validatedata(path, [fa, fna, fasta])
+//
+pub fn validate_data(path: &str, dtype: &str) {
+    match fs::read_dir(path) {
+        Err(e) if e.kind() == ErrorKind::NotFound => {}
+        Err(e) => panic!("{} {e}", "<-DIRECTORY PATH DOESN'T EXIST: ".red().bold()),
+        Ok(data_files) => {
+            if dtype == "pacbio" {
+                let files: Vec<PathBuf> = data_files
+                    .filter_map(|f| f.ok())
+                    .filter(|d| match d.path().extension() {
+                        None => false,
+                        Some(ex) => ex == "fasta.gz",
+                    })
+                    .map(|f| f.path())
+                    .collect();
+
+                if files.is_empty() {
+                    println!("{}", "<-NO PACBIO DATA FILES".red())
+                } else {
+                    println!("{} {:?}", ">-YOUR FILES ARE:".green(), &files);
+                }
+            } else if dtype == "hic" {
+                let files: Vec<PathBuf> = data_files
+                    .filter_map(|f| f.ok())
+                    .filter(|d| match d.path().extension() {
+                        None => false,
+                        Some(ex) => ex == "cram" || ex == "crai",
+                    })
+                    .map(|f| f.path())
+                    .collect();
+
+                if files.is_empty() {
+                    println!("{}", "<-NO HIC DATA FILES".red())
+                } else {
+                    println!("{} {:?}", ">-YOUR FILES ARE:".green(), &files);
+                }
+            } else if dtype == "synteny" {
+                let files: Vec<PathBuf> = data_files
+                    .filter_map(|f| f.ok())
+                    .filter(|d| match d.path().extension() {
+                        None => false,
+                        Some(ex) => ex == "fa" || ex == "fasta" || ex == "fna",
+                    })
+                    .map(|f| f.path())
+                    .collect();
+
+                if files.is_empty() {
+                    println!("{}", "<-NO SYNTENIC GENOMES".red())
+                } else {
+                    println!("{} {:?}", ">-YOUR GENOMES ARE:".green(), &files);
+                }
+            }
+        }
+    };
+}
+
+pub fn validate_yaml(file: &String, _verbose: &bool, _output: &str) {
+    println! {"Validating Yaml: {}", file.purple()};
+
+    let input = fs::File::open(file).expect("Unable to read from file");
+    let contents: TreeValYaml = serde_yaml::from_reader(input).expect("Unable to read from file");
+
+    println!(
+        "RUNNING VALIDATE-YAML FOR SAMPLE: {}",
+        contents.assembly.sample_id.purple()
+    );
+
+    validate_paths(&contents.reference_file, "REFERENCE");
+    validate_paths(&contents.alignment.data_dir, "GENESET");
+    validate_paths(&contents.synteny.synteny_genome_path, "SYNTENY");
+    validate_paths(&contents.busco.lineages_path, "BUSCO");
+
+    validate_paths(&contents.assem_reads.pacbio, "PACBIO");
+    validate_data(&contents.assem_reads.pacbio, "pacbio");
+
+    validate_paths(&contents.assem_reads.hic, "HIC");
+    validate_data(&contents.assem_reads.hic, "hic");
+
+    println!("{}", "CHECKING GENESET DIRECTORY RESOLVES".blue());
+    let genesets = contents.alignment.geneset.split(',');
+    for set in genesets {
+        let gene_alignment_path = contents.alignment.data_dir.clone()
+            + &contents.assembly.class_t
+            + "/csv_data/"
+            + set
+            + "-data.csv";
+        validate_paths(&gene_alignment_path, "GENESET-CSV");
+    }
+
+    println!("{}", "CHECKING SYNTENY DIRECTORY RESOLVES".blue());
+    let synteny_full =
+        contents.synteny.synteny_genome_path.clone() + &contents.assembly.class_t + "/";
+    validate_paths(&synteny_full, "SYNTENY-FASTA");
+    validate_data(&synteny_full, "synteny");
+
+    println!("{}", "CHECKING BUSCO DIRECTORY RESOLVES".blue());
+    let busco_path = contents.busco.lineages_path.clone() + "/lineages/" + &contents.busco.lineage;
+    validate_paths(&busco_path, "BUSCO-DB");
+    // NOW CHECK FOR FILES IN DIRECTORY?
+
+    println!(
+        "{}\n{}\n{}\n{}\n{}",
+        "VALIDATION COMPLETE".purple().bold(),
+        "GENERAL INFORMATION:".purple().bold(),
+        "Check the log to see what failed".bold(),
+        "FULL : ONLY synteny fails are permitted".purple(),
+        "RAPID: geneset, busco and synteny fails are permitted".purple()
+    );
+}
+
+#[cfg(test)]
+mod tests {
+
+    #[test]
+    fn it_works() {
+        let result = 2 + 2;
+        assert_eq!(result, 4);
+    }
+}
diff --git a/src/remap_head.rs b/src/remap_head.rs
deleted file mode 100644
index 83e20ef..0000000
--- a/src/remap_head.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-pub mod remapping_headers {
-    use crate::map_headers;
-    use clap::ArgMatches;
-    use colored::Colorize;
-    use std::fs::File;
-    use std::io::{BufRead, BufReader};
-    use std::iter::Zip;
-
-    use crate::generics::validate_fasta;
-
-    pub fn pull_map_from_tsv(
-        map_file: &str,
-    ) -> Zip<std::vec::IntoIter<std::string::String>, std::vec::IntoIter<std::string::String>> {
-        let file_reader: File = File::open(map_file).expect("CAN'T OPEN FILE");
-        let buff_reader: BufReader<File> = BufReader::new(file_reader);
-
-        let mut old_head: Vec<String> = Vec::new();
-        let mut new_head: Vec<String> = Vec::new();
-
-        for line in buff_reader.lines() {
-            match line {
-                Ok(string) => {
-                    let mut old_new = string.split('\t');
-                    let x = old_new.next().unwrap();
-                    let y = old_new.next().unwrap();
-                    old_head.push(x.to_string());
-                    new_head.push(y.to_string());
-                }
-                Err(_) => {
-                    print!("")
-                }
-            };
-        }
-
-        let mapped_heads: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
-            new_head.into_iter().zip(old_head);
-
-        mapped_heads
-    }
-
-    pub fn remapping_head(arguments: std::option::Option<&ArgMatches>) {
-        let file: &String = arguments.unwrap().get_one::<String>("fasta-file").unwrap();
-        let map_file: &String = arguments.unwrap().get_one::<String>("map-file").unwrap();
-        let output: &String = arguments
-            .unwrap()
-            .get_one::<String>("output-directory")
-            .unwrap();
-
-        println!("Mapping headers for file: {}", file);
-        println!("Replace headers with string: {}", map_file);
-
-        match validate_fasta(file) {
-            Ok(_thing) => {
-                let new_map: Zip<std::vec::IntoIter<String>, std::vec::IntoIter<String>> =
-                    pull_map_from_tsv(map_file);
-
-                let new_fasta: String = format!("{output}_OH.fasta");
-
-                map_headers::mapping_headers::create_mapped_fasta(file, &new_fasta, new_map);
-
-                println!(
-                    "{}\n{}\n\t{}\n",
-                    "FASTA HAS BEEN RE-APPED AND REWRITTEN".green(),
-                    "FOUND HERE:".green(),
-                    &new_fasta.green()
-                );
-            }
-            Err(_) => {
-                println!("NOT A VALID FASTA")
-            }
-        };
-    }
-}
diff --git a/src/split_by_count.rs b/src/split_by_count.rs
deleted file mode 100644
index 1396f00..0000000
--- a/src/split_by_count.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-pub mod split_by_count_mod {
-    use crate::generics::sanitise_header;
-    use clap::ArgMatches;
-    use compare::{natural, Compare};
-    use noodles::fasta::{self, Record};
-    use std::cmp::Ordering;
-    use std::fs::OpenOptions;
-    use std::{
-        fs::{create_dir_all, File},
-        io::BufReader,
-        path::Path,
-    };
-
-    #[allow(clippy::needless_return)]
-    fn fix_head(records: Record, sanitise: bool) -> Record {
-        if sanitise {
-            let header = sanitise_header(records.definition());
-            let definition = fasta::record::Definition::new(header, None);
-            let seq = records.sequence().to_owned();
-            return fasta::Record::new(definition, seq);
-        } else {
-            return records.to_owned();
-        };
-    }
-
-    fn write_fasta(outdir: &String, fasta_record: &Vec<Record>) {
-        println!("{}", outdir);
-
-        let _data_file = File::create(outdir);
-        let file = OpenOptions::new()
-            .append(true)
-            .open(outdir)
-            .expect("creation failed");
-
-        let mut writer = fasta::Writer::new(file);
-        for i in fasta_record {
-            writer.write_record(i).unwrap();
-        }
-    }
-
-    pub fn split_file_by_count(arguments: std::option::Option<&ArgMatches>) {
-        let sanitise: &bool = arguments.unwrap().get_one::<bool>("sanitise").unwrap();
-        let fasta_file = arguments.unwrap().get_one::<String>("fasta-file").unwrap();
-        let path_obj = Path::new(fasta_file);
-        let grab_name = path_obj.file_name().unwrap();
-        let actual_list: Vec<&str> = grab_name.to_str().unwrap().split('.').collect();
-        let actual_name = actual_list[0];
-
-        let data_type = arguments.unwrap().get_one::<String>("data_type").unwrap();
-
-        let outpath = arguments
-            .unwrap()
-            .get_one::<String>("output-directory")
-            .unwrap();
-
-        let new_outpath = format!("{}/{}/{}/", outpath, actual_name, data_type);
-        create_dir_all(new_outpath.clone()).unwrap();
-        let fasta_count = arguments.unwrap().get_one::<u16>("count").unwrap();
-        println!(
-            "Fasta file for processing: {:?}\nNumber of records per file: {:?}",
-            fasta_file, fasta_count
-        );
-
-        let mut counter: u16 = 0;
-        let mut file_counter: u16 = 1;
-
-        let file_name: Vec<&str> = actual_name.split('.').collect();
-
-        let mut reader = File::open(fasta_file)
-            .map(BufReader::new)
-            .map(fasta::Reader::new)
-            .unwrap();
-
-        let mut record_list: Vec<Record> = Vec::new();
-        for result in reader.records() {
-            let record = result.unwrap();
-            counter += 1;
-
-            let final_rec = fix_head(record, *sanitise);
-            record_list.push(final_rec);
-
-            let cmp = natural();
-            let compared = cmp.compare(&counter, fasta_count);
-            if compared == Ordering::Equal {
-                let full_outpath = format!(
-                    "{}{}_f{}_c{}-a{}.fa",
-                    new_outpath,
-                    file_name[0],
-                    file_counter,
-                    &fasta_count,
-                    &record_list.len()
-                );
-
-                write_fasta(&full_outpath, &record_list);
-                file_counter += 1;
-                counter = 0;
-                record_list = Vec::new();
-            }
-        }
-
-        let full_outpath = format!(
-            "{}{}_f{}_c{}-a{}.fa",
-            new_outpath,
-            file_name[0],
-            file_counter,
-            &fasta_count,
-            &record_list.len()
-        );
-        write_fasta(&full_outpath, &record_list);
-    }
-}
diff --git a/src/split_by_size.rs b/src/split_by_size.rs
deleted file mode 100644
index f1b4a7b..0000000
--- a/src/split_by_size.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-pub mod split_by_size_mod {
-    use clap::ArgMatches;
-
-    pub fn split_file_by_size(arguments: std::option::Option<&ArgMatches>) {
-        let fasta_file: &String = arguments.unwrap().get_one::<String>("fasta-file").unwrap();
-        println!("Fasta file for processing: {:?}", &fasta_file);
-        println!(
-            "Size to chunk fasta into: {:?}",
-            arguments.unwrap().get_one::<u16>("mem-size").unwrap()
-        );
-    }
-}
diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs
deleted file mode 100644
index fc5ec7e..0000000
--- a/src/tpf_fasta.rs
+++ /dev/null
@@ -1,277 +0,0 @@
-pub mod tpf_fasta_mod {
-    use clap::ArgMatches;
-    use noodles::core::Position;
-    use noodles::fasta;
-    use noodles::fasta::record::Sequence;
-    use noodles::fasta::repository::adapters::IndexedReader;
-    use std::fs::OpenOptions;
-    use std::io::Write;
-    use std::{fs::read_to_string, fs::File, str};
-
-    use crate::generics::validate_fasta;
-
-    #[derive(Debug, Clone, PartialEq, Eq)]
-    struct Tpf {
-        ori_scaffold: String,
-        start_coord: usize,
-        end_coord: usize,
-        new_scaffold: String,
-        orientation: String,
-    }
-
-    impl std::fmt::Display for Tpf {
-        fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-            write!(
-                fmt,
-                "\t{} -- {} -- {}",
-                self.ori_scaffold, self.start_coord, self.end_coord
-            )
-        }
-    }
-
-    #[derive(Debug, PartialEq, Eq)]
-    struct NewFasta {
-        tpf: Tpf,
-        sequence: String,
-    }
-
-    #[derive(Debug)]
-    struct MyRecord {
-        name: String,
-        sequence: Vec<String>,
-    }
-
-    fn parse_tpf(path: &String) -> Vec<Tpf> {
-        let mut all_tpf: Vec<Tpf> = Vec::new();
-        for line in read_to_string(path).unwrap().lines() {
-            if line.starts_with('?') {
-                let line_replaced = line.replace('\t', " ");
-                let line_list: Vec<&str> = line_replaced.split_whitespace().collect();
-                let scaff_data: Vec<&str> = line_list[1].split(':').collect();
-                let scaff_coords: Vec<&str> = scaff_data[1].split('-').collect();
-                let data = Tpf {
-                    ori_scaffold: scaff_data[0].to_owned(),
-                    start_coord: scaff_coords[0].to_owned().parse::<usize>().unwrap(),
-                    end_coord: scaff_coords[1].to_owned().parse::<usize>().unwrap(),
-                    new_scaffold: line_list[2].to_owned().replace("RL", "SUPER"),
-                    orientation: line_list[3].to_owned(),
-                };
-                all_tpf.push(data);
-            }
-        }
-        all_tpf
-    }
-
-    fn subset_vec_tpf<'a>(
-        tpf: &'a Vec<Tpf>,
-        fasta: (&std::string::String, &usize),
-    ) -> Vec<&'a Tpf> {
-        //
-        // Subset the Vec<TPF> based on a search through the fasta
-        //
-        let mut subset_tpf: Vec<&Tpf> = Vec::new();
-        for i in tpf {
-            if i.ori_scaffold == *fasta.0 {
-                subset_tpf.push(i)
-            }
-        }
-        subset_tpf
-    }
-
-    fn check_orientation(
-        parsed: std::option::Option<noodles::fasta::record::Sequence>,
-        orientation: String,
-    ) -> String {
-        if orientation == "MINUS" {
-            let start = Position::try_from(1).unwrap();
-            let parse_orientation = parsed.unwrap();
-            let compliment: Sequence = parse_orientation
-                .complement()
-                .collect::<Result<_, _>>()
-                .unwrap();
-            let seq = compliment.get(start..).unwrap();
-            str::from_utf8(seq).unwrap().chars().rev().collect()
-        } else {
-            let start = Position::try_from(1).unwrap();
-            let parse_orientation = parsed.unwrap();
-            let seq = parse_orientation.get(start..).unwrap();
-            str::from_utf8(seq).unwrap().chars().collect()
-        }
-    }
-
-    fn parse_seq(
-        sequence: std::option::Option<noodles::fasta::record::Sequence>,
-        tpf: Vec<&Tpf>,
-    ) -> Vec<NewFasta> {
-        let mut subset_tpf: Vec<NewFasta> = Vec::new();
-        //
-        // Take the input sequence and scaffold name
-        // Parse the input sequence based on the data contained in
-        // the TPF. Which is already a subset based on scaff name
-        //
-
-        let new_seq = sequence.unwrap(); // Option(Sequence ()) -> Sequence ()
-        for &i in &tpf {
-            let start = Position::try_from(i.start_coord).unwrap();
-            let end = Position::try_from(i.end_coord).unwrap();
-            //let region = Region::new(&i.new_scaffold, start.unwrap()..=end.unwrap());
-            let parsed = new_seq.slice(start..=end);
-            let the_sequence = check_orientation(parsed, i.orientation.to_owned());
-            let data = NewFasta {
-                tpf: i.to_owned(),
-                sequence: the_sequence,
-            };
-            subset_tpf.push(data);
-        }
-        subset_tpf
-    }
-
-    fn get_uniques(tpf_list: &Vec<Tpf>) -> Vec<String> {
-        let mut uniques: Vec<String> = Vec::new();
-
-        for i in tpf_list {
-            if !uniques.contains(&i.new_scaffold) {
-                uniques.push(i.new_scaffold.to_owned())
-            }
-        }
-        uniques
-    }
-
-    fn save_to_fasta(
-        fasta_data: Vec<NewFasta>,
-        tpf_data: Vec<Tpf>,
-        output: &String,
-        n_length: usize,
-    ) {
-        //
-        // TPF is in the input TPF order, this will continue to be the case until
-        // the script is modified and the Tpf struct gets modified in place for some reason
-        //
-        let _data_file = File::create(output);
-        let mut file = OpenOptions::new()
-            .write(true)
-            .open(output)
-            .expect("creation failed");
-
-        let _debugger = File::create("debug.txt");
-        let mut file2 = OpenOptions::new()
-            .write(true)
-            .open("debug.txt")
-            .expect("creation failed");
-
-        let uniques = get_uniques(&tpf_data);
-
-        // This is inefficient as we are scanning through the fasta_data, uniques number of times
-        // If uniques is 10 long and fasta is 100, then this is 1000 scans through in total.
-        let mut no_more: Vec<String> = Vec::new();
-        for x in uniques {
-            println!("NOW WRITING DATA FOR: {:?}", &x);
-            // X = "SUPER_1"
-            let stringy = format!(">{x}\n");
-            file.write_all(stringy.as_bytes())
-                .expect("Unable to write to file");
-            file2
-                .write_all(stringy.as_bytes())
-                .expect("Unable to write to file");
-
-            let mut data: MyRecord = MyRecord {
-                name: "".to_string(),
-                sequence: Vec::new(),
-            };
-
-            no_more.push(x.to_owned());
-            x.clone_into(&mut data.name);
-            for tpf in &tpf_data {
-                if tpf.new_scaffold == x {
-                    for fasta in &fasta_data {
-                        if fasta.tpf == *tpf {
-                            let stringy = format!("\t{}\n", tpf);
-                            file2
-                                .write_all(stringy.as_bytes())
-                                .expect("Unable to write to file");
-                            data.sequence.push(fasta.sequence.to_owned());
-                        }
-                    }
-                }
-            }
-
-            let line_len: usize = 60;
-            let fixed = data.sequence;
-            let n_string = "N".repeat(n_length);
-            let fixed2 = fixed.join(&n_string); //.join required a borrowed str
-            let fixed3 = fixed2
-                .as_bytes()
-                .chunks(line_len)
-                .map(str::from_utf8)
-                .collect::<Result<Vec<&str>, _>>()
-                .unwrap();
-
-            for i in fixed3 {
-                let formatted = i.to_owned() + "\n";
-                file.write_all(formatted.as_bytes()).unwrap();
-            }
-            println!("NO LONG SCANNING FOR: {:?}", &no_more)
-        }
-    }
-
-    #[allow(clippy::needless_borrow)]
-    #[allow(clippy::let_and_return)]
-    pub fn curate_fasta(arguments: std::option::Option<&ArgMatches>) {
-        //
-        // Generate a curated fasta file based on the input TPF file
-        // which was generated by Pretext and the agp_to_tpf script.
-        // This new fasta file contains a new scaffold naming as well
-        // as pieced together sequences generated by the splitting of
-        // data in Pretext.
-        //
-        let fasta_file: &String = arguments.unwrap().get_one::<String>("fasta").unwrap();
-        let tpf_file: &String = arguments.unwrap().get_one::<String>("tpf").unwrap();
-        let n_length: &usize = arguments.unwrap().get_one::<usize>("n_length").unwrap();
-        let output: &String = arguments.unwrap().get_one::<String>("output").unwrap();
-        println!("LET'S GET CURATING THAT FASTA!");
-        stacker::maybe_grow(32 * 1024, 1024 * 5120, || {
-            match validate_fasta(fasta_file) {
-                Ok(fasta_d) => {
-                    let tpf_data = parse_tpf(&tpf_file);
-                    //let _validated = varify_validity(&tpf_data, &fasta_d);
-
-                    //
-                    // Start indexed reader of the input fasta
-                    // if valid then use the data
-                    //
-                    let reader =
-                        fasta::indexed_reader::Builder::default().build_from_path(fasta_file);
-                    let fasta_repo = match reader {
-                        Ok(data) => {
-                            let adapter = IndexedReader::new(data);
-                            let repository = fasta::Repository::new(adapter);
-                            repository
-                        }
-                        Err(_) => todo!(),
-                    };
-
-                    //
-                    // For unique scaffold in the fasta file iter through and
-                    // parse sequence for each line in the tpf
-                    // The tpf will contain multiple enteries for each scaffold, minimum of one entry.
-                    //
-                    let mut new_fasta_data: Vec<NewFasta> = Vec::new();
-                    for i in fasta_d {
-                        let subset_tpf = subset_vec_tpf(&tpf_data, (&i.0, &i.1));
-                        let sequence = fasta_repo.get(&i.0).transpose();
-
-                        match sequence {
-                            Ok(data) => {
-                                let subset_results = parse_seq(data, subset_tpf);
-                                new_fasta_data.extend(subset_results);
-                            }
-                            Err(e) => panic!("{:?}", e),
-                        };
-                    }
-                    save_to_fasta(new_fasta_data, tpf_data, output, n_length.to_owned())
-                }
-                Err(e) => panic!("Something is wrong with the file! | {}", e),
-            }
-        })
-    }
-}
diff --git a/src/yaml_validator.rs b/src/yaml_validator.rs
deleted file mode 100644
index e23d121..0000000
--- a/src/yaml_validator.rs
+++ /dev/null
@@ -1,272 +0,0 @@
-pub mod yaml_validator_mod {
-    use clap::ArgMatches;
-    use colored::Colorize;
-    use csv::Error;
-    use csv::ReaderBuilder;
-    use noodles::fasta;
-    use serde::{Deserialize, Serialize};
-    use std::fs::{self, File};
-    use std::io::ErrorKind;
-    use std::path::PathBuf;
-    // Would be nice if there was a simple format_check
-    // use noodles::cram as cram;
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct TreeValYaml {
-        assembly: Assembly,
-        reference_file: String,
-        assem_reads: AssemReads,
-        alignment: Alignment,
-        self_comp: SelfComp,
-        intron: Intron,
-        telomere: Telomere,
-        synteny: Synteny,
-        busco: Busco,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct Assembly {
-        level: String,
-        sample_id: String,
-        latin_name: String,
-        classT: String,
-        asmVersion: u16,
-        gevalType: String,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct AssemReads {
-        pacbio: String,
-        hic: String,
-        supplementary: String,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct Alignment {
-        data_dir: String,
-        common_name: String,
-        geneset: String,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct SelfComp {
-        motif_len: u16,
-        mummer_chunk: u16,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct Intron {
-        size: String,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct Telomere {
-        teloseq: String,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct Synteny {
-        synteny_genome_path: String,
-    }
-
-    #[derive(Debug, Serialize, Deserialize)]
-    struct Busco {
-        lineages_path: String,
-        lineage: String,
-    }
-
-    //
-    // CSV STRUCT
-    //
-    //#[derive(Deserialize)]
-    //struct Record {
-    //    org: String,
-    //    type: String,
-    //    data_file: String
-    //}
-
-    pub fn validate_paths(path: &str, field_id: &str) {
-        match fs::metadata(path) {
-            Ok(_) => {
-                println!(
-                    "{}{}   \t{}\t{}",
-                    ">-".green(),
-                    &field_id.green(),
-                    "| PATH EXISTS: ".green(),
-                    path.green()
-                );
-                match field_id {
-                    "REFERENCE" => validate_fasta(path),
-                    "GENESET-CSV" => {
-                        _ = validate_csv(path);
-                    }
-                    "HIC" => {}
-                    _ => println!("Error"),
-                }
-            }
-            Err(_) => println!(
-                "{}{}   \t{}\t{}",
-                "<-".red().bold(),
-                &field_id.red().bold(),
-                "| CHECK YAML!:".red().bold(),
-                path
-            ),
-        }
-    }
-
-    pub fn validate_fasta(path: &str) {
-        let reader = fasta::reader::Builder.build_from_path(path);
-
-        let mut binding = reader.expect("NO VALID HEADER / SEQUENCE PAIRS");
-        let result = binding.records();
-        let counter = result.count();
-        println!(
-            "{} {} {}",
-            ">- REFERENCE H/S PAIRS:".green(),
-            counter,
-            "H/S PAIRS".green()
-        )
-    }
-
-    pub fn validate_csv(path: &str) -> Result<(), Error> {
-        let file = File::open(path)?;
-
-        let mut reader = ReaderBuilder::new()
-            .has_headers(true)
-            .delimiter(b',')
-            .from_reader(file);
-
-        let record = reader.records().count();
-        println!(
-            "{} {} {}",
-            ">-GENESET-RECORD-COUNT: >".green(),
-            record,
-            "<".green()
-        );
-
-        Ok(())
-    }
-
-    //
-    // FUNCTION: Check if pacbio has fasta.gz files, cram has cram and crai and synteny has fasta
-    //           could make this much easier and consise by passing in a list of file types to check
-    //           validatedata(path, [fa, fna, fasta])
-    //
-    pub fn validate_data(path: &str, dtype: &str) {
-        match fs::read_dir(path) {
-            Err(e) if e.kind() == ErrorKind::NotFound => {}
-            Err(e) => panic!("{} {e}", "<-DIRECTORY PATH DOESN'T EXIST: ".red().bold()),
-            Ok(data_files) => {
-                if dtype == "pacbio" {
-                    let files: Vec<PathBuf> = data_files
-                        .filter_map(|f| f.ok())
-                        .filter(|d| match d.path().extension() {
-                            None => false,
-                            Some(ex) => ex == "fasta.gz",
-                        })
-                        .map(|f| f.path())
-                        .collect();
-
-                    if files.is_empty() {
-                        println!("{}", "<-NO PACBIO DATA FILES".red())
-                    } else {
-                        println!("{} {:?}", ">-YOUR FILES ARE:".green(), &files);
-                    }
-                } else if dtype == "hic" {
-                    let files: Vec<PathBuf> = data_files
-                        .filter_map(|f| f.ok())
-                        .filter(|d| match d.path().extension() {
-                            None => false,
-                            Some(ex) => ex == "cram" || ex == "crai",
-                        })
-                        .map(|f| f.path())
-                        .collect();
-
-                    if files.is_empty() {
-                        println!("{}", "<-NO HIC DATA FILES".red())
-                    } else {
-                        println!("{} {:?}", ">-YOUR FILES ARE:".green(), &files);
-                    }
-                } else if dtype == "synteny" {
-                    let files: Vec<PathBuf> = data_files
-                        .filter_map(|f| f.ok())
-                        .filter(|d| match d.path().extension() {
-                            None => false,
-                            Some(ex) => ex == "fa" || ex == "fasta" || ex == "fna",
-                        })
-                        .map(|f| f.path())
-                        .collect();
-
-                    if files.is_empty() {
-                        println!("{}", "<-NO SYNTENIC GENOMES".red())
-                    } else {
-                        println!("{} {:?}", ">-YOUR GENOMES ARE:".green(), &files);
-                    }
-                }
-            }
-        };
-    }
-
-    pub fn validate_yaml(arguments: std::option::Option<&ArgMatches>) {
-        let file = arguments.unwrap().get_one::<String>("yaml").unwrap();
-        let _output: &String = arguments
-            .unwrap()
-            .get_one::<String>("output-directory")
-            .unwrap();
-        let _verbose_flag: &bool = arguments.unwrap().get_one::<bool>("verbose").unwrap();
-
-        println! {"Validating Yaml: {}", file.purple()};
-
-        let input = fs::File::open(file).expect("Unable to read from file");
-        let contents: TreeValYaml =
-            serde_yaml::from_reader(input).expect("Unable to read from file");
-
-        println!(
-            "RUNNING VALIDATE-YAML FOR SAMPLE: {}",
-            contents.assembly.sample_id.purple()
-        );
-
-        validate_paths(&contents.reference_file, "REFERENCE");
-        validate_paths(&contents.alignment.data_dir, "GENESET");
-        validate_paths(&contents.synteny.synteny_genome_path, "SYNTENY");
-        validate_paths(&contents.busco.lineages_path, "BUSCO");
-
-        validate_paths(&contents.assem_reads.pacbio, "PACBIO");
-        validate_data(&contents.assem_reads.pacbio, "pacbio");
-
-        validate_paths(&contents.assem_reads.hic, "HIC");
-        validate_data(&contents.assem_reads.hic, "hic");
-
-        println!("{}", "CHECKING GENESET DIRECTORY RESOLVES".blue());
-        let genesets = contents.alignment.geneset.split(',');
-        for set in genesets {
-            let gene_alignment_path = contents.alignment.data_dir.clone()
-                + &contents.assembly.classT
-                + "/csv_data/"
-                + set
-                + "-data.csv";
-            validate_paths(&gene_alignment_path, "GENESET-CSV");
-        }
-
-        println!("{}", "CHECKING SYNTENY DIRECTORY RESOLVES".blue());
-        let synteny_full =
-            contents.synteny.synteny_genome_path.clone() + &contents.assembly.classT + "/";
-        validate_paths(&synteny_full, "SYNTENY-FASTA");
-        validate_data(&synteny_full, "synteny");
-
-        println!("{}", "CHECKING BUSCO DIRECTORY RESOLVES".blue());
-        let busco_path =
-            contents.busco.lineages_path.clone() + "/lineages/" + &contents.busco.lineage;
-        validate_paths(&busco_path, "BUSCO-DB");
-        // NOW CHECK FOR FILES IN DIRECTORY?
-
-        println!(
-            "{}\n{}\n{}\n{}\n{}",
-            "VALIDATION COMPLETE".purple().bold(),
-            "GENERAL INFORMATION:".purple().bold(),
-            "Check the log to see what failed".bold(),
-            "FULL : ONLY synteny fails are permitted".purple(),
-            "RAPID: geneset, busco and synteny fails are permitted".purple()
-        );
-    }
-}