-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSplitHaplotypes.R
32 lines (26 loc) · 977 Bytes
/
SplitHaplotypes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Install or load tidyr and dplyr
if (!require(tidyr, quietly=TRUE)) {
install.packages("tidyr")
library(tidyr)
}
if (!require(dplyr, quietly=TRUE)) {
install.packages("dplyr")
library(dplyr)
}
# set path
path <- file.path("ExampleData/")
infile <- file.path(path, "hla.37.phased.example-data.xlsx")
hpos <- readxl::read_excel(infile, sheet = 1, n_max = 451)
nameVector <- colnames(hpos)[5:ncol(hpos)]
# function to split haplotypes and save data for each individual in separate txt files
splitAlleles = function(x, infile){
y <- tolower(x)
y <- infile %>% select(POS, RSID, REF, x)
y <- y %>% separate(x, c("fwd","rev"), remove = F)
outPath <- file.path(path, paste(x, "data.txt", sep = '.'))
revDF <- file.path(path, paste(x, "data_rev.txt", sep = "."))
write.table(y, outPath, quote = F, sep = ',', row.names = F)
write.table(y[seq(dim(y)[1],1),], revDF, quote = F, sep = ',', row.names = F)
return(y)
}
lapply(nameVector, splitAlleles, hpos)