-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
85 lines (68 loc) · 3.88 KB
/
run_analysis.R
File metadata and controls
85 lines (68 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# This file analyses "Human Activity Recognition Using Smartphones" dataset present at " https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip".
# This file should be run in a folder called "UCI_HAR_Dataset"
require(dplyr)
require(reshape2)
run_analysis <- function(){
#Assuming this program is run inside the UCI_HAR_Dataset with train & test as folders
#Step 1.1 Read Features & Activity_Label files
#Step 1.1.1 Read Features
features <- read.table("features.txt")
#Step 1.1.1.2 Change "-" to "_". This is needed as "-" is a special character and creates problem in comparision functions later
features <- mutate(features,V2=gsub("(-)+","_",V2))
#Step 1.1.2 Read activity labels
activities <- read.table("activity_labels.txt")
#Step 1.1.2.1 Renaming activity column names which would be useful later on
colnames(activities) <- c("activity","activityName")
#Step Read Training Set
#Step 1.2 Read Training data
message("STEP 1: Reading Training data will take time .... Please be patient ...\n",appendLF=FALSE)
flush.console()
x_train <- read.table("train/X_train.txt")
y_train <- read.table("train/y_train.txt")
subject_train <- read.table("train/subject_train.txt")
#Step 1.3 Merge x,y,subject Training data
#Step 1.3.1 Sticking Features to x_train(Features == Column Names of x_train)
#Step 1.3.1.1 Creating new variable to store the data
feature_x_train <- x_train
#Step 1.3.1.2 Assigning features as colnames
colnames(feature_x_train) <- features$V2
#Step 1.3.2 Sticking y_train to X_train
feature_x_train_activity <- cbind(feature_x_train,activity=y_train$V1)
#Step 1.3.3 Stick subject to feature_x_train_activity
feature_x_train_activity_subject <- cbind(feature_x_train_activity,subject=subject_train$V1)
#Step 1.4 Read Test data
message("STEP 2: Reading Test data will take time .... Please be patient ...\n",appendLF=FALSE)
flush.console()
y_test <- read.table("test/y_test.txt")
x_test <- read.table("test/X_test.txt")
subject_test <- read.table("test/subject_test.txt")
#Step 1.5 Merge x,y,subject Training data
#1.5.1 Sticking Features to x_train(Features == Column Names of x_train)
#1.5.1.1 Creating new variable to store the data
feature_x_test <- x_test
#1.5.1.2 Assigning features as colnames
colnames(feature_x_test) <- features$V2
#1.5.2 Sticking y_test to X_test
feature_x_test_activity <- cbind(feature_x_test,activity=y_test$V1)
#1.5.3 Stick subject to feature_x_test_activity
feature_x_test_activity_subject <- cbind(feature_x_test_activity,subject=subject_test$V1)
#Step 1.6 Stick Train & Test data
feature_train_test_activity_subject <- rbind(feature_x_train_activity_subject,feature_x_test_activity_subject)
#Step 1.7 Merging activities data to complete train/test data
mfeature_train_test_activity_subject <- suppressWarnings(merge(feature_train_test_activity_subject,activities))
#Step 2.1 Search and find all column indexs contain mean or std & activityName+subject.
meanStdIndxVec <- grep("(mean|std|activityName|subject)+",names(mfeature_train_test_activity_subject))
#Step 2.2 Extract columns containing mean or std to create a new data frame
mean_std_df <- mfeature_train_test_activity_subject[,meanStdIndxVec]
#Step 3 & 4 already was achived at Step 1.3.3/1.5.3 & 1.3.4/1.5.4 above where we renamed activity & subject columns
#Step 5 Reshaping data
#Step 5.1 Melting to reshape data. Now each activity & subject has only one variable and its corresponding value
message("FINAL STEP: Tidying up data .... Please be patient ...\n",appendLF=FALSE)
flush.console()
melt_mean_std_df <- melt(mean_std_df,id.vars=c("activityName","subject"))
#Step 5.2 Recasting to give averages for each subject and activity.
sal <- dcast(melt_mean_std_df,subject+activityName~variable,mean)
message("Your Tidy data is ready... Thank you for being patient ...\n",appendLF=FALSE)
flush.console()
sal
}