/getdata

3-GetData final project

Primary LanguageR

** UCI HAR Dataset **

  • Read in features table and select only the names column ( need to change names to be read more clearly) Table 1 features.name
library(dplyr)
features.name <- read.table("UCI HAR Dataset/features.txt",header=FALSE)
  • Start changing features.name to create readable titles ( substitute "()" to "." )
Output of features.name 

```r
V1	V2
1 tBodyAcc-mean()-X
2 tBodyAcc-mean()-Y
3 tBodyAcc-mean()-Z
4 tBodyAcc-std()-X
5 tBodyAcc-std()-Y

Now for substitution of "()"

# substitute "()" to "."
features.name$V3 <- gsub("\\(\\)",".",features.name$V2)  

Adding Columns until it is in readable format:

  V1                V2               V3
1  1 tBodyAcc-mean()-X tBodyAcc-mean.-X
2  2 tBodyAcc-mean()-Y tBodyAcc-mean.-Y
3  3 tBodyAcc-mean()-Z tBodyAcc-mean.-Z
4  4  tBodyAcc-std()-X  tBodyAcc-std.-X
5  5  tBodyAcc-std()-Y  tBodyAcc-std.-Y
# substitute ".-" and "-" to "."}
features.name$V4 <- gsub(".-",".",features.name$V3)       	# substitute ".-" and "-" to "."

Finally have V4 with readable names

  V1                V2               V3             V4
1  1 tBodyAcc-mean()-X tBodyAcc-mean.-X tBodyAc.mean.X
2  2 tBodyAcc-mean()-Y tBodyAcc-mean.-Y tBodyAc.mean.Y
3  3 tBodyAcc-mean()-Z tBodyAcc-mean.-Z tBodyAc.mean.Z
4  4  tBodyAcc-std()-X  tBodyAcc-std.-X  tBodyAc.std.X
5  5  tBodyAcc-std()-Y  tBodyAcc-std.-Y  tBodyAc.std.Y
  • bind X test and train table 2
x.test.table <- read.table("UCI HAR Dataset/test/X_test.txt",header=FALSE)
x.train.table <- read.table("UCI HAR Dataset/train/X_train.txt",header=FALSE)

x_table_rbind <- rbind(x.test.table,x.train.table)
dim(x_table_rbind)
nrow(x_table_rbind)
 
x_table_named <- x_table_rbind                  # copy
names(x_table_named) <- features.name$V4        # give the X table names
  • Now for reading y test and train tables or vectors Table 3
#  READ in y TEST and TRAIN TABLES and bind
y.test.table <- read.table("UCI HAR Dataset/test/y_test.txt",header=FALSE)
y.train.table <- read.table("UCI HAR Dataset/train/y_train.txt",header=FALSE)

y_table_rbind <- rbind( y.test.table,y.train.table)

y_table_named <- y_table_rbind
names(y_table_named) <-  c("y_test_train")
  • Reading in Subject table 4 '''r subject.test.table <- read.table("UCI HAR Dataset/test/subject_test.txt",header=FALSE) '''r

** Next set of Tables are the Accelerometer / Angular readings **

After reading in test and train readings we bind for total measurements.

#--------------- accelerometer readings
# x
body_acc_x_test <- read.table("UCI HAR Dataset/test/Inertial Signals/body_acc_x_test.txt",header=FALSE)
body_acc_x_train <- read.table("UCI HAR Dataset/train/Inertial Signals/body_acc_x_train.txt",header=FALSE)
body_acc_x_test_train <- rbind(body_acc_x_test,body_acc_x_train)


body_gyro_x_test <- read.table("UCI HAR Dataset/test/Inertial Signals/body_gyro_x_test.txt",header=FALSE)
body_gyro_x_train <- read.table("UCI HAR Dataset/train/Inertial Signals/body_gyro_x_train.txt",header=FALSE)
body_gyro_x_test_train <- rbind(body_gyro_x_test,body_gyro_x_train)


total_acc_x_test <- read.table("UCI HAR Dataset/test/Inertial Signals/total_acc_x_test.txt",header=FALSE)
total_acc_x_train <- read.table("UCI HAR Dataset/train/Inertial Signals/total_acc_x_train.txt",header=FALSE)
total_acc_x_test_train <- rbind(total_acc_x_test, total_acc_x_train)

# Y
body_acc_y_test <- read.table("UCI HAR Dataset/test/Inertial Signals/body_acc_y_test.txt",header=FALSE)
body_acc_y_train <- read.table("UCI HAR Dataset/train/Inertial Signals/body_acc_y_train.txt",header=FALSE)
body_acc_y_test_train <- rbind(body_acc_y_test, body_acc_y_train )

body_gyro_y_test <- read.table("UCI HAR Dataset/test/Inertial Signals/body_gyro_y_test.txt",header=FALSE)
body_gyro_y_train <- read.table("UCI HAR Dataset/train/Inertial Signals/body_gyro_y_train.txt",header=FALSE)
body_gyro_y_test_train <- rbind(body_gyro_y_test, body_gyro_y_train)

total_acc_y_test <- read.table("UCI HAR Dataset/test/Inertial Signals/total_acc_y_test.txt",header=FALSE)
total_acc_y_train <- read.table("UCI HAR Dataset/train/Inertial Signals/total_acc_y_train.txt",header=FALSE)
total_acc_y_test_train <- rbind(total_acc_y_test,total_acc_y_train )

# Z
body_acc_z_test <- read.table("UCI HAR Dataset/test/Inertial Signals/body_acc_z_test.txt",header=FALSE)
body_acc_z_train <- read.table("UCI HAR Dataset/train/Inertial Signals/body_acc_z_train.txt",header=FALSE)
body_acc_z_test_train <- rbind(body_acc_z_test,body_acc_z_train)

body_gyro_z_test <- read.table("UCI HAR Dataset/test/Inertial Signals/body_gyro_z_test.txt",header=FALSE)
body_gyro_z_train <- read.table("UCI HAR Dataset/train/Inertial Signals/body_gyro_z_train.txt",header=FALSE)
body_gyro_z_test_train <- rbind(body_gyro_z_test,body_gyro_z_train)

total_acc_z_test <- read.table("UCI HAR Dataset/test/Inertial Signals/total_acc_z_test.txt",header=FALSE)
total_acc_z_train <- read.table("UCI HAR Dataset/train/Inertial Signals/total_acc_z_train.txt",header=FALSE)
total_acc_z_test_train <- rbind(total_acc_z_test ,total_acc_z_train)

** The actual final tables are listed below **

  • body_acc_x_test_train
  • body_gyro_x_test_train
  • total_acc_x_test_train
  • body_acc_y_test_train
  • body_gyro_y_test_train
  • total_acc_y_test_train
  • body_acc_z_test_train
  • body_gyro_z_test_train
  • total_acc_z_test_train

** Now for the final output group summations ** using cbind

t_data <- cbind(as.data.table(subject_test), y_test, X_test)

X_train <- read.table("UCI HAR Dataset/train/X_train.txt")
y_train <- read.table("UCI HAR Dataset/train/y_train.txt")
subject_train <- read.table("UCI HAR Dataset/train/subject_train.txt")
names(X_train) = features
# now for only mean, std
X_train = X_train[,features.mean.std]
y_train[,2] = activity_labels[y_train[,1]]

names(y_train) = c("Activity_ID", "Activity_Label") names(subject_train) = "subject"

train_data <- cbind(as.data.table(subject_train), y_train, X_train)

```R

# rowbind test and train data
data = rbind(test_data, train_data)
id_labels = c("subject", "Activity_ID", "Activity_Label")
data_labels = setdiff(colnames(data), id_labels)
merged_data_final = melt(data, id = id_labels, measure.vars = data_labels)

** Final step **

#    5 From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.
tidydataset = dcast(merged_data_final, subject + Activity_Label ~ variable, mean)
write.table(tidy_dataset, file = "tidydataset.txt")