-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
60 lines (50 loc) · 2.47 KB
/
run_analysis.R
File metadata and controls
60 lines (50 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
library(data.table)
# returns a dataset combining subjects, activities and mean and std measures
# for a given subset, i.e. train or test
#
# inputs:
# subset ... a character indicating the subset, i.e. "train" or "test"
# measures ... a character vector with the "feature names",
# i.e. column 2 from features.txt
# activities ... a dataset with columns id and name from "activity_label.txt
load_data <- function(subset, measures, activities) {
subjects <- fread(file = file.path(subset,
paste0("subject_", subset, ".txt")),
col.names = "subject")
# "Appropriately labels the data set with descriptive variable names"
x <- fread(file = file.path(subset, paste0("X_", subset, ".txt")),
col.names = measures)
y <- fread(file = file.path(subset, paste0("y_", subset, ".txt")),
col.names = "id")
z <-data.table(subjects,
# "Uses descriptive activity names to name the activities in the data set"
activity = merge(y, activities, sort = FALSE)$name,
# "Extracts only the measurements on the mean and standard deviation for each measurement"
subset(x, select = measures[grep("(mean|std)\\(\\)", measures)]))
}
# load and select measure names
measures <- fread("features.txt",
col.names = c("id", "name"))
# load activity labels
activities <- fread("activity_labels.txt",
stringsAsFactors = TRUE,
col.names = c("id", "name"))
# load and combine train and test data
# "Merges the training and the test sets to create one data set"
har <- rbind(load_data(subset = "train",
measures = measures$name,
activities),
load_data(subset = "test",
measures = measures$name,
activities))
# calculate average for each subject, activity and measure
# "creates a second, independent tidy data set with the average of each variable for each activity and each subject"
# avg <- melt(har, id.vars = 1:2)[, .(average=mean(value)), by=subject:variable]
avg <- dcast(melt(har, id.vars = 1:2),
subject + activity ~ variable,
fun.aggregate = mean)
setorder(avg, activity, subject)
names(avg)[-(1:2)] <- paste0("avg_", names(avg)[-(1:2)])
# write analysis data
write.table(har, file = "har.txt", row.names = FALSE)
write.table(avg, file = "avg.txt", row.names = FALSE)