Skip to content

Commit ea4fbcd

Browse files
Explore correlation between course satisfaction and average confidences
1 parent 56ed392 commit ea4fbcd

File tree

3 files changed

+191
-0
lines changed

3 files changed

+191
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Confidence and satisfaction
2+
3+
- They are correlated
4+
- 21.4% of the variance is explained by the correlation
5+
6+
![Confidence and satisfaction](correlation.png)
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/bin/env Rscript
2+
3+
# Goal: determine the correlation between
4+
# course satisfaction and confidence outcomes
5+
#
6+
#
7+
# | confidence
8+
# |
9+
# |
10+
# |
11+
# +-----------------
12+
# satisfaction
13+
#
14+
15+
16+
#' Create non-correlated and correlated values
17+
create_test_values <- function(n = 50) {
18+
t <- tibble::tibble(
19+
average_confidence = runif(n = n, min = 0.0, max = 5.0),
20+
random_satisfactions = runif(n = n, min = 1.0, max = 10.0)
21+
)
22+
t$correlated_satisfactions <- t$average_confidence * 2
23+
t
24+
}
25+
testthat::expect_true(nrow(create_test_values()) > 1)
26+
27+
#' Determine if the two columns are correlated
28+
are_correlated <- function(t, alpha_value = 0.05) {
29+
testthat::expect_equal(ncol(t), 2)
30+
results <- correlation::correlation(t)
31+
results$p < alpha_value
32+
}
33+
34+
testthat::expect_false(
35+
are_correlated(
36+
t = create_test_values() |> dplyr::select(average_confidence, random_satisfactions)
37+
)
38+
)
39+
testthat::expect_true(
40+
are_correlated(
41+
t = create_test_values() |> dplyr::select(average_confidence, correlated_satisfactions)
42+
)
43+
)
44+
45+
# Find the tables
46+
47+
#' Get all the CSV filename
48+
get_all_csv_filenames <- function() {
49+
list.files(path = "..", pattern = "csv", recursive = TRUE, full.names = TRUE)
50+
}
51+
testthat::expect_true(all(file.exists(get_all_csv_filenames())))
52+
53+
#' Get all the relevant CSV filename
54+
get_csv_filenames <- function() {
55+
filenames <- get_all_csv_filenames()
56+
57+
# Remove the helper CSVs
58+
filenames <- stringr::str_subset(filenames, "average_confidences", negate = TRUE)
59+
filenames <- stringr::str_subset(filenames, "survey_start", negate = TRUE)
60+
filenames <- stringr::str_subset(filenames, "initial_knowledge", negate = TRUE)
61+
filenames <- stringr::str_subset(filenames, "confidences_course", negate = TRUE)
62+
63+
# Remove evaluations without confidences
64+
if (exists("has_confidence")) {
65+
testthat::expect_false(has_confidence("../20230523/evaluation_20230523.csv"))
66+
}
67+
filenames <- stringr::str_subset(filenames, "20230523/evaluation_20230523.csv", negate = TRUE)
68+
if (exists("has_confidence")) {
69+
testthat::expect_false(has_confidence("../20231201/evaluation_20231201.csv"))
70+
}
71+
filenames <- stringr::str_subset(filenames, "20231201/evaluation_20231201.csv", negate = TRUE)
72+
filenames
73+
}
74+
testthat::expect_true(all(file.exists(get_csv_filenames())))
75+
76+
#' Check that a file has a column related to the rating
77+
has_satisfaction <- function(csv_filename) {
78+
t <- readr::read_csv(csv_filename, show_col_types = FALSE)
79+
col_names <- names(t)
80+
sum(stringr::str_count(col_names, "how would you rate this training event")) != 0
81+
}
82+
for (filename in get_csv_filenames()) {
83+
# message(filename)
84+
testthat::expect_true(has_satisfaction(filename))
85+
}
86+
87+
#' Check that a file has a column related to the rating
88+
has_confidence <- function(csv_filename) {
89+
t <- readr::read_csv(csv_filename, show_col_types = FALSE)
90+
col_names <- names(t)
91+
sum(stringr::str_count(col_names, "I can")) != 0
92+
}
93+
testthat::expect_false(has_confidence("../20230523/evaluation_20230523.csv"))
94+
testthat::expect_false(has_confidence("../20231201/evaluation_20231201.csv"))
95+
for (csv_filename in get_csv_filenames()) {
96+
# message(csv_filename)
97+
testthat::expect_true(has_confidence(csv_filename))
98+
}
99+
100+
get_satisfactions <- function(csv_filename) {
101+
testthat::expect_true(has_satisfaction(csv_filename))
102+
t <- readr::read_csv(csv_filename, show_col_types = FALSE)
103+
col_name <- stringr::str_subset(names(t), "rate")
104+
testthat::expect_equal(1, length(col_name))
105+
satisfactions <- t |> dplyr::select(dplyr::all_of(col_name)) |> tibble::deframe()
106+
testthat::expect_true(length(satisfactions) > 0)
107+
satisfactions
108+
}
109+
for (csv_filename in get_csv_filenames()) {
110+
message(csv_filename, ": ", paste(get_satisfactions(csv_filename), collapse = " "))
111+
testthat::expect_true(all(get_satisfaction(csv_filename) >= 1.0))
112+
testthat::expect_true(all(get_satisfaction(csv_filename) <= 10.0))
113+
}
114+
115+
#' Get the average confidences per day
116+
get_average_confidence <- function(csv_filename) {
117+
testthat::expect_true(has_confidence(csv_filename))
118+
t <- readr::read_csv(csv_filename, show_col_types = FALSE)
119+
col_names <- stringr::str_subset(names(t), "I can")
120+
121+
t_sub <- t |>
122+
dplyr::select(dplyr::all_of(col_names))
123+
124+
t_sub <- t_sub |>
125+
dplyr::mutate_all(~ replace(., . == "I can absolutely do this!", 5)) |>
126+
dplyr::mutate_all(~ replace(., . == "I have good confidence I can do this", 4)) |>
127+
dplyr::mutate_all(~ replace(., . == "I have some confidence I can do this", 3)) |>
128+
dplyr::mutate_all(~ replace(., . == "I have low confidence I can do this", 2)) |>
129+
dplyr::mutate_all(~ replace(., . == "I have no confidence I can do this", 1)) |>
130+
dplyr::mutate_all(~ replace(., . == "I don't know even what this is about ...?", 0)) |>
131+
dplyr::mutate_all(~ replace(., . == "I did not attend that session", NA))
132+
133+
average_confidence <- rep(NA, nrow(t_sub))
134+
for (i in seq_len(nrow(t_sub))) {
135+
confidences <- as.numeric(t_sub[i, ])
136+
confidences <- confidences[ !is.na(confidences) ]
137+
average_confidence[i] <- mean(confidences)
138+
}
139+
#average_confidence <- average_confidence[ !is.na(average_confidence) ]
140+
141+
testthat::expect_equal(0, sum(is.na(average_confidence)))
142+
average_confidence
143+
}
144+
for (csv_filename in get_csv_filenames()) {
145+
message(csv_filename, ": ", paste(get_average_confidence(csv_filename), collapse = " "))
146+
testthat::expect_true(all(get_average_confidence(csv_filename) >= 1.0))
147+
testthat::expect_true(all(get_average_confidence(csv_filename) <= 10.0))
148+
}
149+
150+
csv_filenames <- get_csv_filenames()
151+
list_of_tables <- list()
152+
for (i in seq_len(length(csv_filenames))) {
153+
csv_filename <- csv_filenames[i]
154+
message(csv_filename)
155+
satisfactions <- get_satisfactions(csv_filename)
156+
average_confidences <- get_average_confidence(csv_filename)
157+
testthat::expect_equal(length(satisfactions), length(average_confidences))
158+
t <- tibble::tibble(
159+
satisfaction = satisfactions,
160+
average_confidence = average_confidences
161+
)
162+
message(are_correlated(t))
163+
list_of_tables[[i]] <- t
164+
}
165+
t <- dplyr::bind_rows(list_of_tables)
166+
167+
168+
results <- correlation::correlation(t)
169+
p_value <- results$p
170+
171+
model <- lm(average_confidence ~ satisfaction, data = t)
172+
r_squared <- summary(model)$r.squared
173+
174+
ggplot2::ggplot(t, ggplot2::aes(x = satisfaction, y = average_confidence)) +
175+
ggplot2::geom_jitter(width = 0.01, height = 0.01) +
176+
ggplot2::geom_smooth(method = "lm") +
177+
ggplot2::labs(
178+
title = "Correlation between course satisfaction and average confidence",
179+
caption = paste0(
180+
"n: ", nrow(t), ", ",
181+
"p value: ", round(p_value, digits = 5), ", ",
182+
"R squared: ", round(100.0 * r_squared, digits = 1), "%"
183+
)
184+
)
185+
ggplot2::ggsave("correlation.png", width = 7, height = 7)
124 KB
Loading

0 commit comments

Comments
 (0)