Skip to content

Commit ac9ef1e

Browse files
authored
Merge pull request #334 from easystats/cor_sort_improvements
[Feature] cor_sort() can deal with non-square matrices
2 parents fced785 + b95e053 commit ac9ef1e

17 files changed

+267
-194
lines changed

DESCRIPTION

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: correlation
33
Title: Methods for Correlation Analysis
4-
Version: 0.8.6
4+
Version: 0.8.6.1
55
Authors@R:
66
c(person(given = "Dominique",
77
family = "Makowski",
@@ -57,8 +57,8 @@ Imports:
5757
bayestestR (>= 0.15.0),
5858
datasets,
5959
datawizard (>= 0.13.0),
60-
insight (>= 0.20.5),
61-
parameters (>= 0.22.2),
60+
insight (>= 1.0.0),
61+
parameters (>= 0.24.0),
6262
stats
6363
Suggests:
6464
BayesFactor,

R/cor_sort.R

+80-10
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ cor_sort <- function(x, distance = "correlation", hclust_method = "complete", ..
2626

2727
#' @export
2828
cor_sort.easycorrelation <- function(x, distance = "correlation", hclust_method = "complete", ...) {
29-
col_order <- .cor_sort_order(as.matrix(x), distance = distance, hclust_method = hclust_method, ...)
30-
x$Parameter1 <- factor(x$Parameter1, levels = col_order)
31-
x$Parameter2 <- factor(x$Parameter2, levels = col_order)
29+
m <- cor_sort(as.matrix(x), distance = distance, hclust_method = hclust_method, ...)
30+
x$Parameter1 <- factor(x$Parameter1, levels = rownames(m))
31+
x$Parameter2 <- factor(x$Parameter2, levels = colnames(m))
3232
reordered <- x[order(x$Parameter1, x$Parameter2), ]
3333

3434
# Restore class and attributes
@@ -38,6 +38,8 @@ cor_sort.easycorrelation <- function(x, distance = "correlation", hclust_method
3838
)
3939

4040
# Make sure Parameter columns are character
41+
# Was added to fix a test, but makes the function not work
42+
# (See https://github.com/easystats/correlation/issues/259)
4143
# reordered$Parameter1 <- as.character(reordered$Parameter1)
4244
# reordered$Parameter2 <- as.character(reordered$Parameter2)
4345

@@ -55,18 +57,32 @@ cor_sort.easycormatrix <- function(x, distance = "correlation", hclust_method =
5557
m <- x
5658
row.names(m) <- x$Parameter
5759
m <- as.matrix(m[names(m)[names(m) != "Parameter"]])
58-
col_order <- .cor_sort_order(m, distance = distance, hclust_method = hclust_method, ...)
60+
61+
# If non-redundant matrix, fail (## TODO: fix that)
62+
if (anyNA(m)) {
63+
insight::format_error("Non-redundant matrices are not supported yet. Try again by setting summary(..., redundant = TRUE)")
64+
}
65+
66+
# Get sorted matrix
67+
m <- cor_sort(m, distance = distance, hclust_method = hclust_method, ...)
5968

6069
# Reorder
61-
x$Parameter <- factor(x$Parameter, levels = col_order)
62-
reordered <- x[order(x$Parameter), c("Parameter", col_order)]
70+
x$Parameter <- factor(x$Parameter, levels = row.names(m))
71+
reordered <- x[order(x$Parameter), c("Parameter", colnames(m))]
6372

6473
# Restore class and attributes
6574
attributes(reordered) <- utils::modifyList(
6675
attributes(x)[!names(attributes(x)) %in% c("names", "row.names")],
6776
attributes(reordered)
6877
)
6978

79+
# Reorder attributes (p-values) etc.
80+
for (id in c("p", "CI", "CI_low", "CI_high", "BF", "Method", "n_Obs", "df_error", "t")) {
81+
if (id %in% names(attributes(reordered))) {
82+
attributes(reordered)[[id]] <- attributes(reordered)[[id]][order(x$Parameter), names(reordered)]
83+
}
84+
}
85+
7086
# make sure Parameter columns are character
7187
reordered$Parameter <- as.character(reordered$Parameter)
7288

@@ -76,8 +92,13 @@ cor_sort.easycormatrix <- function(x, distance = "correlation", hclust_method =
7692

7793
#' @export
7894
cor_sort.matrix <- function(x, distance = "correlation", hclust_method = "complete", ...) {
79-
col_order <- .cor_sort_order(x, distance = distance, hclust_method = hclust_method, ...)
80-
reordered <- x[col_order, col_order]
95+
if (isSquare(x) && all(colnames(x) %in% rownames(x))) {
96+
i <- .cor_sort_square(x, distance = distance, hclust_method = hclust_method, ...)
97+
} else {
98+
i <- .cor_sort_nonsquare(x, distance = "euclidean", ...)
99+
}
100+
101+
reordered <- x[i$row_order, i$col_order]
81102

82103
# Restore class and attributes
83104
attributes(reordered) <- utils::modifyList(
@@ -91,7 +112,7 @@ cor_sort.matrix <- function(x, distance = "correlation", hclust_method = "comple
91112
# Utils -------------------------------------------------------------------
92113

93114

94-
.cor_sort_order <- function(m, distance = "correlation", hclust_method = "complete", ...) {
115+
.cor_sort_square <- function(m, distance = "correlation", hclust_method = "complete", ...) {
95116
if (distance == "correlation") {
96117
d <- stats::as.dist((1 - m) / 2) # r = -1 -> d = 1; r = 1 -> d = 0
97118
} else if (distance == "raw") {
@@ -101,5 +122,54 @@ cor_sort.matrix <- function(x, distance = "correlation", hclust_method = "comple
101122
}
102123

103124
hc <- stats::hclust(d, method = hclust_method)
104-
row.names(m)[hc$order]
125+
row_order <- row.names(m)[hc$order]
126+
list(row_order = row_order, col_order = row_order)
127+
}
128+
129+
130+
.cor_sort_nonsquare <- function(m, distance = "euclidean", ...) {
131+
# Step 1: Perform clustering on rows and columns independently
132+
row_dist <- stats::dist(m, method = distance) # Distance between rows
133+
col_dist <- stats::dist(t(m), method = distance) # Distance between columns
134+
135+
row_hclust <- stats::hclust(row_dist, method = "average")
136+
col_hclust <- stats::hclust(col_dist, method = "average")
137+
138+
# Obtain clustering orders
139+
row_order <- row_hclust$order
140+
col_order <- col_hclust$order
141+
142+
# Reorder matrix based on clustering
143+
clustered_matrix <- m[row_order, col_order]
144+
145+
# Step 2: Refine alignment to emphasize strong correlations along the diagonal
146+
n_rows <- nrow(clustered_matrix)
147+
n_cols <- ncol(clustered_matrix)
148+
149+
used_rows <- logical(n_rows)
150+
refined_row_order <- integer(0)
151+
152+
for (col in seq_len(n_cols)) {
153+
max_value <- -Inf
154+
best_row <- NA
155+
156+
for (row in seq_len(n_rows)[!used_rows]) {
157+
if (abs(clustered_matrix[row, col]) > max_value) {
158+
max_value <- abs(clustered_matrix[row, col])
159+
best_row <- row
160+
}
161+
}
162+
163+
if (!is.na(best_row)) {
164+
refined_row_order <- c(refined_row_order, best_row)
165+
used_rows[best_row] <- TRUE
166+
}
167+
}
168+
169+
# Append any unused rows at the end
170+
refined_row_order <- c(refined_row_order, which(!used_rows))
171+
172+
# Apply
173+
m <- clustered_matrix[refined_row_order, ]
174+
list(row_order = rownames(m), col_order = colnames(m))
105175
}

R/correlation.R

+39-43
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,9 @@
180180
#' `stats` package are supported.
181181
#' }
182182
#'
183-
#' @examplesIf requireNamespace("poorman", quietly = TRUE) && requireNamespace("psych", quietly = TRUE)
184-
#'
183+
#' @examplesIf all(insight::check_if_installed(c("psych", "datawizard"), quietly = TRUE)) && getRversion() >= "4.1.0"
185184
#' library(correlation)
186-
#' library(poorman)
185+
#' data(iris)
187186
#'
188187
#' results <- correlation(iris)
189188
#'
@@ -192,22 +191,20 @@
192191
#' summary(results, redundant = TRUE)
193192
#'
194193
#' # pipe-friendly usage with grouped dataframes from {dplyr} package
195-
#' iris %>%
194+
#' iris |>
196195
#' correlation(select = "Petal.Width", select2 = "Sepal.Length")
197196
#'
198197
#' # Grouped dataframe
199198
#' # grouped correlations
200-
#' iris %>%
201-
#' group_by(Species) %>%
199+
#' iris |>
200+
#' datawizard::data_group(Species) |>
202201
#' correlation()
203202
#'
204203
#' # selecting specific variables for correlation
205-
#' mtcars %>%
206-
#' group_by(am) %>%
207-
#' correlation(
208-
#' select = c("cyl", "wt"),
209-
#' select2 = c("hp")
210-
#' )
204+
#' data(mtcars)
205+
#' mtcars |>
206+
#' datawizard::data_group(am) |>
207+
#' correlation(select = c("cyl", "wt"), select2 = "hp")
211208
#'
212209
#' # supplying custom variable names
213210
#' correlation(anscombe, select = c("x1", "x2"), rename = c("var1", "var2"))
@@ -425,8 +422,36 @@ correlation <- function(data,
425422
ungrouped_x <- as.data.frame(data)
426423
xlist <- split(ungrouped_x, ungrouped_x[groups], sep = " - ")
427424

428-
# If data 2 is provided
429-
if (!is.null(data2)) {
425+
# If data 2 is not provided
426+
if (is.null(data2)) {
427+
modelframe <- data.frame()
428+
out <- data.frame()
429+
for (i in names(xlist)) {
430+
xlist[[i]][groups] <- NULL
431+
rez <- .correlation(
432+
xlist[[i]],
433+
data2,
434+
method = method,
435+
p_adjust = p_adjust,
436+
ci = ci,
437+
bayesian = bayesian,
438+
bayesian_prior = bayesian_prior,
439+
bayesian_ci_method = bayesian_ci_method,
440+
bayesian_test = bayesian_test,
441+
redundant = redundant,
442+
include_factors = include_factors,
443+
partial = partial,
444+
partial_bayesian = partial_bayesian,
445+
multilevel = multilevel,
446+
ranktransform = ranktransform,
447+
winsorize = winsorize
448+
)
449+
modelframe_current <- rez$data
450+
rez$params$Group <- modelframe_current$Group <- i
451+
out <- rbind(out, rez$params)
452+
modelframe <- rbind(modelframe, modelframe_current)
453+
}
454+
} else {
430455
if (inherits(data2, "grouped_df")) {
431456
groups2 <- setdiff(colnames(attributes(data2)$groups), ".rows")
432457
if (!all.equal(groups, groups2)) {
@@ -463,35 +488,6 @@ correlation <- function(data,
463488
modelframe <- rbind(modelframe, modelframe_current)
464489
}
465490
}
466-
# else
467-
} else {
468-
modelframe <- data.frame()
469-
out <- data.frame()
470-
for (i in names(xlist)) {
471-
xlist[[i]][groups] <- NULL
472-
rez <- .correlation(
473-
xlist[[i]],
474-
data2,
475-
method = method,
476-
p_adjust = p_adjust,
477-
ci = ci,
478-
bayesian = bayesian,
479-
bayesian_prior = bayesian_prior,
480-
bayesian_ci_method = bayesian_ci_method,
481-
bayesian_test = bayesian_test,
482-
redundant = redundant,
483-
include_factors = include_factors,
484-
partial = partial,
485-
partial_bayesian = partial_bayesian,
486-
multilevel = multilevel,
487-
ranktransform = ranktransform,
488-
winsorize = winsorize
489-
)
490-
modelframe_current <- rez$data
491-
rez$params$Group <- modelframe_current$Group <- i
492-
out <- rbind(out, rez$params)
493-
modelframe <- rbind(modelframe, modelframe_current)
494-
}
495491
}
496492

497493
# Group as first column

R/display.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
#' @name display.easycormatrix
33
#'
44
#' @description Export tables (i.e. data frame) into different output formats.
5-
#' `print_md()` is a alias for `display(format = "markdown")`.
5+
#' `print_md()` is a alias for `display(format = "markdown")`. Note that
6+
#' you can use `format()` to get the formatted table as a dataframe.
67
#'
78
#' @param object,x An object returned by
89
#' [`correlation()`][correlation] or its summary.

R/methods_format.R

+12-5
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,17 @@ format.easycormatrix <- function(x,
156156
# final new line
157157
footer <- paste0(footer, "\n")
158158

159-
# for html/markdown, create list
159+
# for html/markdown, modify footer format
160160
if (!is.null(format) && format != "text") {
161+
# no line break if not text format
161162
footer <- unlist(strsplit(footer, "\n", fixed = TRUE))
162-
footer <- as.list(footer[nzchar(footer, keepNA = TRUE)])
163+
# remove empty elements
164+
footer <- footer[nzchar(footer, keepNA = TRUE)]
165+
# create list or separate by ";"
166+
footer <- switch(format,
167+
html = paste(footer, collapse = "; "),
168+
as.list(footer)
169+
)
163170
}
164171

165172
footer
@@ -168,7 +175,9 @@ format.easycormatrix <- function(x,
168175

169176
#' @keywords internal
170177
.format_easycorrelation_caption <- function(x, format = NULL) {
171-
if (!is.null(attributes(x)$method)) {
178+
if (is.null(attributes(x)$method)) {
179+
caption <- NULL
180+
} else {
172181
if (isTRUE(attributes(x)$smoothed)) {
173182
prefix <- "Smoothed Correlation Matrix ("
174183
} else {
@@ -179,8 +188,6 @@ format.easycormatrix <- function(x,
179188
} else {
180189
caption <- paste0(prefix, unique(attributes(x)$method), "-method)")
181190
}
182-
} else {
183-
caption <- NULL
184191
}
185192

186193
caption

R/methods_print.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
#' @export
55
print.easycorrelation <- function(x, ...) {
6-
cat(insight::export_table(format(x, ...), format = "text"))
6+
cat(insight::export_table(format(x, ...), ...))
77
invisible(x)
88
}
99

@@ -13,9 +13,9 @@ print.easycormatrix <- function(x, ...) {
1313
# If real matrix, print as matrix
1414
if (colnames(formatted)[1] == "Variables") {
1515
formatted$Variables <- NULL
16-
print(as.matrix(formatted))
16+
print(as.matrix(formatted), ...)
1717
} else {
18-
cat(insight::export_table(format(x, ...), format = "text"))
18+
cat(insight::export_table(format(x, ...), ...))
1919
}
2020
invisible(x)
2121
}
@@ -31,7 +31,7 @@ print.easymatrixlist <- function(x, cols = "auto", ...) {
3131

3232
for (i in cols) {
3333
cat(" ", i, " ", "\n", rep("-", nchar(i) + 2), "\n", sep = "")
34-
print(x[[i]])
34+
print(x[[i]], ...)
3535
cat("\n")
3636
}
3737
}
@@ -40,7 +40,7 @@ print.easymatrixlist <- function(x, cols = "auto", ...) {
4040
print.grouped_easymatrixlist <- function(x, cols = "auto", ...) {
4141
for (i in names(x)) {
4242
cat(rep("=", nchar(i) + 2), "\n ", i, " ", "\n", rep("=", nchar(i) + 2), "\n\n", sep = "")
43-
print(x[[i]])
43+
print(x[[i]], ...)
4444
cat("\n")
4545
}
4646
}

correlation.Rproj

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Version: 1.0
2+
ProjectId: a2737226-16da-4377-8659-b462bf604f1e
23

34
RestoreWorkspace: No
45
SaveWorkspace: No

man/correlation-package.Rd

+6-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)