Skip to content

Commit 1851965

Browse files
Update B05Sa with new function to compute score of the cah
1 parent 073ecf9 commit 1851965

File tree

1 file changed

+68
-53
lines changed
  • devel/shiny/B05Sa_cluster

1 file changed

+68
-53
lines changed

devel/shiny/B05Sa_cluster/app.R

Lines changed: 68 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
learndown::learndownShinyVersion("0.0.9000")
1+
learndown::learndownShinyVersion("0.1.0")
22
conf <- BioDataScience::config()
33

44
library(shiny)
5+
library(shinyjs)
56
library(learndown)
67
library(BioDataScience2)
78
library(dplyr)
@@ -12,8 +13,8 @@ library(chart)
1213
# add news functions ----
1314
## This function move to a package
1415

15-
# CAH for SciViews, version 1.1.1
16-
# Copyright (c) 2021, Philippe Grosjean (phgrsojean@sciviews.org)
16+
# CAH for SciViews, version 1.2.0
17+
# Copyright (c) 2021, Philippe Grosjean (phgrosjean@sciviews.org)
1718

1819
# dist is really a dissimilarity matrix => we use dissimilarity() as in the
1920
# {cluster} package, i.e., class is c("dissimilarity", "dist")
@@ -117,7 +118,7 @@ as.dissimilarity.matrix <- function(x, ...) {
117118

118119
# We want to print only the first few rows and columns
119120
print.dissimilarity <- function(x, digits.d = 3L, rownames.lab = "labels",
120-
...) {
121+
...) {
121122
mat <- as.matrix(x)
122123
mat <- format(round(mat, digits.d))
123124
diag(mat) <- ""
@@ -157,14 +158,14 @@ nobs.dissimilarity <- function(object, ...)
157158
# TODO: `[` by first transforming into a matrix with as.matrix()
158159

159160
autoplot.dissimilarity <- function(object, order = TRUE, show.labels = TRUE,
160-
lab.size = NULL, gradient = list(low = "red", mid = "white", high = "blue"),
161-
...) {
161+
lab.size = NULL, gradient = list(low = "red", mid = "white", high = "blue"),
162+
...) {
162163
factoextra::fviz_dist(object, order = order, show_labels = show.labels,
163164
lab_size = lab.size, gradient = gradient)
164165
}
165166

166167
chart.dissimilarity <- function(data, ...,
167-
type = NULL, env = parent.frame())
168+
type = NULL, env = parent.frame())
168169
autoplot(data, type = type, ...)
169170

170171
# cluster object (inheriting from hclust)
@@ -257,23 +258,24 @@ augment.cluster <- function(x, data, k = NULL, h = NULL, ...) {
257258
# circular), see http://www.sthda.com/english/wiki
258259
# /beautiful-dendrogram-visualizations-in-r-5-must-known-methods
259260
# -unsupervised-machine-learning
260-
plot.cluster <- function(x, y, hang = -1, check = TRUE, type = "vertical",
261-
lab = "Height", ...) {
261+
plot.cluster <- function(x, y, labels = TRUE, hang = -1, check = TRUE,
262+
type = "vertical", lab = "Height", ...) {
262263
type <- match.arg(type[1], c("vertical", "horizontal", "circular"))
263264
# type == "circular" is special because we need to transform as ape::phylo
264265
if (type == "circular") {
265266
if (!missing(hang))
266267
warning("'hang' is not used with a circular dendrogram")
267268
phylo <- ape::as.phylo(x)
268-
plot(phylo, type = "fan", font = 1, ...)
269+
plot(phylo, type = "fan", font = 1, show.tip.label = labels, ...)
269270
} else {# Use plot.dendrogram() instead
270271
# We first convert into dendrogram objet, then we plot it
271272
# (better that plot.hclust())
273+
if (isTRUE(labels)) leaflab <- "perpendicular" else leaflab <- "none"
272274
dendro <- as.dendrogram(x, hang = hang, check = check)
273275
if (type == "horizontal") {
274-
plot(dendro, horiz = TRUE, xlab = lab, ...)
276+
plot(dendro, horiz = TRUE, leaflab = leaflab, xlab = lab, ...)
275277
} else {
276-
plot(dendro, horiz = FALSE, ylab = lab, ...) # note: label different axe
278+
plot(dendro, horiz = FALSE, leaflab = leaflab, ylab = lab, ...)
277279
}
278280
}
279281
}
@@ -285,8 +287,8 @@ circle <- function(x = 0, y = 0, d = 1, col = 0, lwd = 1, lty = 1, ...)
285287
inches = FALSE, add = TRUE, ...)
286288

287289
# TODO: make sure the dendrogram is correct with different ggplot themes
288-
autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
289-
theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
290+
autoplot.cluster <- function(object, labels = TRUE, type = "vertical",
291+
circ.text.size = 3, theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
290292
if (is.null(type))
291293
type <- "vertical"
292294
type <- match.arg(type[1], c("vertical", "horizontal", "circular"))
@@ -298,24 +300,29 @@ autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
298300
theme + xlab(xlab) + ylab(ylab)
299301

300302
if (type == "circular") {
301-
# Get labels (need one more to avoid last = first!)
302-
label_df <- tibble::tibble(labels = c(labels(object)[object$order], ""))
303-
xmax <- nobs(object) + 1
304-
label_df$id <- 1:xmax
305-
angle <- 360 * (label_df$id - 0.5) / xmax
306-
# Left or right?
307-
label_df$hjust <- ifelse(angle < 270 & angle > 90, 1, 0)
308-
# Angle for more readable text
309-
label_df$angle <- ifelse(angle < 270 & angle > 90, angle + 180, angle)
303+
if (isTRUE(labels)) {
304+
# Get labels (need one more to avoid last = first!)
305+
label_df <- tibble::tibble(labels = c(labels(object)[object$order], ""))
306+
xmax <- nobs(object) + 1
307+
label_df$id <- 1:xmax
308+
angle <- 360 * (label_df$id - 0.5) / xmax
309+
# Left or right?
310+
label_df$hjust <- ifelse(angle < 270 & angle > 90, 1, 0)
311+
# Angle for more readable text
312+
label_df$angle <- ifelse(angle < 270 & angle > 90, angle + 180, angle)
313+
}
310314

311315
# Make the dendrogram circular
312316
dendro <- dendro +
313317
scale_x_reverse() +
314318
scale_y_reverse() +
315-
coord_polar(start = pi/2) +
316-
geom_text(data = label_df,
317-
aes(x = id, y = -0.02, label = labels, hjust = hjust),
318-
size = circ.text.size, angle = label_df$angle, inherit.aes = FALSE) +
319+
coord_polar(start = pi/2)
320+
if (isTRUE(labels))
321+
dendro <- dendro +
322+
geom_text(data = label_df,
323+
aes(x = id, y = -0.02, label = labels, hjust = hjust),
324+
size = circ.text.size, angle = label_df$angle, inherit.aes = FALSE)
325+
dendro <- dendro +
319326
theme(panel.border = element_blank(),
320327
axis.text = element_blank(),
321328
axis.line = element_blank(),
@@ -332,6 +339,9 @@ autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
332339
axis.line.x = element_blank(),
333340
axis.ticks.x = element_blank(),
334341
axis.text.y = element_text(angle = 90, hjust = 0.5))
342+
if (!isTRUE(labels))
343+
dendro <- dendro +
344+
theme(axis.text.x = element_blank())
335345

336346
} else {# Horizontal dendrogram
337347
dendro <- dendro +
@@ -342,6 +352,9 @@ autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
342352
theme(panel.border = element_blank(),
343353
axis.line.y = element_blank(),
344354
axis.ticks.y = element_blank())
355+
if (!isTRUE(labels))
356+
dendro <- dendro +
357+
theme(axis.text.y = element_blank())
345358
}
346359
dendro
347360
}
@@ -370,43 +383,41 @@ peng %>.%
370383

371384
# specific Function ----
372385

373-
score_cah <- function(x, reference = peng$species, digits = 5) {
386+
score_cah <- function(x, reference = peng_m$species) {
374387
tab <- table(reference, x)
375-
max_gr <- apply(tab, 1, which.max)
376-
tab[ , ]
377-
378-
if (length(unique(max_gr)) < 3)
379-
res <- "Votre CAH ne permet pas de retrouver les 3 groupes. Un ou plusieurs groupes sont confondus."
388+
k <- length(unique(x))
389+
prop <- prop.table(tab,margin = 2)
390+
res <- sum(apply(prop, 2, max)) / k
380391

381-
if (length(unique(max_gr)) == 3) {
382-
tot <- apply(tab, 1, max) / rowSums(tab)
383-
res <- paste0("Votre CAH permet de discerner 3 groupes avec une précision de ", round((100*sum(tot)/nlevels(reference)),digits = digits ), "%.")
384-
}
385392
res
386393
}
387394

388395
# UI -----
389396

390397
ui <- fluidPage(
398+
useShinyjs(),
391399
learndownShiny("Classification hiérarchique ascendante sur des mesures de manchots d'antarctique."),
392400

393401
sidebarLayout(
394402
sidebarPanel(
395403
p("Vous avez à disposition des mesures sur 342 manchots de 3 espèces différentes. Trouvez les meilleurs paramètres pour votre CAH afin d'optimiser votre regroupement."),
396-
p("Les variables mesurées sont les suivantes : la longueur du bec (mm), la largeur du bec (mm), la longueur de la nageoire (mm) et la masse (g)."),
397404
selectInput("method_dist", "Métrique de distance", choices = c("euclidean", "bray", "canberra", "manhattan")),
398405
selectInput("scale", "Standardisation", choices = c(FALSE, TRUE)),
399406
selectInput("method_clust", "Méthode de CAH",
400407
choices = c("complete", "single", "average", "ward.D2")),
408+
numericInput("k", "Nombre de groupes", min = 3, max = 6, step = 1, value = 3),
401409
hr(),
402410
submitQuitButtons()
403411
),
404412

405413
mainPanel(
414+
p("Les variables mesurées sont les suivantes : la longueur du bec (mm), la largeur du bec (mm), la longueur de la nageoire (mm) et la masse (g)."),
406415
plotOutput("dendrogram"),
407-
tableOutput("tab_res"),
408416
hr(),
409-
textOutput("scores_res")
417+
418+
hidden(
419+
textInput("scores", "Score")
420+
)
410421
)
411422
)
412423
)
@@ -422,31 +433,35 @@ server <- function(input, output, session) {
422433

423434
output$dendrogram <- renderPlot({
424435
cah <- cah()
425-
chart(cah) +
426-
ylab("Hauteur")
436+
437+
plot(cah, lab = "Hauteur", labels = FALSE)
438+
rect.hclust(cah, k = input$k)
427439
})
428440

429-
output$tab_res <- renderTable({
441+
output$scores_res <- renderText({
430442
cah <- cah()
431-
432-
table(peng$species, predict(cah, k = 3)) %>.%
433-
as.data.frame(.) %>.%
434-
pivot_wider(. , names_from = Var2, values_from = Freq) %>.%
435-
rename(., "Especes" = Var1)
443+
score_cah(predict(cah, k = input$k), reference = peng$species)
436444
})
437445

438-
output$scores_res <- renderText({
446+
observe({
447+
input$method_dist
448+
input$method_clust
449+
input$scale
450+
439451
cah <- cah()
440-
score_cah(predict(cah, k = 3), reference = peng$species)
452+
res <- score_cah(predict(cah, k = input$k), reference = peng$species)
453+
454+
updateTextInput(session, "scores",
455+
value = as.character(res))
441456
})
442457

443458

444459
trackEvents(session, input, output,
445460
sign_in.fun = BioDataScience::sign_in, config = conf)
446-
trackSubmit(session, input, output, max_score = 3, solution =
447-
list(method_dist = "euclidean", scale = "TRUE", method_clust = "ward.D2"),
461+
trackSubmit(session, input, output, max_score = 4, solution =
462+
list(scores = c(min = 0.97, max = 1)),
448463
comment = "",
449-
message.success = "Correct, c'est la meilleur solution. La CAH obtient un score très bon de plus de 94 % de correspondance",
464+
message.success = "Correct, La CAH obtient un score très bon de plus de 97 % de correspondance.",
450465
message.error = "Incorrect, un meilleur choix des paramètres est possible.")
451466
trackQuit(session, input, output, delay = 20)
452467
}

0 commit comments

Comments
 (0)