From 3550f785082d1dab3694c66f891f6b1610b3e718 Mon Sep 17 00:00:00 2001 From: Venlanen Date: Wed, 18 Oct 2017 17:02:17 +0200 Subject: [PATCH 1/4] Create README.md --- SCE Utils/Shiny/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 SCE Utils/Shiny/README.md diff --git a/SCE Utils/Shiny/README.md b/SCE Utils/Shiny/README.md new file mode 100644 index 0000000..cf5eb5b --- /dev/null +++ b/SCE Utils/Shiny/README.md @@ -0,0 +1,14 @@ +# Shiny App for Expert SCE Annotation +A web application for semi-manual SCE annotation. +## Installation +Download the `server.R`, `ui.R` and `global.R` files and place them in the same folder with each other. +## Customization +Edit `server.R` with the details of your data on the lines marked with `'#!#'`. Edit `ui.r` with the names +of your users on the lines marked with `'#!#'`. More information is provided in the aforementioned files. **NOTE** You need to +have a mosaiCatcher output file, and you need to edit the location of said file into the `server.R`, for this app to work. +## Running the App +**From a terminal:** execute the following, where `~/shinyapp` should be replaced with the path to your application: +``` +R -e "shiny::runApp('~/shinyapp', launch.browser=TRUE)" +``` +**From RStudio:** open `server.R` in your RStudio, and click on the _Run App_ text in the top right of the source code pane. From 868a11b7c562499e3cb95bfaad27c657a2e11fd8 Mon Sep 17 00:00:00 2001 From: Venlanen Date: Wed, 18 Oct 2017 17:06:10 +0200 Subject: [PATCH 2/4] Add files via upload --- SCE Utils/Shiny/global.R | 154 ++++++++++++++++++++ SCE Utils/Shiny/server.R | 300 +++++++++++++++++++++++++++++++++++++++ SCE Utils/Shiny/ui.R | 109 ++++++++++++++ 3 files changed, 563 insertions(+) create mode 100644 SCE Utils/Shiny/global.R create mode 100644 SCE Utils/Shiny/server.R create mode 100644 SCE Utils/Shiny/ui.R diff --git a/SCE Utils/Shiny/global.R b/SCE Utils/Shiny/global.R new file mode 100644 index 0000000..cabf591 --- /dev/null +++ b/SCE Utils/Shiny/global.R @@ -0,0 +1,154 @@ +# Shiny app for SCE classification +# global.R Helper functions + +library(shiny) +library(shinythemes) +library(htmlwidgets) +library(data.table) +library(ggplot2) +library(shinyjs) + +# Pretty printing, courtesy of Sascha Meiers +format_Mb <- function(x) { + paste(scales::comma(x/1e6), "Mb") +} + +# Data initialization, adapted from Sascha Meiers. Some redundance to handle slighly different mosaiCatcher runs. +initialize_data <- function(e) { + e <- e[class != "None",] + + # Order chromosomes. + e <- e[, chrom := sub('^chr', '', chrom)][] # get rid of 'chr' if is there + e <- e[grepl('^([1-9]|[12][0-9]|X|Y)$', chrom), ] # only looking at standard chromosomes + e$chrom <- paste0("chr", e$chrom) # add 'chr' for clarity on plots + # Turn the 'chrom' field into a factor, and order it. + e <- e[, chrom := factor(chrom, levels=as.character(c(paste0("chr", 1:22),'chrX','chrY')), ordered = T)] + + # A unique identifier for individual cells + if (is.null(e$cell_name)) e$cell_name = paste(e$sample, e$cell, sep="_") + + # Find consecutive intervals and number them + e <- e[order(cell_name, chrom, start, end),] + e$cnsc <- cumsum(e[, .(consecutive = c(1,abs(diff(as.numeric(factor(class)))))), by = .(sample, cell, chrom)]$consecutive) + + return(e) +} + +# Read result files for leaderboard. +get_results <- function(path, pattern) { + files = NULL + # List all result files for this dataset. + files = list.files(path, pattern = paste0(pattern, ".txt")) + results = data.table(cell_name = character(), cell = character(), sample = character(), chrom = character(), + cnsc = character(), end = integer(), user = character(), time = character(), counter = integer()) + if (!is.null(files) && length(files) > 0) { + for (i in files) { + a = fread(paste0(path, i), showProgress = F) + results = rbind(results, a) + } + } + return(results) +} + +# Main plot with the watson and crick bars. Adapted from Sascha Meiers. +main_plot <- function(e) { + if (the_end) { + message <- paste0("\n Congratulations!\n", + " You have clicked through all of the chromosomes.\n") + plt <- ggplot() + + annotate("text", x=10, y=10, label = message) + + theme_classic() + + theme(panel.spacing = unit(0, "lines"), + strip.background = element_rect(fill = NA, colour=NA), + rect = element_blank() + ) + + guides(fill = FALSE) + + return(plt) + } + + info_reads_per_bin = stats::median(e$w + e$c) + info_y_limit = 2*info_reads_per_bin+1 + + chr_end = max(e$end) + + plt <- ggplot(e) + aes(x = (start+end)/2) + + # prepare consecutive rectangles for a better plotting experience + consecutive = cumsum(abs(diff(as.numeric(as.factor(e$class))))) + consecutive = c(consecutive[1], consecutive) + e$consecutive = consecutive + f = e[, .(start = min(start), end = max(end), class = class[1]), by = .(consecutive, chrom, sample, cell)][] + + plt <- plt + + geom_rect(data = f, aes(xmin = start, xmax=end, ymin=-Inf, ymax=Inf, fill=class), + inherit.aes=F, alpha=0.25) + + scale_fill_manual(values = c(WW = "sandybrown", CC = "paleturquoise4", + WC = "yellow", None = NA)) + + # Watson/Crick bars + plt <- plt + + suppressWarnings(geom_bar(aes(y = -w, width=(end-start)), + stat='identity', position = 'identity', fill='sandybrown')) + + suppressWarnings(geom_bar(aes(y = c, width=(end-start)), + stat='identity', position = 'identity', fill='paleturquoise4')) + + + ylab("Watson | Crick") + xlab(NULL) + + scale_x_continuous(breaks = scales::pretty_breaks(12), labels = format_Mb) + + scale_y_continuous(breaks = scales::pretty_breaks(1)) + + coord_cartesian(xlim = c(0, chr_end), ylim = c(-info_y_limit, info_y_limit)) + + theme_classic() + + theme(panel.spacing = unit(0, "lines"), + strip.background = element_rect(fill = NA, colour=NA), + plot.title = element_text(hjust = 0.5)) + + guides(fill = FALSE) + + plt <- plt + facet_grid(sample+cell ~ .) + + ggtitle(e$chrom[1], subtitle = NULL) + + return(plt) +} + +# Overlay plot for just the breakpoints +bkp_plot <- function(e, bkps, highlighted) { + if (the_end) { + plt <- ggplot() + geom_blank() + + theme_classic() + + theme(panel.spacing = unit(0, "lines"), + strip.background = element_rect(fill = NA, colour=NA), + rect = element_blank() + ) + + guides(fill = FALSE) + return(plt) + } + + info_reads_per_bin <- round(median(e$w + e$c)) + info_y_limit <- 2*info_reads_per_bin+1 + + chr_end <- max(e$end) + + bkps <- bkps[cnsc %in% e$cnsc] + + plt <- ggplot(bkps) + + ylab("Watson | Crick") + xlab(NULL) + + scale_x_continuous(breaks = scales::pretty_breaks(12), labels = format_Mb) + + scale_y_continuous(breaks = scales::pretty_breaks(1)) + + coord_cartesian(xlim = c(0, chr_end), ylim = c(-info_y_limit, info_y_limit)) + + theme_classic() + + theme(panel.spacing = unit(0, "lines"), + strip.background = element_rect(fill = NA, colour=NA), + plot.title = element_text(hjust = 0.5), + rect = element_blank() + ) + + guides(fill = FALSE) + + facet_grid(sample+cell ~ .) + + geom_segment(aes(x=end, y=-info_y_limit, xend=end, yend=info_y_limit), + size=0.4, inherit.aes=F, linetype="dashed", col="dodgerblue") + + ggtitle(e$chrom[1], subtitle = NULL) + + if ((!is.null(highlighted)) && (nrow(highlighted)>0)) { + plt <- plt + geom_segment(data=highlighted, aes(x=end, y=-info_y_limit, xend=end, yend=info_y_limit), size=0.7, inherit.aes=F, linetype="solid", col="orangered") + } + + return(plt) +} diff --git a/SCE Utils/Shiny/server.R b/SCE Utils/Shiny/server.R new file mode 100644 index 0000000..4ab57ae --- /dev/null +++ b/SCE Utils/Shiny/server.R @@ -0,0 +1,300 @@ +# Shiny app for SCE classification +# server.R - Server side functionality +# +# Lines marked with '#!#' are where you should put in the details of your data. +# You need to input the path and name to your mosaiCatcher output in this file. + +# Libraries +library(shiny) +library(shinythemes) +library(data.table) +library(ggplot2) +library(htmlwidgets) +library(shinyjs) + +source("global.R") + +# Define server logic +shinyServer(function(input, output, session) { + + ## Data to classify. This should be a mosaiCatcher output file. CHANGE THIS! + f_in <- "zcat your_path/count.table.txt.gz" #!# + # Name or short description of the dataset to be shown under the plots. + # Example: "Skin fibroblasts, 500kb bins" + data_name <<- "" #!# + + ## Storing data. These don't need to be changed, unless you plan to use several different datasets. + # File suffix to use for the result files the result filename will be in the form of + #`[result_suffix].txt` This suffix is also used to filter out result files for the leaderboard. + result_suffix <<- "_results" #!# + # Where to store the result files. This doesn't need to be changed. + result_folder <<- "./Results/" #!# + + ## Variables + # Other + var_cellsToShow <<- 5 + var_plotWidth <<- 1200 + var_plotHeight <<- 575 + var_UIPadding <<- 10 + # How close the click has to be (in bp) + var_pad <<- 2e6 + + + # Initialize reactive variables + # Where we are + r_counter <<- 1 + makeReactiveBinding("r_counter") + # User + r_name <-NULL + makeReactiveBinding("r_name") + # File to write results into + r_resultFile <- "" + makeReactiveBinding("r_resultFile") + # How many cells were shown to the user in the last plot. This is only relevant when there are too few chromosomes to display. + r_cellsShown <<- var_cellsToShow + makeReactiveBinding("r_cellsShown") + # Next cells to show + r_nextData <- NULL + makeReactiveBinding("r_nextData") + # Flag for tester account + r_tester <- FALSE + makeReactiveBinding("r_tester") + # Leaderboard + r_results <- NULL + makeReactiveBinding("r_results") + #Flag for having gone through it all + the_end <<- F + makeReactiveBinding("the_end") + + + # Read all counts + #message("Reading in data...") + d <- fread(f_in, showProgress = F) + #message("Please wait, calculating everything needed...") + d <- initialize_data(d) + + # All breakpoints .. + potential_SCEs <- d[, .(end=max(end)), by=.(cell_name, cell, sample, chrom, cnsc)] + # ..where there are at least two breakpoints per cell.. + potential_SCEs <- merge(potential_SCEs[(duplicated(potential_SCEs, by=c("chrom", "cell_name"), fromLast=F))], + potential_SCEs[(duplicated(potential_SCEs, by=c("chrom", "cell_name"), fromLast=T))], all = T) + #.. and not looking at breakpoints that denote the end of the chromosome + potential_SCEs <- potential_SCEs[potential_SCEs[, .I[end != max(end)], by=chrom]$V1] + + # Choose all chromosomes on all the cells (ignore bins) + sample_cell_list <- unique(d[order(chrom, cell_name),.(chrom, cell_name, sample)]) + # Take only the ones that have at least one breakpoint + sample_cell_list <- sample_cell_list[paste(cell_name, chrom, sep="_") %in% paste(potential_SCEs$cell_name, potential_SCEs$chrom, sep="_")] + # 20 of each chromosome for all the donors #!! + sample_cell_list <- sample_cell_list[, .SD[1:20], by=list(chrom, sample)] #!! + # Remove cases where there weren't enough eligible chromosomes, and set counter (id) + sample_cell_list <- sample_cell_list[!is.na(sample_cell_list$cell_name)][, id := 1:.N] + + # Only looking at breakpoints from the cells selected above + potential_SCEs <- potential_SCEs[paste(cell_name, chrom, sep="_") %in% paste(sample_cell_list$cell_name, sample_cell_list$chrom, sep="_")] + + # This will store the breakpoints the user selects + # This is here instead of with the other variables, because it needs 'potential_SCEs' + r_selectedSCEs <- potential_SCEs[0,] + makeReactiveBinding("r_selectedSCEs") + + # Workaround to stop drawing the plot twice for each click + clickSaved <- reactiveValues(singleclick = NULL) + observeEvent(eventExpr = input$plotClick, handlerExpr = { clickSaved$singleclick <- input$plotClick }) + + ## Show login page, and after getting the name of the user, initialize counter, then show main page + # User initialization + observeEvent(input$submitButton, { + r_name <<- input$name + r_cellsShown <<- var_cellsToShow + + if (r_name != "Just testing") { + # A result file for each user + r_resultFile <<- paste0(result_folder, r_name, result_suffix, ".txt") + + # If there are already results on file, carry on from where we left off + if (file.exists(r_resultFile)) { + prev_results <<- fread(r_resultFile, header = T, showProgress = F) + if (r_name %in% prev_results$user) { + r_counter <<- counter <- max(prev_results[user == r_name]$counter) + } else { + r_counter <<- 1 + fwrite(list("cell_name", "cell", "sample", "chrom", "cnsc", "end", "user", "time", "counter"), r_resultFile, append = F, showProgress = F) + } + # New user + } else { + r_counter <<- 1 + fwrite(list("cell_name", "cell", "sample", "chrom", "cnsc", "end", "user", "time", "counter"), r_resultFile, append = F, showProgress = F) + } + } else { + # User is a tester, create random starting point. Don't write results. + r_tester <<- TRUE + r_counter <<- sample.int(nrow(sample_cell_list), 1) + } + + # Switch from the signup page to the main UI + hide("signup") + show("mainContent") + # Show greeting to user on first page. + #show("firstPage") + }) + + # Surplus bells and whistles + # Input greeting to user on first page. + #output$greeting <- renderText({ + # input$submitButton + # isolate({ + # greeting <- paste0("Hello ", input$name, ".") + # }) + #}) + + # On 'Yes'-button click, write selected SCEs into memory, then reset counters + observeEvent(input$yesButton, { + + # Reconcile actual cells shown + if (r_cellsShown == var_cellsToShow) { + r_counter <<- r_counter + var_cellsToShow + } else { + r_counter <<- r_counter + r_cellsShown + r_cellsShown <<- var_cellsToShow + } + + # Add user info + r_selectedSCEs$user <- rep(r_name) + r_selectedSCEs$date <- Sys.time() + r_selectedSCEs$counter <- r_counter + # Should add field for dataset! and a corresponding check at user initialization + + # Save results for everyone except testers + if (!r_tester) { + fwrite(r_selectedSCEs, r_resultFile, append = T, showProgress = F) + } + + # The greeting + #hide("firstPage") + + # Reset variables + r_selectedSCEs <<- potential_SCEs[0,] + clickSaved$singleclick <<- NULL + }) + + # Create plot of segment colours and binned W and C reads + output$plot <- renderPlot({ + + # Redraw on every click of the button, but not otherwise + input$yesButton + isolate({ + + # Number of cells to show: + if (sample_cell_list[chrom == sample_cell_list$chrom[r_counter], max(id)] == r_counter) { + last_cell <- r_counter + r_cellsShown <<- 1 + } else { + max_id <- sample_cell_list[chrom == sample_cell_list$chrom[r_counter], max(id)] + if (!is.null(max_id)) { + last_cell <- min((r_counter + var_cellsToShow - 1), max_id) + if((r_counter + var_cellsToShow - 1) > max_id) { + r_cellsShown <<- (last_cell - r_counter) + } + } else { + last_cell <- (r_counter + var_cellsToShow - 1) + r_cellsShown <- var_cellsToShow + } + } + #print(last_cell) + # Select cell "r_counter" and a few more + r_nextData <<- d[sample_cell_list[r_counter:last_cell, ], on = .(chrom, cell_name)] + + # Plotting function in global.R + if (last_cell == nrow(sample_cell_list)) { + the_end <<- T + } + main_plot(r_nextData) + }) + }, height = var_plotHeight, width = var_plotWidth) + + # Create plot of breakpoints to overlay on top of the base plot. This speeds things up, as only the top plot needs to be + # redrawn on breakpoint selection clicks + output$bkpPlot <- renderPlot({ + + # Draw if and only if plot area clicked or yes button pressed + clickSaved$singleclick + input$yesButton + isolate({ + g <- clickSaved$singleclick + if (!is.null(g)) { + + # Find out where the click was, and if hit any of the breakpoints + close_lines <- potential_SCEs[0,] + new_line <- potential_SCEs[0,] + close_lines <- potential_SCEs[chrom==r_nextData$chrom[1] & + end >= (g$x - var_pad) & + end <= (g$x + var_pad) & + sample == g$panelvar1 & + cell == g$panelvar2] + if (!is.null(close_lines) && nrow(close_lines)>0) { + if ( nrow(close_lines) == 1 ) { + new_line <- close_lines[1] + } else { + new_line <- close_lines[which.min(abs(end - g$x)),][1,] + } + r_selectedSCEs <<- rbind(r_selectedSCEs, new_line) + # remove _all_ duplicates if any (toggle mode) + r_selectedSCEs <<- r_selectedSCEs[!(duplicated(r_selectedSCEs) | duplicated(r_selectedSCEs, fromLast = TRUE)), ] + } + } + + # Plotting function in global.R + bkp_plot(r_nextData, potential_SCEs, r_selectedSCEs) + }) + }, height = var_plotHeight, width = var_plotWidth, bg="transparent") + + ## Surplus bells and whistles. + # Info about selected breakpoints in a text field under the plot. Cursor hover info would probably be more useful. + # output$bkps_info <- renderText({ + # clickSaved$singleclick + # input$yesButton + # isolate({ + # xy_str <- function(g) { + # if(nrow(g)==0) + # return(" [no breakpoints selected] \n") + # g <- g[order(sample, cell, end)] + # info_string <- "" + # for(i in 1:nrow(g)) { + # info_string <- paste0(info_string, "[", i , "][Sample: ", g$sample[i], ", cell: ", g$cell[i], ", at ", round(g$end[i]/1e6), "MB] \n", collapse = "") + # } + # return(info_string) + # } + # paste0("You have selected the following breakpoints: \n\n", xy_str(r_selectedSCEs)) + # }) + # }) + + # Listen for click on the leaderboard tab. Only refresh scores on click. + observeEvent(input$Tabs, { + if (input$Tabs == "score_tab") { + r_results <<- get_results(result_folder, result_suffix) + } + }) + + # Find out high scores! + output$scores <- renderText({ + if (!is.null(r_results)) { + r_results <- r_results[, count_cells:=max(counter), by=user] + r_results <- r_results[, count_bkps:=.N, by=user] + r_results <- r_results[order(count_cells, decreasing = T)] + cells = "\tUser\t\tChromosomes\tBreakpoints\n" + rank = 1 + for (i in unique(r_results$user)) { + cells = paste0(cells, rank, ".\t", i, "\t\t", (r_results[user==i]$count_cells[1]-1), "\t\t", r_results[user==i]$count_bkps[1], "\n") + rank = rank+1 + } + paste0(cells) + } + }) + + output$info <- renderText({ + paste0("Data: ", data_name) + }) + + session$onSessionEnded(stopApp) +}) diff --git a/SCE Utils/Shiny/ui.R b/SCE Utils/Shiny/ui.R new file mode 100644 index 0000000..aae0ed1 --- /dev/null +++ b/SCE Utils/Shiny/ui.R @@ -0,0 +1,109 @@ +# Shiny app for SCE classification +# ui.R User interface +# +# Please put in the names of your users on the lines marked with '#!#'. + +library(shiny) +library(shinythemes) +library(data.table) +library(ggplot2) +library(shinyjs) + +source("global.R") + +# Define UI +shinyUI( + fluidPage( + + # To enable plotting two plots on top of each other + tags$head(tags$style(".fixPosition{position: absolute;}")), + + # Cool buttons + theme = shinytheme("spacelab"), + + #Showing and hiding stuff + useShinyjs(), + + # First page + div( + id = "signup", + titlePanel("SCE Classification App"), + tags$p("Who are you?"), + tags$p( + selectInput("name", "Name:", + c("Just testing" = "Just testing", # You can leave this in. The results won't be stored. + "Ashley" = "Ashley", #!# Input the names of your users here. + "Karen" = "Karen", #!# + "Sascha" = "Sascha", #!# + "Venla" = "Venla")), #!# + actionButton("submitButton", "Submit") + ), + br(), + tags$p(tags$b("Note:"), " Please choose your own name, as it is used to determine which chromosomes to show you. You can test the app by choosing ", tags$i("Just testing"), "as your username. Testers will be started on a random chromosome, and the SCEs they pick are not permanently stored."), + tags$p("Calculations are performed first, so loading the first plot might take some time. Thank you for your patience.") + ), + + # Main page, hidden at first + hidden( + div( + useShinyjs(), + id = "mainContent", + + # Application title + titlePanel("SCE Classification"), + + mainPanel( + tabsetPanel( + type = "tabs", + id = "Tabs", + tabPanel( + "Plot", + # Superfluous bells and whistles + #hidden( + # div( + # id="firstPage", + # h3(textOutput("greeting")) + # ) + #), + #tags$p("Please click on all the breakpoints (blue dashed lines) that you think border + #a sister chromatid exchange. Once you've selected all the breakpoints, click on the button below the plot."), + #hr(), + fluidRow( + column( + width = 11, + offset = 0, + div( + plotOutput("plot", height = 575), + class = "fixPosition" + ), + div( + plotOutput("bkpPlot", height = 575, click = "plotClick"), + class = "fixPosition" + + ), + style = paste0("width: 100% ; height: ", (575 + 10),"px") + ), + fluidRow( + column(11, + verbatimTextOutput("info") + # tags$p(verbatimTextOutput("bkps_info")) + ), + column(1, + actionButton("yesButton", "Breakpoints chosen. Next!"), + style = "display: inline-block; vertical-align: top" + ) + ) + ) + ), + tabPanel( + title = "Leaderboard", + value = "score_tab", + verbatimTextOutput("scores") + ) + ) + ) + ) + ), + padding = 10 + ) +) From 47917c37595175c774f0c7c663712ecd8fa521fe Mon Sep 17 00:00:00 2001 From: Venlanen Date: Wed, 18 Oct 2017 17:10:30 +0200 Subject: [PATCH 3/4] Update README.md --- SCE Utils/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/SCE Utils/README.md b/SCE Utils/README.md index f012e06..6367bd7 100644 --- a/SCE Utils/README.md +++ b/SCE Utils/README.md @@ -1,7 +1,13 @@ # SCE-Utils Utilities for finding and plotting SCEs in Strand-Seq data. -## Usage from command line: +## Shiny +A web app for semi-manual SCE annotation. + +## `SCE.R` and `plot_all_into_pdf.R` +A script for finding potential sister chromatid exchanges, and another to plot the results into a pdf. + +### Usage from command line: ``` Rscript SCE.R count.table.gz SCEs.txt Rscript plot_all_into_pdf.R count.table.gz SCEs.txt output.pdf From c8d8926b4054a4ac03ab1573ce6242ed951ba603 Mon Sep 17 00:00:00 2001 From: Venlanen Date: Wed, 18 Oct 2017 17:11:42 +0200 Subject: [PATCH 4/4] Update README.md --- SCE Utils/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCE Utils/README.md b/SCE Utils/README.md index 6367bd7..b72dd03 100644 --- a/SCE Utils/README.md +++ b/SCE Utils/README.md @@ -4,7 +4,7 @@ Utilities for finding and plotting SCEs in Strand-Seq data. ## Shiny A web app for semi-manual SCE annotation. -## `SCE.R` and `plot_all_into_pdf.R` +## The Scripts `SCE.R` and `plot_all_into_pdf.R` A script for finding potential sister chromatid exchanges, and another to plot the results into a pdf. ### Usage from command line: