Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export(plot_qc_metrics_heatmap)
export(plot_rle)
export(plot_volcano)
export(read_metadata)
export(select_robust_controls)
export(subsample_genes)
export(summarise_de)
export(validate_metadata)
Expand Down Expand Up @@ -66,6 +67,9 @@ importFrom(dplyr,reframe)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,where)
importFrom(edgeR,DGEList)
importFrom(edgeR,calcNormFactors)
importFrom(edgeR,cpm)
importFrom(ggiraph,geom_point_interactive)
importFrom(ggiraph,girafe)
importFrom(ggplot2,aes)
Expand Down Expand Up @@ -124,6 +128,7 @@ importFrom(stringr,str_trim)
importFrom(tibble,column_to_rownames)
importFrom(tibble,rownames_to_column)
importFrom(tidyr,drop_na)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,unnest)
importFrom(utils,capture.output)
Expand Down
6 changes: 3 additions & 3 deletions R/check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ check_zeroinflation <- function(data = NULL,
group = unique(df$group),
n_genes = nrow(df),
n_wells = sum(combined_id == unique(df$group)),
median_p0_obs = median(df$p0_obs),
median_p0_nb = median(df$p0_nb),
median_ZI = median(df$ZI),
mean_p0_obs = mean(df$p0_obs),
mean_p0_nb = mean(df$p0_nb),
mean_ZI = mean(df$ZI),
observed_zeros_num = sum(df$obs_zeros_num),
expected_zeros_num = sum(df$expected_zeros_num)
)
Expand Down
2 changes: 1 addition & 1 deletion R/compute_multi_de.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ compute_multi_de <- function(data = NULL,
method <- if (is.null(method)) "edgeR" else method
if (!method %in% c("Seurat_wilcox", "DESeq2", "edgeR",
"RUVg", "RUVs", "RUVr",
"limma_voom")) {
"limma_voom","limma_trend")) {
stop("Your normalization method is not available.")
}
if (is.null(control_samples)) {
Expand Down
77 changes: 50 additions & 27 deletions R/compute_normalised_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#' @param data A tidyseurat object merged with metadata. Must contain columns
#' "Well_ID", "Row", "Column"
#' @param method One of "raw", "logNorm", "cpm", "clr", "SCT", "DESeq2",
#' "edgeR", "RUVg", "RUVs", "RUVr", "limma_voom", "limma_trend", "zinb"
#' "edgeR", "RUVg", "RUVs", "RUVr", "limma_voom", "limma_trend",
#' "limma_voom_zinb", "edgeR_zinb"
#' @param batch Either empty, a single value, or a vector corresponding to the
#' number of samples
#' @param k Parameter k for RUVSeq and zinb methods
Expand Down Expand Up @@ -53,7 +54,8 @@ compute_normalised_counts <- function(data = NULL,
"cpm", "clr", "SCT",
"DESeq2", "edgeR",
"RUVg", "RUVs", "RUVr",
"limma_voom", "limma_trend", "zinb")) {
"limma_voom", "limma_trend", "limma_voom_zinb",
"edgeR_zinb")) {
stop("Your normalization method is not available.")
}
batch <- if (is.null(batch)) "1" else as.character(batch)
Expand Down Expand Up @@ -96,32 +98,32 @@ compute_normalised_counts <- function(data = NULL,

normalize_lognorm <- function(data) {
lognorm <- NormalizeData(data, normalization.method = "LogNormalize", scale.factor = 10000)
lognorm@assays$RNA$data
return(lognorm@assays$RNA$data)
}

normalize_cpm <- function(data) {
cpm <- NormalizeData(data, normalization.method = "RC", scale.factor = 1e6)
cpm@assays$RNA$data
return(cpm@assays$RNA$data)
}

normalize_clr <- function(data) {
clr <- NormalizeData(data, normalization.method = "CLR", margin = 2)
clr@assays$RNA$data
return(clr@assays$RNA$data)
}

normalize_sct <- function(data, batch) {
sct <- SCTransform(data, do.scale = TRUE,
return.only.var.genes = FALSE,
vars.to.regress = if (length(batch) == 1) NULL else batch, verbose = FALSE)
sct@assays$SCT$data
return(sct@assays$SCT$data)
}

normalize_deseq2 <- function(data, batch) {
coldata <- coldata
design <- model_matrix
dds <- DESeqDataSetFromMatrix(countData = data@assays$RNA$counts, colData = coldata, design = design)
dds <- estimateSizeFactors(dds)
counts(dds, normalized = TRUE)
return(counts(dds, normalized = TRUE))
}

normalize_edger <- function(data, batch) {
Expand All @@ -135,15 +137,15 @@ compute_normalised_counts <- function(data = NULL,
dge <- calcNormFactors(dge, methods = "TMM")
design <- model_matrix
dge <- estimateDisp(dge, design, BPPARAM = p)
cpm(dge, log = FALSE)
return(cpm(dge, log = FALSE))
}

normalize_limma_voom <- function(data, batch) {
dge <- DGEList(counts = data@assays$RNA$counts, samples = coldata$condition, group = coldata$condition)
dge <- calcNormFactors(dge, methods = "TMMwsp")
design <- model_matrix
dge <- voom(dge, design)
dge$E
return(dge$E)
}

normalize_limma_trend <- function(data, batch) {
Expand All @@ -154,7 +156,7 @@ compute_normalised_counts <- function(data = NULL,
fit <- lmFit(logCPM, design)
fit <- eBayes(fit, trend=TRUE)
dge <- fit
logCPM
return(logCPM)
}

normalize_ruvg <- function(data, batch, spikes, k) {
Expand All @@ -180,7 +182,7 @@ compute_normalised_counts <- function(data = NULL,
}
if (!all(spikes %in% row.names(data@assays$RNA$counts))) {
warning("Some or all of your control genes are not present in the dataset.")
spikes <- intersect(spikes, rownames(counts(set)))
spikes <- intersect(spikes, rownames(counts))
}
#k defines number of sources of variation, two have been chosen for row and column
set <- EDASeq::newSeqExpressionSet(counts = as.matrix(counts),
Expand Down Expand Up @@ -236,7 +238,7 @@ compute_normalised_counts <- function(data = NULL,
EDASeq::normCounts(set)
}

normalize_zinb <- function(data, batch) {
normalize_limmavoom_zinb <- function(data, batch) {

message("Please allow extra time for zinb mode.")
if (ncol(data) > 50) {
Expand All @@ -247,34 +249,54 @@ compute_normalised_counts <- function(data = NULL,
cl <- makeCluster(num_cores)
doParallel::registerDoParallel(cl)
p <- BiocParallel::DoparParam()
system.time(zinb <- zinbwave::zinbwave(filtered_sce, K = k,
suppressWarnings(zinb <- zinbwave::zinbwave(filtered_sce, K = k,
epsilon=12000,
BPPARAM = p,
observationalWeights = TRUE))
observationalWeights = TRUE, verbose = F))
counts <- zinb@assays@data$counts
weights <- zinb@assays@data$weights

# approximate denoised counts (downweighting dropouts)
#dge <- DGEList(counts = data@assays$RNA$counts, samples = coldata$condition, group = coldata$condition)
#dge <- calcNormFactors(dge, methods = "TMM")
#design <- model_matrix
#dge <- estimateDisp(dge, design, BPPARAM = p)
#dge$weights <- assay(zinb, "weights")
#fit <- glmQLFit(dge, design, BPPARAM = p)
#norm_counts <- fitted(fit)


dge <- DGEList(counts = counts(filtered_sce), samples = coldata$condition, group = coldata$condition)
dge <- calcNormFactors(dge, methods = "TMMwsp")
design <- model_matrix
dge <- voom(dge, design, weights = weights)
normalised_values <- dge$E

#normalised_values <- zinb@assays@data$normalizedValues
stopCluster(cl)
doParallel::registerDoParallel()
return(normalised_values)
}

normalize_edger_zinb <- function(data, batch) {

message("Please allow extra time for zinb mode.")
if (ncol(data) > 50) {
message("zinb with over 50 samples takes a long time. Consider reducing the number of samples or genes.")
}
data_sce <- as.SingleCellExperiment(data)
filtered_sce <- subset(data_sce, rowSums(as.data.frame(counts(data_sce))) > 0)
cl <- makeCluster(num_cores)
doParallel::registerDoParallel(cl)
p <- BiocParallel::DoparParam()
suppressMessages(zinb <- zinbwave::zinbwave(filtered_sce, K = k,
epsilon=12000,
BPPARAM = p,
observationalWeights = TRUE))
counts <- zinb@assays@data$counts
weights <- zinb@assays@data$weights

# approximate denoised counts (downweighting dropouts)
dge <- DGEList(counts = counts, samples = coldata$condition, group = coldata$condition)
dge <- calcNormFactors(dge, methods = "TMM")
design <- model_matrix
dge <- estimateDisp(dge, design, BPPARAM = p)
dge$weights <- weights
fit <- glmQLFit(dge, design, BPPARAM = p)
normalised_values <- fitted(fit)
stopCluster(cl)
doParallel::registerDoParallel()
return(normalised_values)
}

# Main function logic
validated <- validate_inputs(data, method, batch, k, max_counts, num_cores)
data <- validated$data
Expand All @@ -297,7 +319,8 @@ compute_normalised_counts <- function(data = NULL,
RUVg = normalize_ruvg(data, batch, spikes, k),
RUVs = normalize_ruvs(data, batch, k),
RUVr = normalize_ruvr(data, batch, k),
zinb = normalize_zinb(data, batch),
limma_voom_zinb = normalize_limmavoom_zinb(data, batch),
edgeR_zinb = normalize_edger_zinb(data, batch),
stop("Unsupported normalization method.")
)
return(norm_data)
Expand Down
29 changes: 19 additions & 10 deletions R/compute_single_de.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ utils::globalVariables(c("model_matrix"))
#' number of samples
#' @param k Parameter k for RUVSeq methods, check RUVSeq tutorial
#' @param spikes List of genes to use as spike controls
#' @param num_cores Number of cores for parallel processing
#' @importFrom limma makeContrasts eBayes contrasts.fit topTable
#' @importFrom tibble rownames_to_column
#' @import DESeq2
Expand All @@ -34,7 +35,8 @@ compute_single_de <- function(data = NULL,
method = NULL,
batch = 1,
k = 2,
spikes = NULL) {
spikes = NULL,
num_cores = 1) {
if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) {
stop(
"compute_single_de(): the following package is required but not installed: SummarizedExperiment",
Expand All @@ -48,7 +50,7 @@ compute_single_de <- function(data = NULL,
method <- if (is.null(method)) "limma_voom" else method
if (!method %in% c("Seurat_wilcox", "DESeq2", "edgeR",
"RUVg", "RUVs", "RUVr",
"limma_voom", "limma_trend")) {
"limma_voom", "limma_trend","edgeR_zinb", "limma_zinb")) {
stop("Your normalization method is not available.")
}
if (is.null(treatment_samples) || is.null(control_samples)) {
Expand Down Expand Up @@ -157,7 +159,8 @@ compute_single_de <- function(data = NULL,
combined_id <- data$combined_id
dds <- DESeqDataSetFromMatrix(countData = data@assays$RNA$counts,
colData = pheno_data,
design = ~ condition)
# design = ~ condition)
design = if (length(batch) == 1) ~ condition else ~ condition + batch)
dds <- DESeq(dds)
res <- results(dds, contrast = c("condition", treatment_samples, control_samples))
top_table <- as.data.frame(res) %>%
Expand All @@ -178,7 +181,7 @@ compute_single_de <- function(data = NULL,
latent.vars = "batch",
test.use = "wilcox") %>%
select("avg_log2FC", "p_val", "p_val_adj") %>%
rename("log2FC" = "avg_log2FC", "p_val" = "p_val", "p_value_adj" = "p_val_adj") %>%
rename("log2FC" = "avg_log2FC", "p_value" = "p_val", "p_value_adj" = "p_val_adj") %>%
mutate(metric = 1) %>%
rownames_to_column("gene")

Expand All @@ -190,7 +193,7 @@ compute_single_de <- function(data = NULL,
model_matrix <- if (length(batch) == 1) model.matrix(~0 + combined_id) else
model.matrix(~0 + combined_id + batch)
if (length(spikes) == 0) {
warning("List of control genes not provided for RUVg, using default.")
warning("List of control genes not provided for RUVg, using default human housekeeping genes.")
spikes <- c(
"ACTB",
"GAPDH",
Expand Down Expand Up @@ -316,20 +319,25 @@ compute_single_de <- function(data = NULL,
return(as.data.frame(top_table))
}

de_zinb <- function(data, pheno_data, treatment_samples, control_samples, batch, k) {
de_edgeR_zinb <- function(data, pheno_data, treatment_samples, control_samples, batch, k) {

data_sce<-as.SingleCellExperiment(data)
filtered_sce <- data_sce[rowSums(counts(data_sce)) > 50, ]
num_cores <- 8 # Change this based on your system
filtered_sce <- data_sce[rowSums(counts(data_sce)) > 0, ]
cl <- makeCluster(num_cores)
doParallel::registerDoParallel(cl)
p <- BiocParallel::DoparParam()
system.time(zinb <- zinbwave::zinbwave(filtered_sce, K = 2,
epsilon=1000,
epsilon=12000,
BPPARAM = p,
observationalWeights = TRUE))

weights <- SummarizedExperiment::assay(zinb, "weights")

#perform_edgeR
combined_id <- data$combined_id
model_matrix <- if (length(batch) == 1) model.matrix(~0 + combined_id) else
model.matrix(~0 + combined_id + batch)

dge <- DGEList(SummarizedExperiment::assay(zinb))
dge <- calcNormFactors(dge, method = "TMMwsp")

Expand Down Expand Up @@ -375,7 +383,8 @@ compute_single_de <- function(data = NULL,
RUVg = de_ruvg(data, pheno_data, treatment_samples, control_samples, batch, spikes, k),
RUVs = de_ruvs(data, pheno_data, treatment_samples, control_samples, batch, k),
RUVr = de_ruvr(data, pheno_data, treatment_samples, control_samples, batch, k),
zinb = de_zinb(data, pheno_data, treatment_samples, control_samples, batch, k),
edgeR_zinb = de_edgeR_zinb(data, pheno_data, treatment_samples, control_samples, batch, k),
limma_zinb = limma_zinb(data, pheno_data, treatment_samples, control_samples, batch, k),
stop("Unsupported DE method.")
)
return(de_data)
Expand Down
6 changes: 4 additions & 2 deletions R/plot_rle.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#' @param batch Either empty, a single value, or a vector corresponding to the
#' number of samples.
#' @param normalisation One of "raw", "logNorm", "cpm", "clr", "SCT", "DESeq2",
#' "edgeR", "RUVg", "RUVs", "RUVr", "limma_voom", "limma_trend", "zinb". If empty, defaults to raw.
#' "edgeR", "RUVg", "RUVs", "RUVr", "limma_voom", "limma_trend", "limma_voom_zinb",
#' "edgeR_zinb". If empty, defaults to raw.
#' @param spikes List of genes to use as spike controls in RUVg
#' @param num_cores Number of cores for edgeR and zinb calculations

Expand Down Expand Up @@ -205,7 +206,8 @@ plot_rle <- function(data, barcodes = NULL,
RUVg = compute_normalised_counts(data, method = "RUVg", batch = batch, spikes = spikes, num_cores = num_cores),
RUVs = compute_normalised_counts(data, method = "RUVs", batch = batch, num_cores = num_cores),
RUVr = compute_normalised_counts(data, method = "RUVr", batch = batch, num_cores = num_cores),
zinb = compute_normalised_counts(data, method = "zinb", batch = batch, num_cores = num_cores),
limma_voom_zinb = compute_normalised_counts(data, method = "limma_voom_zinb", batch = batch, num_cores = num_cores),
edgeR_zinb = compute_normalised_counts(data, method = "edgeR_zinb", batch = batch, num_cores = num_cores),
stop("Unsupported normalization method.")
)

Expand Down
Loading
Loading