Automatically : emptyDrops()
# Removing empty droplets
bc_rank <- DropletUtils::emptyDrops(m = scmat, retain = emptydrops.retain)
scmat_filtered <- scmat[, which(bc_rank$FDR < 1E-03)]
# Kneeplot
## Make the dataframe with all droplets (before filtering) and the number of UMI for each droplet, and if the droplets are filtered or not by emptydrops
nb_umi_by_barcode <- data.frame(nb_umi = Matrix::colSums(scmat), barcodes = colnames(scmat))
nb_umi_by_barcode <- nb_umi_by_barcode %>% arrange(desc(nb_umi)) %>% dplyr::mutate(num_barcode = seq.int(ncol(scmat)))
nb_umi_by_barcode$droplets_state <- "Empty Droplets"
nb_umi_by_barcode[nb_umi_by_barcode$barcodes %in% colnames(scmat_filtered), "droplets_state"] <- "Full Droplets"
## Draw kneeplot
ggplot2::ggplot(nb_umi_by_barcode, ggplot2::aes(y = nb_umi, x = num_barcode, color = droplets_state)) +
ggplot2::geom_point() + ggplot2::ggtitle(paste0("Kneeplot of ", sample.name)) +
ggplot2::theme(legend.title = ggplot2::element_blank()) +
ggplot2::scale_y_log10(name = "Number of UMI by droplet (log scale)") +
ggplot2::scale_x_log10(name = "Droplet rank (log scale)") +
ggplot2::expand_limits(x = 0, y = 0) +
ggplot2::scale_colour_manual(values = c("cyan3","royalblue4"), guide = 'legend') +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(linetype = c(0, 0), shape = c(16, 16))))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
# Creation of the Seurat object
sobj <- Seurat::CreateSeuratObject(counts = scmat_filtered, project = sample.name)
# Cleaning
rm(bc_rank,nb_umi_by_barcode,scmat,scmat_filtered)
invisible(gc())
Manually
# Quantification
## Nb features by cell
sobj$min_features <- sobj$nFeature_RNA >= min.features
print(paste0('Number of droplets with >= ', min.features, ' features :'))
## [1] "Number of droplets with >= 3 features :"
print(table(sobj$min_features))
##
## TRUE
## 3967
## Nb counts by cell
sobj$min_counts <- sobj$nCount_RNA >= min.counts
print(paste0('Number of droplets with >= ', min.counts, ' of total counts:'))
## [1] "Number of droplets with >= 100 of total counts:"
print(table(sobj$min_counts))
##
## TRUE
## 3967
# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " droplets)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " droplets)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)
## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)