user.id <- Sys.getenv('USER')
input.dir <- paste0("/shared/projects/sincellte_2022/", user.id, "/Primary_analyses/input/matrix/")
output.dir <- paste0("/shared/projects/sincellte_2022/", user.id, "/Primary_analyses/finalresult/")
sample.name <- "PBMC_CTRL_GE"
species <- "human"
aligment <- "10X" #kallisto or 10X
# Computational Parameters
the.seed <- 1337L
# Analysis Parameters
## Empty droplets
emptydrops.retain <- NULL
## Barcode-level QC cell
pcmito.min <- 0
pcmito.max <- 20
pcribo.min <- 0
pcribo.max <- 100
min.features <- 3
min.counts <- 100
## Feature-level QC
min.cells <- 5
set.seed(the.seed)
suppressPackageStartupMessages(library(dplyr))
# Loading raw count matrix
if(aligment == "kallisto"){
scmat <- BUSpaRse::read_count_output(dir = input.dir, name = sample.name) # from BUStools
}else if(aligment == "10X"){
scmat <- Seurat::Read10X(input.dir) # from 10X
} else stop("Alignment technology unknown!")
# Some metrics about raw count matrix:
print('Number of barodes (droplets) in raw count matrix:')
## [1] "Number of barodes (droplets) in raw count matrix:"
droplets.total.nb <- ncol(scmat)
droplets.total.nb
## [1] 343461
print('Number of features (genes) in raw count matrix:')
## [1] "Number of features (genes) in raw count matrix:"
genes.total.nb <- nrow(scmat)
genes.total.nb
## [1] 33077
print('Number of UMIs in raw count matrix:')
## [1] "Number of UMIs in raw count matrix:"
umi.total.nb <- sum(scmat)
umi.total.nb
## [1] 12925546
# Removing empty droplets
bc_rank <- DropletUtils::emptyDrops(m = scmat, retain = emptydrops.retain)
scmat_filtered <- scmat[, which(bc_rank$FDR < 1E-03)]
# Kneeplot
## Make the dataframe with all droplets (before filtering) and the number of UMI for each droplet, and if the droplets are filtered or not by emptydrops
nb_umi_by_barcode <- data.frame(nb_umi = Matrix::colSums(scmat), barcodes = colnames(scmat))
nb_umi_by_barcode <- nb_umi_by_barcode %>% arrange(desc(nb_umi)) %>% dplyr::mutate(num_barcode = seq.int(ncol(scmat)))
nb_umi_by_barcode$droplets_state <- "Empty Droplets"
nb_umi_by_barcode[nb_umi_by_barcode$barcodes %in% colnames(scmat_filtered), "droplets_state"] <- "Full Droplets"
## Draw kneeplot
ggplot2::ggplot(nb_umi_by_barcode, ggplot2::aes(y = nb_umi, x = num_barcode, color = droplets_state)) +
ggplot2::geom_point() + ggplot2::ggtitle(paste0("Kneeplot of ", sample.name)) +
ggplot2::theme(legend.title = ggplot2::element_blank()) +
ggplot2::scale_y_log10(name = "Number of UMI by droplet (log scale)") +
ggplot2::scale_x_log10(name = "Droplet rank (log scale)") +
ggplot2::expand_limits(x = 0, y = 0) +
ggplot2::scale_colour_manual(values = c("cyan3","royalblue4"), guide = 'legend') +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(linetype = c(0, 0), shape = c(16, 16))))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
# Creation of the Seurat object
sobj <- Seurat::CreateSeuratObject(counts = scmat_filtered, project = sample.name)
# Cleaning
rm(bc_rank,nb_umi_by_barcode,scmat,scmat_filtered)
invisible(gc())
# Quantification
## Nb features by cell
sobj$min_features <- sobj$nFeature_RNA >= min.features
print(paste0('Number of droplets with >= ', min.features, ' features :'))
## [1] "Number of droplets with >= 3 features :"
print(table(sobj$min_features))
##
## TRUE
## 3967
## Nb counts by cell
sobj$min_counts <- sobj$nCount_RNA >= min.counts
print(paste0('Number of droplets with >= ', min.counts, ' of total counts:'))
## [1] "Number of droplets with >= 100 of total counts:"
print(table(sobj$min_counts))
##
## TRUE
## 3967
# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " droplets)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " droplets)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)
## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
## Scatterplot
Seurat::FeatureScatter(sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
# Filtering cells on min.features and min.counts
sobj <- sobj[, sobj$nFeature_RNA >= min.features & sobj$nCount_RNA >= min.counts]
# Some metrics about filtered count matrix:
print('Number of droplets in filtered data:')
## [1] "Number of droplets in filtered data:"
droplets.kept.nb <- ncol(sobj)
droplets.kept.nb
## [1] 3967
print('Number of genes in filtered data:')
## [1] "Number of genes in filtered data:"
genes.kept.nb <- nrow(sobj)
genes.kept.nb
## [1] 33077
print('Number of UMIs in filtered data:')
## [1] "Number of UMIs in filtered data:"
umi.kept.nb <- sum(sobj@assays$RNA@counts)
umi.kept.nb
## [1] 11255515
print('Fraction of UMIs in droplets :')
## [1] "Fraction of UMIs in droplets :"
fraction.umi <- umi.kept.nb / umi.total.nb
fraction.umi
## [1] 0.8707961
print('Mean of UMIs in droplets :')
## [1] "Mean of UMIs in droplets :"
mean.umi <- round((umi.kept.nb / droplets.kept.nb), 0)
mean.umi
## [1] 2837
# Pattern
if(species == "human"){
mito_patern <- "^MT-"
ribo_patern <- "^RP[LS][[:digit:]]"
}else if (species == "mouse"){
mito_patern <- "^mt-"
ribo_patern <- "^Rp[ls][[:digit:]]"
}
# Percentage MITO
## Percent
sobj$percent_mt <- Seurat::PercentageFeatureSet(sobj, pattern = mito_patern)
print('% Mitochondrial expression :')
## [1] "% Mitochondrial expression :"
print(summary(sobj$percent_mt))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07489 6.78855 8.87850 9.93317 11.82237 60.03824
## Number
df_percent_mt <- data.frame(percent_mt = sobj$percent_mt, droplets_state = "in")
df_percent_mt[df_percent_mt$percent_mt <= pcmito.min, "droplets_state"] <- "out.left"
df_percent_mt[df_percent_mt$percent_mt >= pcmito.max, "droplets_state"] <- "out.right"
print(paste0(pcmito.min, ' <= % mito <= ', pcmito.max, ' :'))
## [1] "0 <= % mito <= 20 :"
print(table(df_percent_mt$droplets_state))
##
## in out.right
## 3828 139
# Percentage RIBO
## Percent
sobj$percent_rb <- Seurat::PercentageFeatureSet(sobj, pattern = ribo_patern)
print('% Ribosomal expression :')
## [1] "% Ribosomal expression :"
print(summary(sobj$percent_rb))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1676 18.5365 24.8413 24.5548 30.7507 56.7164
## Number
df_percent_rb <- data.frame(percent_rb = sobj$percent_rb, droplets_state = "in")
df_percent_rb[df_percent_rb$percent_rb <= pcribo.min, "droplets_state"] <- "out.left"
df_percent_rb[df_percent_rb$percent_rb >= pcribo.max, "droplets_state"] <- "out.right"
print(paste0(pcribo.min, ' <= % ribo <= ', pcribo.max, ' :'))
## [1] "0 <= % ribo <= 100 :"
print(table(df_percent_rb$droplets_state))
##
## in
## 3967
# Plots
## Histograms
ggplot2::qplot(sobj$percent_mt, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%mito (in [", pcmito.min, ';', pcmito.max, "] : ", length(which(df_percent_mt$droplets_state == "in")), " droplets)"), xlab = "%mito") + ggplot2::geom_vline(xintercept = c(5,10,15,20), col = "cyan4", linetype = "dashed", size = 1) + ggplot2::geom_vline(xintercept = c(pcmito.min,pcmito.max), col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(sobj$percent_rb, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%ribo (in [", pcribo.min, ';', pcribo.max, "] : ", length(which(df_percent_rb$droplets_state == "in")), " droplets)"), xlab = "%ribo") + ggplot2::geom_vline(xintercept = c(pcribo.min,pcribo.max), col = "red", linetype = "dashed", size = 1.5)
## Violinplot
Seurat::VlnPlot(sobj, features = c("percent_mt", "percent_rb"), ncol = 2)
# Nb counts per gene
df_cells_by_gene <- data.frame(nb_cells = apply(sobj@assays$RNA@counts,1,function(x){length(which(x>0))}), gene_state = "in", stringsAsFactors = FALSE)
df_cells_by_gene[df_cells_by_gene$nb_cells <= min.cells, "gene_state"] <- "out"
df_cells_by_gene_zoom <- df_cells_by_gene[df_cells_by_gene$nb_cells <= min.cells*4, ]
print(paste0('Number of genes expressed in >= ', min.cells, ' cells:'))
## [1] "Number of genes expressed in >= 5 cells:"
print(table(df_cells_by_gene$gene_state))
##
## in out
## 23156 9921
# Plots
## Histograms
ggplot2::qplot(df_cells_by_gene$nb_cells, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nb genes (>= ", min.cells, " : ", length(which(df_cells_by_gene$gene_state == "in")), " genes)"), xlab = "nb cells") + ggplot2::geom_vline(xintercept = min.cells, col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(df_cells_by_gene_zoom$nb_cells, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nb genes (>= ", min.cells, " : ", length(which(df_cells_by_gene$gene_state == "in")), " genes) (Zoom)"),xlab = "nb cells") + ggplot2::geom_vline(xintercept = min.cells, col = "red", linetype = "dashed", size = 1.5)
Note: Cyclone take a while (~10-15min).
# Seurat CC
## Load database
if(species == "human"){
cc_seurat <- Seurat::cc.genes.updated.2019
}else if (species == "mouse"){
### Seurat only provides cell cycle genes for homo sapiens (as this list was developed solely using data from this species). To use it on mus musculus, one can simply convert these human HUGO genes to to mus MGI orthologues (using biomaRt). See [this github issue](https://github.com/satijalab/seurat/issues/2493)
cc_seurat <- Seurat::cc.genes.updated.2019
human_mart = biomaRt::useEnsembl("ensembl", dataset = "hsapiens_gene_ensembl")
mouse_mart = biomaRt::useEnsembl("ensembl", dataset = "mmusculus_gene_ensembl")
cc_seurat$s.genes = biomaRt::getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_seurat$s.genes , mart = human_mart, attributesL = c("mgi_symbol"), martL = mouse_mart, uniqueRows=T)[, 2]
cc_seurat$g2m.genes = biomaRt::getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_seurat$g2m.genes , mart = human_mart, attributesL = c("mgi_symbol"), martL = mouse_mart, uniqueRows=T)[, 2]
}
## Prediction
sobj <- Seurat::CellCycleScoring(object = sobj, s.features = cc_seurat$s.genes, g2m.features = cc_seurat$g2m.genes, assay = "RNA", nbin = 10, seed = the.seed)
sobj$Seurat.SmG2M.Score <- sobj$S.Score - sobj$G2M.Score
## Rename results columns with 'Seurat.' prefix
sobj$Seurat.S.Score <- sobj$S.Score
sobj$Seurat.G2M.Score <- sobj$G2M.Score
sobj$Seurat.Phase <- sobj$Phase
sobj$S.Score <- sobj$G2M.Score <- sobj$Phase <- NULL
## Print results
print("Cell cycle phase according to Seurat: ")
## [1] "Cell cycle phase according to Seurat: "
print(table(sobj$Seurat.Phase))
##
## G1 G2M S
## 3286 402 279
# Scran: Cyclone
## Load database
if(species == "human"){
# Load
cc_cyclone <- readRDS(system.file("exdata", "human_cycle_markers.rds", package = "scran"))
# Translate database from ENSEMBL to gene SYMBOL
suppressPackageStartupMessages(library(org.Hs.eg.db))
for (phase in names(cc_cyclone)) {
for (fs in names(cc_cyclone[[phase]])) {
cc_cyclone[[phase]][[fs]] <- suppressMessages(AnnotationDbi::mapIds(org.Hs.eg.db, keys=cc_cyclone[[phase]][[fs]], column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first"))
}
}
}else if (species == "mouse"){
# Load
cc_cyclone <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package = "scran"))
# Translate database from ENSEMBL to gene SYMBOL
suppressPackageStartupMessages(library(org.Mm.eg.db))
for (phase in names(cc_cyclone)) {
for (fs in names(cc_cyclone[[phase]])) {
cc_cyclone[[phase]][[fs]] <- suppressMessages(AnnotationDbi::mapIds(org.Mm.eg.db, keys=cc_cyclone[[phase]][[fs]], column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first"))
}
}
}
## Prediction
cycres <- scran::cyclone(Seurat::as.SingleCellExperiment(sobj), pairs = cc_cyclone, verbose = FALSE)
sobj$Cyclone.SmG2M.Score <- cycres$normalized.scores$S - cycres$normalized.scores$G2M
## Save results in the seurat object
sobj$Cyclone.Phase <- as.factor(cycres$phases)
sobj$Cyclone.G1.Score <- cycres$normalized.scores$G1
sobj$Cyclone.S.Score <- cycres$normalized.scores$S
sobj$Cyclone.G2M.Score <- cycres$normalized.scores$G2M
rm(cycres)
## Print results
print("Cell cycle phase according to Cyclone: ")
## [1] "Cell cycle phase according to Cyclone: "
print(table(sobj$Cyclone.Phase))
##
## G1 G2M S
## 2024 843 1100
# Crossing results : Seurat -VS- Cyclone
df_cycle <- data.frame(Seurat.Phase = sobj$Seurat.Phase, Cyclone.Phase = sobj$Cyclone.Phase)
print(table(df_cycle))
## Cyclone.Phase
## Seurat.Phase G1 G2M S
## G1 1684 661 941
## G2M 192 121 89
## S 148 61 70
print(paste("Seurat and Cyclone agreed for ",round((sum(diag(table(df_cycle))) / sum(table(df_cycle)) *100),2),"% of droplets."))
## [1] "Seurat and Cyclone agreed for 47.26 % of droplets."
# scDblFinder
sobj$scDblFinder.class <- Seurat::as.Seurat(scDblFinder::scDblFinder(Seurat::as.SingleCellExperiment(sobj)))$scDblFinder.class
## Warning in .checkSCE(sce): Some cells in `sce` have an extremely low read
## counts; note that these could trigger errors and might best be filtered out
## Creating ~5000 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 171 cells excluded from training.
## iter=1, 274 cells excluded from training.
## iter=2, 292 cells excluded from training.
## Threshold found:0.782
## 253 (6.4%) doublets called
sobj$scDblFinder.class <- unname(sobj$scDblFinder.class == "doublet")
print('scDblFinder doublets :')
## [1] "scDblFinder doublets :"
print(table(sobj$scDblFinder.class))
##
## FALSE TRUE
## 3714 253
# scds
sobj$scds.score <- scds::cxds_bcds_hybrid(Seurat::as.SingleCellExperiment(sobj))$hybrid_score
## [13:07:42] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
sobj$scds.class <- unname(sobj$scds.score > 1)
print('scds-hybrid doublets :')
## [1] "scds-hybrid doublets :"
print(table(sobj$scds.class))
##
## FALSE TRUE
## 3552 415
# union of scDblFinder & scds
sobj$doublets_consensus.class <- sobj$scDblFinder.class | sobj$scds.class
print('Consensus doublets :')
## [1] "Consensus doublets :"
print(table(sobj$doublets_consensus.class))
##
## FALSE TRUE
## 3451 516
# Correlation between doublets and cell cycle
## Overlap cells
df_S.cycle_doublets <- data.frame(Seurat.Phase = sobj$Seurat.Phase, doublets = sobj$doublets_consensus.class)
print(table(df_S.cycle_doublets))
## doublets
## Seurat.Phase FALSE TRUE
## G1 2806 480
## G2M 386 16
## S 259 20
print(paste0(round((nrow(df_S.cycle_doublets[df_S.cycle_doublets$Seurat.Phase == "G2M" & df_S.cycle_doublets$doublets == TRUE,])/nrow(df_S.cycle_doublets[df_S.cycle_doublets$doublets == TRUE,])*100),2),"% of doublets are in G2M phase by Seurat."))
## [1] "3.1% of doublets are in G2M phase by Seurat."
df_C.cycle_doublets <- data.frame(Cyclone.Phase = sobj$Cyclone.Phase, doublets = sobj$doublets_consensus.class)
print(table(df_C.cycle_doublets))
## doublets
## Cyclone.Phase FALSE TRUE
## G1 1730 294
## G2M 780 63
## S 941 159
print(paste0(round((nrow(df_C.cycle_doublets[df_C.cycle_doublets$Cyclone.Phase == "G2M" & df_C.cycle_doublets$doublets == TRUE,])/nrow(df_C.cycle_doublets[df_S.cycle_doublets$doublets == TRUE,])*100),2),"% of doublets are in G2M phase by Cyclone."))
## [1] "12.21% of doublets are in G2M phase by Cyclone."
## Quick 'n dirty uMAP
## /!\ Warning: this umap will not represent the cell types present! Many parameters will have to be adjusted for this (tomorrow's course on Statistical models and analyses). Here, we just want to be able to represent all the cells on the same graph and plot the identification of doublets and the phases of the cell cycle /!\
tmp_sobj <- sobj
tmp_sobj <- suppressWarnings(Seurat::SCTransform(tmp_sobj, seed.use = the.seed, verbose = FALSE)) #normalization
tmp_sobj <- Seurat::RunPCA(tmp_sobj, verbose = FALSE,seed.use = the.seed) #dimension reduction
tmp_sobj <- Seurat::RunUMAP(tmp_sobj, dims = 1:25, verbose = FALSE, seed.use = the.seed)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
Seurat::DimPlot(tmp_sobj, group.by = "Cyclone.Phase") + ggplot2::ggtitle("Cell Phase (cyclone)") + Seurat::DarkTheme()
Seurat::DimPlot(tmp_sobj, group.by = "Seurat.Phase") + ggplot2::ggtitle("Cell Phase (Seurat)") + Seurat::DarkTheme()
Seurat::DimPlot(tmp_sobj, group.by = "scDblFinder.class") + ggplot2::ggtitle("Cell doublets (scDblFinder)") + Seurat::DarkTheme()
Seurat::DimPlot(tmp_sobj, group.by = "scds.class") + ggplot2::ggtitle("Cell doublets (scds)") + Seurat::DarkTheme()
Seurat::DimPlot(tmp_sobj, group.by = "doublets_consensus.class") + ggplot2::ggtitle("Cell doublets (union)") + Seurat::DarkTheme()
# Cleaning
rm(tmp_sobj)
invisible(gc())
# Filtering cell doublets
sobj <- sobj[, !sobj$doublets_consensus.class]
# Filtering non-qualified cells
cellskeep <- sobj$percent_mt >= pcmito.min & sobj$percent_mt <= pcmito.max & sobj$percent_rb >= pcribo.min & sobj$percent_rb <= pcribo.max
sobj <- sobj[, cellskeep]
# Filtering features with ultra-low expression
features_to_keep <- which(apply(sobj@assays$RNA@counts,1,function(x){ length(which(x>0)) }) >= min.cells)
sobj <- sobj[features_to_keep,]
# Re-computing basic metrics : percentage of mito + ribo + nb features + nb counts
## Nb features by cell
sobj$nFeature_RNA <- apply(sobj@assays$RNA@counts,2,function(x){ length(which(x>0)) })
sobj$min_features <- sobj$nFeature_RNA >= min.features
## Nb counts by cell
sobj$nCount_RNA <- apply(sobj@assays$RNA@counts,2,function(x){ sum(x) })
sobj$min_counts <- sobj$nCount_RNA >= min.counts
## Percent mito
sobj$percent_mt <- Seurat::PercentageFeatureSet(sobj, pattern = mito_patern)
## Percent ribo
sobj$percent_rb <- Seurat::PercentageFeatureSet(sobj, pattern = ribo_patern)
# Some metrics about total filtered count matrix:
print('Number of cells in filtered data:')
## [1] "Number of cells in filtered data:"
print(ncol(sobj))
## [1] 3316
print('Number of genes in filtered data:')
## [1] "Number of genes in filtered data:"
print(nrow(sobj))
## [1] 22435
print('Number of UMIs in filtered data:')
## [1] "Number of UMIs in filtered data:"
print(sum(sobj@assays$RNA@counts))
## [1] 8097845
print(paste0('Number of cells with >= ', min.features, ' features :'))
## [1] "Number of cells with >= 3 features :"
print(table(sobj$min_features))
##
## TRUE
## 3316
print('Mean features by cell :')
## [1] "Mean features by cell :"
print(summary(sobj$nFeature_RNA))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 84.0 320.0 537.0 934.4 1564.0 5078.0
print(paste0('Number of cells with >= ', min.counts, ' of total counts:'))
## [1] "Number of cells with >= 100 of total counts:"
print(table(sobj$min_counts))
##
## TRUE
## 3316
print('Mean Count by cell:')
## [1] "Mean Count by cell:"
print(summary(sobj$nCount_RNA))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 101.0 523.0 977.5 2442.1 4379.5 25707.0
print('% Mitochondrial expression :')
## [1] "% Mitochondrial expression :"
print(summary(sobj$percent_mt))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07504 6.78807 8.89640 9.47368 11.70721 19.96466
print('% Ribosomal expression :')
## [1] "% Ribosomal expression :"
print(summary(sobj$percent_rb))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1679 19.2996 25.4411 25.1937 31.3269 56.7503
print("Cell cycle phases according to Seurat: ")
## [1] "Cell cycle phases according to Seurat: "
print(table(sobj$Seurat.Phase))
##
## G1 G2M S
## 2698 364 254
print("Cell cycle phases according to Cyclone: ")
## [1] "Cell cycle phases according to Cyclone: "
print(table(sobj$Cyclone.Phase))
##
## G1 G2M S
## 1681 733 902
print('Consensus doublets :')
## [1] "Consensus doublets :"
print(table(sobj$doublets_consensus.class))
##
## FALSE
## 3316
# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " cells)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " cells)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(sobj$percent_mt, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%mito (in [", pcmito.min, ';', pcmito.max, "] : ", length(which(df_percent_mt$droplets_state == "in")), " cells)"), xlab = "%mito") + ggplot2::geom_vline(xintercept = c(5,10,15,20), col = "cyan4", linetype = "dashed", size = 1) + ggplot2::geom_vline(xintercept = c(pcmito.min,pcmito.max), col = "red", linetype = "dashed", size = 1.5)
ggplot2::qplot(sobj$percent_rb, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%ribo (in [", pcribo.min, ';', pcribo.max, "] : ", length(which(df_percent_rb$droplets_state == "in")), " cells)"), xlab = "%ribo") + ggplot2::geom_vline(xintercept = c(pcribo.min,pcribo.max), col = "red", linetype = "dashed", size = 1.5)
## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA","percent_mt","percent_rb"), ncol = 4)
## Scatterplot
Seurat::FeatureScatter(sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
# Save parameters
sobj@misc$params$sample.name <- sample.name
sobj@misc$params$species <- species
sobj@misc$params$QC$seed <- the.seed
sobj@misc$params$QC$emptydrops$emptydrops.retain <- emptydrops.retain
sobj@misc$params$QC$pcmito.range <- c(pcmito.min,pcmito.max)
sobj@misc$params$QC$pcribo.range <- c(pcribo.min,pcribo.max)
sobj@misc$params$QC$min.features <- min.features
sobj@misc$params$QC$min.counts <- min.counts
sobj@misc$params$QC$min.cells <- min.cells
# Save R session
sobj@misc$params$QC$Rsession <- utils::capture.output(devtools::session_info())
## Registered S3 method overwritten by 'cli':
## method from
## print.boxx spatstat.geom
# Save seurat object
dir.create(output.dir, recursive = TRUE, showWarnings = FALSE)
save(sobj, file = paste0(output.dir, sample.name, '_FILTERED_with.rda'), compress = "bzip2")
# Print R session info
utils::capture.output(devtools::session_info())
## [1] "─ Session info ──────────────────────────────────────────────────────────────"
## [2] " hash: cookie, man farmer: light skin tone, person golfing: medium-dark skin tone"
## [3] ""
## [4] " setting value"
## [5] " version R version 4.1.1 (2021-08-10)"
## [6] " os Ubuntu 18.04.6 LTS"
## [7] " system x86_64, linux-gnu"
## [8] " ui X11"
## [9] " language (EN)"
## [10] " collate fr_FR.UTF-8"
## [11] " ctype fr_FR.UTF-8"
## [12] " tz Europe/Paris"
## [13] " date 2022-01-08"
## [14] " pandoc 2.16.2 @ /shared/software/miniconda/envs/r-4.1.1/bin/ (via rmarkdown)"
## [15] ""
## [16] "─ Packages ───────────────────────────────────────────────────────────────────"
## [17] " package * version date (UTC) lib source"
## [18] " abind 1.4-5 2016-07-21 [2] CRAN (R 4.1.0)"
## [19] " AnnotationDbi * 1.56.2 2021-11-09 [2] Bioconductor"
## [20] " assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.1.0)"
## [21] " beachmat 2.10.0 2021-10-26 [2] Bioconductor"
## [22] " beeswarm 0.4.0 2021-06-01 [2] CRAN (R 4.1.1)"
## [23] " Biobase * 2.54.0 2021-10-26 [2] Bioconductor"
## [24] " BiocGenerics * 0.40.0 2021-10-26 [2] Bioconductor"
## [25] " BiocNeighbors 1.12.0 2021-10-26 [2] Bioconductor"
## [26] " BiocParallel 1.28.3 2021-12-09 [2] Bioconductor"
## [27] " BiocSingular 1.10.0 2021-10-26 [2] Bioconductor"
## [28] " Biostrings 2.62.0 2021-10-26 [2] Bioconductor"
## [29] " bit 4.0.4 2020-08-04 [2] CRAN (R 4.1.0)"
## [30] " bit64 4.0.5 2020-08-30 [2] CRAN (R 4.1.0)"
## [31] " bitops 1.0-7 2021-04-24 [2] CRAN (R 4.1.0)"
## [32] " blob 1.2.2 2021-07-23 [2] CRAN (R 4.1.0)"
## [33] " bluster 1.4.0 2021-10-26 [2] Bioconductor"
## [34] " bslib 0.3.1 2021-10-06 [2] CRAN (R 4.1.1)"
## [35] " cachem 1.0.6 2021-08-19 [2] CRAN (R 4.1.1)"
## [36] " callr 3.7.0 2021-04-20 [2] CRAN (R 4.1.0)"
## [37] " cli 3.1.0 2021-10-27 [2] CRAN (R 4.1.1)"
## [38] " cluster 2.1.2 2021-04-17 [2] CRAN (R 4.1.0)"
## [39] " codetools 0.2-18 2020-11-04 [2] CRAN (R 4.1.0)"
## [40] " colorspace 2.0-2 2021-06-24 [2] CRAN (R 4.1.0)"
## [41] " cowplot 1.1.1 2020-12-30 [2] CRAN (R 4.1.0)"
## [42] " crayon 1.4.2 2021-10-29 [2] CRAN (R 4.1.1)"
## [43] " data.table 1.14.2 2021-09-27 [2] CRAN (R 4.1.1)"
## [44] " DBI 1.1.2 2021-12-20 [2] CRAN (R 4.1.1)"
## [45] " DelayedArray 0.20.0 2021-10-26 [2] Bioconductor"
## [46] " DelayedMatrixStats 1.16.0 2021-10-26 [2] Bioconductor"
## [47] " deldir 1.0-6 2021-10-23 [2] CRAN (R 4.1.1)"
## [48] " desc 1.4.0 2021-09-28 [2] CRAN (R 4.1.1)"
## [49] " devtools 2.4.3 2021-11-30 [2] CRAN (R 4.1.1)"
## [50] " digest 0.6.29 2021-12-01 [2] CRAN (R 4.1.1)"
## [51] " dplyr * 1.0.7 2021-06-18 [2] CRAN (R 4.1.0)"
## [52] " dqrng 0.3.0 2021-05-01 [2] CRAN (R 4.1.0)"
## [53] " DropletUtils 1.14.1 2021-11-08 [2] Bioconductor"
## [54] " edgeR 3.36.0 2021-10-26 [2] Bioconductor"
## [55] " ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.1.0)"
## [56] " evaluate 0.14 2019-05-28 [2] CRAN (R 4.1.0)"
## [57] " fansi 0.5.0 2021-05-25 [2] CRAN (R 4.1.1)"
## [58] " farver 2.1.0 2021-02-28 [2] CRAN (R 4.1.0)"
## [59] " fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.1.0)"
## [60] " fitdistrplus 1.1-6 2021-09-28 [2] CRAN (R 4.1.1)"
## [61] " fs 1.5.2 2021-12-08 [2] CRAN (R 4.1.1)"
## [62] " future 1.23.0 2021-10-31 [2] CRAN (R 4.1.1)"
## [63] " future.apply 1.8.1 2021-08-10 [2] CRAN (R 4.1.1)"
## [64] " generics 0.1.1 2021-10-25 [2] CRAN (R 4.1.1)"
## [65] " GenomeInfoDb 1.30.0 2021-10-26 [2] Bioconductor"
## [66] " GenomeInfoDbData 1.2.7 2021-12-02 [2] Bioconductor"
## [67] " GenomicRanges 1.46.1 2021-11-18 [2] Bioconductor"
## [68] " ggbeeswarm 0.6.0 2017-08-07 [2] CRAN (R 4.1.1)"
## [69] " ggplot2 3.3.5 2021-06-25 [2] CRAN (R 4.1.0)"
## [70] " ggrepel 0.9.1 2021-01-15 [2] CRAN (R 4.1.0)"
## [71] " ggridges 0.5.3 2021-01-08 [2] CRAN (R 4.1.0)"
## [72] " globals 0.14.0 2020-11-22 [2] CRAN (R 4.1.0)"
## [73] " glue 1.6.0 2021-12-17 [2] CRAN (R 4.1.1)"
## [74] " goftest 1.2-3 2021-10-07 [2] CRAN (R 4.1.1)"
## [75] " gridExtra 2.3 2017-09-09 [2] CRAN (R 4.1.0)"
## [76] " gtable 0.3.0 2019-03-25 [2] CRAN (R 4.1.0)"
## [77] " HDF5Array 1.22.1 2021-11-14 [2] Bioconductor"
## [78] " highr 0.9 2021-04-16 [2] CRAN (R 4.1.0)"
## [79] " htmltools 0.5.2 2021-08-25 [2] CRAN (R 4.1.1)"
## [80] " htmlwidgets 1.5.4 2021-09-08 [2] CRAN (R 4.1.1)"
## [81] " httpuv 1.6.4 2021-12-14 [2] CRAN (R 4.1.1)"
## [82] " httr 1.4.2 2020-07-20 [2] CRAN (R 4.1.0)"
## [83] " ica 1.0-2 2018-05-24 [2] CRAN (R 4.1.0)"
## [84] " igraph 1.2.10 2021-12-15 [2] CRAN (R 4.1.1)"
## [85] " IRanges * 2.28.0 2021-10-26 [2] Bioconductor"
## [86] " irlba 2.3.5 2021-12-06 [2] CRAN (R 4.1.1)"
## [87] " jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.1.0)"
## [88] " jsonlite 1.7.2 2020-12-09 [2] CRAN (R 4.1.0)"
## [89] " KEGGREST 1.34.0 2021-10-26 [2] Bioconductor"
## [90] " KernSmooth 2.23-20 2021-05-03 [2] CRAN (R 4.1.0)"
## [91] " knitr 1.37 2021-12-16 [2] CRAN (R 4.1.1)"
## [92] " labeling 0.4.2 2020-10-20 [2] CRAN (R 4.1.0)"
## [93] " later 1.3.0 2021-08-18 [2] CRAN (R 4.1.1)"
## [94] " lattice 0.20-45 2021-09-22 [2] CRAN (R 4.1.1)"
## [95] " lazyeval 0.2.2 2019-03-15 [2] CRAN (R 4.1.0)"
## [96] " leiden 0.3.9 2021-07-27 [2] CRAN (R 4.1.0)"
## [97] " lifecycle 1.0.1 2021-09-24 [2] CRAN (R 4.1.1)"
## [98] " limma 3.50.0 2021-10-26 [2] Bioconductor"
## [99] " listenv 0.8.0 2019-12-05 [2] CRAN (R 4.1.0)"
## [100] " lmtest 0.9-39 2021-11-07 [2] CRAN (R 4.1.1)"
## [101] " locfit 1.5-9.4 2020-03-25 [2] CRAN (R 4.1.1)"
## [102] " magrittr 2.0.1 2020-11-17 [2] CRAN (R 4.1.0)"
## [103] " MASS 7.3-54 2021-05-03 [2] CRAN (R 4.1.0)"
## [104] " Matrix 1.3-4 2021-06-01 [2] CRAN (R 4.1.0)"
## [105] " MatrixGenerics 1.6.0 2021-10-26 [2] Bioconductor"
## [106] " matrixStats 0.61.0 2021-09-17 [2] CRAN (R 4.1.1)"
## [107] " memoise 2.0.1 2021-11-26 [2] CRAN (R 4.1.1)"
## [108] " metapod 1.2.0 2021-10-26 [2] Bioconductor"
## [109] " mgcv 1.8-38 2021-10-06 [2] CRAN (R 4.1.1)"
## [110] " mime 0.12 2021-09-28 [2] CRAN (R 4.1.1)"
## [111] " miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.1.0)"
## [112] " munsell 0.5.0 2018-06-12 [2] CRAN (R 4.1.0)"
## [113] " nlme 3.1-153 2021-09-07 [2] CRAN (R 4.1.1)"
## [114] " org.Hs.eg.db * 3.14.0 2021-12-09 [2] Bioconductor"
## [115] " parallelly 1.30.0 2021-12-17 [2] CRAN (R 4.1.1)"
## [116] " patchwork 1.1.1 2020-12-17 [2] CRAN (R 4.1.0)"
## [117] " pbapply 1.5-0 2021-09-16 [2] CRAN (R 4.1.1)"
## [118] " pillar 1.6.4 2021-10-18 [2] CRAN (R 4.1.1)"
## [119] " pkgbuild 1.2.1 2021-11-30 [2] CRAN (R 4.1.1)"
## [120] " pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.1.0)"
## [121] " pkgload 1.2.4 2021-11-30 [2] CRAN (R 4.1.1)"
## [122] " plotly 4.10.0 2021-10-09 [2] CRAN (R 4.1.1)"
## [123] " plyr 1.8.6 2020-03-03 [2] CRAN (R 4.1.0)"
## [124] " png 0.1-7 2013-12-03 [2] CRAN (R 4.1.0)"
## [125] " polyclip 1.10-0 2019-03-14 [2] CRAN (R 4.1.0)"
## [126] " prettyunits 1.1.1 2020-01-24 [2] CRAN (R 4.1.0)"
## [127] " pROC 1.18.0 2021-09-03 [2] CRAN (R 4.1.1)"
## [128] " processx 3.5.2 2021-04-30 [2] CRAN (R 4.1.0)"
## [129] " promises 1.2.0.1 2021-02-11 [2] CRAN (R 4.1.0)"
## [130] " ps 1.6.0 2021-02-28 [2] CRAN (R 4.1.0)"
## [131] " purrr 0.3.4 2020-04-17 [2] CRAN (R 4.1.0)"
## [132] " R.methodsS3 1.8.1 2020-08-26 [2] CRAN (R 4.1.0)"
## [133] " R.oo 1.24.0 2020-08-26 [2] CRAN (R 4.1.0)"
## [134] " R.utils 2.11.0 2021-09-26 [2] CRAN (R 4.1.1)"
## [135] " R6 2.5.1 2021-08-19 [2] CRAN (R 4.1.1)"
## [136] " RANN 2.6.1 2019-01-08 [2] CRAN (R 4.1.0)"
## [137] " RColorBrewer 1.1-2 2014-12-07 [2] CRAN (R 4.1.0)"
## [138] " Rcpp 1.0.7 2021-07-07 [2] CRAN (R 4.1.0)"
## [139] " RcppAnnoy 0.0.19 2021-07-30 [2] CRAN (R 4.1.0)"
## [140] " RCurl 1.98-1.5 2021-09-17 [2] CRAN (R 4.1.1)"
## [141] " remotes 2.4.2 2021-11-30 [2] CRAN (R 4.1.1)"
## [142] " reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.1.0)"
## [143] " reticulate 1.22 2021-09-17 [2] CRAN (R 4.1.1)"
## [144] " rhdf5 2.38.0 2021-10-26 [2] Bioconductor"
## [145] " rhdf5filters 1.6.0 2021-10-26 [2] Bioconductor"
## [146] " Rhdf5lib 1.16.0 2021-10-26 [2] Bioconductor"
## [147] " rlang 0.4.12 2021-10-18 [2] CRAN (R 4.1.1)"
## [148] " rmarkdown 2.11 2021-09-14 [2] CRAN (R 4.1.1)"
## [149] " ROCR 1.0-11 2020-05-02 [2] CRAN (R 4.1.0)"
## [150] " rpart 4.1-15 2019-04-12 [2] CRAN (R 4.1.0)"
## [151] " rprojroot 2.0.2 2020-11-15 [2] CRAN (R 4.1.0)"
## [152] " RSpectra 0.16-0 2019-12-01 [2] CRAN (R 4.1.1)"
## [153] " RSQLite 2.2.9 2021-12-06 [2] CRAN (R 4.1.1)"
## [154] " rstudioapi 0.13 2020-11-12 [2] CRAN (R 4.1.0)"
## [155] " rsvd 1.0.5 2021-04-16 [2] CRAN (R 4.1.1)"
## [156] " Rtsne 0.15 2018-11-10 [2] CRAN (R 4.1.0)"
## [157] " S4Vectors * 0.32.3 2021-11-21 [2] Bioconductor"
## [158] " sass 0.4.0 2021-05-12 [2] CRAN (R 4.1.0)"
## [159] " ScaledMatrix 1.2.0 2021-10-26 [2] Bioconductor"
## [160] " scales 1.1.1 2020-05-11 [2] CRAN (R 4.1.0)"
## [161] " scater 1.22.0 2021-10-26 [2] Bioconductor"
## [162] " scattermore 0.7 2020-11-24 [2] CRAN (R 4.1.0)"
## [163] " scDblFinder 1.8.0 2021-10-26 [2] Bioconductor"
## [164] " scds 1.10.0 2021-10-26 [2] Bioconductor"
## [165] " scran 1.22.1 2021-11-14 [2] Bioconductor"
## [166] " sctransform 0.3.2 2020-12-16 [2] CRAN (R 4.1.0)"
## [167] " scuttle 1.4.0 2021-10-26 [2] Bioconductor"
## [168] " sessioninfo 1.2.1 2021-11-02 [2] CRAN (R 4.1.1)"
## [169] " Seurat 4.0.6 2021-12-16 [2] CRAN (R 4.1.1)"
## [170] " SeuratObject 4.0.4 2021-11-23 [2] CRAN (R 4.1.1)"
## [171] " shiny 1.7.1 2021-10-02 [2] CRAN (R 4.1.1)"
## [172] " SingleCellExperiment 1.16.0 2021-10-26 [2] Bioconductor"
## [173] " sparseMatrixStats 1.6.0 2021-10-26 [2] Bioconductor"
## [174] " spatstat.core 2.3-2 2021-11-26 [2] CRAN (R 4.1.1)"
## [175] " spatstat.data 2.1-2 2021-12-17 [2] CRAN (R 4.1.1)"
## [176] " spatstat.geom 2.3-1 2021-12-10 [2] CRAN (R 4.1.1)"
## [177] " spatstat.sparse 2.1-0 2021-12-17 [2] CRAN (R 4.1.1)"
## [178] " spatstat.utils 2.3-0 2021-12-12 [2] CRAN (R 4.1.1)"
## [179] " statmod 1.4.36 2021-05-10 [2] CRAN (R 4.1.0)"
## [180] " stringi 1.7.6 2021-11-29 [2] CRAN (R 4.1.1)"
## [181] " stringr 1.4.0 2019-02-10 [2] CRAN (R 4.1.0)"
## [182] " SummarizedExperiment 1.24.0 2021-10-26 [2] Bioconductor"
## [183] " survival 3.2-13 2021-08-24 [2] CRAN (R 4.1.1)"
## [184] " tensor 1.5 2012-05-05 [2] CRAN (R 4.1.0)"
## [185] " testthat 3.1.0 2021-10-04 [2] CRAN (R 4.1.1)"
## [186] " tibble 3.1.6 2021-11-07 [2] CRAN (R 4.1.1)"
## [187] " tidyr 1.1.4 2021-09-27 [2] CRAN (R 4.1.1)"
## [188] " tidyselect 1.1.1 2021-04-30 [2] CRAN (R 4.1.0)"
## [189] " usethis 2.1.3 2021-10-27 [2] CRAN (R 4.1.1)"
## [190] " utf8 1.2.2 2021-07-24 [2] CRAN (R 4.1.0)"
## [191] " uwot 0.1.11 2021-12-02 [2] CRAN (R 4.1.1)"
## [192] " vctrs 0.3.8 2021-04-29 [2] CRAN (R 4.1.0)"
## [193] " vipor 0.4.5 2017-03-22 [2] CRAN (R 4.1.1)"
## [194] " viridis 0.6.2 2021-10-13 [2] CRAN (R 4.1.1)"
## [195] " viridisLite 0.4.0 2021-04-13 [2] CRAN (R 4.1.0)"
## [196] " withr 2.4.3 2021-11-30 [2] CRAN (R 4.1.1)"
## [197] " xfun 0.29 2021-12-14 [2] CRAN (R 4.1.1)"
## [198] " xgboost 1.5.0.2 2021-11-21 [2] CRAN (R 4.1.1)"
## [199] " xtable 1.8-4 2019-04-21 [2] CRAN (R 4.1.0)"
## [200] " XVector 0.34.0 2021-10-26 [2] Bioconductor"
## [201] " yaml 2.2.1 2020-02-01 [2] CRAN (R 4.1.0)"
## [202] " zlibbioc 1.40.0 2021-10-26 [2] Bioconductor"
## [203] " zoo 1.8-9 2021-03-09 [2] CRAN (R 4.1.0)"
## [204] ""
## [205] " [1] /shared/home/maglave/R/x86_64-conda-linux-gnu-library/4.1"
## [206] " [2] /shared/software/miniconda/envs/r-4.1.1/lib/R/library"
## [207] ""
## [208] "──────────────────────────────────────────────────────────────────────────────"