Initializing the practical session

user.id <- Sys.getenv('USER')

Setting Dataset

input.dir <- paste0("/shared/projects/sincellte_2022/", user.id, "/Primary_analyses/input/matrix/")
output.dir <- paste0("/shared/projects/sincellte_2022/", user.id, "/Primary_analyses/finalresult/")
sample.name <- "PBMC_CTRL_GE"
species <- "human"
aligment <- "10X" #kallisto or 10X

Setting Paramaters

# Computational Parameters
the.seed <- 1337L

# Analysis Parameters
## Empty droplets
emptydrops.retain <- NULL
## Barcode-level QC cell
pcmito.min <- 0
pcmito.max <- 20
pcribo.min <- 0
pcribo.max <- 100
min.features <- 3
min.counts <- 100
## Feature-level QC
min.cells <- 5

Setting seed and loading R packages

set.seed(the.seed)
suppressPackageStartupMessages(library(dplyr))

Loading raw count matrix

# Loading raw count matrix
if(aligment == "kallisto"){
  scmat <- BUSpaRse::read_count_output(dir = input.dir, name = sample.name) # from BUStools
}else if(aligment == "10X"){
  scmat <- Seurat::Read10X(input.dir) # from 10X
} else stop("Alignment technology unknown!")

# Some metrics about raw count matrix:
print('Number of barodes (droplets) in raw count matrix:')
## [1] "Number of barodes (droplets) in raw count matrix:"
droplets.total.nb <- ncol(scmat)
droplets.total.nb
## [1] 343461
print('Number of features (genes) in raw count matrix:')
## [1] "Number of features (genes) in raw count matrix:"
genes.total.nb <- nrow(scmat)
genes.total.nb
## [1] 33077
print('Number of UMIs in raw count matrix:')
## [1] "Number of UMIs in raw count matrix:"
umi.total.nb <- sum(scmat)
umi.total.nb
## [1] 12925546

Removing empty droplets

Automatically : emptyDrops()

# Removing empty droplets
bc_rank <- DropletUtils::emptyDrops(m = scmat, retain = emptydrops.retain)
scmat_filtered <- scmat[, which(bc_rank$FDR < 1E-03)]

# Kneeplot
## Make the dataframe with all droplets (before filtering) and the number of UMI for each droplet, and if the droplets are filtered or not by emptydrops
nb_umi_by_barcode <- data.frame(nb_umi = Matrix::colSums(scmat), barcodes = colnames(scmat))
nb_umi_by_barcode <- nb_umi_by_barcode %>% arrange(desc(nb_umi)) %>% dplyr::mutate(num_barcode = seq.int(ncol(scmat)))
nb_umi_by_barcode$droplets_state <- "Empty Droplets"
nb_umi_by_barcode[nb_umi_by_barcode$barcodes %in% colnames(scmat_filtered), "droplets_state"] <- "Full Droplets"
## Draw kneeplot
ggplot2::ggplot(nb_umi_by_barcode, ggplot2::aes(y = nb_umi, x = num_barcode, color = droplets_state)) +
  ggplot2::geom_point() + ggplot2::ggtitle(paste0("Kneeplot of ", sample.name)) + 
  ggplot2::theme(legend.title = ggplot2::element_blank()) +
  ggplot2::scale_y_log10(name = "Number of UMI by droplet (log scale)") + 
  ggplot2::scale_x_log10(name = "Droplet rank (log scale)") +
  ggplot2::expand_limits(x = 0, y = 0) +
  ggplot2::scale_colour_manual(values = c("cyan3","royalblue4"), guide = 'legend') +
  ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(linetype = c(0, 0), shape = c(16, 16))))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis

# Creation of the Seurat object
sobj <- Seurat::CreateSeuratObject(counts = scmat_filtered, project = sample.name)

# Cleaning
rm(bc_rank,nb_umi_by_barcode,scmat,scmat_filtered)
invisible(gc())

Manually

# Quantification
## Nb features by cell
sobj$min_features <- sobj$nFeature_RNA >= min.features
print(paste0('Number of droplets with >= ', min.features, ' features :'))
## [1] "Number of droplets with >= 3 features :"
print(table(sobj$min_features))
## 
## TRUE 
## 3967
## Nb counts by cell
sobj$min_counts <- sobj$nCount_RNA >= min.counts
print(paste0('Number of droplets with >= ', min.counts, ' of total counts:'))
## [1] "Number of droplets with >= 100 of total counts:"
print(table(sobj$min_counts))
## 
## TRUE 
## 3967
# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " droplets)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " droplets)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)

## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

## Scatterplot
Seurat::FeatureScatter(sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

# Filtering cells on min.features and min.counts
sobj <- sobj[, sobj$nFeature_RNA >= min.features & sobj$nCount_RNA >= min.counts]

# Some metrics about filtered count matrix:
print('Number of droplets  in filtered data:')
## [1] "Number of droplets  in filtered data:"
droplets.kept.nb <- ncol(sobj)
droplets.kept.nb
## [1] 3967
print('Number of genes in filtered data:')
## [1] "Number of genes in filtered data:"
genes.kept.nb <- nrow(sobj)
genes.kept.nb
## [1] 33077
print('Number of UMIs in filtered data:')
## [1] "Number of UMIs in filtered data:"
umi.kept.nb <- sum(sobj@assays$RNA@counts)
umi.kept.nb
## [1] 11255515
print('Fraction of UMIs in droplets :')
## [1] "Fraction of UMIs in droplets :"
fraction.umi <- umi.kept.nb / umi.total.nb
fraction.umi
## [1] 0.8707961
print('Mean of UMIs in droplets :')
## [1] "Mean of UMIs in droplets :"
mean.umi <- round((umi.kept.nb / droplets.kept.nb), 0)
mean.umi
## [1] 2837

Computing basic metrics : Percentages of mito + Percentages of ribo + Cell cycle prediction + Identification of doublets

Percentages of mito + ribo

# Pattern
if(species == "human"){
  mito_patern <- "^MT-"
  ribo_patern <- "^RP[LS][[:digit:]]"
}else if (species == "mouse"){
  mito_patern <- "^mt-"
  ribo_patern <- "^Rp[ls][[:digit:]]"
}

# Percentage MITO
## Percent
sobj$percent_mt <- Seurat::PercentageFeatureSet(sobj, pattern = mito_patern)
print('% Mitochondrial expression :')
## [1] "% Mitochondrial expression :"
print(summary(sobj$percent_mt))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.07489  6.78855  8.87850  9.93317 11.82237 60.03824
## Number
df_percent_mt <- data.frame(percent_mt = sobj$percent_mt, droplets_state = "in")
df_percent_mt[df_percent_mt$percent_mt <= pcmito.min, "droplets_state"] <- "out.left"
df_percent_mt[df_percent_mt$percent_mt >= pcmito.max, "droplets_state"] <- "out.right"
print(paste0(pcmito.min, ' <= % mito <= ', pcmito.max, ' :'))
## [1] "0 <= % mito <= 20 :"
print(table(df_percent_mt$droplets_state))
## 
##        in out.right 
##      3828       139
# Percentage RIBO
## Percent
sobj$percent_rb <- Seurat::PercentageFeatureSet(sobj, pattern = ribo_patern)
print('% Ribosomal expression :')
## [1] "% Ribosomal expression :"
print(summary(sobj$percent_rb))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1676 18.5365 24.8413 24.5548 30.7507 56.7164
## Number
df_percent_rb <- data.frame(percent_rb = sobj$percent_rb, droplets_state = "in")
df_percent_rb[df_percent_rb$percent_rb <= pcribo.min, "droplets_state"] <- "out.left"
df_percent_rb[df_percent_rb$percent_rb >= pcribo.max, "droplets_state"] <- "out.right"
print(paste0(pcribo.min, ' <= % ribo <= ', pcribo.max, ' :'))
## [1] "0 <= % ribo <= 100 :"
print(table(df_percent_rb$droplets_state))
## 
##   in 
## 3967
# Plots
## Histograms
ggplot2::qplot(sobj$percent_mt, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%mito (in [", pcmito.min, ';', pcmito.max, "] : ", length(which(df_percent_mt$droplets_state == "in")), " droplets)"), xlab = "%mito") + ggplot2::geom_vline(xintercept = c(5,10,15,20), col = "cyan4", linetype = "dashed", size = 1) + ggplot2::geom_vline(xintercept = c(pcmito.min,pcmito.max), col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj$percent_rb, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%ribo (in [", pcribo.min, ';', pcribo.max, "] : ", length(which(df_percent_rb$droplets_state == "in")), " droplets)"), xlab = "%ribo") + ggplot2::geom_vline(xintercept = c(pcribo.min,pcribo.max), col = "red", linetype = "dashed", size = 1.5)

## Violinplot
Seurat::VlnPlot(sobj, features = c("percent_mt", "percent_rb"), ncol = 2)

Identification of background genes

# Nb counts per gene
df_cells_by_gene <- data.frame(nb_cells = apply(sobj@assays$RNA@counts,1,function(x){length(which(x>0))}), gene_state = "in", stringsAsFactors = FALSE)
df_cells_by_gene[df_cells_by_gene$nb_cells <= min.cells, "gene_state"] <- "out"
df_cells_by_gene_zoom <- df_cells_by_gene[df_cells_by_gene$nb_cells <= min.cells*4, ]
print(paste0('Number of genes expressed in >= ', min.cells, ' cells:'))
## [1] "Number of genes expressed in >= 5 cells:"
print(table(df_cells_by_gene$gene_state))
## 
##    in   out 
## 23156  9921
# Plots
## Histograms
ggplot2::qplot(df_cells_by_gene$nb_cells, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nb genes (>= ", min.cells, " : ", length(which(df_cells_by_gene$gene_state == "in")), " genes)"), xlab = "nb cells") + ggplot2::geom_vline(xintercept = min.cells, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(df_cells_by_gene_zoom$nb_cells, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nb genes (>= ", min.cells, " : ", length(which(df_cells_by_gene$gene_state == "in")), " genes) (Zoom)"),xlab = "nb cells") + ggplot2::geom_vline(xintercept = min.cells, col = "red", linetype = "dashed", size = 1.5)

Cell cycle prediction

Note: Cyclone take a while (~10-15min).

# Seurat CC
## Load database
if(species == "human"){
  cc_seurat <- Seurat::cc.genes.updated.2019
}else if (species == "mouse"){
  ### Seurat only provides cell cycle genes for homo sapiens (as this list was developed solely using data from this species). To use it on mus musculus, one can simply convert these human HUGO genes to to mus MGI orthologues (using biomaRt). See [this github issue](https://github.com/satijalab/seurat/issues/2493) 
  cc_seurat <- Seurat::cc.genes.updated.2019
  human_mart = biomaRt::useEnsembl("ensembl", dataset = "hsapiens_gene_ensembl")
  mouse_mart = biomaRt::useEnsembl("ensembl", dataset = "mmusculus_gene_ensembl")
  cc_seurat$s.genes = biomaRt::getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_seurat$s.genes , mart = human_mart, attributesL = c("mgi_symbol"), martL = mouse_mart, uniqueRows=T)[, 2]
  cc_seurat$g2m.genes = biomaRt::getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_seurat$g2m.genes , mart = human_mart, attributesL = c("mgi_symbol"), martL = mouse_mart, uniqueRows=T)[, 2]
}
## Prediction
sobj <- Seurat::CellCycleScoring(object = sobj, s.features = cc_seurat$s.genes, g2m.features = cc_seurat$g2m.genes, assay = "RNA", nbin = 10, seed = the.seed)
sobj$Seurat.SmG2M.Score <- sobj$S.Score - sobj$G2M.Score
## Rename results columns with 'Seurat.' prefix
sobj$Seurat.S.Score <- sobj$S.Score
sobj$Seurat.G2M.Score <- sobj$G2M.Score
sobj$Seurat.Phase <- sobj$Phase
sobj$S.Score <- sobj$G2M.Score <- sobj$Phase <- NULL
## Print results
print("Cell cycle phase according to Seurat: ")
## [1] "Cell cycle phase according to Seurat: "
print(table(sobj$Seurat.Phase))
## 
##   G1  G2M    S 
## 3286  402  279
# Scran: Cyclone
## Load database
if(species == "human"){
  # Load
  cc_cyclone <- readRDS(system.file("exdata", "human_cycle_markers.rds", package = "scran"))
  # Translate database from ENSEMBL to gene SYMBOL
  suppressPackageStartupMessages(library(org.Hs.eg.db))
  for (phase in names(cc_cyclone)) {
    for (fs in names(cc_cyclone[[phase]])) {
      cc_cyclone[[phase]][[fs]] <- suppressMessages(AnnotationDbi::mapIds(org.Hs.eg.db, keys=cc_cyclone[[phase]][[fs]], column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first"))
    }
  }
}else if (species == "mouse"){
  # Load
  cc_cyclone <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package = "scran"))
  # Translate database from ENSEMBL to gene SYMBOL
  suppressPackageStartupMessages(library(org.Mm.eg.db))
  for (phase in names(cc_cyclone)) {
    for (fs in names(cc_cyclone[[phase]])) {
      cc_cyclone[[phase]][[fs]] <- suppressMessages(AnnotationDbi::mapIds(org.Mm.eg.db, keys=cc_cyclone[[phase]][[fs]], column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first"))
    }
  }
}
## Prediction
cycres <- scran::cyclone(Seurat::as.SingleCellExperiment(sobj), pairs = cc_cyclone, verbose = FALSE)
sobj$Cyclone.SmG2M.Score <- cycres$normalized.scores$S - cycres$normalized.scores$G2M
## Save results in the seurat object
sobj$Cyclone.Phase <- as.factor(cycres$phases)
sobj$Cyclone.G1.Score <- cycres$normalized.scores$G1
sobj$Cyclone.S.Score <- cycres$normalized.scores$S
sobj$Cyclone.G2M.Score <- cycres$normalized.scores$G2M
rm(cycres)
## Print results
print("Cell cycle phase according to Cyclone: ")
## [1] "Cell cycle phase according to Cyclone: "
print(table(sobj$Cyclone.Phase))
## 
##   G1  G2M    S 
## 2024  843 1100
# Crossing results : Seurat -VS- Cyclone
df_cycle <- data.frame(Seurat.Phase = sobj$Seurat.Phase, Cyclone.Phase = sobj$Cyclone.Phase)
print(table(df_cycle))
##             Cyclone.Phase
## Seurat.Phase   G1  G2M    S
##          G1  1684  661  941
##          G2M  192  121   89
##          S    148   61   70
print(paste("Seurat and Cyclone agreed for ",round((sum(diag(table(df_cycle))) / sum(table(df_cycle)) *100),2),"% of droplets."))
## [1] "Seurat and Cyclone agreed for  47.26 % of droplets."

Identification of cell doublets

# scDblFinder
sobj$scDblFinder.class <- Seurat::as.Seurat(scDblFinder::scDblFinder(Seurat::as.SingleCellExperiment(sobj)))$scDblFinder.class
## Warning in .checkSCE(sce): Some cells in `sce` have an extremely low read
## counts; note that these could trigger errors and might best be filtered out
## Creating ~5000 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 171 cells excluded from training.
## iter=1, 274 cells excluded from training.
## iter=2, 292 cells excluded from training.
## Threshold found:0.782
## 253 (6.4%) doublets called
sobj$scDblFinder.class <- unname(sobj$scDblFinder.class == "doublet")
print('scDblFinder doublets :')
## [1] "scDblFinder doublets :"
print(table(sobj$scDblFinder.class))
## 
## FALSE  TRUE 
##  3714   253
# scds
sobj$scds.score <- scds::cxds_bcds_hybrid(Seurat::as.SingleCellExperiment(sobj))$hybrid_score
## [13:07:42] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
sobj$scds.class <- unname(sobj$scds.score > 1)
print('scds-hybrid doublets :')
## [1] "scds-hybrid doublets :"
print(table(sobj$scds.class))
## 
## FALSE  TRUE 
##  3552   415
# union of scDblFinder & scds
sobj$doublets_consensus.class <- sobj$scDblFinder.class | sobj$scds.class
print('Consensus doublets :')
## [1] "Consensus doublets :"
print(table(sobj$doublets_consensus.class))
## 
## FALSE  TRUE 
##  3451   516
# Correlation between doublets and cell cycle
## Overlap cells
df_S.cycle_doublets <- data.frame(Seurat.Phase = sobj$Seurat.Phase, doublets = sobj$doublets_consensus.class)
print(table(df_S.cycle_doublets))
##             doublets
## Seurat.Phase FALSE TRUE
##          G1   2806  480
##          G2M   386   16
##          S     259   20
print(paste0(round((nrow(df_S.cycle_doublets[df_S.cycle_doublets$Seurat.Phase == "G2M" & df_S.cycle_doublets$doublets == TRUE,])/nrow(df_S.cycle_doublets[df_S.cycle_doublets$doublets == TRUE,])*100),2),"% of doublets are in G2M phase by Seurat."))
## [1] "3.1% of doublets are in G2M phase by Seurat."
df_C.cycle_doublets <- data.frame(Cyclone.Phase = sobj$Cyclone.Phase, doublets = sobj$doublets_consensus.class)
print(table(df_C.cycle_doublets))
##              doublets
## Cyclone.Phase FALSE TRUE
##           G1   1730  294
##           G2M   780   63
##           S     941  159
print(paste0(round((nrow(df_C.cycle_doublets[df_C.cycle_doublets$Cyclone.Phase == "G2M" & df_C.cycle_doublets$doublets == TRUE,])/nrow(df_C.cycle_doublets[df_S.cycle_doublets$doublets == TRUE,])*100),2),"% of doublets are in G2M phase by Cyclone."))
## [1] "12.21% of doublets are in G2M phase by Cyclone."
## Quick 'n dirty uMAP 
## /!\ Warning: this umap will not represent the cell types present! Many parameters will have to be adjusted for this (tomorrow's course on Statistical models and analyses). Here, we just want to be able to represent all the cells on the same graph and plot the identification of doublets and the phases of the cell cycle /!\
tmp_sobj <- sobj
tmp_sobj <- suppressWarnings(Seurat::SCTransform(tmp_sobj, seed.use = the.seed, verbose = FALSE)) #normalization
tmp_sobj <- Seurat::RunPCA(tmp_sobj, verbose = FALSE,seed.use = the.seed) #dimension reduction
tmp_sobj <- Seurat::RunUMAP(tmp_sobj, dims = 1:25, verbose = FALSE, seed.use = the.seed)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
Seurat::DimPlot(tmp_sobj, group.by = "Cyclone.Phase") + ggplot2::ggtitle("Cell Phase (cyclone)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "Seurat.Phase") + ggplot2::ggtitle("Cell Phase (Seurat)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "scDblFinder.class") + ggplot2::ggtitle("Cell doublets (scDblFinder)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "scds.class") + ggplot2::ggtitle("Cell doublets (scds)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "doublets_consensus.class") + ggplot2::ggtitle("Cell doublets (union)") + Seurat::DarkTheme()

# Cleaning
rm(tmp_sobj)
invisible(gc())

Filtering

# Filtering cell doublets
sobj <- sobj[, !sobj$doublets_consensus.class]

# Filtering non-qualified cells
cellskeep <- sobj$percent_mt >= pcmito.min & sobj$percent_mt <= pcmito.max & sobj$percent_rb >= pcribo.min & sobj$percent_rb <= pcribo.max
sobj <- sobj[, cellskeep]

# Filtering features with ultra-low expression
features_to_keep <- which(apply(sobj@assays$RNA@counts,1,function(x){ length(which(x>0)) }) >= min.cells)
sobj <- sobj[features_to_keep,]

Checking the effect of filtering

# Re-computing basic metrics : percentage of mito + ribo + nb features + nb counts
## Nb features by cell
sobj$nFeature_RNA <- apply(sobj@assays$RNA@counts,2,function(x){ length(which(x>0)) })
sobj$min_features <- sobj$nFeature_RNA >= min.features
## Nb counts by cell
sobj$nCount_RNA <- apply(sobj@assays$RNA@counts,2,function(x){ sum(x) })
sobj$min_counts <- sobj$nCount_RNA >= min.counts
## Percent mito
sobj$percent_mt <- Seurat::PercentageFeatureSet(sobj, pattern = mito_patern)
## Percent ribo
sobj$percent_rb <- Seurat::PercentageFeatureSet(sobj, pattern = ribo_patern)

# Some metrics about total filtered count matrix:
print('Number of cells in filtered data:')
## [1] "Number of cells in filtered data:"
print(ncol(sobj))
## [1] 3316
print('Number of genes in filtered data:')
## [1] "Number of genes in filtered data:"
print(nrow(sobj))
## [1] 22435
print('Number of UMIs in filtered data:')
## [1] "Number of UMIs in filtered data:"
print(sum(sobj@assays$RNA@counts))
## [1] 8097845
print(paste0('Number of cells with >= ', min.features, ' features :'))
## [1] "Number of cells with >= 3 features :"
print(table(sobj$min_features))
## 
## TRUE 
## 3316
print('Mean features by cell :')
## [1] "Mean features by cell :"
print(summary(sobj$nFeature_RNA))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    84.0   320.0   537.0   934.4  1564.0  5078.0
print(paste0('Number of cells with >= ', min.counts, ' of total counts:'))
## [1] "Number of cells with >= 100 of total counts:"
print(table(sobj$min_counts))
## 
## TRUE 
## 3316
print('Mean Count by cell:')
## [1] "Mean Count by cell:"
print(summary(sobj$nCount_RNA))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   101.0   523.0   977.5  2442.1  4379.5 25707.0
print('% Mitochondrial expression :')
## [1] "% Mitochondrial expression :"
print(summary(sobj$percent_mt))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.07504  6.78807  8.89640  9.47368 11.70721 19.96466
print('% Ribosomal expression :')
## [1] "% Ribosomal expression :"
print(summary(sobj$percent_rb))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1679 19.2996 25.4411 25.1937 31.3269 56.7503
print("Cell cycle phases according to Seurat: ")
## [1] "Cell cycle phases according to Seurat: "
print(table(sobj$Seurat.Phase))
## 
##   G1  G2M    S 
## 2698  364  254
print("Cell cycle phases according to Cyclone: ")
## [1] "Cell cycle phases according to Cyclone: "
print(table(sobj$Cyclone.Phase))
## 
##   G1  G2M    S 
## 1681  733  902
print('Consensus doublets :')
## [1] "Consensus doublets :"
print(table(sobj$doublets_consensus.class))
## 
## FALSE 
##  3316
# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " cells)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " cells)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj$percent_mt, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%mito (in [", pcmito.min, ';', pcmito.max, "] : ", length(which(df_percent_mt$droplets_state == "in")), " cells)"), xlab = "%mito") + ggplot2::geom_vline(xintercept = c(5,10,15,20), col = "cyan4", linetype = "dashed", size = 1) + ggplot2::geom_vline(xintercept = c(pcmito.min,pcmito.max), col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj$percent_rb, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%ribo (in [", pcribo.min, ';', pcribo.max, "] : ", length(which(df_percent_rb$droplets_state == "in")), " cells)"), xlab = "%ribo") + ggplot2::geom_vline(xintercept = c(pcribo.min,pcribo.max), col = "red", linetype = "dashed", size = 1.5)

## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA","percent_mt","percent_rb"), ncol = 4)

## Scatterplot
Seurat::FeatureScatter(sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

Save

# Save parameters
sobj@misc$params$sample.name <- sample.name
sobj@misc$params$species <- species
sobj@misc$params$QC$seed <- the.seed
sobj@misc$params$QC$emptydrops$emptydrops.retain <- emptydrops.retain
sobj@misc$params$QC$pcmito.range <- c(pcmito.min,pcmito.max)
sobj@misc$params$QC$pcribo.range <- c(pcribo.min,pcribo.max)
sobj@misc$params$QC$min.features <- min.features
sobj@misc$params$QC$min.counts <- min.counts
sobj@misc$params$QC$min.cells <- min.cells

# Save R session
sobj@misc$params$QC$Rsession <- utils::capture.output(devtools::session_info())
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
# Save seurat object
dir.create(output.dir, recursive = TRUE, showWarnings = FALSE)
save(sobj, file = paste0(output.dir, sample.name, '_FILTERED_with.rda'), compress = "bzip2")
# Print R session info
utils::capture.output(devtools::session_info())
##   [1] "─ Session info  ──────────────────────────────────────────────────────────────"   
##   [2] " hash: cookie, man farmer: light skin tone, person golfing: medium-dark skin tone"
##   [3] ""                                                                                 
##   [4] " setting  value"                                                                  
##   [5] " version  R version 4.1.1 (2021-08-10)"                                           
##   [6] " os       Ubuntu 18.04.6 LTS"                                                     
##   [7] " system   x86_64, linux-gnu"                                                      
##   [8] " ui       X11"                                                                    
##   [9] " language (EN)"                                                                   
##  [10] " collate  fr_FR.UTF-8"                                                            
##  [11] " ctype    fr_FR.UTF-8"                                                            
##  [12] " tz       Europe/Paris"                                                           
##  [13] " date     2022-01-08"                                                             
##  [14] " pandoc   2.16.2 @ /shared/software/miniconda/envs/r-4.1.1/bin/ (via rmarkdown)"  
##  [15] ""                                                                                 
##  [16] "─ Packages ───────────────────────────────────────────────────────────────────"   
##  [17] " package              * version  date (UTC) lib source"                           
##  [18] " abind                  1.4-5    2016-07-21 [2] CRAN (R 4.1.0)"                   
##  [19] " AnnotationDbi        * 1.56.2   2021-11-09 [2] Bioconductor"                     
##  [20] " assertthat             0.2.1    2019-03-21 [2] CRAN (R 4.1.0)"                   
##  [21] " beachmat               2.10.0   2021-10-26 [2] Bioconductor"                     
##  [22] " beeswarm               0.4.0    2021-06-01 [2] CRAN (R 4.1.1)"                   
##  [23] " Biobase              * 2.54.0   2021-10-26 [2] Bioconductor"                     
##  [24] " BiocGenerics         * 0.40.0   2021-10-26 [2] Bioconductor"                     
##  [25] " BiocNeighbors          1.12.0   2021-10-26 [2] Bioconductor"                     
##  [26] " BiocParallel           1.28.3   2021-12-09 [2] Bioconductor"                     
##  [27] " BiocSingular           1.10.0   2021-10-26 [2] Bioconductor"                     
##  [28] " Biostrings             2.62.0   2021-10-26 [2] Bioconductor"                     
##  [29] " bit                    4.0.4    2020-08-04 [2] CRAN (R 4.1.0)"                   
##  [30] " bit64                  4.0.5    2020-08-30 [2] CRAN (R 4.1.0)"                   
##  [31] " bitops                 1.0-7    2021-04-24 [2] CRAN (R 4.1.0)"                   
##  [32] " blob                   1.2.2    2021-07-23 [2] CRAN (R 4.1.0)"                   
##  [33] " bluster                1.4.0    2021-10-26 [2] Bioconductor"                     
##  [34] " bslib                  0.3.1    2021-10-06 [2] CRAN (R 4.1.1)"                   
##  [35] " cachem                 1.0.6    2021-08-19 [2] CRAN (R 4.1.1)"                   
##  [36] " callr                  3.7.0    2021-04-20 [2] CRAN (R 4.1.0)"                   
##  [37] " cli                    3.1.0    2021-10-27 [2] CRAN (R 4.1.1)"                   
##  [38] " cluster                2.1.2    2021-04-17 [2] CRAN (R 4.1.0)"                   
##  [39] " codetools              0.2-18   2020-11-04 [2] CRAN (R 4.1.0)"                   
##  [40] " colorspace             2.0-2    2021-06-24 [2] CRAN (R 4.1.0)"                   
##  [41] " cowplot                1.1.1    2020-12-30 [2] CRAN (R 4.1.0)"                   
##  [42] " crayon                 1.4.2    2021-10-29 [2] CRAN (R 4.1.1)"                   
##  [43] " data.table             1.14.2   2021-09-27 [2] CRAN (R 4.1.1)"                   
##  [44] " DBI                    1.1.2    2021-12-20 [2] CRAN (R 4.1.1)"                   
##  [45] " DelayedArray           0.20.0   2021-10-26 [2] Bioconductor"                     
##  [46] " DelayedMatrixStats     1.16.0   2021-10-26 [2] Bioconductor"                     
##  [47] " deldir                 1.0-6    2021-10-23 [2] CRAN (R 4.1.1)"                   
##  [48] " desc                   1.4.0    2021-09-28 [2] CRAN (R 4.1.1)"                   
##  [49] " devtools               2.4.3    2021-11-30 [2] CRAN (R 4.1.1)"                   
##  [50] " digest                 0.6.29   2021-12-01 [2] CRAN (R 4.1.1)"                   
##  [51] " dplyr                * 1.0.7    2021-06-18 [2] CRAN (R 4.1.0)"                   
##  [52] " dqrng                  0.3.0    2021-05-01 [2] CRAN (R 4.1.0)"                   
##  [53] " DropletUtils           1.14.1   2021-11-08 [2] Bioconductor"                     
##  [54] " edgeR                  3.36.0   2021-10-26 [2] Bioconductor"                     
##  [55] " ellipsis               0.3.2    2021-04-29 [2] CRAN (R 4.1.0)"                   
##  [56] " evaluate               0.14     2019-05-28 [2] CRAN (R 4.1.0)"                   
##  [57] " fansi                  0.5.0    2021-05-25 [2] CRAN (R 4.1.1)"                   
##  [58] " farver                 2.1.0    2021-02-28 [2] CRAN (R 4.1.0)"                   
##  [59] " fastmap                1.1.0    2021-01-25 [2] CRAN (R 4.1.0)"                   
##  [60] " fitdistrplus           1.1-6    2021-09-28 [2] CRAN (R 4.1.1)"                   
##  [61] " fs                     1.5.2    2021-12-08 [2] CRAN (R 4.1.1)"                   
##  [62] " future                 1.23.0   2021-10-31 [2] CRAN (R 4.1.1)"                   
##  [63] " future.apply           1.8.1    2021-08-10 [2] CRAN (R 4.1.1)"                   
##  [64] " generics               0.1.1    2021-10-25 [2] CRAN (R 4.1.1)"                   
##  [65] " GenomeInfoDb           1.30.0   2021-10-26 [2] Bioconductor"                     
##  [66] " GenomeInfoDbData       1.2.7    2021-12-02 [2] Bioconductor"                     
##  [67] " GenomicRanges          1.46.1   2021-11-18 [2] Bioconductor"                     
##  [68] " ggbeeswarm             0.6.0    2017-08-07 [2] CRAN (R 4.1.1)"                   
##  [69] " ggplot2                3.3.5    2021-06-25 [2] CRAN (R 4.1.0)"                   
##  [70] " ggrepel                0.9.1    2021-01-15 [2] CRAN (R 4.1.0)"                   
##  [71] " ggridges               0.5.3    2021-01-08 [2] CRAN (R 4.1.0)"                   
##  [72] " globals                0.14.0   2020-11-22 [2] CRAN (R 4.1.0)"                   
##  [73] " glue                   1.6.0    2021-12-17 [2] CRAN (R 4.1.1)"                   
##  [74] " goftest                1.2-3    2021-10-07 [2] CRAN (R 4.1.1)"                   
##  [75] " gridExtra              2.3      2017-09-09 [2] CRAN (R 4.1.0)"                   
##  [76] " gtable                 0.3.0    2019-03-25 [2] CRAN (R 4.1.0)"                   
##  [77] " HDF5Array              1.22.1   2021-11-14 [2] Bioconductor"                     
##  [78] " highr                  0.9      2021-04-16 [2] CRAN (R 4.1.0)"                   
##  [79] " htmltools              0.5.2    2021-08-25 [2] CRAN (R 4.1.1)"                   
##  [80] " htmlwidgets            1.5.4    2021-09-08 [2] CRAN (R 4.1.1)"                   
##  [81] " httpuv                 1.6.4    2021-12-14 [2] CRAN (R 4.1.1)"                   
##  [82] " httr                   1.4.2    2020-07-20 [2] CRAN (R 4.1.0)"                   
##  [83] " ica                    1.0-2    2018-05-24 [2] CRAN (R 4.1.0)"                   
##  [84] " igraph                 1.2.10   2021-12-15 [2] CRAN (R 4.1.1)"                   
##  [85] " IRanges              * 2.28.0   2021-10-26 [2] Bioconductor"                     
##  [86] " irlba                  2.3.5    2021-12-06 [2] CRAN (R 4.1.1)"                   
##  [87] " jquerylib              0.1.4    2021-04-26 [2] CRAN (R 4.1.0)"                   
##  [88] " jsonlite               1.7.2    2020-12-09 [2] CRAN (R 4.1.0)"                   
##  [89] " KEGGREST               1.34.0   2021-10-26 [2] Bioconductor"                     
##  [90] " KernSmooth             2.23-20  2021-05-03 [2] CRAN (R 4.1.0)"                   
##  [91] " knitr                  1.37     2021-12-16 [2] CRAN (R 4.1.1)"                   
##  [92] " labeling               0.4.2    2020-10-20 [2] CRAN (R 4.1.0)"                   
##  [93] " later                  1.3.0    2021-08-18 [2] CRAN (R 4.1.1)"                   
##  [94] " lattice                0.20-45  2021-09-22 [2] CRAN (R 4.1.1)"                   
##  [95] " lazyeval               0.2.2    2019-03-15 [2] CRAN (R 4.1.0)"                   
##  [96] " leiden                 0.3.9    2021-07-27 [2] CRAN (R 4.1.0)"                   
##  [97] " lifecycle              1.0.1    2021-09-24 [2] CRAN (R 4.1.1)"                   
##  [98] " limma                  3.50.0   2021-10-26 [2] Bioconductor"                     
##  [99] " listenv                0.8.0    2019-12-05 [2] CRAN (R 4.1.0)"                   
## [100] " lmtest                 0.9-39   2021-11-07 [2] CRAN (R 4.1.1)"                   
## [101] " locfit                 1.5-9.4  2020-03-25 [2] CRAN (R 4.1.1)"                   
## [102] " magrittr               2.0.1    2020-11-17 [2] CRAN (R 4.1.0)"                   
## [103] " MASS                   7.3-54   2021-05-03 [2] CRAN (R 4.1.0)"                   
## [104] " Matrix                 1.3-4    2021-06-01 [2] CRAN (R 4.1.0)"                   
## [105] " MatrixGenerics         1.6.0    2021-10-26 [2] Bioconductor"                     
## [106] " matrixStats            0.61.0   2021-09-17 [2] CRAN (R 4.1.1)"                   
## [107] " memoise                2.0.1    2021-11-26 [2] CRAN (R 4.1.1)"                   
## [108] " metapod                1.2.0    2021-10-26 [2] Bioconductor"                     
## [109] " mgcv                   1.8-38   2021-10-06 [2] CRAN (R 4.1.1)"                   
## [110] " mime                   0.12     2021-09-28 [2] CRAN (R 4.1.1)"                   
## [111] " miniUI                 0.1.1.1  2018-05-18 [2] CRAN (R 4.1.0)"                   
## [112] " munsell                0.5.0    2018-06-12 [2] CRAN (R 4.1.0)"                   
## [113] " nlme                   3.1-153  2021-09-07 [2] CRAN (R 4.1.1)"                   
## [114] " org.Hs.eg.db         * 3.14.0   2021-12-09 [2] Bioconductor"                     
## [115] " parallelly             1.30.0   2021-12-17 [2] CRAN (R 4.1.1)"                   
## [116] " patchwork              1.1.1    2020-12-17 [2] CRAN (R 4.1.0)"                   
## [117] " pbapply                1.5-0    2021-09-16 [2] CRAN (R 4.1.1)"                   
## [118] " pillar                 1.6.4    2021-10-18 [2] CRAN (R 4.1.1)"                   
## [119] " pkgbuild               1.2.1    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [120] " pkgconfig              2.0.3    2019-09-22 [2] CRAN (R 4.1.0)"                   
## [121] " pkgload                1.2.4    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [122] " plotly                 4.10.0   2021-10-09 [2] CRAN (R 4.1.1)"                   
## [123] " plyr                   1.8.6    2020-03-03 [2] CRAN (R 4.1.0)"                   
## [124] " png                    0.1-7    2013-12-03 [2] CRAN (R 4.1.0)"                   
## [125] " polyclip               1.10-0   2019-03-14 [2] CRAN (R 4.1.0)"                   
## [126] " prettyunits            1.1.1    2020-01-24 [2] CRAN (R 4.1.0)"                   
## [127] " pROC                   1.18.0   2021-09-03 [2] CRAN (R 4.1.1)"                   
## [128] " processx               3.5.2    2021-04-30 [2] CRAN (R 4.1.0)"                   
## [129] " promises               1.2.0.1  2021-02-11 [2] CRAN (R 4.1.0)"                   
## [130] " ps                     1.6.0    2021-02-28 [2] CRAN (R 4.1.0)"                   
## [131] " purrr                  0.3.4    2020-04-17 [2] CRAN (R 4.1.0)"                   
## [132] " R.methodsS3            1.8.1    2020-08-26 [2] CRAN (R 4.1.0)"                   
## [133] " R.oo                   1.24.0   2020-08-26 [2] CRAN (R 4.1.0)"                   
## [134] " R.utils                2.11.0   2021-09-26 [2] CRAN (R 4.1.1)"                   
## [135] " R6                     2.5.1    2021-08-19 [2] CRAN (R 4.1.1)"                   
## [136] " RANN                   2.6.1    2019-01-08 [2] CRAN (R 4.1.0)"                   
## [137] " RColorBrewer           1.1-2    2014-12-07 [2] CRAN (R 4.1.0)"                   
## [138] " Rcpp                   1.0.7    2021-07-07 [2] CRAN (R 4.1.0)"                   
## [139] " RcppAnnoy              0.0.19   2021-07-30 [2] CRAN (R 4.1.0)"                   
## [140] " RCurl                  1.98-1.5 2021-09-17 [2] CRAN (R 4.1.1)"                   
## [141] " remotes                2.4.2    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [142] " reshape2               1.4.4    2020-04-09 [2] CRAN (R 4.1.0)"                   
## [143] " reticulate             1.22     2021-09-17 [2] CRAN (R 4.1.1)"                   
## [144] " rhdf5                  2.38.0   2021-10-26 [2] Bioconductor"                     
## [145] " rhdf5filters           1.6.0    2021-10-26 [2] Bioconductor"                     
## [146] " Rhdf5lib               1.16.0   2021-10-26 [2] Bioconductor"                     
## [147] " rlang                  0.4.12   2021-10-18 [2] CRAN (R 4.1.1)"                   
## [148] " rmarkdown              2.11     2021-09-14 [2] CRAN (R 4.1.1)"                   
## [149] " ROCR                   1.0-11   2020-05-02 [2] CRAN (R 4.1.0)"                   
## [150] " rpart                  4.1-15   2019-04-12 [2] CRAN (R 4.1.0)"                   
## [151] " rprojroot              2.0.2    2020-11-15 [2] CRAN (R 4.1.0)"                   
## [152] " RSpectra               0.16-0   2019-12-01 [2] CRAN (R 4.1.1)"                   
## [153] " RSQLite                2.2.9    2021-12-06 [2] CRAN (R 4.1.1)"                   
## [154] " rstudioapi             0.13     2020-11-12 [2] CRAN (R 4.1.0)"                   
## [155] " rsvd                   1.0.5    2021-04-16 [2] CRAN (R 4.1.1)"                   
## [156] " Rtsne                  0.15     2018-11-10 [2] CRAN (R 4.1.0)"                   
## [157] " S4Vectors            * 0.32.3   2021-11-21 [2] Bioconductor"                     
## [158] " sass                   0.4.0    2021-05-12 [2] CRAN (R 4.1.0)"                   
## [159] " ScaledMatrix           1.2.0    2021-10-26 [2] Bioconductor"                     
## [160] " scales                 1.1.1    2020-05-11 [2] CRAN (R 4.1.0)"                   
## [161] " scater                 1.22.0   2021-10-26 [2] Bioconductor"                     
## [162] " scattermore            0.7      2020-11-24 [2] CRAN (R 4.1.0)"                   
## [163] " scDblFinder            1.8.0    2021-10-26 [2] Bioconductor"                     
## [164] " scds                   1.10.0   2021-10-26 [2] Bioconductor"                     
## [165] " scran                  1.22.1   2021-11-14 [2] Bioconductor"                     
## [166] " sctransform            0.3.2    2020-12-16 [2] CRAN (R 4.1.0)"                   
## [167] " scuttle                1.4.0    2021-10-26 [2] Bioconductor"                     
## [168] " sessioninfo            1.2.1    2021-11-02 [2] CRAN (R 4.1.1)"                   
## [169] " Seurat                 4.0.6    2021-12-16 [2] CRAN (R 4.1.1)"                   
## [170] " SeuratObject           4.0.4    2021-11-23 [2] CRAN (R 4.1.1)"                   
## [171] " shiny                  1.7.1    2021-10-02 [2] CRAN (R 4.1.1)"                   
## [172] " SingleCellExperiment   1.16.0   2021-10-26 [2] Bioconductor"                     
## [173] " sparseMatrixStats      1.6.0    2021-10-26 [2] Bioconductor"                     
## [174] " spatstat.core          2.3-2    2021-11-26 [2] CRAN (R 4.1.1)"                   
## [175] " spatstat.data          2.1-2    2021-12-17 [2] CRAN (R 4.1.1)"                   
## [176] " spatstat.geom          2.3-1    2021-12-10 [2] CRAN (R 4.1.1)"                   
## [177] " spatstat.sparse        2.1-0    2021-12-17 [2] CRAN (R 4.1.1)"                   
## [178] " spatstat.utils         2.3-0    2021-12-12 [2] CRAN (R 4.1.1)"                   
## [179] " statmod                1.4.36   2021-05-10 [2] CRAN (R 4.1.0)"                   
## [180] " stringi                1.7.6    2021-11-29 [2] CRAN (R 4.1.1)"                   
## [181] " stringr                1.4.0    2019-02-10 [2] CRAN (R 4.1.0)"                   
## [182] " SummarizedExperiment   1.24.0   2021-10-26 [2] Bioconductor"                     
## [183] " survival               3.2-13   2021-08-24 [2] CRAN (R 4.1.1)"                   
## [184] " tensor                 1.5      2012-05-05 [2] CRAN (R 4.1.0)"                   
## [185] " testthat               3.1.0    2021-10-04 [2] CRAN (R 4.1.1)"                   
## [186] " tibble                 3.1.6    2021-11-07 [2] CRAN (R 4.1.1)"                   
## [187] " tidyr                  1.1.4    2021-09-27 [2] CRAN (R 4.1.1)"                   
## [188] " tidyselect             1.1.1    2021-04-30 [2] CRAN (R 4.1.0)"                   
## [189] " usethis                2.1.3    2021-10-27 [2] CRAN (R 4.1.1)"                   
## [190] " utf8                   1.2.2    2021-07-24 [2] CRAN (R 4.1.0)"                   
## [191] " uwot                   0.1.11   2021-12-02 [2] CRAN (R 4.1.1)"                   
## [192] " vctrs                  0.3.8    2021-04-29 [2] CRAN (R 4.1.0)"                   
## [193] " vipor                  0.4.5    2017-03-22 [2] CRAN (R 4.1.1)"                   
## [194] " viridis                0.6.2    2021-10-13 [2] CRAN (R 4.1.1)"                   
## [195] " viridisLite            0.4.0    2021-04-13 [2] CRAN (R 4.1.0)"                   
## [196] " withr                  2.4.3    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [197] " xfun                   0.29     2021-12-14 [2] CRAN (R 4.1.1)"                   
## [198] " xgboost                1.5.0.2  2021-11-21 [2] CRAN (R 4.1.1)"                   
## [199] " xtable                 1.8-4    2019-04-21 [2] CRAN (R 4.1.0)"                   
## [200] " XVector                0.34.0   2021-10-26 [2] Bioconductor"                     
## [201] " yaml                   2.2.1    2020-02-01 [2] CRAN (R 4.1.0)"                   
## [202] " zlibbioc               1.40.0   2021-10-26 [2] Bioconductor"                     
## [203] " zoo                    1.8-9    2021-03-09 [2] CRAN (R 4.1.0)"                   
## [204] ""                                                                                 
## [205] " [1] /shared/home/maglave/R/x86_64-conda-linux-gnu-library/4.1"                   
## [206] " [2] /shared/software/miniconda/envs/r-4.1.1/lib/R/library"                       
## [207] ""                                                                                 
## [208] "──────────────────────────────────────────────────────────────────────────────"