SinCellTE 2022: Primary analyses

Initializing the practical session

user.id <- Sys.getenv('USER')

Setting Dataset

input.dir <- paste0("/shared/projects/sincellte_2022/", user.id, "/Primary_analyses/input/matrix/")
output.dir <- paste0("/shared/projects/sincellte_2022/", user.id, "/Primary_analyses/finalresult/")
sample.name <- "PBMC_CTRL_GE"
species <- "human"
aligment <- "10X" #kallisto or 10X

Setting Paramaters

# Computational Parameters
the.seed <- 1337L

# Analysis Parameters
## Empty droplets
emptydrops.retain <- NULL
## Barcode-level QC cell
pcmito.min <- 0
pcmito.max <- 20
pcribo.min <- 0
pcribo.max <- 100
min.features <- 200
min.counts <- 2500
## Feature-level QC
min.cells <- 5

Setting seed and loading R packages

set.seed(the.seed)
suppressPackageStartupMessages(library(dplyr))

Loading raw count matrix

# Loading raw count matrix
if(aligment == "kallisto"){
  scmat <- BUSpaRse::read_count_output(dir = input.dir, name = sample.name) # from BUStools
}else if(aligment == "10X"){
  scmat <- Seurat::Read10X(input.dir) # from 10X
} else stop("Alignment technology unknown!")

# Some metrics about raw count matrix:
print('Number of barodes (droplets) in raw count matrix:')

## [1] "Number of barodes (droplets) in raw count matrix:"

droplets.total.nb <- ncol(scmat)
droplets.total.nb

## [1] 343461

print('Number of features (genes) in raw count matrix:')

## [1] "Number of features (genes) in raw count matrix:"

genes.total.nb <- nrow(scmat)
genes.total.nb

## [1] 33077

print('Number of UMIs in raw count matrix:')

## [1] "Number of UMIs in raw count matrix:"

umi.total.nb <- sum(scmat)
umi.total.nb

## [1] 12925546

Removing empty droplets

Automatically : emptyDrops()

# Removing empty droplets
bc_rank <- DropletUtils::emptyDrops(m = scmat, retain = emptydrops.retain)
scmat_filtered <- scmat[, which(bc_rank$FDR < 1E-03)]

# Kneeplot
## Make the dataframe with all droplets (before filtering) and the number of UMI for each droplet, and if the droplets are filtered or not by emptydrops
nb_umi_by_barcode <- data.frame(nb_umi = Matrix::colSums(scmat), barcodes = colnames(scmat))
nb_umi_by_barcode <- nb_umi_by_barcode %>% arrange(desc(nb_umi)) %>% dplyr::mutate(num_barcode = seq.int(ncol(scmat)))
nb_umi_by_barcode$droplets_state <- "Empty Droplets"
nb_umi_by_barcode[nb_umi_by_barcode$barcodes %in% colnames(scmat_filtered), "droplets_state"] <- "Full Droplets"
## Draw kneeplot
ggplot2::ggplot(nb_umi_by_barcode, ggplot2::aes(y = nb_umi, x = num_barcode, color = droplets_state)) +
  ggplot2::geom_point() + ggplot2::ggtitle(paste0("Kneeplot of ", sample.name)) + 
  ggplot2::theme(legend.title = ggplot2::element_blank()) +
  ggplot2::scale_y_log10(name = "Number of UMI by droplet (log scale)") + 
  ggplot2::scale_x_log10(name = "Droplet rank (log scale)") +
  ggplot2::expand_limits(x = 0, y = 0) +
  ggplot2::scale_colour_manual(values = c("cyan3","royalblue4"), guide = 'legend') +
  ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(linetype = c(0, 0), shape = c(16, 16))))

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous x-axis

# Creation of the Seurat object
sobj <- Seurat::CreateSeuratObject(counts = scmat_filtered, project = sample.name)

# Cleaning
rm(bc_rank,nb_umi_by_barcode,scmat,scmat_filtered)
invisible(gc())

Manually

# Quantification
## Nb features by cell
sobj$min_features <- sobj$nFeature_RNA >= min.features
print(paste0('Number of droplets with >= ', min.features, ' features :'))

## [1] "Number of droplets with >= 200 features :"

print(table(sobj$min_features))

## 
## FALSE  TRUE 
##   268  3699

## Nb counts by cell
sobj$min_counts <- sobj$nCount_RNA >= min.counts
print(paste0('Number of droplets with >= ', min.counts, ' of total counts:'))

## [1] "Number of droplets with >= 2500 of total counts:"

print(table(sobj$min_counts))

## 
## FALSE  TRUE 
##  2381  1586

# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " droplets)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " droplets)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)

## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

## Scatterplot
Seurat::FeatureScatter(sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

# Filtering cells on min.features and min.counts
sobj <- sobj[, sobj$nFeature_RNA >= min.features & sobj$nCount_RNA >= min.counts]

# Some metrics about filtered count matrix:
print('Number of droplets  in filtered data:')

## [1] "Number of droplets  in filtered data:"

droplets.kept.nb <- ncol(sobj)
droplets.kept.nb

## [1] 1584

print('Number of genes in filtered data:')

## [1] "Number of genes in filtered data:"

genes.kept.nb <- nrow(sobj)
genes.kept.nb

## [1] 33077

print('Number of UMIs in filtered data:')

## [1] "Number of UMIs in filtered data:"

umi.kept.nb <- sum(sobj@assays$RNA@counts)
umi.kept.nb

## [1] 9342026

print('Fraction of UMIs in droplets :')

## [1] "Fraction of UMIs in droplets :"

fraction.umi <- umi.kept.nb / umi.total.nb
fraction.umi

## [1] 0.7227568

print('Mean of UMIs in droplets :')

## [1] "Mean of UMIs in droplets :"

mean.umi <- round((umi.kept.nb / droplets.kept.nb), 0)
mean.umi

## [1] 5898

Computing basic metrics : Percentages of mito + Percentages of ribo + Cell cycle prediction + Identification of doublets

Percentages of mito + ribo

# Pattern
if(species == "human"){
  mito_patern <- "^MT-"
  ribo_patern <- "^RP[LS][[:digit:]]"
}else if (species == "mouse"){
  mito_patern <- "^mt-"
  ribo_patern <- "^Rp[ls][[:digit:]]"
}

# Percentage MITO
## Percent
sobj$percent_mt <- Seurat::PercentageFeatureSet(sobj, pattern = mito_patern)
print('% Mitochondrial expression :')

## [1] "% Mitochondrial expression :"

print(summary(sobj$percent_mt))

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.07489  6.25794  7.74069  8.13897  9.56449 32.92551

## Number
df_percent_mt <- data.frame(percent_mt = sobj$percent_mt, droplets_state = "in")
df_percent_mt[df_percent_mt$percent_mt <= pcmito.min, "droplets_state"] <- "out.left"
df_percent_mt[df_percent_mt$percent_mt >= pcmito.max, "droplets_state"] <- "out.right"
print(paste0(pcmito.min, ' <= % mito <= ', pcmito.max, ' :'))

## [1] "0 <= % mito <= 20 :"

print(table(df_percent_mt$droplets_state))

## 
##        in out.right 
##      1575         9

# Percentage RIBO
## Percent
sobj$percent_rb <- Seurat::PercentageFeatureSet(sobj, pattern = ribo_patern)
print('% Ribosomal expression :')

## [1] "% Ribosomal expression :"

print(summary(sobj$percent_rb))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.659  20.168  28.276  27.401  34.301  54.456

## Number
df_percent_rb <- data.frame(percent_rb = sobj$percent_rb, droplets_state = "in")
df_percent_rb[df_percent_rb$percent_rb <= pcribo.min, "droplets_state"] <- "out.left"
df_percent_rb[df_percent_rb$percent_rb >= pcribo.max, "droplets_state"] <- "out.right"
print(paste0(pcribo.min, ' <= % ribo <= ', pcribo.max, ' :'))

## [1] "0 <= % ribo <= 100 :"

print(table(df_percent_rb$droplets_state))

## 
##   in 
## 1584

# Plots
## Histograms
ggplot2::qplot(sobj$percent_mt, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%mito (in [", pcmito.min, ';', pcmito.max, "] : ", length(which(df_percent_mt$droplets_state == "in")), " droplets)"), xlab = "%mito") + ggplot2::geom_vline(xintercept = c(5,10,15,20), col = "cyan4", linetype = "dashed", size = 1) + ggplot2::geom_vline(xintercept = c(pcmito.min,pcmito.max), col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj$percent_rb, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%ribo (in [", pcribo.min, ';', pcribo.max, "] : ", length(which(df_percent_rb$droplets_state == "in")), " droplets)"), xlab = "%ribo") + ggplot2::geom_vline(xintercept = c(pcribo.min,pcribo.max), col = "red", linetype = "dashed", size = 1.5)

## Violinplot
Seurat::VlnPlot(sobj, features = c("percent_mt", "percent_rb"), ncol = 2)

Identification of background genes

# Nb counts per gene
df_cells_by_gene <- data.frame(nb_cells = apply(sobj@assays$RNA@counts,1,function(x){length(which(x>0))}), gene_state = "in", stringsAsFactors = FALSE)
df_cells_by_gene[df_cells_by_gene$nb_cells <= min.cells, "gene_state"] <- "out"
df_cells_by_gene_zoom <- df_cells_by_gene[df_cells_by_gene$nb_cells <= min.cells*4, ]
print(paste0('Number of genes expressed in >= ', min.cells, ' cells:'))

## [1] "Number of genes expressed in >= 5 cells:"

print(table(df_cells_by_gene$gene_state))

## 
##    in   out 
## 22269 10808

# Plots
## Histograms
ggplot2::qplot(df_cells_by_gene$nb_cells, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nb genes (>= ", min.cells, " : ", length(which(df_cells_by_gene$gene_state == "in")), " genes)"), xlab = "nb cells") + ggplot2::geom_vline(xintercept = min.cells, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(df_cells_by_gene_zoom$nb_cells, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nb genes (>= ", min.cells, " : ", length(which(df_cells_by_gene$gene_state == "in")), " genes) (Zoom)"),xlab = "nb cells") + ggplot2::geom_vline(xintercept = min.cells, col = "red", linetype = "dashed", size = 1.5)

Cell cycle prediction

Note: Cyclone take a while (~10-15min).

# Seurat CC
## Load database
if(species == "human"){
  cc_seurat <- Seurat::cc.genes.updated.2019
}else if (species == "mouse"){
  ### Seurat only provides cell cycle genes for homo sapiens (as this list was developed solely using data from this species). To use it on mus musculus, one can simply convert these human HUGO genes to to mus MGI orthologues (using biomaRt). See [this github issue](https://github.com/satijalab/seurat/issues/2493) 
  cc_seurat <- Seurat::cc.genes.updated.2019
  human_mart = biomaRt::useEnsembl("ensembl", dataset = "hsapiens_gene_ensembl")
  mouse_mart = biomaRt::useEnsembl("ensembl", dataset = "mmusculus_gene_ensembl")
  cc_seurat$s.genes = biomaRt::getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_seurat$s.genes , mart = human_mart, attributesL = c("mgi_symbol"), martL = mouse_mart, uniqueRows=T)[, 2]
  cc_seurat$g2m.genes = biomaRt::getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_seurat$g2m.genes , mart = human_mart, attributesL = c("mgi_symbol"), martL = mouse_mart, uniqueRows=T)[, 2]
}
## Prediction
sobj <- Seurat::CellCycleScoring(object = sobj, s.features = cc_seurat$s.genes, g2m.features = cc_seurat$g2m.genes, assay = "RNA", nbin = 10, seed = the.seed)
sobj$Seurat.SmG2M.Score <- sobj$S.Score - sobj$G2M.Score
## Rename results columns with 'Seurat.' prefix
sobj$Seurat.S.Score <- sobj$S.Score
sobj$Seurat.G2M.Score <- sobj$G2M.Score
sobj$Seurat.Phase <- sobj$Phase
sobj$S.Score <- sobj$G2M.Score <- sobj$Phase <- NULL
## Print results
print("Cell cycle phase according to Seurat: ")

## [1] "Cell cycle phase according to Seurat: "

print(table(sobj$Seurat.Phase))

## 
##   G1  G2M    S 
## 1390   13  181

# Scran: Cyclone
## Load database
if(species == "human"){
  # Load
  cc_cyclone <- readRDS(system.file("exdata", "human_cycle_markers.rds", package = "scran"))
  # Translate database from ENSEMBL to gene SYMBOL
  suppressPackageStartupMessages(library(org.Hs.eg.db))
  for (phase in names(cc_cyclone)) {
    for (fs in names(cc_cyclone[[phase]])) {
      cc_cyclone[[phase]][[fs]] <- suppressMessages(AnnotationDbi::mapIds(org.Hs.eg.db, keys=cc_cyclone[[phase]][[fs]], column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first"))
    }
  }
}else if (species == "mouse"){
  # Load
  cc_cyclone <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", package = "scran"))
  # Translate database from ENSEMBL to gene SYMBOL
  suppressPackageStartupMessages(library(org.Mm.eg.db))
  for (phase in names(cc_cyclone)) {
    for (fs in names(cc_cyclone[[phase]])) {
      cc_cyclone[[phase]][[fs]] <- suppressMessages(AnnotationDbi::mapIds(org.Mm.eg.db, keys=cc_cyclone[[phase]][[fs]], column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first"))
    }
  }
}
## Prediction
cycres <- scran::cyclone(Seurat::as.SingleCellExperiment(sobj), pairs = cc_cyclone, verbose = FALSE)
sobj$Cyclone.SmG2M.Score <- cycres$normalized.scores$S - cycres$normalized.scores$G2M
## Save results in the seurat object
sobj$Cyclone.Phase <- as.factor(cycres$phases)
sobj$Cyclone.G1.Score <- cycres$normalized.scores$G1
sobj$Cyclone.S.Score <- cycres$normalized.scores$S
sobj$Cyclone.G2M.Score <- cycres$normalized.scores$G2M
rm(cycres)
## Print results
print("Cell cycle phase according to Cyclone: ")

## [1] "Cell cycle phase according to Cyclone: "

print(table(sobj$Cyclone.Phase))

## 
##  G1 G2M   S 
## 750 215 619

# Crossing results : Seurat -VS- Cyclone
df_cycle <- data.frame(Seurat.Phase = sobj$Seurat.Phase, Cyclone.Phase = sobj$Cyclone.Phase)
print(table(df_cycle))

##             Cyclone.Phase
## Seurat.Phase  G1 G2M   S
##          G1  676 194 520
##          G2M   3   1   9
##          S    71  20  90

print(paste("Seurat and Cyclone agreed for ",round((sum(diag(table(df_cycle))) / sum(table(df_cycle)) *100),2),"% of droplets."))

## [1] "Seurat and Cyclone agreed for  48.42 % of droplets."

Identification of cell doublets

# scDblFinder
sobj$scDblFinder.class <- Seurat::as.Seurat(scDblFinder::scDblFinder(Seurat::as.SingleCellExperiment(sobj)))$scDblFinder.class

## Creating ~5000 artificial doublets...

## Dimensional reduction

## Evaluating kNN...

## Training model...

## iter=0, 19 cells excluded from training.

## iter=1, 39 cells excluded from training.

## iter=2, 41 cells excluded from training.

## Threshold found:0.916

## 52 (3.3%) doublets called

sobj$scDblFinder.class <- unname(sobj$scDblFinder.class == "doublet")
print('scDblFinder doublets :')

## [1] "scDblFinder doublets :"

print(table(sobj$scDblFinder.class))

## 
## FALSE  TRUE 
##  1532    52

# scds
sobj$scds.score <- scds::cxds_bcds_hybrid(Seurat::as.SingleCellExperiment(sobj))$hybrid_score

## [12:39:53] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.

sobj$scds.class <- unname(sobj$scds.score > 1)
print('scds-hybrid doublets :')

## [1] "scds-hybrid doublets :"

print(table(sobj$scds.class))

## 
## FALSE  TRUE 
##  1419   165

# union of scDblFinder & scds
sobj$doublets_consensus.class <- sobj$scDblFinder.class | sobj$scds.class
print('Consensus doublets :')

## [1] "Consensus doublets :"

print(table(sobj$doublets_consensus.class))

## 
## FALSE  TRUE 
##  1415   169

# Correlation between doublets and cell cycle
## Overlap cells
df_S.cycle_doublets <- data.frame(Seurat.Phase = sobj$Seurat.Phase, doublets = sobj$doublets_consensus.class)
print(table(df_S.cycle_doublets))

##             doublets
## Seurat.Phase FALSE TRUE
##          G1   1237  153
##          G2M    12    1
##          S     166   15

print(paste0(round((nrow(df_S.cycle_doublets[df_S.cycle_doublets$Seurat.Phase == "G2M" & df_S.cycle_doublets$doublets == TRUE,])/nrow(df_S.cycle_doublets[df_S.cycle_doublets$doublets == TRUE,])*100),2),"% of doublets are in G2M phase by Seurat."))

## [1] "0.59% of doublets are in G2M phase by Seurat."

df_C.cycle_doublets <- data.frame(Cyclone.Phase = sobj$Cyclone.Phase, doublets = sobj$doublets_consensus.class)
print(table(df_C.cycle_doublets))

##              doublets
## Cyclone.Phase FALSE TRUE
##           G1    673   77
##           G2M   204   11
##           S     538   81

print(paste0(round((nrow(df_C.cycle_doublets[df_C.cycle_doublets$Cyclone.Phase == "G2M" & df_C.cycle_doublets$doublets == TRUE,])/nrow(df_C.cycle_doublets[df_S.cycle_doublets$doublets == TRUE,])*100),2),"% of doublets are in G2M phase by Cyclone."))

## [1] "6.51% of doublets are in G2M phase by Cyclone."

## Quick 'n dirty uMAP 
## /!\ Warning: this umap will not represent the cell types present! Many parameters will have to be adjusted for this (tomorrow's course on Statistical models and analyses). Here, we just want to be able to represent all the cells on the same graph and plot the identification of doublets and the phases of the cell cycle /!\
tmp_sobj <- sobj
tmp_sobj <- suppressWarnings(Seurat::SCTransform(tmp_sobj, seed.use = the.seed, verbose = FALSE)) #normalization
tmp_sobj <- Seurat::RunPCA(tmp_sobj, verbose = FALSE,seed.use = the.seed) #dimension reduction
tmp_sobj <- Seurat::RunUMAP(tmp_sobj, dims = 1:25, verbose = FALSE, seed.use = the.seed)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

Seurat::DimPlot(tmp_sobj, group.by = "Cyclone.Phase") + ggplot2::ggtitle("Cell Phase (cyclone)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "Seurat.Phase") + ggplot2::ggtitle("Cell Phase (Seurat)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "scDblFinder.class") + ggplot2::ggtitle("Cell doublets (scDblFinder)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "scds.class") + ggplot2::ggtitle("Cell doublets (scds)") + Seurat::DarkTheme()

Seurat::DimPlot(tmp_sobj, group.by = "doublets_consensus.class") + ggplot2::ggtitle("Cell doublets (union)") + Seurat::DarkTheme()

# Cleaning
rm(tmp_sobj)
invisible(gc())

Filtering

# Filtering cell doublets
sobj <- sobj[, !sobj$doublets_consensus.class]

# Filtering non-qualified cells
cellskeep <- sobj$percent_mt >= pcmito.min & sobj$percent_mt <= pcmito.max & sobj$percent_rb >= pcribo.min & sobj$percent_rb <= pcribo.max
sobj <- sobj[, cellskeep]

# Filtering features with ultra-low expression
features_to_keep <- which(apply(sobj@assays$RNA@counts,1,function(x){ length(which(x>0)) }) >= min.cells)
sobj <- sobj[features_to_keep,]

Checking the effect of filtering

# Re-computing basic metrics : percentage of mito + ribo + nb features + nb counts
## Nb features by cell
sobj$nFeature_RNA <- apply(sobj@assays$RNA@counts,2,function(x){ length(which(x>0)) })
sobj$min_features <- sobj$nFeature_RNA >= min.features
## Nb counts by cell
sobj$nCount_RNA <- apply(sobj@assays$RNA@counts,2,function(x){ sum(x) })
sobj$min_counts <- sobj$nCount_RNA >= min.counts
## Percent mito
sobj$percent_mt <- Seurat::PercentageFeatureSet(sobj, pattern = mito_patern)
## Percent ribo
sobj$percent_rb <- Seurat::PercentageFeatureSet(sobj, pattern = ribo_patern)

# Some metrics about total filtered count matrix:
print('Number of cells in filtered data:')

## [1] "Number of cells in filtered data:"

print(ncol(sobj))

## [1] 1406

print('Number of genes in filtered data:')

## [1] "Number of genes in filtered data:"

print(nrow(sobj))

## [1] 21999

print('Number of UMIs in filtered data:')

## [1] "Number of UMIs in filtered data:"

print(sum(sobj@assays$RNA@counts))

## [1] 7487411

print(paste0('Number of cells with >= ', min.features, ' features :'))

## [1] "Number of cells with >= 200 features :"

print(table(sobj$min_features))

## 
## TRUE 
## 1406

print('Mean features by cell :')

## [1] "Mean features by cell :"

print(summary(sobj$nFeature_RNA))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     762    1477    1776    1848    2093    4994

print(paste0('Number of cells with >= ', min.counts, ' of total counts:'))

## [1] "Number of cells with >= 2500 of total counts:"

print(table(sobj$min_counts))

## 
## FALSE  TRUE 
##     2  1404

print('Mean Count by cell:')

## [1] "Mean Count by cell:"

print(summary(sobj$nCount_RNA))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2488    4090    5138    5325    6249   17297

print('% Mitochondrial expression :')

## [1] "% Mitochondrial expression :"

print(summary(sobj$percent_mt))

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.07513  6.28521  7.80418  8.12489  9.65911 17.89279

print('% Ribosomal expression :')

## [1] "% Ribosomal expression :"

print(summary(sobj$percent_rb))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6548 20.1281 28.6940 27.7038 34.8094 54.5083

print("Cell cycle phases according to Seurat: ")

## [1] "Cell cycle phases according to Seurat: "

print(table(sobj$Seurat.Phase))

## 
##   G1  G2M    S 
## 1229   12  165

print("Cell cycle phases according to Cyclone: ")

## [1] "Cell cycle phases according to Cyclone: "

print(table(sobj$Cyclone.Phase))

## 
##  G1 G2M   S 
## 671 200 535

print('Consensus doublets :')

## [1] "Consensus doublets :"

print(table(sobj$doublets_consensus.class))

## 
## FALSE 
##  1406

# Plots
## Histograms
ggplot2::qplot(sobj[["nFeature_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nFeature_RNA (>= ", min.features, " : ", length(which(sobj$min_features)), " cells)"), xlab = "nFeature_RNA") + ggplot2::geom_vline(xintercept = min.features, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj[["nCount_RNA", drop = TRUE]], geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("nCount_RNA (>= ", min.counts, " : ", length(which(sobj$min_counts)), " cells)"), xlab = "nCount_RNA") + ggplot2::geom_vline(xintercept = min.counts, col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj$percent_mt, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%mito (in [", pcmito.min, ';', pcmito.max, "] : ", length(which(df_percent_mt$droplets_state == "in")), " cells)"), xlab = "%mito") + ggplot2::geom_vline(xintercept = c(5,10,15,20), col = "cyan4", linetype = "dashed", size = 1) + ggplot2::geom_vline(xintercept = c(pcmito.min,pcmito.max), col = "red", linetype = "dashed", size = 1.5)

ggplot2::qplot(sobj$percent_rb, geom = "histogram", bins = 101, fill = I("white"), col = I("black"), main = paste0("%ribo (in [", pcribo.min, ';', pcribo.max, "] : ", length(which(df_percent_rb$droplets_state == "in")), " cells)"), xlab = "%ribo") + ggplot2::geom_vline(xintercept = c(pcribo.min,pcribo.max), col = "red", linetype = "dashed", size = 1.5)

## Violinplot
Seurat::VlnPlot(sobj, features = c("nFeature_RNA", "nCount_RNA","percent_mt","percent_rb"), ncol = 4)

## Scatterplot
Seurat::FeatureScatter(sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

Save

# Save parameters
sobj@misc$params$sample.name <- sample.name
sobj@misc$params$species <- species
sobj@misc$params$QC$seed <- the.seed
sobj@misc$params$QC$emptydrops$emptydrops.retain <- emptydrops.retain
sobj@misc$params$QC$pcmito.range <- c(pcmito.min,pcmito.max)
sobj@misc$params$QC$pcribo.range <- c(pcribo.min,pcribo.max)
sobj@misc$params$QC$min.features <- min.features
sobj@misc$params$QC$min.counts <- min.counts
sobj@misc$params$QC$min.cells <- min.cells

# Save R session
sobj@misc$params$QC$Rsession <- utils::capture.output(devtools::session_info())

## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom

# Save seurat object
dir.create(output.dir, recursive = TRUE, showWarnings = FALSE)
save(sobj, file = paste0(output.dir, sample.name, '_FILTERED_without.rda'), compress = "bzip2")

# Print R session info
utils::capture.output(devtools::session_info())

##   [1] "─ Session info  ──────────────────────────────────────────────────────────────"   
##   [2] " hash: cookie, man farmer: light skin tone, person golfing: medium-dark skin tone"
##   [3] ""                                                                                 
##   [4] " setting  value"                                                                  
##   [5] " version  R version 4.1.1 (2021-08-10)"                                           
##   [6] " os       Ubuntu 18.04.6 LTS"                                                     
##   [7] " system   x86_64, linux-gnu"                                                      
##   [8] " ui       X11"                                                                    
##   [9] " language (EN)"                                                                   
##  [10] " collate  fr_FR.UTF-8"                                                            
##  [11] " ctype    fr_FR.UTF-8"                                                            
##  [12] " tz       Europe/Paris"                                                           
##  [13] " date     2022-01-08"                                                             
##  [14] " pandoc   2.16.2 @ /shared/software/miniconda/envs/r-4.1.1/bin/ (via rmarkdown)"  
##  [15] ""                                                                                 
##  [16] "─ Packages ───────────────────────────────────────────────────────────────────"   
##  [17] " package              * version  date (UTC) lib source"                           
##  [18] " abind                  1.4-5    2016-07-21 [2] CRAN (R 4.1.0)"                   
##  [19] " AnnotationDbi        * 1.56.2   2021-11-09 [2] Bioconductor"                     
##  [20] " assertthat             0.2.1    2019-03-21 [2] CRAN (R 4.1.0)"                   
##  [21] " beachmat               2.10.0   2021-10-26 [2] Bioconductor"                     
##  [22] " beeswarm               0.4.0    2021-06-01 [2] CRAN (R 4.1.1)"                   
##  [23] " Biobase              * 2.54.0   2021-10-26 [2] Bioconductor"                     
##  [24] " BiocGenerics         * 0.40.0   2021-10-26 [2] Bioconductor"                     
##  [25] " BiocNeighbors          1.12.0   2021-10-26 [2] Bioconductor"                     
##  [26] " BiocParallel           1.28.3   2021-12-09 [2] Bioconductor"                     
##  [27] " BiocSingular           1.10.0   2021-10-26 [2] Bioconductor"                     
##  [28] " Biostrings             2.62.0   2021-10-26 [2] Bioconductor"                     
##  [29] " bit                    4.0.4    2020-08-04 [2] CRAN (R 4.1.0)"                   
##  [30] " bit64                  4.0.5    2020-08-30 [2] CRAN (R 4.1.0)"                   
##  [31] " bitops                 1.0-7    2021-04-24 [2] CRAN (R 4.1.0)"                   
##  [32] " blob                   1.2.2    2021-07-23 [2] CRAN (R 4.1.0)"                   
##  [33] " bluster                1.4.0    2021-10-26 [2] Bioconductor"                     
##  [34] " bslib                  0.3.1    2021-10-06 [2] CRAN (R 4.1.1)"                   
##  [35] " cachem                 1.0.6    2021-08-19 [2] CRAN (R 4.1.1)"                   
##  [36] " callr                  3.7.0    2021-04-20 [2] CRAN (R 4.1.0)"                   
##  [37] " cli                    3.1.0    2021-10-27 [2] CRAN (R 4.1.1)"                   
##  [38] " cluster                2.1.2    2021-04-17 [2] CRAN (R 4.1.0)"                   
##  [39] " codetools              0.2-18   2020-11-04 [2] CRAN (R 4.1.0)"                   
##  [40] " colorspace             2.0-2    2021-06-24 [2] CRAN (R 4.1.0)"                   
##  [41] " cowplot                1.1.1    2020-12-30 [2] CRAN (R 4.1.0)"                   
##  [42] " crayon                 1.4.2    2021-10-29 [2] CRAN (R 4.1.1)"                   
##  [43] " data.table             1.14.2   2021-09-27 [2] CRAN (R 4.1.1)"                   
##  [44] " DBI                    1.1.2    2021-12-20 [2] CRAN (R 4.1.1)"                   
##  [45] " DelayedArray           0.20.0   2021-10-26 [2] Bioconductor"                     
##  [46] " DelayedMatrixStats     1.16.0   2021-10-26 [2] Bioconductor"                     
##  [47] " deldir                 1.0-6    2021-10-23 [2] CRAN (R 4.1.1)"                   
##  [48] " desc                   1.4.0    2021-09-28 [2] CRAN (R 4.1.1)"                   
##  [49] " devtools               2.4.3    2021-11-30 [2] CRAN (R 4.1.1)"                   
##  [50] " digest                 0.6.29   2021-12-01 [2] CRAN (R 4.1.1)"                   
##  [51] " dplyr                * 1.0.7    2021-06-18 [2] CRAN (R 4.1.0)"                   
##  [52] " dqrng                  0.3.0    2021-05-01 [2] CRAN (R 4.1.0)"                   
##  [53] " DropletUtils           1.14.1   2021-11-08 [2] Bioconductor"                     
##  [54] " edgeR                  3.36.0   2021-10-26 [2] Bioconductor"                     
##  [55] " ellipsis               0.3.2    2021-04-29 [2] CRAN (R 4.1.0)"                   
##  [56] " evaluate               0.14     2019-05-28 [2] CRAN (R 4.1.0)"                   
##  [57] " fansi                  0.5.0    2021-05-25 [2] CRAN (R 4.1.1)"                   
##  [58] " farver                 2.1.0    2021-02-28 [2] CRAN (R 4.1.0)"                   
##  [59] " fastmap                1.1.0    2021-01-25 [2] CRAN (R 4.1.0)"                   
##  [60] " fitdistrplus           1.1-6    2021-09-28 [2] CRAN (R 4.1.1)"                   
##  [61] " fs                     1.5.2    2021-12-08 [2] CRAN (R 4.1.1)"                   
##  [62] " future                 1.23.0   2021-10-31 [2] CRAN (R 4.1.1)"                   
##  [63] " future.apply           1.8.1    2021-08-10 [2] CRAN (R 4.1.1)"                   
##  [64] " generics               0.1.1    2021-10-25 [2] CRAN (R 4.1.1)"                   
##  [65] " GenomeInfoDb           1.30.0   2021-10-26 [2] Bioconductor"                     
##  [66] " GenomeInfoDbData       1.2.7    2021-12-02 [2] Bioconductor"                     
##  [67] " GenomicRanges          1.46.1   2021-11-18 [2] Bioconductor"                     
##  [68] " ggbeeswarm             0.6.0    2017-08-07 [2] CRAN (R 4.1.1)"                   
##  [69] " ggplot2                3.3.5    2021-06-25 [2] CRAN (R 4.1.0)"                   
##  [70] " ggrepel                0.9.1    2021-01-15 [2] CRAN (R 4.1.0)"                   
##  [71] " ggridges               0.5.3    2021-01-08 [2] CRAN (R 4.1.0)"                   
##  [72] " globals                0.14.0   2020-11-22 [2] CRAN (R 4.1.0)"                   
##  [73] " glue                   1.6.0    2021-12-17 [2] CRAN (R 4.1.1)"                   
##  [74] " goftest                1.2-3    2021-10-07 [2] CRAN (R 4.1.1)"                   
##  [75] " gridExtra              2.3      2017-09-09 [2] CRAN (R 4.1.0)"                   
##  [76] " gtable                 0.3.0    2019-03-25 [2] CRAN (R 4.1.0)"                   
##  [77] " HDF5Array              1.22.1   2021-11-14 [2] Bioconductor"                     
##  [78] " highr                  0.9      2021-04-16 [2] CRAN (R 4.1.0)"                   
##  [79] " htmltools              0.5.2    2021-08-25 [2] CRAN (R 4.1.1)"                   
##  [80] " htmlwidgets            1.5.4    2021-09-08 [2] CRAN (R 4.1.1)"                   
##  [81] " httpuv                 1.6.4    2021-12-14 [2] CRAN (R 4.1.1)"                   
##  [82] " httr                   1.4.2    2020-07-20 [2] CRAN (R 4.1.0)"                   
##  [83] " ica                    1.0-2    2018-05-24 [2] CRAN (R 4.1.0)"                   
##  [84] " igraph                 1.2.10   2021-12-15 [2] CRAN (R 4.1.1)"                   
##  [85] " IRanges              * 2.28.0   2021-10-26 [2] Bioconductor"                     
##  [86] " irlba                  2.3.5    2021-12-06 [2] CRAN (R 4.1.1)"                   
##  [87] " jquerylib              0.1.4    2021-04-26 [2] CRAN (R 4.1.0)"                   
##  [88] " jsonlite               1.7.2    2020-12-09 [2] CRAN (R 4.1.0)"                   
##  [89] " KEGGREST               1.34.0   2021-10-26 [2] Bioconductor"                     
##  [90] " KernSmooth             2.23-20  2021-05-03 [2] CRAN (R 4.1.0)"                   
##  [91] " knitr                  1.37     2021-12-16 [2] CRAN (R 4.1.1)"                   
##  [92] " labeling               0.4.2    2020-10-20 [2] CRAN (R 4.1.0)"                   
##  [93] " later                  1.3.0    2021-08-18 [2] CRAN (R 4.1.1)"                   
##  [94] " lattice                0.20-45  2021-09-22 [2] CRAN (R 4.1.1)"                   
##  [95] " lazyeval               0.2.2    2019-03-15 [2] CRAN (R 4.1.0)"                   
##  [96] " leiden                 0.3.9    2021-07-27 [2] CRAN (R 4.1.0)"                   
##  [97] " lifecycle              1.0.1    2021-09-24 [2] CRAN (R 4.1.1)"                   
##  [98] " limma                  3.50.0   2021-10-26 [2] Bioconductor"                     
##  [99] " listenv                0.8.0    2019-12-05 [2] CRAN (R 4.1.0)"                   
## [100] " lmtest                 0.9-39   2021-11-07 [2] CRAN (R 4.1.1)"                   
## [101] " locfit                 1.5-9.4  2020-03-25 [2] CRAN (R 4.1.1)"                   
## [102] " magrittr               2.0.1    2020-11-17 [2] CRAN (R 4.1.0)"                   
## [103] " MASS                   7.3-54   2021-05-03 [2] CRAN (R 4.1.0)"                   
## [104] " Matrix                 1.3-4    2021-06-01 [2] CRAN (R 4.1.0)"                   
## [105] " MatrixGenerics         1.6.0    2021-10-26 [2] Bioconductor"                     
## [106] " matrixStats            0.61.0   2021-09-17 [2] CRAN (R 4.1.1)"                   
## [107] " memoise                2.0.1    2021-11-26 [2] CRAN (R 4.1.1)"                   
## [108] " metapod                1.2.0    2021-10-26 [2] Bioconductor"                     
## [109] " mgcv                   1.8-38   2021-10-06 [2] CRAN (R 4.1.1)"                   
## [110] " mime                   0.12     2021-09-28 [2] CRAN (R 4.1.1)"                   
## [111] " miniUI                 0.1.1.1  2018-05-18 [2] CRAN (R 4.1.0)"                   
## [112] " munsell                0.5.0    2018-06-12 [2] CRAN (R 4.1.0)"                   
## [113] " nlme                   3.1-153  2021-09-07 [2] CRAN (R 4.1.1)"                   
## [114] " org.Hs.eg.db         * 3.14.0   2021-12-09 [2] Bioconductor"                     
## [115] " parallelly             1.30.0   2021-12-17 [2] CRAN (R 4.1.1)"                   
## [116] " patchwork              1.1.1    2020-12-17 [2] CRAN (R 4.1.0)"                   
## [117] " pbapply                1.5-0    2021-09-16 [2] CRAN (R 4.1.1)"                   
## [118] " pillar                 1.6.4    2021-10-18 [2] CRAN (R 4.1.1)"                   
## [119] " pkgbuild               1.2.1    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [120] " pkgconfig              2.0.3    2019-09-22 [2] CRAN (R 4.1.0)"                   
## [121] " pkgload                1.2.4    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [122] " plotly                 4.10.0   2021-10-09 [2] CRAN (R 4.1.1)"                   
## [123] " plyr                   1.8.6    2020-03-03 [2] CRAN (R 4.1.0)"                   
## [124] " png                    0.1-7    2013-12-03 [2] CRAN (R 4.1.0)"                   
## [125] " polyclip               1.10-0   2019-03-14 [2] CRAN (R 4.1.0)"                   
## [126] " prettyunits            1.1.1    2020-01-24 [2] CRAN (R 4.1.0)"                   
## [127] " pROC                   1.18.0   2021-09-03 [2] CRAN (R 4.1.1)"                   
## [128] " processx               3.5.2    2021-04-30 [2] CRAN (R 4.1.0)"                   
## [129] " promises               1.2.0.1  2021-02-11 [2] CRAN (R 4.1.0)"                   
## [130] " ps                     1.6.0    2021-02-28 [2] CRAN (R 4.1.0)"                   
## [131] " purrr                  0.3.4    2020-04-17 [2] CRAN (R 4.1.0)"                   
## [132] " R.methodsS3            1.8.1    2020-08-26 [2] CRAN (R 4.1.0)"                   
## [133] " R.oo                   1.24.0   2020-08-26 [2] CRAN (R 4.1.0)"                   
## [134] " R.utils                2.11.0   2021-09-26 [2] CRAN (R 4.1.1)"                   
## [135] " R6                     2.5.1    2021-08-19 [2] CRAN (R 4.1.1)"                   
## [136] " RANN                   2.6.1    2019-01-08 [2] CRAN (R 4.1.0)"                   
## [137] " RColorBrewer           1.1-2    2014-12-07 [2] CRAN (R 4.1.0)"                   
## [138] " Rcpp                   1.0.7    2021-07-07 [2] CRAN (R 4.1.0)"                   
## [139] " RcppAnnoy              0.0.19   2021-07-30 [2] CRAN (R 4.1.0)"                   
## [140] " RCurl                  1.98-1.5 2021-09-17 [2] CRAN (R 4.1.1)"                   
## [141] " remotes                2.4.2    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [142] " reshape2               1.4.4    2020-04-09 [2] CRAN (R 4.1.0)"                   
## [143] " reticulate             1.22     2021-09-17 [2] CRAN (R 4.1.1)"                   
## [144] " rhdf5                  2.38.0   2021-10-26 [2] Bioconductor"                     
## [145] " rhdf5filters           1.6.0    2021-10-26 [2] Bioconductor"                     
## [146] " Rhdf5lib               1.16.0   2021-10-26 [2] Bioconductor"                     
## [147] " rlang                  0.4.12   2021-10-18 [2] CRAN (R 4.1.1)"                   
## [148] " rmarkdown              2.11     2021-09-14 [2] CRAN (R 4.1.1)"                   
## [149] " ROCR                   1.0-11   2020-05-02 [2] CRAN (R 4.1.0)"                   
## [150] " rpart                  4.1-15   2019-04-12 [2] CRAN (R 4.1.0)"                   
## [151] " rprojroot              2.0.2    2020-11-15 [2] CRAN (R 4.1.0)"                   
## [152] " RSpectra               0.16-0   2019-12-01 [2] CRAN (R 4.1.1)"                   
## [153] " RSQLite                2.2.9    2021-12-06 [2] CRAN (R 4.1.1)"                   
## [154] " rstudioapi             0.13     2020-11-12 [2] CRAN (R 4.1.0)"                   
## [155] " rsvd                   1.0.5    2021-04-16 [2] CRAN (R 4.1.1)"                   
## [156] " Rtsne                  0.15     2018-11-10 [2] CRAN (R 4.1.0)"                   
## [157] " S4Vectors            * 0.32.3   2021-11-21 [2] Bioconductor"                     
## [158] " sass                   0.4.0    2021-05-12 [2] CRAN (R 4.1.0)"                   
## [159] " ScaledMatrix           1.2.0    2021-10-26 [2] Bioconductor"                     
## [160] " scales                 1.1.1    2020-05-11 [2] CRAN (R 4.1.0)"                   
## [161] " scater                 1.22.0   2021-10-26 [2] Bioconductor"                     
## [162] " scattermore            0.7      2020-11-24 [2] CRAN (R 4.1.0)"                   
## [163] " scDblFinder            1.8.0    2021-10-26 [2] Bioconductor"                     
## [164] " scds                   1.10.0   2021-10-26 [2] Bioconductor"                     
## [165] " scran                  1.22.1   2021-11-14 [2] Bioconductor"                     
## [166] " sctransform            0.3.2    2020-12-16 [2] CRAN (R 4.1.0)"                   
## [167] " scuttle                1.4.0    2021-10-26 [2] Bioconductor"                     
## [168] " sessioninfo            1.2.1    2021-11-02 [2] CRAN (R 4.1.1)"                   
## [169] " Seurat                 4.0.6    2021-12-16 [2] CRAN (R 4.1.1)"                   
## [170] " SeuratObject           4.0.4    2021-11-23 [2] CRAN (R 4.1.1)"                   
## [171] " shiny                  1.7.1    2021-10-02 [2] CRAN (R 4.1.1)"                   
## [172] " SingleCellExperiment   1.16.0   2021-10-26 [2] Bioconductor"                     
## [173] " sparseMatrixStats      1.6.0    2021-10-26 [2] Bioconductor"                     
## [174] " spatstat.core          2.3-2    2021-11-26 [2] CRAN (R 4.1.1)"                   
## [175] " spatstat.data          2.1-2    2021-12-17 [2] CRAN (R 4.1.1)"                   
## [176] " spatstat.geom          2.3-1    2021-12-10 [2] CRAN (R 4.1.1)"                   
## [177] " spatstat.sparse        2.1-0    2021-12-17 [2] CRAN (R 4.1.1)"                   
## [178] " spatstat.utils         2.3-0    2021-12-12 [2] CRAN (R 4.1.1)"                   
## [179] " statmod                1.4.36   2021-05-10 [2] CRAN (R 4.1.0)"                   
## [180] " stringi                1.7.6    2021-11-29 [2] CRAN (R 4.1.1)"                   
## [181] " stringr                1.4.0    2019-02-10 [2] CRAN (R 4.1.0)"                   
## [182] " SummarizedExperiment   1.24.0   2021-10-26 [2] Bioconductor"                     
## [183] " survival               3.2-13   2021-08-24 [2] CRAN (R 4.1.1)"                   
## [184] " tensor                 1.5      2012-05-05 [2] CRAN (R 4.1.0)"                   
## [185] " testthat               3.1.0    2021-10-04 [2] CRAN (R 4.1.1)"                   
## [186] " tibble                 3.1.6    2021-11-07 [2] CRAN (R 4.1.1)"                   
## [187] " tidyr                  1.1.4    2021-09-27 [2] CRAN (R 4.1.1)"                   
## [188] " tidyselect             1.1.1    2021-04-30 [2] CRAN (R 4.1.0)"                   
## [189] " usethis                2.1.3    2021-10-27 [2] CRAN (R 4.1.1)"                   
## [190] " utf8                   1.2.2    2021-07-24 [2] CRAN (R 4.1.0)"                   
## [191] " uwot                   0.1.11   2021-12-02 [2] CRAN (R 4.1.1)"                   
## [192] " vctrs                  0.3.8    2021-04-29 [2] CRAN (R 4.1.0)"                   
## [193] " vipor                  0.4.5    2017-03-22 [2] CRAN (R 4.1.1)"                   
## [194] " viridis                0.6.2    2021-10-13 [2] CRAN (R 4.1.1)"                   
## [195] " viridisLite            0.4.0    2021-04-13 [2] CRAN (R 4.1.0)"                   
## [196] " withr                  2.4.3    2021-11-30 [2] CRAN (R 4.1.1)"                   
## [197] " xfun                   0.29     2021-12-14 [2] CRAN (R 4.1.1)"                   
## [198] " xgboost                1.5.0.2  2021-11-21 [2] CRAN (R 4.1.1)"                   
## [199] " xtable                 1.8-4    2019-04-21 [2] CRAN (R 4.1.0)"                   
## [200] " XVector                0.34.0   2021-10-26 [2] Bioconductor"                     
## [201] " yaml                   2.2.1    2020-02-01 [2] CRAN (R 4.1.0)"                   
## [202] " zlibbioc               1.40.0   2021-10-26 [2] Bioconductor"                     
## [203] " zoo                    1.8-9    2021-03-09 [2] CRAN (R 4.1.0)"                   
## [204] ""                                                                                 
## [205] " [1] /shared/home/maglave/R/x86_64-conda-linux-gnu-library/4.1"                   
## [206] " [2] /shared/software/miniconda/envs/r-4.1.1/lib/R/library"                       
## [207] ""                                                                                 
## [208] "──────────────────────────────────────────────────────────────────────────────"