library(Seurat)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(reticulate)
library(wesanderson)
use_python("/usr/bin/python3")
#Set ggplot theme as classic
theme_set(theme_classic())
This dataset was generated from the sequencing of two 10X V3 libraries run in parallel from the same tissue dissociation prep
<- Read10X(data.dir = "../../RawData/Hem_1_filtered_feature_bc_matrix/")
Countdata
<- CreateSeuratObject(raw.data = Countdata,
Raw.data min.cells = 3,
min.genes = 800,
project = "Hem1") ; rm(Countdata)
@meta.data$Barcodes <- rownames(Raw.data@meta.data)
Raw.data
dim(Raw.data@data)
## [1] 18098 10180
<- grep(pattern = "^mt-", x = rownames(x = Raw.data@data), value = TRUE)
mito.genes <- Matrix::colSums(Raw.data@raw.data[mito.genes, ])/Matrix::colSums(Raw.data@raw.data)
percent.mito <- AddMetaData(object = Raw.data, metadata = percent.mito, col.name = "percent.mito")
Raw.data
<- grep(pattern = "(^Rpl|^Rps|^Mrp)", x = rownames(x = Raw.data@data), value = TRUE)
ribo.genes <- Matrix::colSums(Raw.data@raw.data[ribo.genes, ])/Matrix::colSums(Raw.data@raw.data)
percent.ribo <- AddMetaData(object = Raw.data, metadata = percent.ribo, col.name = "percent.ribo")
Raw.data
rm(mito.genes, percent.mito,ribo.genes,percent.ribo)
VlnPlot(object = Raw.data, features.plot = c("nGene","nUMI", "percent.mito", "percent.ribo"), nCol = 2 )
# Relation between nUMI and nGene detected
<- Raw.data@meta.data
Cell.QC.Stat $Barcodes <- rownames(Cell.QC.Stat)
Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=nUMI, y=nGene)) + geom_point() + geom_smooth(method="lm")
p1 <- ggMarginal(p1, type = "histogram", fill="lightgrey")
p1
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) + geom_point() + geom_smooth(method="lm")
p2 <- ggMarginal(p2, type = "histogram", fill="lightgrey")
p2
plot_grid(plotlist = list(p1,p2), ncol=2, align='h', rel_widths = c(1, 1)) ; rm(p1,p2)
Cells with deviating nGene/nUMI ratio display an Erythrocyte signature
<- list(c("Hbb-bt", "Hbq1a", "Isg20", "Fech", "Snca", "Rec114"))
genes.list <- "Erythrocyte.signature"
enrich.name <- AddModuleScore(Raw.data,
Raw.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
$Erythrocyte.signature <- Raw.data@meta.data$Erythrocyte.signature1 Cell.QC.Stat
<- colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)
gradient
<- ggplot(Cell.QC.Stat, aes(x= log10(nUMI), y= log10(nGene))) +
p1 geom_point(aes(color= Erythrocyte.signature)) +
scale_color_gradientn(colours=rev(gradient), name='Erythrocyte score') +
geom_smooth(method="lm")
<- ggMarginal(p1, type = "histogram", fill="lightgrey")
p1 p1
We applied a high and low median absolute deviation (mad) thresholds to exclude outlier cells
<- median(Cell.QC.Stat$percent.mito) + 3*mad(Cell.QC.Stat$percent.mito)
max.mito.thr <- median(Cell.QC.Stat$percent.mito) - 3*mad(Cell.QC.Stat$percent.mito) min.mito.thr
<- ggplot(Cell.QC.Stat, aes(x=nGene, y=percent.mito)) +
p1 geom_point() +
geom_hline(aes(yintercept = max.mito.thr), colour = "red", linetype = 2) +
geom_hline(aes(yintercept = min.mito.thr), colour = "red", linetype = 2) +
annotate(geom = "text", label = paste0(as.numeric(table(Cell.QC.Stat$percent.mito > max.mito.thr | Cell.QC.Stat$percent.mito < min.mito.thr)[2])," cells removed\n",
as.numeric(table(Cell.QC.Stat$percent.mito > max.mito.thr | Cell.QC.Stat$percent.mito < min.mito.thr)[1])," cells remain"),
x = 6000, y = 0.4)
ggMarginal(p1, type = "histogram", fill="lightgrey", bins=100)
# Filter cells based on these thresholds
<- Cell.QC.Stat %>% filter(percent.mito < max.mito.thr) %>% filter(percent.mito > min.mito.thr) Cell.QC.Stat
We filter cells which are likely to be doublet based on their higher content of transcript detected as well as cell with to few genes/UMI sequenced
# Set low and hight thresholds on the number of detected genes
<- median(log10(Cell.QC.Stat$nGene)) - 3*mad(log10(Cell.QC.Stat$nGene))
min.Genes.thr <- median(log10(Cell.QC.Stat$nGene)) + 3*mad(log10(Cell.QC.Stat$nGene))
max.Genes.thr
# Set hight threshold on the number of transcripts
<- median(log10(Cell.QC.Stat$nUMI)) + 3*mad(log10(Cell.QC.Stat$nUMI)) max.nUMI.thr
# Gene/UMI scatter plot before filtering
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) +
p1 geom_point() +
geom_smooth(method="lm") +
geom_hline(aes(yintercept = min.Genes.thr), colour = "green", linetype = 2) +
geom_hline(aes(yintercept = max.Genes.thr), colour = "green", linetype = 2) +
geom_vline(aes(xintercept = max.nUMI.thr), colour = "red", linetype = 2)
ggMarginal(p1, type = "histogram", fill="lightgrey")
# Filter cells base on both metrics
<- Cell.QC.Stat %>% filter(log10(nGene) > min.Genes.thr) %>% filter(log10(nUMI) < max.nUMI.thr) Cell.QC.Stat
<- lm(data = Cell.QC.Stat, formula = log10(nGene) ~ log10(nUMI))
lm.model
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) +
p2 geom_point() +
geom_smooth(method="lm") +
geom_hline(aes(yintercept = min.Genes.thr), colour = "green", linetype = 2) +
geom_hline(aes(yintercept = max.Genes.thr), colour = "green", linetype = 2) +
geom_vline(aes(xintercept = max.nUMI.thr), colour = "red", linetype = 2) +
geom_abline(intercept = lm.model$coefficients[1] - 0.09 , slope = lm.model$coefficients[2], color="orange") +
annotate(geom = "text", label = paste0(dim(Cell.QC.Stat)[1], " QC passed cells"), x = 4, y = 3.8)
ggMarginal(p2, type = "histogram", fill="lightgrey")
# Cells to exclude lie below an intercept offset of -0.09
$valideCells <- log10(Cell.QC.Stat$nGene) > (log10(Cell.QC.Stat$nUMI) * lm.model$coefficients[2] + (lm.model$coefficients[1] - 0.09)) Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) +
p3 geom_point(aes(colour = valideCells)) +
geom_smooth(method="lm") +
geom_abline(intercept = lm.model$coefficients[1] - 0.09 , slope = lm.model$coefficients[2], color="orange") +
theme(legend.position="none") +
annotate(geom = "text", label = paste0(as.numeric(table(Cell.QC.Stat$valideCells)[2]), " QC passed cells\n",
as.numeric(table(Cell.QC.Stat$valideCells)[1]), " QC filtered"), x = 4, y = 3.8)
ggMarginal(p3, type = "histogram", fill="lightgrey")
# Remove invalid cells
<- Cell.QC.Stat %>% filter(valideCells) Cell.QC.Stat
<- SubsetData(Raw.data, cells.use = Cell.QC.Stat$Barcodes , subset.raw = T, do.clean = F) Raw.data
# Plot final QC metrics
VlnPlot(object = Raw.data, features.plot = c("nGene","nUMI", "percent.mito", "percent.ribo"), nCol = 2 )
<- ggplot(Raw.data@meta.data, aes(x=log10(nUMI), y=log10(nGene))) + geom_point() + geom_smooth(method="lm")
p1 ggMarginal(p1, type = "histogram", fill="lightgrey")
rm(list = ls()[!ls() %in% "Raw.data"])
Export raw count matrix as input to Scrublet
#Export filtered matrix
dir.create("../../Scrublet_inputs")
<- Matrix(as.matrix(Raw.data@raw.data), sparse = TRUE)
exprData writeMM(exprData, "../../Scrublet_inputs/matrix1.mtx")
## NULL
import scrublet as scr
import scipy.io
import numpy as np
import os
#Load raw counts matrix and gene list
= '../../Scrublet_inputs'
input_dir = scipy.io.mmread(input_dir + '/matrix1.mtx').T.tocsc()
counts_matrix
#Initialize Scrublet
= scr.Scrublet(counts_matrix,
scrub =0.1,
expected_doublet_rate=2,
sim_doublet_ratio= 8)
n_neighbors
#Run the default pipeline
= scrub.scrub_doublets(min_counts=1,
doublet_scores, predicted_doublets =3,
min_cells=85,
min_gene_variability_pctl=25) n_prin_comps
## Preprocessing...
## Simulating doublets...
## Embedding transcriptomes using PCA...
## Calculating doublet scores...
## Automatically set threshold at doublet score = 0.20
## Detected doublet rate = 7.4%
## Estimated detectable doublet fraction = 68.6%
## Overall doublet rate:
## Expected = 10.0%
## Estimated = 10.8%
## Elapsed time: 28.5 seconds
# Import scrublet's doublet score
@meta.data$Doubletscore <- py$doublet_scores
Raw.data
# Plot doublet score
ggplot(Raw.data@meta.data, aes(x = Doubletscore, stat(ndensity))) +
geom_histogram(bins = 200, colour ="lightgrey")+
geom_vline(xintercept = 0.20, colour = "red", linetype = 2)
# Manually set threshold at doublet score to 0.2
@meta.data$Predicted_doublets <- ifelse(py$doublet_scores > 0.2, "Doublet","Singlet" )
Raw.datatable(Raw.data@meta.data$Predicted_doublets)
##
## Doublet Singlet
## 658 8184
#Remove Scrublet inferred doublets
<- rownames(Raw.data@meta.data[Raw.data@meta.data$Predicted_doublets == "Singlet",])
Valid.Cells
.1 <- SubsetData(Raw.data, cells.use = Valid.Cells, subset.raw = T, do.clean = F) QC.data
rm(list = ls()[!ls() %in% "QC.data.1"])
<- Read10X(data.dir = "../../RawData/Hem_2_filtered_feature_bc_matrix/")
Countdata
<- CreateSeuratObject(raw.data = Countdata,
Raw.data min.cells = 3,
min.genes = 800,
project = "Hem2") ; rm(Countdata)
@meta.data$Barcodes <- rownames(Raw.data@meta.data)
Raw.data
dim(Raw.data@data)
## [1] 18048 8817
<- grep(pattern = "^mt-", x = rownames(x = Raw.data@data), value = TRUE)
mito.genes <- Matrix::colSums(Raw.data@raw.data[mito.genes, ])/Matrix::colSums(Raw.data@raw.data)
percent.mito <- AddMetaData(object = Raw.data, metadata = percent.mito, col.name = "percent.mito")
Raw.data
<- grep(pattern = "(^Rpl|^Rps|^Mrp)", x = rownames(x = Raw.data@data), value = TRUE)
ribo.genes <- Matrix::colSums(Raw.data@raw.data[ribo.genes, ])/Matrix::colSums(Raw.data@raw.data)
percent.ribo <- AddMetaData(object = Raw.data, metadata = percent.ribo, col.name = "percent.ribo")
Raw.data
rm(mito.genes, percent.mito,ribo.genes,percent.ribo)
VlnPlot(object = Raw.data, features.plot = c("nGene","nUMI", "percent.mito", "percent.ribo"), nCol = 2 )
# Relation between nUMI and nGene detected
<- Raw.data@meta.data
Cell.QC.Stat $Barcodes <- rownames(Cell.QC.Stat)
Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=nUMI, y=nGene)) + geom_point() + geom_smooth(method="lm")
p1 <- ggMarginal(p1, type = "histogram", fill="lightgrey")
p1
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) + geom_point() + geom_smooth(method="lm")
p2 <- ggMarginal(p2, type = "histogram", fill="lightgrey")
p2
plot_grid(plotlist = list(p1,p2), ncol=2, align='h', rel_widths = c(1, 1)) ; rm(p1,p2)
Cells with deviating nGene/nUMI ratio display an Erythrocyte signature
<- list(c("Hbb-bt", "Hbq1a", "Isg20", "Fech", "Snca", "Rec114"))
genes.list <- "Erythrocyte.signature"
enrich.name <- AddModuleScore(Raw.data,
Raw.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
$Erythrocyte.signature <- Raw.data@meta.data$Erythrocyte.signature1 Cell.QC.Stat
<- colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)
gradient
<- ggplot(Cell.QC.Stat, aes(x= log10(nUMI), y= log10(nGene))) +
p1 geom_point(aes(color= Erythrocyte.signature)) +
scale_color_gradientn(colours=rev(gradient), name='Erythrocyte score') +
geom_smooth(method="lm")
<- ggMarginal(p1, type = "histogram", fill="lightgrey")
p1 p1
We applied a high and low median absolute deviation (mad) thresholds to exclude outlier cells
<- median(Cell.QC.Stat$percent.mito) + 3*mad(Cell.QC.Stat$percent.mito)
max.mito.thr <- median(Cell.QC.Stat$percent.mito) - 3*mad(Cell.QC.Stat$percent.mito) min.mito.thr
<- ggplot(Cell.QC.Stat, aes(x=nGene, y=percent.mito)) +
p1 geom_point() +
geom_hline(aes(yintercept = max.mito.thr), colour = "red", linetype = 2) +
geom_hline(aes(yintercept = min.mito.thr), colour = "red", linetype = 2) +
annotate(geom = "text", label = paste0(as.numeric(table(Cell.QC.Stat$percent.mito > max.mito.thr | Cell.QC.Stat$percent.mito < min.mito.thr)[2])," cells removed\n",
as.numeric(table(Cell.QC.Stat$percent.mito > max.mito.thr | Cell.QC.Stat$percent.mito < min.mito.thr)[1])," cells remain"),
x = 6000, y = 0.4)
ggMarginal(p1, type = "histogram", fill="lightgrey", bins=100)
# Filter cells based on these thresholds
<- Cell.QC.Stat %>% filter(percent.mito < max.mito.thr) %>% filter(percent.mito > min.mito.thr) Cell.QC.Stat
We filter cells which are likely to be doublet based on their higher content of transcript detected as well as cell with to few genes/UMI sequenced
# Set low and hight thresholds on the number of detected genes
<- median(log10(Cell.QC.Stat$nGene)) - 3*mad(log10(Cell.QC.Stat$nGene))
min.Genes.thr <- median(log10(Cell.QC.Stat$nGene)) + 3*mad(log10(Cell.QC.Stat$nGene))
max.Genes.thr
# Set hight threshold on the number of transcripts
<- median(log10(Cell.QC.Stat$nUMI)) + 3*mad(log10(Cell.QC.Stat$nUMI)) max.nUMI.thr
# Gene/UMI scatter plot before filtering
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) +
p1 geom_point() +
geom_smooth(method="lm") +
geom_hline(aes(yintercept = min.Genes.thr), colour = "green", linetype = 2) +
geom_hline(aes(yintercept = max.Genes.thr), colour = "green", linetype = 2) +
geom_vline(aes(xintercept = max.nUMI.thr), colour = "red", linetype = 2)
ggMarginal(p1, type = "histogram", fill="lightgrey")
# Filter cells base on both metrics
<- Cell.QC.Stat %>% filter(log10(nGene) > min.Genes.thr) %>% filter(log10(nUMI) < max.nUMI.thr) Cell.QC.Stat
<- lm(data = Cell.QC.Stat, formula = log10(nGene) ~ log10(nUMI))
lm.model
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) +
p2 geom_point() +
geom_smooth(method="lm") +
geom_hline(aes(yintercept = min.Genes.thr), colour = "green", linetype = 2) +
geom_hline(aes(yintercept = max.Genes.thr), colour = "green", linetype = 2) +
geom_vline(aes(xintercept = max.nUMI.thr), colour = "red", linetype = 2) +
geom_abline(intercept = lm.model$coefficients[1] - 0.09 , slope = lm.model$coefficients[2], color="orange") +
annotate(geom = "text", label = paste0(dim(Cell.QC.Stat)[1], " QC passed cells"), x = 4, y = 3.8)
ggMarginal(p2, type = "histogram", fill="lightgrey")
# Cells to exclude lie below an intercept offset of -0.09
$valideCells <- log10(Cell.QC.Stat$nGene) > (log10(Cell.QC.Stat$nUMI) * lm.model$coefficients[2] + (lm.model$coefficients[1] - 0.09)) Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) +
p3 geom_point(aes(colour = valideCells)) +
geom_smooth(method="lm") +
geom_abline(intercept = lm.model$coefficients[1] - 0.09 , slope = lm.model$coefficients[2], color="orange") +
theme(legend.position="none") +
annotate(geom = "text", label = paste0(as.numeric(table(Cell.QC.Stat$valideCells)[2]), " QC passed cells\n",
as.numeric(table(Cell.QC.Stat$valideCells)[1]), " QC filtered"), x = 4, y = 3.8)
ggMarginal(p3, type = "histogram", fill="lightgrey")
# Remove invalid cells
<- Cell.QC.Stat %>% filter(valideCells) Cell.QC.Stat
<- SubsetData(Raw.data, cells.use = Cell.QC.Stat$Barcodes , subset.raw = T, do.clean = F) Raw.data
# Plot final QC metrics
VlnPlot(object = Raw.data, features.plot = c("nGene","nUMI", "percent.mito", "percent.ribo"), nCol = 2 )
<- ggplot(Raw.data@meta.data, aes(x=log10(nUMI), y=log10(nGene))) + geom_point() + geom_smooth(method="lm")
p1 ggMarginal(p1, type = "histogram", fill="lightgrey")
rm(list = ls()[!ls() %in% c("Raw.data", "QC.data.1")])
Export raw count matrix as input to Scrublet
#Export filtered matrix
<- Matrix(as.matrix(Raw.data@raw.data), sparse = TRUE)
exprData writeMM(exprData, "../../Scrublet_inputs/matrix2.mtx")
## NULL
import scrublet as scr
import scipy.io
import numpy as np
import os
#Load raw counts matrix and gene list
= '../../Scrublet_inputs'
input_dir = scipy.io.mmread(input_dir + '/matrix2.mtx').T.tocsc()
counts_matrix
#Initialize Scrublet
= scr.Scrublet(counts_matrix,
scrub =0.1,
expected_doublet_rate=2,
sim_doublet_ratio= 8)
n_neighbors
#Run the default pipeline
= scrub.scrub_doublets(min_counts=1,
doublet_scores, predicted_doublets =3,
min_cells=85,
min_gene_variability_pctl=25) n_prin_comps
## Preprocessing...
## Simulating doublets...
## Embedding transcriptomes using PCA...
## Calculating doublet scores...
## Automatically set threshold at doublet score = 0.24
## Detected doublet rate = 5.1%
## Estimated detectable doublet fraction = 59.0%
## Overall doublet rate:
## Expected = 10.0%
## Estimated = 8.7%
## Elapsed time: 9.8 seconds
##
## /home/matthieu/.local/lib/python3.6/site-packages/scrublet/helper_functions.py:239: RuntimeWarning: invalid value encountered in log
## gLog = lambda input: np.log(input[1] * np.exp(-input[0]) + input[2])
# Import scrublet's doublet score
@meta.data$Doubletscore <- py$doublet_scores
Raw.data
# Plot doublet score
ggplot(Raw.data@meta.data, aes(x = Doubletscore, stat(ndensity))) +
geom_histogram(bins = 200, colour ="lightgrey")+
geom_vline(xintercept = 0.24, colour = "red", linetype = 2)
# Manually set threshold at doublet score to 0.2
@meta.data$Predicted_doublets <- ifelse(py$doublet_scores > 0.24, "Doublet","Singlet" )
Raw.datatable(Raw.data@meta.data$Predicted_doublets)
##
## Doublet Singlet
## 385 7149
#Remove Scrublet inferred doublets
<- rownames(Raw.data@meta.data[Raw.data@meta.data$Predicted_doublets == "Singlet",])
Valid.Cells
.2 <- SubsetData(Raw.data, cells.use = Valid.Cells, subset.raw = T, do.clean = F) QC.data
rm(list = ls()[!ls() %in% c("QC.data.1", "QC.data.2")])
<- MergeSeurat(QC.data.1, QC.data.2,
Hem.data do.normalize = F,
add.cell.id1 = "Hem1",
add.cell.id2 = "Hem2")
@meta.data$Barcodes <- rownames(Hem.data@meta.data) Hem.data
<- Hem.data@meta.data
Cell.QC.Stat $Barcodes <- rownames(Cell.QC.Stat)
Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=nUMI, y=nGene)) + geom_point() + geom_smooth(method="lm")
p1 <- ggMarginal(p1, type = "histogram", fill="lightgrey")
p1
<- ggplot(Cell.QC.Stat, aes(x=log10(nUMI), y=log10(nGene))) + geom_point() + geom_smooth(method="lm")
p2 <- ggMarginal(p2, type = "histogram", fill="lightgrey")
p2
plot_grid(plotlist = list(p1,p2), ncol=2, align='h', rel_widths = c(1, 1)) ; rm(p1,p2)
rm(list = ls()[!ls() %in% "Hem.data"])
# Filter genes expressed by less than 3 cells
<- Matrix::rowSums(Hem.data@data > 0)
num.cells <- names(x = num.cells[which(x = num.cells >= 3)])
genes.use @raw.data <- Hem.data@raw.data[genes.use, ]
Hem.data@data <- Hem.data@data[genes.use, ] Hem.data
# log-normalize the gene expression matrix
<- NormalizeData(object = Hem.data,
Hem.datanormalization.method = "LogNormalize",
scale.factor = round(median(Hem.data@meta.data$nUMI)),
display.progress = F)
dir.create("../../SpringCoordinates")
# Export raw expression matrix and gene list to regenerate a spring plot
<- Matrix(as.matrix(Hem.data@raw.data), sparse = TRUE)
exprData writeMM(exprData, "../../SpringCoordinates/ExprData.mtx")
## NULL
<- row.names(Hem.data@raw.data)
Genelist write.table(Genelist, "../../SpringCoordinates/Genelist.csv", sep="\t", col.names = F, row.names = F)
Spring coordinates were generated using the online version of SPRING with the following parameters :
Number of cells: 15333
Number of genes that passed filter: 874
Min expressing cells (gene filtering): 3
Min number of UMIs (gene filtering): 3
Gene variability %ile (gene filtering): 95
Number of principal components: 20
Number of nearest neighbors: 8
Number of force layout iterations: 500
Import the new coordinates
# Import Spring coordinates
<-read.table("../SpringCoordinates/hem_spring.csv", sep=",", header = T)
Coordinates rownames(Coordinates) <- colnames(Hem.data@data)
<- SetDimReduction(Hem.data,
Hem.data reduction.type = "spring",
slot = "cell.embeddings",
new.data = as.matrix(Coordinates))
@dr$spring@key <- "spring"
Hem.datacolnames(Hem.data@dr$spring@cell.embeddings) <- paste0(GetDimReduction(object= Hem.data, reduction.type = "spring",slot = "key"), c(1,2))
<- c("Mcm5", "Pcna", "Tym5", "Fen1", "Mcm2", "Mcm4", "Rrm1", "Ung", "Gins2", "Mcm6", "Cdca7", "Dtl", "Prim1", "Uhrf1", "Mlf1ip", "Hells", "Rfc2", "Rap2", "Nasp", "Rad51ap1", "Gmnn", "Wdr76", "Slbp", "Ccne2", "Ubr7", "Pold3", "Msh2", "Atad2", "Rad51", "Rrm2", "Cdc45", "Cdc6", "Exo1", "Tipin", "Dscc1", "Blm", " Casp8ap2", "Usp1", "Clspn", "Pola1", "Chaf1b", "Brip1", "E2f8")
s.genes <- c("Hmgb2", "Ddk1","Nusap1", "Ube2c", "Birc5", "Tpx2", "Top2a", "Ndc80", "Cks2", "Nuf2", "Cks1b", "Mki67", "Tmpo", " Cenpk", "Tacc3", "Fam64a", "Smc4", "Ccnb2", "Ckap2l", "Ckap2", "Aurkb", "Bub1", "Kif11", "Anp32e", "Tubb4b", "Gtse1", "kif20b", "Hjurp", "Cdca3", "Hn1", "Cdc20", "Ttk", "Cdc25c", "kif2c", "Rangap1", "Ncapd2", "Dlgap5", "Cdca2", "Cdca8", "Ect2", "Kif23", "Hmmr", "Aurka", "Psrc1", "Anln", "Lbr", "Ckap5", "Cenpe", "Ctcf", "Nek2", "G2e3", "Gas2l3", "Cbx5", "Cenpa")
g2m.genes
<- CellCycleScoring(object = Hem.data,
Hem.data s.genes = s.genes,
g2m.genes = g2m.genes,
set.ident = TRUE)
@meta.data$CC.Difference <- Hem.data@meta.data$S.Score - Hem.data@meta.data$G2M.Score Hem.data
DimPlot(Hem.data,
reduction.use = "spring",
group.by = "Phase",
cols.use = wes_palette("GrandBudapest1", 3, type = "discrete")[3:1],
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
no.legend = F )
We assigned broad transcriptional cell state score based on known and manually curated marker genes
<- c("Rgcc", "Sparc", "Hes5","Hes1", "Slc1a3",
APgenes "Ddah1", "Ldha", "Hmga2","Sfrp1", "Id4",
"Creb5", "Ptn", "Lpar1", "Rcn1","Zfp36l1",
"Sox9", "Sox2", "Nr2e1", "Ttyh1", "Trip6")
<- list(APgenes)
genes.list <- "AP_signature"
enrich.name <- AddModuleScore(Hem.data,
Hem.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
<- FeaturePlot(object = Hem.data,
plot features.plot = APgenes,
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:20], ncol = 5) cowplot
Apical progenitors gene expression
<- c("Eomes", "Igsf8", "Insm1", "Elavl2", "Elavl4",
BPgenes "Hes6","Gadd45g", "Neurog2", "Btg2", "Neurog1")
<- list(BPgenes)
genes.list <- "BP_signature"
enrich.name <- AddModuleScore(Hem.data,
Hem.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
<- FeaturePlot(object = Hem.data,
plot features.plot = BPgenes,
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:10], ncol = 5) cowplot
Basal progenitors gene expression
<- c("Mfap4", "Nhlh2", "Nhlh1", "Ppp1r14a", "Nav1",
ENgenes "Neurod1", "Sorl1", "Svip", "Cxcl12", "Tenm4",
"Dll3", "Rgmb", "Cntn2", "Vat1")
<- list(ENgenes)
genes.list <- "EN_signature"
enrich.name <- AddModuleScore(Hem.data,
Hem.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
<- FeaturePlot(object = Hem.data,
plot features.plot = ENgenes,
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:14], ncol = 5) cowplot
Early pallial neurons gene expression
<- c("Snhg11", "Pcsk1n", "Mapt", "Ina", "Stmn4",
LNgenes "Gap43", "Tubb2a", "Ly6h","Ptprd", "Mef2c")
<- list(LNgenes)
genes.list <- "LN_signature"
enrich.name <- AddModuleScore(Hem.data,
Hem.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
<- FeaturePlot(object = Hem.data,
plot features.plot = LNgenes,
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:10], ncol = 5) cowplot
Late pallial neurons gene expression
<- c("Lum", "Lgals1", "Foxc1")
Mgenes <- list(Mgenes)
genes.list <- "Meninges_signature"
enrich.name <- AddModuleScore(Hem.data,
Hem.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
<- FeaturePlot(object = Hem.data,
plot features.plot = Mgenes,
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:3], ncol = 3) cowplot
Meninges gene expression
<- c("Fcer1g", "C1qb", "Tyrobp")
Immunegenes <- list(Immunegenes)
genes.list <- "Immune_signature"
enrich.name <- AddModuleScore(Hem.data,
Hem.data genes.list = genes.list,
genes.pool = NULL,
n.bin = 5,
seed.use = 1,
ctrl.size = length(genes.list),
use.k = FALSE,
enrich.name = enrich.name,
random.seed = 1)
<- FeaturePlot(object = Hem.data,
plot features.plot = Immunegenes,
cols.use = c("grey90", brewer.pal(9,"YlGnBu")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
do.return =T,
no.axes = T)
for (i in 1:length(plot)){
$data <- plot[[i]]$data[order(plot[[i]]$data$gene),]
plot[[i]] }
::plot_grid(plotlist = plot[1:3], ncol = 3) cowplot
Immune gene expression
FeaturePlot(object = Hem.data,
features.plot = c("AP_signature1", "BP_signature1",
"EN_signature1", "LN_signature1",
"Meninges_signature1","Immune_signature1"),
cols.use = rev(brewer.pal(10,"Spectral")),
reduction.use = "spring",
no.legend = T,
overlay = F,
dark.theme = F,
no.axes = T)
saveRDS(Hem.data, "../QC.filtered.cells.RDS")
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "15 septembre, 2021, 09,47"
#Packages used
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 reticulate_1.18 ggExtra_0.9 RColorBrewer_1.1-2
## [5] dplyr_1.0.6 Seurat_2.3.4 Matrix_1.2-17 cowplot_1.1.1
## [9] ggplot2_3.3.3
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-1 ellipsis_0.3.2
## [4] class_7.3-17 modeltools_0.2-22 ggridges_0.5.1
## [7] mclust_5.4.5 htmlTable_1.13.2 base64enc_0.1-3
## [10] rstudioapi_0.11 proxy_0.4-23 farver_2.1.0
## [13] npsurv_0.4-0 flexmix_2.3-15 bit64_4.0.2
## [16] fansi_0.5.0 codetools_0.2-16 splines_3.6.3
## [19] R.methodsS3_1.7.1 lsei_1.2-0 robustbase_0.93-5
## [22] knitr_1.26 jsonlite_1.7.2 Formula_1.2-3
## [25] ica_1.0-2 cluster_2.1.0 kernlab_0.9-29
## [28] png_0.1-7 R.oo_1.23.0 shiny_1.4.0
## [31] compiler_3.6.3 httr_1.4.2 backports_1.1.5
## [34] fastmap_1.0.1 later_1.2.0 lars_1.2
## [37] acepack_1.4.1 htmltools_0.5.1.1 tools_3.6.3
## [40] igraph_1.2.5 gtable_0.3.0 glue_1.4.2
## [43] reshape2_1.4.3 RANN_2.6.1 rappdirs_0.3.1
## [46] Rcpp_1.0.6 vctrs_0.3.8 gdata_2.18.0
## [49] ape_5.3 nlme_3.1-141 iterators_1.0.12
## [52] fpc_2.2-3 lmtest_0.9-37 gbRd_0.4-11
## [55] xfun_0.18 stringr_1.4.0 mime_0.10
## [58] miniUI_0.1.1.1 lifecycle_1.0.0 irlba_2.3.3
## [61] gtools_3.8.1 DEoptimR_1.0-8 zoo_1.8-6
## [64] MASS_7.3-53 scales_1.1.1 promises_1.2.0.1
## [67] doSNOW_1.0.18 parallel_3.6.3 yaml_2.2.1
## [70] pbapply_1.4-2 gridExtra_2.3 segmented_1.0-0
## [73] rpart_4.1-15 latticeExtra_0.6-28 stringi_1.4.6
## [76] highr_0.8 foreach_1.4.7 checkmate_1.9.4
## [79] caTools_1.17.1.2 bibtex_0.4.2 Rdpack_0.11-0
## [82] SDMTools_1.1-221.1 rlang_0.4.11 pkgconfig_2.0.3
## [85] dtw_1.21-3 prabclus_2.3-1 bitops_1.0-6
## [88] evaluate_0.14 lattice_0.20-41 ROCR_1.0-7
## [91] purrr_0.3.4 labeling_0.4.2 htmlwidgets_1.5.3
## [94] bit_4.0.4 tidyselect_1.1.1 plyr_1.8.4
## [97] magrittr_2.0.1 R6_2.5.0 snow_0.4-3
## [100] gplots_3.0.1.1 generics_0.1.0 Hmisc_4.3-0
## [103] DBI_1.0.0 mgcv_1.8-33 pillar_1.6.1
## [106] foreign_0.8-72 withr_2.4.2 mixtools_1.1.0
## [109] fitdistrplus_1.0-14 survival_2.44-1.1 nnet_7.3-14
## [112] tsne_0.1-3 tibble_3.1.2 crayon_1.4.1
## [115] hdf5r_1.3.2.9000 KernSmooth_2.23-15 utf8_1.2.1
## [118] rmarkdown_2.5 grid_3.6.3 data.table_1.14.0
## [121] metap_1.1 digest_0.6.27 diptest_0.75-7
## [124] xtable_1.8-4 httpuv_1.5.2 tidyr_1.1.3
## [127] R.utils_2.9.0 stats4_3.6.3 munsell_0.5.0
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎