library(Seurat)
library(fastTopics)
library(RcppParallel)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(wesanderson)
#Set ggplot theme as classic
theme_set(theme_classic())
<- readRDS("../QC.filtered.cells.RDS") Hem.data
DimPlot(Hem.data,
reduction = "spring",
cols = c(wes_palette("FantasticFox1"),"grey60"),
pt.size = 0.5) & NoAxes()
# Extract apical progenitors
<- subset(Hem.data, idents = c(0,1,3))
Progenitors.data
DimPlot(Progenitors.data,
reduction = "spring",
pt.size = 0.5,
cols = c(wes_palette("FantasticFox1")[c(1,2,4)]),
split.by = 'ident') + NoLegend() & NoAxes()
rm(Hem.data) ; gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3307939 176.7 6059512 323.7 4834983 258.3
## Vcells 338673567 2583.9 1045076561 7973.4 1016168883 7752.8
For this analysis we will keep only genes detected in at least 20 over 12325 cells
<- GetAssayData(object = Progenitors.data[["RNA"]], slot = "counts")
progenitors.counts dim(progenitors.counts)
## [1] 18268 12325
<- Matrix::rowSums(progenitors.counts > 0)
num.cells <- names(x = num.cells[which(x = num.cells >= 20)])
genes.use <- progenitors.counts[genes.use, ]
progenitors.counts
dim(progenitors.counts)
## [1] 15314 12325
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3295244 176.0 6059512 323.7 4834983 258.3
## Vcells 414502290 3162.5 1045076561 7973.4 1016168883 7752.8
set.seed(1)
<- fit_topic_model(t(progenitors.counts),
fit k = 15,
numiter.main = 200,
numiter.refine = 200,
method.main = "em",
method.refine = "scd",
control.main = list(numiter = 4, nc= 6),
control.refine = list(numiter = 4, nc= 6, extrapolate = TRUE),
verbose = "progressbar")
## Initializing factors using Topic SCORE algorithm.
## Initializing loadings by running 10 SCD updates.
## Fitting rank-15 Poisson NMF to 12325 x 15314 sparse matrix.
## Running 200 EM updates, without extrapolation (fastTopics 0.5-59).
## Refining model fit.
## Fitting rank-15 Poisson NMF to 12325 x 15314 sparse matrix.
## Running 200 SCD updates, with extrapolation (fastTopics 0.5-59).
# Add cells' topics loading to the metadata
@meta.data <- cbind(Progenitors.data@meta.data, fit$L) Progenitors.data
FeaturePlot(object = Progenitors.data,
features = paste0("k", 1:15),
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring") & NoLegend() & NoAxes()
FeaturePlot(object = Progenitors.data,
features = paste0("k", c(15,12,9,8,14,6)),
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoLegend() & NoAxes()
set.seed(1)
<- prcomp(fit$L[,c(15,12,9,8,14,6)])$x
pca <- cluster::pam(pca, k = 6)$clustering clusters
@meta.data$TopicsKmeans <- as.numeric(clusters)
Progenitors.data
FeaturePlot(object = Progenitors.data,
features = "TopicsKmeans",
cols = c(wes_palette("FantasticFox1"),"grey90", "grey40"),
reduction = "spring") & NoLegend() & NoAxes()
Idents(Progenitors.data) <- Progenitors.data$TopicsKmeans
DimPlot(Progenitors.data,
reduction = "spring",
pt.size = 0.5,
cols = c(wes_palette("FantasticFox1"),"grey90", "grey40"),
split.by = 'ident') + NoLegend() & NoAxes()
= c("Dorso-Medial_pallium", "ChP", "Medial_pallium", "Hem", "ChP_progenitors", "Thalamic_eminence")
ident
$progenitor_type <- sapply(Progenitors.data$TopicsKmeans,
Progenitors.dataFUN = function(x) {x= ident[x]})
Idents(Progenitors.data) <- Progenitors.data$progenitor_type
DimPlot(Progenitors.data,
reduction = "spring",
pt.size = 0.5,
cols = c(wes_palette("FantasticFox1"),"grey90"),
split.by = 'ident') + NoLegend() & NoAxes()
<- readRDS("../QC.filtered.cells.RDS") Hem.data
$Cell_ident <- sapply(Hem.data$Barcodes,
Hem.dataFUN = function(x) {
if (x %in% Progenitors.data$Barcodes) {
= Progenitors.data@meta.data[x, "progenitor_type"]
x else {
} = paste0("seurat_clusters_", Hem.data@meta.data[x, "seurat_clusters"])
x
} })
DimPlot(object = Hem.data,
group.by = "Cell_ident",
reduction = "spring",
cols = c("#83c3b8", #"ChP"
"#009fda", #"ChP_progenitors"
"#68b041", #"Dorso-Medial_pallium"
"#e46b6b", #"Hem"
"#e3c148", #"Medial_pallium"
"#b7d174", #2
"grey40", #4
"black", #5
"#3e69ac" #"Thalamic_eminence"
))
<- subset(Hem.data, idents = 2)
Neurons.data
DimPlot(Neurons.data ,
reduction = "spring",
pt.size = 1,
cols = c("#b7d174")) + NoAxes()
<- FeaturePlot(object = Neurons.data ,
p1 features = c("BP_signature1","LN_signature1"),
pt.size = 0.5,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()
<- FeaturePlot(object = Neurons.data ,
p2 features = c("Foxg1", "Trp73"),
pt.size = 0.5,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes()
/ p2 p1
Separation between the 2 lineage seems straightforward. We use louvain clustering to split the two.
<- RunPCA(Neurons.data, verbose = FALSE)
Neurons.data
<- FindNeighbors(Neurons.data,
Neurons.data dims = 1:10,
k.param = 8)
<- FindClusters(Neurons.data, resolution = 0.05) Neurons.data
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2835
## Number of edges: 56608
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9640
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(Neurons.data,
reduction = "spring",
cols = c("#cc391b","#026c9a"),
pt.size = 0.5) & NoAxes()
$Lineage <- sapply(as.numeric(Neurons.data$SCT_snn_res.0.05),
Neurons.dataFUN = function(x) {x= c("Cajal-Retzius_neurons","Pallial_neurons")[x]})
DimPlot(object = Neurons.data,
group.by = "Lineage",
reduction = "spring",
cols = c("#cc391b","#026c9a"),
pt.size = 0.5) & NoAxes()
$Cell_ident <- sapply(Hem.data$Barcodes,
Hem.dataFUN = function(x) {
if (x %in% Neurons.data$Barcodes) {
= Neurons.data@meta.data[x, "Lineage"]
x else {
} = Hem.data@meta.data[x, "Cell_ident"]
x
} })
We excluded Meninges and Immune cell clusters
Idents(Hem.data) <- Hem.data$Cell_ident
<- subset(Hem.data, idents = unique(Hem.data$Cell_ident)[!unique(Hem.data$Cell_ident) %in% c("seurat_clusters_4", "seurat_clusters_5")]) Hem.data
<- BuildClusterTree(Hem.data,
Hem.data assay = "SCT",
slot = "data",
reorder = T,
verbose = TRUE)
<- Tool(object = Hem.data, slot = "BuildClusterTree")
data.tree <- ape::rotate(data.tree, node =c(12))
tree.rotated
Idents(Hem.data) <- factor(x = Idents(Hem.data),
levels = c("ChP","Cajal-Retzius_neurons","Pallial_neurons",
"Dorso-Medial_pallium","Medial_pallium","Hem",
"Thalamic_eminence","ChP_progenitors"),
ordered = TRUE)
DimPlot(object = Hem.data,
group.by = "Cell_ident",
reduction = "spring",
cols = c("#ebcb2e", # CR
"#9ec22f", #"ChP"
"#e7823a", #"ChP_progenitors"
"#cc3a1b", #"Dorso-Medial_pallium"
"#d14c8d", #"Hem"
"#4cabdc", #"Medial_pallium"
"#046c9a", # Pallial
"#4990c9" #"Thalamic_eminence"
)+ NoAxes() )
<- ggdendro::ggdendrogram(ggdendro::dendro_data(as.hclust(tree.rotated)), labels = F, rotate = T) + scale_y_reverse() p1
<- c("Htr2c", "Cfap126",
Marker.genes "Trp73", "Lhx1", "Foxg1","Cbln4", "Tbr1", "Neurod2",
"Lmo2", "Sox9", "Lhx2", "Meis2", "Shisa2",
"Wif1", "Wnt5a", "Id3",
"Rassf4", "Dkk3","Rspo3",
"Dlk1", "Meg3",
"Mlf1","Sulf1", "Ttr")
<- data.frame(t(as.matrix(Hem.data@assays$SCT[Marker.genes,])))
data.to.plot
$Cell <- rownames(data.to.plot)
data.to.plot$id <- Idents(Hem.data)
data.to.plot
#Reshape the dataframe
<- data.to.plot %>% tidyr::gather(key = Marker.genes, value = expression, -c(Cell, id))
data.to.plot
#For each genes in each cluster calculate mean expression and percent cell with norm expression > 0
<- data.to.plot %>%
data.to.plot group_by(id, Marker.genes) %>%
summarize(avg.exp = mean(expm1(x = expression)), pct.exp = length(x = expression[expression > 0.7]) / length(x = expression))
<- data.to.plot %>% ungroup() %>%
data.to.plot group_by(Marker.genes) %>%
mutate(avg.exp.scale = scale(x = avg.exp)) %>%
mutate(avg.exp.scale = MinMax(data = avg.exp.scale, max = 2, min = -2)) # add column with scaled expression values from -2 to 2
$genes.plot <- factor(x = data.to.plot$Marker.genes, levels = rev(x = Marker.genes)) #Put gene names as factor
data.to.plot
$pct.exp[data.to.plot$pct.exp < 0.05] <- NA #Set to Na if less than percent.min of cells express the gene
data.to.plot$pct.exp <- data.to.plot$pct.exp * 100
data.to.plot
<- ggplot(data = data.to.plot, mapping = aes(x = genes.plot, y = id)) +
p2 geom_point(mapping = aes(size = pct.exp, color = avg.exp.scale)) + # modify the colors by if want to color by domains or cluster ident
scale_size_area(max_size= 4) + # Scale the radius of the dot from 0 to 6
scale_x_discrete(position = "top") +
theme(axis.text.x = element_text(angle = 90, vjust = 1), axis.title.y = element_blank()) +
xlab("") + ylab("") +
scale_colour_gradientn(colours = brewer.pal(11,"RdPu"))
plot_grid(plotlist = list(p1,p2), ncol=2, align='h', rel_widths = c(0.2, 1.5))
FeaturePlot(object = Hem.data,
features = c("Tbr1", "Trp73", "Htr2c",
"Shisa2", "Wif1", "Rassf4","Dkk3",
"Dlk1", "Sulf1", "Ttr"),
pt.size = 0.2,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
saveRDS(Hem.data, "../QC.filtered.clustered.cells.RDS")
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "23 février, 2022, 12,24"
#Packages used
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 cowplot_1.1.1 ggExtra_0.9 ggplot2_3.3.5
## [5] RColorBrewer_1.1-2 dplyr_1.0.7 Matrix_1.4-0 RcppParallel_5.1.4
## [9] fastTopics_0.5-59 SeuratObject_4.0.4 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2
## [4] splines_4.1.2 listenv_0.8.0 scattermore_0.7
## [7] digest_0.6.29 invgamma_1.1 foreach_1.5.1
## [10] htmltools_0.5.2 SQUAREM_2021.1 fansi_0.5.0
## [13] magrittr_2.0.2 tensor_1.5 cluster_2.1.2
## [16] ROCR_1.0-11 recipes_0.1.17 globals_0.14.0
## [19] gower_0.2.2 matrixStats_0.61.0 MCMCpack_1.6-0
## [22] spatstat.sparse_2.0-0 prettyunits_1.1.1 colorspace_2.0-2
## [25] ggrepel_0.9.1 xfun_0.28 crayon_1.4.2
## [28] jsonlite_1.7.2 spatstat.data_2.1-0 ape_5.5
## [31] survival_3.2-13 zoo_1.8-9 iterators_1.0.13
## [34] glue_1.5.1 polyclip_1.10-0 gtable_0.3.0
## [37] ipred_0.9-12 MatrixModels_0.5-0 leiden_0.3.9
## [40] future.apply_1.8.1 abind_1.4-5 SparseM_1.81
## [43] scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1
## [46] Rcpp_1.0.8 progress_1.2.2 viridisLite_0.4.0
## [49] xtable_1.8-4 reticulate_1.22 spatstat.core_2.3-1
## [52] stats4_4.1.2 lava_1.6.10 prodlim_2019.11.13
## [55] truncnorm_1.0-8 htmlwidgets_1.5.4 httr_1.4.2
## [58] ellipsis_0.3.2 ica_1.0-2 farver_2.1.0
## [61] pkgconfig_2.0.3 nnet_7.3-17 sass_0.4.0
## [64] uwot_0.1.10 deldir_1.0-6 utf8_1.2.2
## [67] caret_6.0-90 labeling_0.4.2 tidyselect_1.1.1
## [70] rlang_0.4.12 reshape2_1.4.4 later_1.3.0
## [73] munsell_0.5.0 tools_4.1.2 generics_0.1.1
## [76] ggridges_0.5.3 ggdendro_0.1.23 evaluate_0.14
## [79] stringr_1.4.0 fastmap_1.1.0 yaml_2.2.1
## [82] goftest_1.2-3 mcmc_0.9-7 ModelMetrics_1.2.2.2
## [85] knitr_1.36 fitdistrplus_1.1-6 purrr_0.3.4
## [88] RANN_2.6.1 pbapply_1.5-0 future_1.23.0
## [91] nlme_3.1-153 mime_0.12 quantreg_5.86
## [94] compiler_4.1.2 plotly_4.10.0 png_0.1-7
## [97] spatstat.utils_2.2-0 tibble_3.1.6 bslib_0.3.1
## [100] stringi_1.7.6 highr_0.9 lattice_0.20-45
## [103] vctrs_0.3.8 pillar_1.6.4 lifecycle_1.0.1
## [106] spatstat.geom_2.3-0 lmtest_0.9-39 jquerylib_0.1.4
## [109] RcppAnnoy_0.0.19 data.table_1.14.2 irlba_2.3.3
## [112] conquer_1.2.1 httpuv_1.6.3 patchwork_1.1.1
## [115] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-20
## [118] gridExtra_2.3 parallelly_1.29.0 codetools_0.2-18
## [121] MASS_7.3-55 assertthat_0.2.1 withr_2.4.3
## [124] sctransform_0.3.2 hms_1.1.1 mgcv_1.8-38
## [127] parallel_4.1.2 quadprog_1.5-8 grid_4.1.2
## [130] rpart_4.1.16 timeDate_3043.102 tidyr_1.1.4
## [133] coda_0.19-4 class_7.3-20 rmarkdown_2.11
## [136] ashr_2.2-47 Rtsne_0.15 mixsqp_0.3-43
## [139] pROC_1.18.0 shiny_1.7.1 lubridate_1.8.0
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎