library(Seurat)
library(scrattch.hicat)
library(FateID)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(reticulate)
library(wesanderson)
library(princurve)
use_python("/usr/bin/python3")
#Set ggplot theme as classic
theme_set(theme_classic())
# Load the raw counts matrix
<- "/mnt/B8926B8E926B5044/2017-2021_Pierani-lab/Projet_CR_transcriptome_scRNAseq/Datasets/E11-12_Hem-Wnt3aCre_11-19/hemCR/QC.filtered.clustered.cells.RDS"
path.ref
<- list(WT = readRDS(path.ref) %>%
WT.KO subset(subset = orig.ident == "Hem1" & Cell_ident %in% c("ChP_progenitors", "ChP",
"Dorso-Medial_pallium", "Medial_pallium",
"Hem", "Thalamic_eminence") ),
KO = readRDS("Pidd1KO.cells.RDS") %>% subset(idents = c(0,1,3,6)))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2781542 148.6 4507117 240.8 4507117 240.8
## Vcells 388970477 2967.7 807840593 6163.4 798973275 6095.7
<- DimPlot(object = WT.KO[["WT"]],
p1 group.by = "Cell.state",
reduction = "spring",
cols = c("#31b6bd", "#ebcb2e", "#9ec22f", "#cc3a1b", "#d14c8d", "#4cabdc", "#5ab793", "#e7823a", "#046c9a", "#4990c9"),
pt.size = 1.5
& NoAxes()
)
<- DimPlot(WT.KO[["KO"]],
p2 reduction = "spring",
group.by = "Broadclust.ident",
cols = c("#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b"),
pt.size = 1.5) & NoAxes()
+ p2 p1
"WT"]] <- NormalizeData(WT.KO[["WT"]], normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA")
WT.KO[["KO"]] <- NormalizeData(WT.KO[["KO"]], normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA") WT.KO[[
"WT"]] <- FindVariableFeatures(WT.KO[["WT"]], selection.method = "vst", nfeatures = 2000)
WT.KO[["KO"]] <- FindVariableFeatures(WT.KO[["KO"]], selection.method = "vst", nfeatures = 2000) WT.KO[[
<- SelectIntegrationFeatures(object.list = WT.KO, nfeatures = 1500) features
<- FindTransferAnchors(reference = WT.KO[["WT"]],
KO.anchors query = WT.KO[["KO"]],
features = features,
reduction = "rpca",
k.anchor = 5,
k.filter = 100,
k.score = 30,
npcs = 20,
dims = 1:20,
max.features = 200)
<- TransferData(anchorset = KO.anchors,
predictions refdata = WT.KO[["WT"]]$Cell.state,
dims = 1:20)
"KO"]] <- AddMetaData(WT.KO[["KO"]], metadata = predictions) WT.KO[[
<- brewer.pal(n =11, name = "Spectral")
cols
ggplot(WT.KO[["KO"]]@meta.data, aes(Spring_1, Spring_2)) +
geom_point(aes(color=prediction.score.max), size=1, shape=16) +
scale_color_gradientn(colours=rev(cols), name='prediction.score.max')
<- DimPlot(object = WT.KO[["WT"]],
p1 group.by = "Cell.state",
reduction = "spring",
cols = c("#7293c8", "#b79f0b", "#3ca73f","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b"),
pt.size = 1) & NoAxes()
<- DimPlot(WT.KO[["KO"]],
p2 group.by = "predicted.id",
reduction = "spring",
cols = c("#7293c8","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b"),
pt.size = 1) & NoAxes()
+ p2 p1
<- WT.KO[["KO"]]@meta.data
Projected.idents
rm(list = ls()[!ls() %in% c("Projected.idents")])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2940800 157.1 4507117 240.8 4507117 240.8
## Vcells 389436188 2971.2 1007133240 7683.9 908772642 6933.4
<- readRDS("Pidd1KO.cells.RDS")
Raw.data
$Cell.ident <- sapply(Raw.data$Barcodes,
Raw.dataFUN = function(x) {
if (x %in% Projected.idents $Barcodes) {
= Projected.idents[x, "predicted.id"]
x else {
} = Raw.data@meta.data[x, "Broadclust.ident"]
x
} })
DimPlot(object = Raw.data,
group.by = "Cell.ident",
reduction = "spring",
cols = c("#4cabdc", "#7293c8", "grey40", "#7293c8","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b"),
pt.size = 0.5) & NoAxes()
rm(list = ls()[!ls() %in% "Raw.data"])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2934993 156.8 4507117 240.8 4507117 240.8
## Vcells 288562628 2201.6 805706592 6147.1 908772642 6933.4
# AP
<- c("Rgcc", "Sparc", "Hes5","Hes1", "Slc1a3",
APgenes "Ddah1", "Ldha", "Hmga2","Sfrp1", "Id4",
"Creb5", "Ptn", "Lpar1", "Rcn1","Zfp36l1",
"Sox9", "Sox2", "Nr2e1", "Ttyh1", "Trip6")
<- AddModuleScore(Raw.data,
Raw.data features = list(APgenes),
name = "AP_signature")
# BP
<- c("Eomes", "Igsf8", "Insm1", "Elavl2", "Elavl4",
BPgenes "Hes6","Gadd45g", "Neurog2", "Btg2", "Neurog1")
<- AddModuleScore(Raw.data,
Raw.datafeatures = list(BPgenes),
name = "BP_signature")
# EN
<- c("Mfap4", "Nhlh2", "Nhlh1", "Ppp1r14a", "Nav1",
ENgenes "Neurod1", "Sorl1", "Svip", "Cxcl12", "Tenm4",
"Dll3", "Rgmb", "Cntn2", "Vat1")
<- AddModuleScore(Raw.data,
Raw.data features = list(ENgenes),
name = "EN_signature")
# LN
<- c("Snhg11", "Pcsk1n", "Mapt", "Ina", "Stmn4",
LNgenes "Gap43", "Tubb2a", "Ly6h","Ptprd", "Mef2c")
<- AddModuleScore(Raw.data,
Raw.data features = list(LNgenes),
name = "LN_signature")
FeaturePlot(object = Raw.data,
features = c("AP_signature1", "BP_signature1",
"EN_signature1", "LN_signature1"),
pt.size = 0.75,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
<- Raw.data %>% subset(subset = Cell.ident %in% c(3,6)) LN.data
# K-means clustering based on AP, SP and Pal signature scores
set.seed(100)
<- kmeans(cbind(LN.data$BP_signature1,
cl $EN_signature1,
LN.data$LN_signature1), 2)
LN.data
$kmeanClust <- paste0("Clust.",cl$cluster) LN.data
<- wes_palette("GrandBudapest1", 4, type = "discrete")
col.pal
<- ggplot(LN.data@meta.data, aes(x=LN_signature1, y=EN_signature1, colour = kmeanClust)) +
p1 scale_color_manual(values=col.pal) +
geom_point() +
theme(legend.position="none")
ggMarginal(p1, type = "histogram", fill="lightgrey")
DimPlot(object = LN.data,
group.by = "kmeanClust",
reduction = "spring",
cols = col.pal,
pt.size = 0.5) & NoAxes()
<- LN.data %>% subset(subset = kmeanClust == "Clust.2" & Spring_2 > 2500)
LN.data
DimPlot(object = LN.data,
group.by = "kmeanClust",
reduction = "spring",
cols = c("#4cabdc", "#7293c8", "grey40"),
pt.size = 0.5) & NoAxes()
# Exclude genes detected in less than 3 cells
<- Matrix::rowSums(LN.data@assays[["RNA"]]@counts > 0)
num.cells <- names(x = num.cells[which(x = num.cells >= 3)])
genes.use
<- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE),
GenesToRemove grep(pattern = "^mt-", x = genes.use, value = TRUE),
"Xist")
<- genes.use[!genes.use %in% GenesToRemove] genes.use
<- as.matrix(LN.data@assays[["RNA"]]@counts)[rownames(LN.data@assays[["RNA"]]@counts) %in% genes.use,]
dgeMatrix_count <- cpm(dgeMatrix_count)
dgeMatrix_cpm <- log2(dgeMatrix_cpm + 1)
norm.dat
<- Matrix(norm.dat, sparse = TRUE)
norm.dat <- list(raw.dat=dgeMatrix_count, norm.dat=norm.dat)
Data.matrix attach(Data.matrix)
<- log2(colSums(as.matrix(Data.matrix$norm.dat) > 0))
gene.counts <- log2(colSums(Data.matrix$raw.dat))
nUMI <- LN.data$percent.mito
perctMito <- LN.data$percent.ribo
perctRibo <- LN.data$LN_signature1
LNscore
<- as.matrix(cbind(gene.counts,
rm.eigen
nUMI,
perctMito,
perctRibo,
LNscore ))
row.names(rm.eigen) <- names(gene.counts)
colnames(rm.eigen) <- c("log2nGenes",
"log2nUMI",
"perctMito",
"perctRibo",
"LNscore")
rm(gene.counts, nUMI, perctMito, perctRibo, LNscore)
# Parameters for iterative clustering
<- de_param(padj.th = 0.01,
de.param lfc.th = 1,
low.th = 1,
q1.th = 0.25,
q2.th = NULL,
q.diff.th = 0.7,
de.score.th = 200,
min.cells = 10)
<- iter_clust(norm.dat,
iter.result counts = raw.dat,
dim.method = "pca",
max.dim = 10,
k.nn = 5,
de.param = de.param,
rm.eigen = rm.eigen,
rm.th = 0.6,
vg.padj.th = 0.5,
method = "louvain",
prefix = "iter_clust",
verbose = F)
## [1] "iter_clust"
## Finding nearest neighbors...DONE ~ 0.004 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.008 s
## Build undirected graph from the weighted links...DONE ~ 0.023 s
## Run louvain clustering on the graph ...DONE ~ 0.011 s
## Return a community class
## -Modularity value: 0.9385356
## -Number of clusters: 36[1] "iter_clust.1"
## Finding nearest neighbors...DONE ~ 0 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.001 s
## Build undirected graph from the weighted links...DONE ~ 0.001 s
## Run louvain clustering on the graph ...DONE ~ 0.001 s
## Return a community class
## -Modularity value: 0.6538551
## -Number of clusters: 4[1] "iter_clust.2"
## Finding nearest neighbors...DONE ~ 0.001 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.003 s
## Build undirected graph from the weighted links...DONE ~ 0.006 s
## Run louvain clustering on the graph ...DONE ~ 0.003 s
## Return a community class
## -Modularity value: 0.8933776
## -Number of clusters: 18[1] "iter_clust.3"
## Finding nearest neighbors...DONE ~ 0.001 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.004 s
## Build undirected graph from the weighted links...DONE ~ 0.008 s
## Run louvain clustering on the graph ...DONE ~ 0.004 s
## Return a community class
## -Modularity value: 0.9202576
## -Number of clusters: 26[1] "iter_clust.4"
## Finding nearest neighbors...DONE ~ 0.001 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.002 s
## Build undirected graph from the weighted links...DONE ~ 0.005 s
## Run louvain clustering on the graph ...DONE ~ 0.002 s
## Return a community class
## -Modularity value: 0.8736993
## -Number of clusters: 14
# Merge clusters which are not seperable by DEGs
<- t(norm.dat[iter.result$markers,])
rd.dat <- merge_cl(norm.dat,
merge.result cl = iter.result$cl,
rd.dat = rd.dat,
de.param = de.param)
cat(length(unique(merge.result$cl))," Clusters")
## 4 Clusters
$iter.clust <- merge.result$cl
LN.data
Idents(LN.data) <- "iter.clust"
<- c("#ebcb2e", "#9ec22f", "#cc3a1b")
colors
DimPlot(LN.data,
reduction = "spring",
#cols = colors,
pt.size = 1.5) & NoAxes()
$Cell.ident <- sapply(Raw.data$Barcodes,
Raw.dataFUN = function(x) {
if (x %in% LN.data$Barcodes) {
= paste0("LN.", LN.data@meta.data[x, "iter.clust"])
x else {
} = Raw.data@meta.data[x, "Cell.ident"]
x
} })
$Cell.ident <- ifelse(Raw.data$Cell.ident %in% c(3,5), "Pal.neuron.traj", Raw.data$Cell.ident)
Raw.data$Cell.ident <- ifelse(Raw.data$Cell.ident == 6, "CR.traj", Raw.data$Cell.ident)
Raw.data$Cell.ident <- ifelse(Raw.data$Cell.ident == "Thalamic_eminence", "Septum_ANR", Raw.data$Cell.ident) Raw.data
$Cell.ident <- ifelse(Raw.data$Cell.ident == "Pal.neuron.traj",
Raw.data"Differentiating_neurons", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "Septum_ANR",
Raw.data"Progenitors_Septum_ANR", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "Medial_pallium",
Raw.data"Progenitors_Medial-pallium", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "Hem",
Raw.data"Progenitors_Hem", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "Dorso-Medial_pallium",
Raw.data"Progenitors_Dorso-Medial-pallium", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "ChP_progenitors",
Raw.data"Progenitors_ChP", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "ChP_Early",
Raw.data"Choroid_Plexus", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "CR.traj",
Raw.data"Differentiating_CRs", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident == "LN.5",
Raw.data"Neurons_Cajal-Retzius", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident %in% c("LN.6", "LN.NA", "LN.1"),
Raw.data"Neurons_Dorsal-Pallium", Raw.data$Cell.ident)
$Cell.ident <- ifelse(Raw.data$Cell.ident %in% c("LN.7"),
Raw.data"Neurons_Medial-Pallium", Raw.data$Cell.ident)
DimPlot(object = Raw.data,
group.by = "Cell.ident",
reduction = "spring",
cols = c("grey40","#4cabdc","#046c9a", "#7293c8", "#31b6bd", "#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", wes_palette("FantasticFox1")),
pt.size = 0.5) & NoAxes()
rm(list = ls()[!ls() %in% c("Raw.data")])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3088623 165.0 5731741 306.2 5731741 306.2
## Vcells 306611956 2339.3 805706592 6147.1 908772642 6933.4
dir.create("./SpringCoordinates/SpringCoordinates_cleaned")
# Export raw expression matrix and gene list to regenerate a spring plot
<- Matrix(as.matrix(Raw.data@assays[["RNA"]]@counts), sparse = TRUE)
exprData writeMM(exprData, "./SpringCoordinates/SpringCoordinates_cleaned/ExprData.mtx")
## NULL
rm(exprData)
<- row.names(Raw.data@assays[["RNA"]]@counts)
Genelist write.table(Genelist, "./SpringCoordinates/SpringCoordinates_cleaned/Genelist.csv", sep="\t", col.names = F, row.names = F, quote = F)
#Export metadata
<- c("Identity", Raw.data$Cell.ident)
Identity <- paste(Identity, sep=",", collapse=",")
Identity
<- c("Genotype", Raw.data$Genotype)
Genotype <- paste(Genotype, sep=",", collapse=",")
Genotype
<- rbind(Identity, Genotype)
Cellgrouping write.table(Cellgrouping, "./SpringCoordinates/SpringCoordinates_cleaned/Cellgrouping.csv", quote =F, row.names = F, col.names = F)
#Cells scores
<- c("AP.score", Raw.data$AP_signature1)
AP.score <- paste(AP.score, sep=",", collapse=",")
AP.score
<- c("BP.score", Raw.data$BP_signature1)
BP.score <- paste(BP.score, sep=",", collapse=",")
BP.score
<- c("EN.score", Raw.data$EN_signature1)
EN.score <- paste(EN.score, sep=",", collapse=",")
EN.score
<- c("LN.score", Raw.data$LN_signature1)
LN.score <- paste(LN.score, sep=",", collapse=",")
LN.score
<- rbind(AP.score, BP.score, EN.score, LN.score)
ColorTrack write.table(ColorTrack, "./SpringCoordinates/SpringCoordinates_cleaned/ColorTrack.csv", quote =F, row.names = F, col.names = F)
#Coordinates
<- cbind(c(1:nrow(Raw.data@meta.data))-1,
Coordinates $Spring_1,
Raw.data$Spring_2)
Raw.data
<- function(x){
Spring.Sym = abs(max(Coordinates[,3])-x)
x
}
3] <- sapply(Coordinates[,3], function(x) Spring.Sym(x))
Coordinates[,
write.table(Coordinates, "./SpringCoordinates/SpringCoordinates_cleaned/Custom_coor.csv", sep = ",", quote =F, row.names = F, col.names = F)
saveRDS(Raw.data, "./Pidd1KO.cells.RDS")
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "16 janvier, 2023, 11,58"
#Packages used
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] princurve_2.1.6 wesanderson_0.3.6 reticulate_1.22
## [4] cowplot_1.1.1 ggExtra_0.9 ggplot2_3.4.0
## [7] RColorBrewer_1.1-2 dplyr_1.0.7 Matrix_1.5-1
## [10] FateID_0.2.1 scrattch.hicat_1.0.0 SeuratObject_4.0.4
## [13] Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-2 deldir_1.0-6
## [4] ellipsis_0.3.2 ggridges_0.5.3 som_0.3-5.1
## [7] rstudioapi_0.13 spatstat.data_2.1-0 farver_2.1.0
## [10] leiden_0.3.9 listenv_0.8.0 ggrepel_0.9.1
## [13] lle_1.1 RSpectra_0.16-0 fansi_0.5.0
## [16] codetools_0.2-18 splines_4.2.2 knitr_1.36
## [19] polyclip_1.10-0 jsonlite_1.7.2 umap_0.2.8.0
## [22] ica_1.0-2 cluster_2.1.4 png_0.1-7
## [25] pheatmap_1.0.12 snowfall_1.84-6.1 uwot_0.1.10
## [28] shiny_1.7.1 sctransform_0.3.2 spatstat.sparse_2.0-0
## [31] compiler_4.2.2 httr_1.4.2 assertthat_0.2.1
## [34] fastmap_1.1.0 lazyeval_0.2.2 limma_3.50.0
## [37] cli_3.4.1 later_1.3.0 htmltools_0.5.2
## [40] tools_4.2.2 igraph_1.2.11 gtable_0.3.0
## [43] glue_1.5.1 RANN_2.6.1 reshape2_1.4.4
## [46] Rcpp_1.0.8 scattermore_0.7 jquerylib_0.1.4
## [49] vctrs_0.5.1 nlme_3.1-153 lmtest_0.9-39
## [52] xfun_0.28 stringr_1.4.0 globals_0.14.0
## [55] mime_0.12 miniUI_0.1.1.1 lifecycle_1.0.3
## [58] irlba_2.3.3 goftest_1.2-3 future_1.23.0
## [61] MASS_7.3-58 zoo_1.8-9 scales_1.2.1
## [64] spatstat.core_2.3-1 promises_1.2.0.1 spatstat.utils_2.2-0
## [67] parallel_4.2.2 yaml_2.2.1 pbapply_1.5-0
## [70] gridExtra_2.3 sass_0.4.0 rpart_4.1.19
## [73] stringi_1.7.6 highr_0.9 randomForest_4.7-1
## [76] rlang_1.0.6 pkgconfig_2.0.3 matrixStats_0.61.0
## [79] evaluate_0.14 lattice_0.20-45 ROCR_1.0-11
## [82] purrr_0.3.4 tensor_1.5 labeling_0.4.2
## [85] patchwork_1.1.1 htmlwidgets_1.5.4 tidyselect_1.1.1
## [88] parallelly_1.29.0 RcppAnnoy_0.0.19 plyr_1.8.6
## [91] magrittr_2.0.2 R6_2.5.1 generics_0.1.1
## [94] DBI_1.1.1 withr_2.5.0 mgcv_1.8-41
## [97] pillar_1.6.4 fitdistrplus_1.1-6 scatterplot3d_0.3-41
## [100] survival_3.4-0 abind_1.4-5 tibble_3.1.6
## [103] future.apply_1.8.1 crayon_1.4.2 KernSmooth_2.23-20
## [106] utf8_1.2.2 spatstat.geom_2.4-0 plotly_4.10.0
## [109] rmarkdown_2.11 locfit_1.5-9.4 grid_4.2.2
## [112] data.table_1.14.2 digest_0.6.29 xtable_1.8-4
## [115] tidyr_1.1.4 httpuv_1.6.3 openssl_1.4.5
## [118] munsell_0.5.0 viridisLite_0.4.0 bslib_0.3.1
## [121] askpass_1.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎