In this part of the analysis we apply Revelio algorithm to explore cell cycle dynamic of pallial and hem domain radial glial cells
library(Seurat)
library(Revelio)
library(princurve)
library(monocle)
library(gprofiler2)
library(orthologsBioMART)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(wesanderson)
#Set ggplot theme as classic
theme_set(theme_classic())Hem.data <- readRDS("../QC.filtered.clustered.cells.RDS")DimPlot(object = Hem.data,
group.by = "Cell_ident",
reduction = "spring",
cols = c("#ebcb2e", #"ChP"
"#9ec22f", #"ChP_progenitors"
"#e7823a", # CR
"#cc3a1b", #"Dorso-Medial_pallium"
"#d14c8d", #"Hem"
"#4cabdc", #"Medial_pallium"
"#046c9a", # Pallial
"#4990c9" #"Thalamic_eminence"
)
)ChP.data <- subset(Hem.data, idents = c("ChP", "ChP_progenitors"))
DimPlot(ChP.data,
reduction = "spring",
pt.size = 1,
cols = c("#83c3b8", "#009fda")) + NoAxes()FeaturePlot(object = ChP.data ,
features = c("Fgf8", "Fgf17", "Adamts15", "Fgfbp1"),
pt.size = 0.5,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()ChP.data <- AddModuleScore(ChP.data,
features = list(c("Fgf8", "Fgf17", "Adamts15", "Fgfbp1")),
ctrl = 10,
name = "Septum")
FeaturePlot(object = ChP.data ,
features = c("Septum1"),
pt.size = 0.5,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()hist(ChP.data$Septum1, breaks = 20)ChP.data$Septal.prog <- ChP.data$Septum1 > 0p1 <- DimPlot(ChP.data,
reduction = "spring",
group.by = "Septal.prog",
pt.size = 1) + NoAxes()
p2 <- FeaturePlot(object = ChP.data ,
features = c("Fgf17"),
pt.size = 0.5,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
p1 + p2ChP.data <- subset(ChP.data,
subset = Septal.prog == FALSE & ChP.data$Spring_1 > 1300)DimPlot(ChP.data,
reduction = "spring",
pt.size = 1,
cols = c("#83c3b8", "#009fda")) + NoAxes()Trajectories.ChP <- ChP.data@meta.data %>%
dplyr::select("Barcodes", "nUMI", "Spring_1", "Spring_2")fit <- principal_curve(as.matrix(Trajectories.ChP[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = 0.8,
stretch=2)## Starting curve---distance^2: 117510509591
## Iteration 1---distance^2: 46558929
## Iteration 2---distance^2: 42504599
## Iteration 3---distance^2: 41393217
## Iteration 4---distance^2: 40991617
## Iteration 5---distance^2: 40890588
## Iteration 6---distance^2: 40826638
## Iteration 7---distance^2: 40830824
#The principal curve smoothed
ChP.pc.line <- as.data.frame(fit$s[order(fit$lambda),])
#Pseudotime score
Trajectories.ChP$Pseudotime <- fit$lambda/max(fit$lambda)
#Inverse the score if positive correlation with progenitor marker
if (cor(Trajectories.ChP$Pseudotime, ChP.data@assays$SCT@data['Hmga2', Trajectories.ChP$Barcodes]) > 0) {
Trajectories.ChP$Pseudotime <- -(Trajectories.ChP$Pseudotime - max(Trajectories.ChP$Pseudotime))
}
ChP.data$Pseudotime <- Trajectories.ChP$PseudotimeFeaturePlot(object = ChP.data,
features = "Pseudotime",
pt.size = 2,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()Prog.data <- subset(ChP.data, idents = c("ChP_progenitors"))
DimPlot(Prog.data,
reduction = "spring",
pt.size = 1,
cols = c("#009fda")) + NoAxes()Prog.data <- NormalizeData(Prog.data, normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA")rawCounts <- as.matrix(Prog.data[["RNA"]]@counts)# Filter genes expressed by less than 10 cells
num.cells <- Matrix::rowSums(rawCounts > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 10)])
rawCounts <- rawCounts[genes.use, ]rm(list = ls()[!ls() %in% c("rawCounts", "CCgenes", "ChP.data", "Prog.data")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6031711 322.2 8756530 467.7 8756530 467.7
## Vcells 109360801 834.4 603966152 4607.9 620030699 4730.5
CCgenes <- read.table("CCgenes.csv", sep = ";", header = T)We can now follow the tutorial form the package github page
myData <- createRevelioObject(rawData = rawCounts,
cyclicGenes = CCgenes,
lowernGeneCutoff = 0,
uppernUMICutoff = Inf,
ccPhaseAssignBasedOnIndividualBatches = F)## 2022-02-24 16:17:15: reading data: 1.63secs
rm("rawCounts")
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6050902 323.2 8756530 467.7 8756530 467.7
## Vcells 109450747 835.1 483172922 3686.4 620030699 4730.5
The getCellCyclePhaseAssignInformation filter “outlier” cells for cell cycle phase assignation. We modified the function to keep all cells as we observed this does not affect the global cell cycle fitting procedure
source("../Functions/functions_InitializationCCPhaseAssignFiltering.R")
myData <- getCellCyclePhaseAssign_allcells(myData)## 2022-02-24 16:17:20: assigning cell cycle phases: 8.68secs
myData <- getPCAData(dataList = myData)## 2022-02-24 16:17:35: calculating PCA: 8.72secs
myData <- getOptimalRotation(dataList = myData)## 2022-02-24 16:17:58: calculating optimal rotation: 1.14secs
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6082646 324.9 11352458 606.3 11352458 606.3
## Vcells 146912240 1120.9 483172922 3686.4 620030699 4730.5
CellCycledata <- cbind(as.data.frame(t(myData@transformedData$dc$data[1:2,])),
nUMI= myData@cellInfo$nUMI,
Revelio.phase = factor(myData@cellInfo$ccPhase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1")),
Revelio.cc= myData@cellInfo$ccPercentageUniformlySpaced,
Seurat.cc= Prog.data@meta.data[myData@cellInfo$cellID,"CC.Difference"])ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Revelio.phase)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))p1 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Revelio.phase)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
p2 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color=Revelio.cc), size=2, shape=16) +
scale_color_gradientn(colours=rev(colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)),
name='Revelio_cc')
p1 + p2ggplot(CellCycledata, aes(x= Revelio.cc, y= nUMI/10000)) +
geom_point(aes(color= Revelio.phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)Prog.data$Revelio.DC1 <- CellCycledata$DC1
Prog.data$Revelio.DC2 <- CellCycledata$DC2
Prog.data$Revelio.phase <- CellCycledata$Revelio.phase
Prog.data$Revelio.cc <- CellCycledata$Revelio.ccp1 <- FeaturePlot(object = Prog.data,
features = "Revelio.cc",
pt.size = 1,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()
p2 <- DimPlot(object = Prog.data,
group.by = "Revelio.phase",
pt.size = 1,
reduction = "spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p3 <- FeaturePlot(object = Prog.data,
features = "Pseudotime",
pt.size = 2,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()
p1 + p2 + p3 Trajectories.progenitors <- Prog.data@meta.data %>%
dplyr::select(Barcodes, nUMI, Spring_1, Spring_2, Pseudotime) %>%
mutate(Cycling.axis= Prog.data$Revelio.cc,
Phase = Prog.data$Revelio.phase,
Gmnc= Prog.data@assays$RNA@data["Gmnc",],
Ttr= Prog.data@assays$RNA@data["Ttr",],
Htr2c= Prog.data@assays$RNA@data["Htr2c",],
Top2a= Prog.data@assays$RNA@data["Top2a",])p1 <- ggplot(Trajectories.progenitors, aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color= Phase), size=1.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
p2 <- Trajectories.progenitors %>% arrange(Gmnc) %>%
ggplot(aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color=Gmnc), size=1.5) +
scale_color_gradientn(colours =c("grey90", brewer.pal(9,"YlGnBu")))
p3 <- Trajectories.progenitors %>% arrange(Ttr) %>%
ggplot(aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color=Ttr), size=1.5) +
scale_color_gradientn(colours =c("grey90", brewer.pal(9,"YlGnBu")))
p4 <- Trajectories.progenitors %>% arrange(Htr2c) %>%
ggplot(aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color=Htr2c), size=1.5) +
scale_color_gradientn(colours =c("grey90", brewer.pal(9,"YlGnBu")))
p1 + p2 + p3 + p4 + patchwork::plot_layout(ncol = 2)rm(list = ls()[!ls() %in% c("Trajectories.progenitors", "ChP.data")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6109035 326.3 11352458 606.3 11352458 606.3
## Vcells 55782339 425.6 386538338 2949.1 620030699 4730.5
Import progenitors cycling coordinates in the full dataset
ChP.data$Cycling.axis <- sapply(ChP.data$Barcodes,
FUN = function(x) {
if (x %in% Trajectories.progenitors$Barcodes) {
x = Trajectories.progenitors[x, "Cycling.axis"]
} else {
x = NA
}
})FeaturePlot(object = ChP.data,
features = "Pseudotime",
pt.size = 2,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()# Transfer metadata
meta.data <- data.frame(Barcode= ChP.data$Barcodes,
Pseudotime= ChP.data$Pseudotime,
Cell.cycle= ChP.data$Phase)
Annot.data <- new('AnnotatedDataFrame', data = meta.data)
# Transfer counts data
ChP.data <- FindVariableFeatures(ChP.data, selection.method = "vst", nfeatures = 2000)
var.genes <- VariableFeatures(ChP.data)
count.data = data.frame(gene_short_name = rownames(ChP.data[["RNA"]]@data[var.genes,]),
row.names = rownames(ChP.data[["RNA"]]@data[var.genes,]))
feature.data <- new('AnnotatedDataFrame', data = count.data)
# Create the CellDataSet object including variable genes only
gbm_cds <- newCellDataSet(ChP.data[["RNA"]]@counts[var.genes,],
phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 0,
expressionFamily = negbinomial())gbm_cds <- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)rm(list = ls()[!ls() %in% c("ChP.data", "gbm_cds")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6163153 329.2 11352458 606.3 11352458 606.3
## Vcells 57165822 436.2 309230671 2359.3 620030699 4730.5
pseudo.maturation.diff <- differentialGeneTest(gbm_cds[fData(gbm_cds)$num_cells_expressed >= 30,],
fullModelFormulaStr = "~sm.ns(Pseudotime, df = 3)",
reducedModelFormulaStr = "~1",
cores = parallel::detectCores() - 2)# Filter genes based on FDR
pseudo.maturation.diff.filtered <- pseudo.maturation.diff %>% filter(qval < 5e-5)# Create a new vector of 200 points
nPoints <- 200
new_data <- data.frame(Pseudotime = seq(min(pData(gbm_cds)$Pseudotime), max(pData(gbm_cds)$Pseudotime), length.out = nPoints))
# Smooth gene expression
Smooth.curve.matrix <- genSmoothCurves(gbm_cds[as.character(pseudo.maturation.diff.filtered$gene_short_name),],
trend_formula = "~sm.ns(Pseudotime, df = 3)",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)## Cluster gene by expression profiles
Pseudotime.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(Smooth.curve.matrix),method = "pearson"))), k= 4)
ChP.Gene.dynamique <- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
q.val = pseudo.maturation.diff.filtered$qval
) %>% arrange(Gene.Clusters)
row.names(ChP.Gene.dynamique) <- ChP.Gene.dynamique$Gene
ChP.Gene.dynamique$Gene.Clusters <- paste0("Clust.", ChP.Gene.dynamique$Gene.Clusters)# Order the rows using seriation
dst <- as.dist((1-cor(scale(t(Smooth.curve.matrix)), method = "spearman")))
row.ser <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
gene.order <- rownames(Smooth.curve.matrix[seriation::get_order(row.ser),])
pal <- wes_palette("Darjeeling1")
anno.colors <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
pheatmap::pheatmap(Smooth.curve.matrix[gene.order,],
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = ChP.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "")ChP.gostres <- gost(query = list("Clust.1" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.1") %>% pull(Gene) %>% as.character(),
"Clust.2" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.2") %>% pull(Gene) %>% as.character(),
"Clust.3" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.3") %>% pull(Gene) %>% as.character(),
"Clust.4" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.4") %>% pull(Gene) %>% as.character(),
"Clust.5" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.5") %>% pull(Gene) %>% as.character()),
organism = "mmusculus", ordered_query = F,
multi_query = F, significant = T, exclude_iea = T,
measure_underrepresentation = F, evcodes = T,
user_threshold = 0.05, correction_method = "fdr",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = c("GO:MF", "GO:BP"), as_short_link = F)ChP.gostres <- gost(query = as.character(ChP.Gene.dynamique$Gene),
organism = "mmusculus", ordered_query = F,
multi_query = F, significant = T, exclude_iea = T,
measure_underrepresentation = F, evcodes = T,
user_threshold = 0.05, correction_method = "fdr",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = c("GO:MF", "GO:BP"), as_short_link = F)write.table(ChP.Gene.dynamique, "ChP.Gene.dynamique.csv", sep = ";")
write.table(apply(ChP.gostres$result,2,as.character),
"ChP.gostres.csv", sep = ";", quote = F, row.names = F)#date
format(Sys.time(), "%d %B, %Y, %H,%M")## [1] "24 février, 2022, 16,21"
#Packages used
sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 cowplot_1.1.1 ggExtra_0.9
## [4] RColorBrewer_1.1-2 dplyr_1.0.7 orthologsBioMART_0.1.0
## [7] data.table_1.14.2 biomaRt_2.50.1 gprofiler2_0.2.1
## [10] monocle_2.22.0 DDRTree_0.1.5 irlba_2.3.3
## [13] VGAM_1.1-5 ggplot2_3.3.5 Biobase_2.54.0
## [16] BiocGenerics_0.40.0 Matrix_1.4-0 princurve_2.1.6
## [19] Revelio_0.1.0 SeuratObject_4.0.4 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.22 tidyselect_1.1.1
## [4] RSQLite_2.2.8 AnnotationDbi_1.56.2 htmlwidgets_1.5.4
## [7] TSP_1.1-11 grid_4.1.2 combinat_0.0-8
## [10] docopt_0.7.1 Rtsne_0.15 munsell_0.5.0
## [13] codetools_0.2-18 ica_1.0-2 future_1.23.0
## [16] miniUI_0.1.1.1 withr_2.4.3 colorspace_2.0-2
## [19] fastICA_1.2-3 filelock_1.0.2 highr_0.9
## [22] knitr_1.36 ROCR_1.0-11 tensor_1.5
## [25] listenv_0.8.0 labeling_0.4.2 slam_0.1-49
## [28] GenomeInfoDbData_1.2.7 polyclip_1.10-0 bit64_4.0.5
## [31] farver_2.1.0 pheatmap_1.0.12 parallelly_1.29.0
## [34] vctrs_0.3.8 generics_0.1.1 xfun_0.28
## [37] BiocFileCache_2.2.0 R6_2.5.1 GenomeInfoDb_1.30.0
## [40] seriation_1.3.1 bitops_1.0-7 spatstat.utils_2.2-0
## [43] cachem_1.0.6 assertthat_0.2.1 promises_1.2.0.1
## [46] scales_1.1.1 gtable_0.3.0 globals_0.14.0
## [49] goftest_1.2-3 rlang_0.4.12 lazyeval_0.2.2
## [52] spatstat.geom_2.3-0 yaml_2.2.1 reshape2_1.4.4
## [55] abind_1.4-5 httpuv_1.6.3 tools_4.1.2
## [58] ellipsis_0.3.2 spatstat.core_2.3-1 jquerylib_0.1.4
## [61] ggridges_0.5.3 Rcpp_1.0.8 plyr_1.8.6
## [64] progress_1.2.2 zlibbioc_1.40.0 purrr_0.3.4
## [67] RCurl_1.98-1.5 densityClust_0.3 prettyunits_1.1.1
## [70] rpart_4.1.16 deldir_1.0-6 pbapply_1.5-0
## [73] viridis_0.6.2 S4Vectors_0.32.3 zoo_1.8-9
## [76] ggrepel_0.9.1 cluster_2.1.2 magrittr_2.0.2
## [79] scattermore_0.7 lmtest_0.9-39 RANN_2.6.1
## [82] fitdistrplus_1.1-6 matrixStats_0.61.0 hms_1.1.1
## [85] patchwork_1.1.1 mime_0.12 evaluate_0.14
## [88] xtable_1.8-4 XML_3.99-0.8 sparsesvd_0.2
## [91] IRanges_2.28.0 gridExtra_2.3 HSMMSingleCell_1.14.0
## [94] compiler_4.1.2 tibble_3.1.6 KernSmooth_2.23-20
## [97] crayon_1.4.2 htmltools_0.5.2 mgcv_1.8-38
## [100] later_1.3.0 tidyr_1.1.4 DBI_1.1.1
## [103] dbplyr_2.1.1 MASS_7.3-55 rappdirs_0.3.3
## [106] parallel_4.1.2 igraph_1.2.11 pkgconfig_2.0.3
## [109] registry_0.5-1 plotly_4.10.0 spatstat.sparse_2.0-0
## [112] foreach_1.5.1 xml2_1.3.3 bslib_0.3.1
## [115] XVector_0.34.0 stringr_1.4.0 digest_0.6.29
## [118] sctransform_0.3.2 RcppAnnoy_0.0.19 spatstat.data_2.1-0
## [121] Biostrings_2.62.0 rmarkdown_2.11 leiden_0.3.9
## [124] uwot_0.1.10 curl_4.3.2 shiny_1.7.1
## [127] lifecycle_1.0.1 nlme_3.1-153 jsonlite_1.7.2
## [130] viridisLite_0.4.0 limma_3.50.0 fansi_0.5.0
## [133] pillar_1.6.4 lattice_0.20-45 KEGGREST_1.34.0
## [136] fastmap_1.1.0 httr_1.4.2 survival_3.2-13
## [139] glue_1.5.1 qlcMatrix_0.9.7 FNN_1.1.3
## [142] iterators_1.0.13 png_0.1-7 bit_4.0.4
## [145] stringi_1.7.6 sass_0.4.0 blob_1.2.2
## [148] memoise_2.0.1 future.apply_1.8.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎