In this part of the analysis we apply Revelio algorithm to explore cell cycle dynamic of pallial and hem domain radial glial cells
library(Seurat)
library(Revelio)
library(princurve)
library(monocle)
library(gprofiler2)
library(orthologsBioMART)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(wesanderson)
#Set ggplot theme as classic
theme_set(theme_classic())
<- readRDS("../QC.filtered.clustered.cells.RDS") Hem.data
DimPlot(object = Hem.data,
group.by = "Cell_ident",
reduction = "spring",
cols = c("#ebcb2e", #"ChP"
"#9ec22f", #"ChP_progenitors"
"#e7823a", # CR
"#cc3a1b", #"Dorso-Medial_pallium"
"#d14c8d", #"Hem"
"#4cabdc", #"Medial_pallium"
"#046c9a", # Pallial
"#4990c9" #"Thalamic_eminence"
) )
<- subset(Hem.data, idents = c("ChP", "ChP_progenitors"))
ChP.data
DimPlot(ChP.data,
reduction = "spring",
pt.size = 1,
cols = c("#83c3b8", "#009fda")) + NoAxes()
FeaturePlot(object = ChP.data ,
features = c("Fgf8", "Fgf17", "Adamts15", "Fgfbp1"),
pt.size = 0.5,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
<- AddModuleScore(ChP.data,
ChP.data features = list(c("Fgf8", "Fgf17", "Adamts15", "Fgfbp1")),
ctrl = 10,
name = "Septum")
FeaturePlot(object = ChP.data ,
features = c("Septum1"),
pt.size = 0.5,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()
hist(ChP.data$Septum1, breaks = 20)
$Septal.prog <- ChP.data$Septum1 > 0 ChP.data
<- DimPlot(ChP.data,
p1 reduction = "spring",
group.by = "Septal.prog",
pt.size = 1) + NoAxes()
<- FeaturePlot(object = ChP.data ,
p2 features = c("Fgf17"),
pt.size = 0.5,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
+ p2 p1
<- subset(ChP.data,
ChP.data subset = Septal.prog == FALSE & ChP.data$Spring_1 > 1300)
DimPlot(ChP.data,
reduction = "spring",
pt.size = 1,
cols = c("#83c3b8", "#009fda")) + NoAxes()
<- ChP.data@meta.data %>%
Trajectories.ChP ::select("Barcodes", "nUMI", "Spring_1", "Spring_2") dplyr
<- principal_curve(as.matrix(Trajectories.ChP[,c("Spring_1", "Spring_2")]),
fit smoother='lowess',
trace=TRUE,
f = 0.8,
stretch=2)
## Starting curve---distance^2: 117510509591
## Iteration 1---distance^2: 46558929
## Iteration 2---distance^2: 42504599
## Iteration 3---distance^2: 41393217
## Iteration 4---distance^2: 40991617
## Iteration 5---distance^2: 40890588
## Iteration 6---distance^2: 40826638
## Iteration 7---distance^2: 40830824
#The principal curve smoothed
<- as.data.frame(fit$s[order(fit$lambda),])
ChP.pc.line
#Pseudotime score
$Pseudotime <- fit$lambda/max(fit$lambda)
Trajectories.ChP
#Inverse the score if positive correlation with progenitor marker
if (cor(Trajectories.ChP$Pseudotime, ChP.data@assays$SCT@data['Hmga2', Trajectories.ChP$Barcodes]) > 0) {
$Pseudotime <- -(Trajectories.ChP$Pseudotime - max(Trajectories.ChP$Pseudotime))
Trajectories.ChP
}
$Pseudotime <- Trajectories.ChP$Pseudotime ChP.data
FeaturePlot(object = ChP.data,
features = "Pseudotime",
pt.size = 2,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()
<- subset(ChP.data, idents = c("ChP_progenitors"))
Prog.data
DimPlot(Prog.data,
reduction = "spring",
pt.size = 1,
cols = c("#009fda")) + NoAxes()
<- NormalizeData(Prog.data, normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA") Prog.data
<- as.matrix(Prog.data[["RNA"]]@counts) rawCounts
# Filter genes expressed by less than 10 cells
<- Matrix::rowSums(rawCounts > 0)
num.cells <- names(x = num.cells[which(x = num.cells >= 10)])
genes.use <- rawCounts[genes.use, ] rawCounts
rm(list = ls()[!ls() %in% c("rawCounts", "CCgenes", "ChP.data", "Prog.data")])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6031711 322.2 8756530 467.7 8756530 467.7
## Vcells 109360801 834.4 603966152 4607.9 620030699 4730.5
<- read.table("CCgenes.csv", sep = ";", header = T) CCgenes
We can now follow the tutorial form the package github page
<- createRevelioObject(rawData = rawCounts,
myData cyclicGenes = CCgenes,
lowernGeneCutoff = 0,
uppernUMICutoff = Inf,
ccPhaseAssignBasedOnIndividualBatches = F)
## 2022-02-24 16:17:15: reading data: 1.63secs
rm("rawCounts")
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6050902 323.2 8756530 467.7 8756530 467.7
## Vcells 109450747 835.1 483172922 3686.4 620030699 4730.5
The getCellCyclePhaseAssignInformation
filter “outlier” cells for cell cycle phase assignation. We modified the function to keep all cells as we observed this does not affect the global cell cycle fitting procedure
source("../Functions/functions_InitializationCCPhaseAssignFiltering.R")
<- getCellCyclePhaseAssign_allcells(myData) myData
## 2022-02-24 16:17:20: assigning cell cycle phases: 8.68secs
<- getPCAData(dataList = myData) myData
## 2022-02-24 16:17:35: calculating PCA: 8.72secs
<- getOptimalRotation(dataList = myData) myData
## 2022-02-24 16:17:58: calculating optimal rotation: 1.14secs
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6082646 324.9 11352458 606.3 11352458 606.3
## Vcells 146912240 1120.9 483172922 3686.4 620030699 4730.5
<- cbind(as.data.frame(t(myData@transformedData$dc$data[1:2,])),
CellCycledata nUMI= myData@cellInfo$nUMI,
Revelio.phase = factor(myData@cellInfo$ccPhase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1")),
Revelio.cc= myData@cellInfo$ccPercentageUniformlySpaced,
Seurat.cc= Prog.data@meta.data[myData@cellInfo$cellID,"CC.Difference"])
ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Revelio.phase)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
<- ggplot(CellCycledata, aes(DC1, DC2)) +
p1 geom_point(aes(color = Revelio.phase)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
<- ggplot(CellCycledata, aes(DC1, DC2)) +
p2 geom_point(aes(color=Revelio.cc), size=2, shape=16) +
scale_color_gradientn(colours=rev(colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)),
name='Revelio_cc')
+ p2 p1
ggplot(CellCycledata, aes(x= Revelio.cc, y= nUMI/10000)) +
geom_point(aes(color= Revelio.phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
$Revelio.DC1 <- CellCycledata$DC1
Prog.data$Revelio.DC2 <- CellCycledata$DC2
Prog.data
$Revelio.phase <- CellCycledata$Revelio.phase
Prog.data$Revelio.cc <- CellCycledata$Revelio.cc Prog.data
<- FeaturePlot(object = Prog.data,
p1 features = "Revelio.cc",
pt.size = 1,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()
<- DimPlot(object = Prog.data,
p2 group.by = "Revelio.phase",
pt.size = 1,
reduction = "spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
<- FeaturePlot(object = Prog.data,
p3 features = "Pseudotime",
pt.size = 2,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()
+ p2 + p3 p1
<- Prog.data@meta.data %>%
Trajectories.progenitors ::select(Barcodes, nUMI, Spring_1, Spring_2, Pseudotime) %>%
dplyrmutate(Cycling.axis= Prog.data$Revelio.cc,
Phase = Prog.data$Revelio.phase,
Gmnc= Prog.data@assays$RNA@data["Gmnc",],
Ttr= Prog.data@assays$RNA@data["Ttr",],
Htr2c= Prog.data@assays$RNA@data["Htr2c",],
Top2a= Prog.data@assays$RNA@data["Top2a",])
<- ggplot(Trajectories.progenitors, aes(x= Pseudotime, y= Cycling.axis)) +
p1 geom_point(aes(color= Phase), size=1.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
<- Trajectories.progenitors %>% arrange(Gmnc) %>%
p2 ggplot(aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color=Gmnc), size=1.5) +
scale_color_gradientn(colours =c("grey90", brewer.pal(9,"YlGnBu")))
<- Trajectories.progenitors %>% arrange(Ttr) %>%
p3 ggplot(aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color=Ttr), size=1.5) +
scale_color_gradientn(colours =c("grey90", brewer.pal(9,"YlGnBu")))
<- Trajectories.progenitors %>% arrange(Htr2c) %>%
p4 ggplot(aes(x= Pseudotime, y= Cycling.axis)) +
geom_point(aes(color=Htr2c), size=1.5) +
scale_color_gradientn(colours =c("grey90", brewer.pal(9,"YlGnBu")))
+ p2 + p3 + p4 + patchwork::plot_layout(ncol = 2) p1
rm(list = ls()[!ls() %in% c("Trajectories.progenitors", "ChP.data")])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6109035 326.3 11352458 606.3 11352458 606.3
## Vcells 55782339 425.6 386538338 2949.1 620030699 4730.5
Import progenitors cycling coordinates in the full dataset
$Cycling.axis <- sapply(ChP.data$Barcodes,
ChP.dataFUN = function(x) {
if (x %in% Trajectories.progenitors$Barcodes) {
= Trajectories.progenitors[x, "Cycling.axis"]
x else {
} = NA
x
} })
FeaturePlot(object = ChP.data,
features = "Pseudotime",
pt.size = 2,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()
# Transfer metadata
<- data.frame(Barcode= ChP.data$Barcodes,
meta.data Pseudotime= ChP.data$Pseudotime,
Cell.cycle= ChP.data$Phase)
<- new('AnnotatedDataFrame', data = meta.data)
Annot.data
# Transfer counts data
<- FindVariableFeatures(ChP.data, selection.method = "vst", nfeatures = 2000)
ChP.data <- VariableFeatures(ChP.data)
var.genes = data.frame(gene_short_name = rownames(ChP.data[["RNA"]]@data[var.genes,]),
count.data row.names = rownames(ChP.data[["RNA"]]@data[var.genes,]))
<- new('AnnotatedDataFrame', data = count.data)
feature.data
# Create the CellDataSet object including variable genes only
<- newCellDataSet(ChP.data[["RNA"]]@counts[var.genes,],
gbm_cds phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 0,
expressionFamily = negbinomial())
<- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1) gbm_cds
rm(list = ls()[!ls() %in% c("ChP.data", "gbm_cds")])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6163153 329.2 11352458 606.3 11352458 606.3
## Vcells 57165822 436.2 309230671 2359.3 620030699 4730.5
<- differentialGeneTest(gbm_cds[fData(gbm_cds)$num_cells_expressed >= 30,],
pseudo.maturation.diff fullModelFormulaStr = "~sm.ns(Pseudotime, df = 3)",
reducedModelFormulaStr = "~1",
cores = parallel::detectCores() - 2)
# Filter genes based on FDR
<- pseudo.maturation.diff %>% filter(qval < 5e-5) pseudo.maturation.diff.filtered
# Create a new vector of 200 points
<- 200
nPoints <- data.frame(Pseudotime = seq(min(pData(gbm_cds)$Pseudotime), max(pData(gbm_cds)$Pseudotime), length.out = nPoints))
new_data
# Smooth gene expression
<- genSmoothCurves(gbm_cds[as.character(pseudo.maturation.diff.filtered$gene_short_name),],
Smooth.curve.matrix trend_formula = "~sm.ns(Pseudotime, df = 3)",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)
## Cluster gene by expression profiles
<- cluster::pam(as.dist((1 - cor(Matrix::t(Smooth.curve.matrix),method = "pearson"))), k= 4)
Pseudotime.genes.clusters
<- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
ChP.Gene.dynamique Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
q.val = pseudo.maturation.diff.filtered$qval
%>% arrange(Gene.Clusters)
)
row.names(ChP.Gene.dynamique) <- ChP.Gene.dynamique$Gene
$Gene.Clusters <- paste0("Clust.", ChP.Gene.dynamique$Gene.Clusters) ChP.Gene.dynamique
# Order the rows using seriation
<- as.dist((1-cor(scale(t(Smooth.curve.matrix)), method = "spearman")))
dst <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
row.ser <- rownames(Smooth.curve.matrix[seriation::get_order(row.ser),])
gene.order
<- wes_palette("Darjeeling1")
pal <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
anno.colors Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
::pheatmap(Smooth.curve.matrix[gene.order,],
pheatmapscale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = ChP.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "")
<- gost(query = list("Clust.1" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.1") %>% pull(Gene) %>% as.character(),
ChP.gostres "Clust.2" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.2") %>% pull(Gene) %>% as.character(),
"Clust.3" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.3") %>% pull(Gene) %>% as.character(),
"Clust.4" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.4") %>% pull(Gene) %>% as.character(),
"Clust.5" = ChP.Gene.dynamique %>% filter(Gene.Clusters == "Clust.5") %>% pull(Gene) %>% as.character()),
organism = "mmusculus", ordered_query = F,
multi_query = F, significant = T, exclude_iea = T,
measure_underrepresentation = F, evcodes = T,
user_threshold = 0.05, correction_method = "fdr",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = c("GO:MF", "GO:BP"), as_short_link = F)
<- gost(query = as.character(ChP.Gene.dynamique$Gene),
ChP.gostres organism = "mmusculus", ordered_query = F,
multi_query = F, significant = T, exclude_iea = T,
measure_underrepresentation = F, evcodes = T,
user_threshold = 0.05, correction_method = "fdr",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = c("GO:MF", "GO:BP"), as_short_link = F)
write.table(ChP.Gene.dynamique, "ChP.Gene.dynamique.csv", sep = ";")
write.table(apply(ChP.gostres$result,2,as.character),
"ChP.gostres.csv", sep = ";", quote = F, row.names = F)
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "24 février, 2022, 16,21"
#Packages used
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 cowplot_1.1.1 ggExtra_0.9
## [4] RColorBrewer_1.1-2 dplyr_1.0.7 orthologsBioMART_0.1.0
## [7] data.table_1.14.2 biomaRt_2.50.1 gprofiler2_0.2.1
## [10] monocle_2.22.0 DDRTree_0.1.5 irlba_2.3.3
## [13] VGAM_1.1-5 ggplot2_3.3.5 Biobase_2.54.0
## [16] BiocGenerics_0.40.0 Matrix_1.4-0 princurve_2.1.6
## [19] Revelio_0.1.0 SeuratObject_4.0.4 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.22 tidyselect_1.1.1
## [4] RSQLite_2.2.8 AnnotationDbi_1.56.2 htmlwidgets_1.5.4
## [7] TSP_1.1-11 grid_4.1.2 combinat_0.0-8
## [10] docopt_0.7.1 Rtsne_0.15 munsell_0.5.0
## [13] codetools_0.2-18 ica_1.0-2 future_1.23.0
## [16] miniUI_0.1.1.1 withr_2.4.3 colorspace_2.0-2
## [19] fastICA_1.2-3 filelock_1.0.2 highr_0.9
## [22] knitr_1.36 ROCR_1.0-11 tensor_1.5
## [25] listenv_0.8.0 labeling_0.4.2 slam_0.1-49
## [28] GenomeInfoDbData_1.2.7 polyclip_1.10-0 bit64_4.0.5
## [31] farver_2.1.0 pheatmap_1.0.12 parallelly_1.29.0
## [34] vctrs_0.3.8 generics_0.1.1 xfun_0.28
## [37] BiocFileCache_2.2.0 R6_2.5.1 GenomeInfoDb_1.30.0
## [40] seriation_1.3.1 bitops_1.0-7 spatstat.utils_2.2-0
## [43] cachem_1.0.6 assertthat_0.2.1 promises_1.2.0.1
## [46] scales_1.1.1 gtable_0.3.0 globals_0.14.0
## [49] goftest_1.2-3 rlang_0.4.12 lazyeval_0.2.2
## [52] spatstat.geom_2.3-0 yaml_2.2.1 reshape2_1.4.4
## [55] abind_1.4-5 httpuv_1.6.3 tools_4.1.2
## [58] ellipsis_0.3.2 spatstat.core_2.3-1 jquerylib_0.1.4
## [61] ggridges_0.5.3 Rcpp_1.0.8 plyr_1.8.6
## [64] progress_1.2.2 zlibbioc_1.40.0 purrr_0.3.4
## [67] RCurl_1.98-1.5 densityClust_0.3 prettyunits_1.1.1
## [70] rpart_4.1.16 deldir_1.0-6 pbapply_1.5-0
## [73] viridis_0.6.2 S4Vectors_0.32.3 zoo_1.8-9
## [76] ggrepel_0.9.1 cluster_2.1.2 magrittr_2.0.2
## [79] scattermore_0.7 lmtest_0.9-39 RANN_2.6.1
## [82] fitdistrplus_1.1-6 matrixStats_0.61.0 hms_1.1.1
## [85] patchwork_1.1.1 mime_0.12 evaluate_0.14
## [88] xtable_1.8-4 XML_3.99-0.8 sparsesvd_0.2
## [91] IRanges_2.28.0 gridExtra_2.3 HSMMSingleCell_1.14.0
## [94] compiler_4.1.2 tibble_3.1.6 KernSmooth_2.23-20
## [97] crayon_1.4.2 htmltools_0.5.2 mgcv_1.8-38
## [100] later_1.3.0 tidyr_1.1.4 DBI_1.1.1
## [103] dbplyr_2.1.1 MASS_7.3-55 rappdirs_0.3.3
## [106] parallel_4.1.2 igraph_1.2.11 pkgconfig_2.0.3
## [109] registry_0.5-1 plotly_4.10.0 spatstat.sparse_2.0-0
## [112] foreach_1.5.1 xml2_1.3.3 bslib_0.3.1
## [115] XVector_0.34.0 stringr_1.4.0 digest_0.6.29
## [118] sctransform_0.3.2 RcppAnnoy_0.0.19 spatstat.data_2.1-0
## [121] Biostrings_2.62.0 rmarkdown_2.11 leiden_0.3.9
## [124] uwot_0.1.10 curl_4.3.2 shiny_1.7.1
## [127] lifecycle_1.0.1 nlme_3.1-153 jsonlite_1.7.2
## [130] viridisLite_0.4.0 limma_3.50.0 fansi_0.5.0
## [133] pillar_1.6.4 lattice_0.20-45 KEGGREST_1.34.0
## [136] fastmap_1.1.0 httr_1.4.2 survival_3.2-13
## [139] glue_1.5.1 qlcMatrix_0.9.7 FNN_1.1.3
## [142] iterators_1.0.13 png_0.1-7 bit_4.0.4
## [145] stringi_1.7.6 sass_0.4.0 blob_1.2.2
## [148] memoise_2.0.1 future.apply_1.8.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎