library(Seurat)
library(monocle)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(wesanderson)
#Set ggplot theme as classic
theme_set(theme_classic())
<- readRDS("Progenitors.RDS") Progenitors.data
<- FeaturePlot(object = Progenitors.data,
p1 features = "Revelio.cc",
pt.size = 1,
cols = rev(colorRampPalette(brewer.pal(n =10, name = "Spectral"))(100)),
reduction = "spring",
order = T) & NoAxes()
<- DimPlot(object = Progenitors.data,
p2 group.by = "Revelio.phase",
pt.size = 1,
reduction = "spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
+ p2 p1
# Find all cell cycle viariable genes common to all domains
rm(list = ls()[!ls() %in% c("Progenitors.data")])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3349592 178.9 6291914 336.1 4748340 253.6
## Vcells 226307592 1726.6 291573378 2224.6 231992099 1770.0
<- NormalizeData(Progenitors.data, normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA")
Progenitors.data
<- FindVariableFeatures(Progenitors.data, selection.method = "vst", nfeatures = 2000, assay = "RNA") Progenitors.data
# Transfer metadata
<- data.frame(barcode= Progenitors.data$Barcodes,
meta.data domain= Progenitors.data$Cell_ident,
Revelio.phase= Progenitors.data$Revelio.phase,
Revelio.cc= Progenitors.data$Revelio.cc)
<- new('AnnotatedDataFrame', data = meta.data)
Annot.data
# Transfer counts data
<- Progenitors.data[["RNA"]]@var.features
var.genes = data.frame(gene_short_name = rownames(Progenitors.data[["RNA"]]@data[var.genes,]),
count.data row.names = rownames(Progenitors.data[["RNA"]]@data[var.genes,]))
<- new('AnnotatedDataFrame', data = count.data)
feature.data
# Create the CellDataSet object including variable genes only
<- newCellDataSet(Progenitors.data[["RNA"]]@counts[var.genes,],
gbm_cds phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 0,
expressionFamily = negbinomial())
<- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1) gbm_cds
<- differentialGeneTest(gbm_cds[fData(gbm_cds)$num_cells_expressed > 200,],
Cellcycle.diff fullModelFormulaStr = "~sm.ns(Revelio.cc, df = 3)",
reducedModelFormulaStr = "~1",
cores = parallel::detectCores() - 2)
# Filter genes based on FDR
<- Cellcycle.diff %>% filter(qval < 5e-20) Cellcycle.diff.filtered
# Create a new vector of 200 points
<- 200
nPoints <- data.frame(Revelio.cc = seq(min(pData(gbm_cds)$Revelio.cc), max(pData(gbm_cds)$Revelio.cc), length.out = nPoints))
new_data
# Smooth gene expression
<- genSmoothCurves(gbm_cds[as.character(Cellcycle.diff.filtered$gene_short_name),],
Smooth.curve.matrix trend_formula = "~sm.ns(Revelio.cc, df = 3)",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)
## Cluster gene by expression profiles
<- cluster::pam(as.dist((1 - cor(Matrix::t(Smooth.curve.matrix),method = "spearman"))), k= 5)
Pseudotime.genes.clusters
<- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Ccycle.Gene.dynamique Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
q.val = Cellcycle.diff.filtered$qval
%>% arrange(Gene.Clusters)
)
row.names(Ccycle.Gene.dynamique) <- Ccycle.Gene.dynamique$Gene
$Gene.Clusters <- paste0("Clust.", Ccycle.Gene.dynamique$Gene.Clusters) Ccycle.Gene.dynamique
# Order the rows using seriation
<- as.dist((1-cor(scale(t(Smooth.curve.matrix)), method = "spearman")))
dst <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
row.ser <- rownames(Smooth.curve.matrix[seriation::get_order(row.ser),])
gene.order
<- wes_palette("Darjeeling1")
pal <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
anno.colors Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
<- data.frame(Cell.state = rep(c("Cycling_RG","Differentiating_cells"), each=100))
col.anno
::pheatmap(Smooth.curve.matrix[gene.order,],
pheatmapscale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = Ccycle.Gene.dynamique %>% dplyr::select(Gene.Clusters),
#annotation_col = col.anno,
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "")
# Plot gene trends along cycling axis
<- function(Seurat.data,
Cell.cycle.trend
group.by,
gene){
<- Seurat.data@meta.data %>% select("Revelio.cc", "Revelio.phase", "Cell_ident")
data $Gene <- Progenitors.data@assays$RNA@data[gene,]
data
if (!group.by == "Cell_ident") {
<- ggplot(data=data, aes(x= Revelio.cc, y= Gene)) +
p geom_point(aes(color= Revelio.phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA) +
ggtitle(gene)
else {
} <- ggplot(data=data, aes(x= Revelio.cc, y= Gene)) +
p geom_point(aes(color= Cell_ident), size=0.5) +
scale_color_manual(values= c("#68b041", "#e3c148", "#e46b6b")) +
geom_smooth(method="loess", n= 50, aes(color= Cell_ident)) +
ylim(0,NA) +
ggtitle(gene)
}
return(p)
}
<- function(Seurat.data,
Plot.Genes.trend
group.by,
genes){<- mapply(FUN = Cell.cycle.trend, gene = genes,
pList MoreArgs = list(Seurat.data = Seurat.data, group.by=group.by),
SIMPLIFY = FALSE)
print(x = cowplot::plot_grid(plotlist = pList, ncol = 2))
}
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Revelio.phase",
genes= c("Gadd45g", "Hes6", "Sox4" #Module1
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Revelio.phase",
genes= c("Casp8ap2", "Emx1","Mcm4" #Module5
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Revelio.phase",
genes= c("Pold1","Ticrr", "Plk4"#Module4
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Revelio.phase",
genes= c("Nes", "Sox2", "Bora"#Module3
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Revelio.phase",
genes= c("Gas1", "Bcl11b", "Dynll1"#Module2
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Cell_ident",
genes= c("Gadd45g", "Hes6", "Sox4" #Module1
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Cell_ident",
genes= c("Casp8ap2", "Emx1","Mcm4" #Module5
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Cell_ident",
genes= c("Pold1","Ticrr", "Plk4"#Module4
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Cell_ident",
genes= c("Nes", "Sox2", "Bora"#Module3
))
Plot.Genes.trend(Seurat.data= Progenitors.data,
group.by = "Cell_ident",
genes= c("Gas1", "Bcl11b", "Dynll1"#Module2
))
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "24 février, 2022, 15,49"
#Packages used
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 cowplot_1.1.1 ggExtra_0.9
## [4] RColorBrewer_1.1-2 dplyr_1.0.7 monocle_2.22.0
## [7] DDRTree_0.1.5 irlba_2.3.3 VGAM_1.1-5
## [10] ggplot2_3.3.5 Biobase_2.54.0 BiocGenerics_0.40.0
## [13] Matrix_1.4-0 SeuratObject_4.0.4 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2
## [4] densityClust_0.3 listenv_0.8.0 scattermore_0.7
## [7] fastICA_1.2-3 digest_0.6.29 foreach_1.5.1
## [10] htmltools_0.5.2 viridis_0.6.2 fansi_0.5.0
## [13] magrittr_2.0.2 tensor_1.5 cluster_2.1.2
## [16] ROCR_1.0-11 limma_3.50.0 globals_0.14.0
## [19] matrixStats_0.61.0 docopt_0.7.1 spatstat.sparse_2.0-0
## [22] colorspace_2.0-2 ggrepel_0.9.1 xfun_0.28
## [25] sparsesvd_0.2 crayon_1.4.2 jsonlite_1.7.2
## [28] spatstat.data_2.1-0 iterators_1.0.13 survival_3.2-13
## [31] zoo_1.8-9 glue_1.5.1 polyclip_1.10-0
## [34] registry_0.5-1 gtable_0.3.0 leiden_0.3.9
## [37] future.apply_1.8.1 abind_1.4-5 scales_1.1.1
## [40] pheatmap_1.0.12 DBI_1.1.1 miniUI_0.1.1.1
## [43] Rcpp_1.0.8 viridisLite_0.4.0 xtable_1.8-4
## [46] reticulate_1.22 spatstat.core_2.3-1 htmlwidgets_1.5.4
## [49] httr_1.4.2 FNN_1.1.3 ellipsis_0.3.2
## [52] ica_1.0-2 pkgconfig_2.0.3 farver_2.1.0
## [55] sass_0.4.0 uwot_0.1.10 deldir_1.0-6
## [58] utf8_1.2.2 tidyselect_1.1.1 labeling_0.4.2
## [61] rlang_0.4.12 reshape2_1.4.4 later_1.3.0
## [64] munsell_0.5.0 tools_4.1.2 generics_0.1.1
## [67] ggridges_0.5.3 evaluate_0.14 stringr_1.4.0
## [70] fastmap_1.1.0 yaml_2.2.1 goftest_1.2-3
## [73] knitr_1.36 fitdistrplus_1.1-6 purrr_0.3.4
## [76] RANN_2.6.1 pbapply_1.5-0 future_1.23.0
## [79] nlme_3.1-153 mime_0.12 slam_0.1-49
## [82] compiler_4.1.2 plotly_4.10.0 png_0.1-7
## [85] spatstat.utils_2.2-0 tibble_3.1.6 bslib_0.3.1
## [88] stringi_1.7.6 highr_0.9 lattice_0.20-45
## [91] HSMMSingleCell_1.14.0 vctrs_0.3.8 pillar_1.6.4
## [94] lifecycle_1.0.1 spatstat.geom_2.3-0 combinat_0.0-8
## [97] lmtest_0.9-39 jquerylib_0.1.4 RcppAnnoy_0.0.19
## [100] data.table_1.14.2 seriation_1.3.1 httpuv_1.6.3
## [103] patchwork_1.1.1 R6_2.5.1 promises_1.2.0.1
## [106] TSP_1.1-11 KernSmooth_2.23-20 gridExtra_2.3
## [109] parallelly_1.29.0 codetools_0.2-18 MASS_7.3-55
## [112] assertthat_0.2.1 withr_2.4.3 qlcMatrix_0.9.7
## [115] sctransform_0.3.2 mgcv_1.8-38 parallel_4.1.2
## [118] grid_4.1.2 rpart_4.1.16 tidyr_1.1.4
## [121] rmarkdown_2.11 Rtsne_0.15 shiny_1.7.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎