library(Seurat)
library(princurve)
library(Revelio)
library(monocle)
library(gprofiler2)
library(seriation)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(wesanderson)
library(UpSetR)
#Set ggplot theme as classic
theme_set(theme_classic())WT.KO.integrated <- readRDS("WT_KO.integrated.RDS")DimPlot(WT.KO.integrated,
reduction = "integrated_spring",
group.by = "Lineage",
pt.size = 1,
cols = c("#cc391b","#e7823a","#969696","#026c9a")
) + NoAxes()Neurons.data <- subset(WT.KO.integrated,
subset = Lineage %in% c("Cajal-Retzius_neurons", "Pallial_neurons") & orig.ident == "Hem1" & Cell.ident.WT %in% c("Cajal-Retzius_neurons", "Pallial_neurons"))
DimPlot(Neurons.data,
group.by = "Lineage",
reduction = "integrated_spring",
pt.size = 1,
cols = c("#cc391b","#026c9a")
) + NoAxes()rm(WT.KO.integrated)
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3376158 180.4 6140847 328.0 5661690 302.4
## Vcells 85680579 653.7 997799792 7612.7 1064977766 8125.2
Trajectories.Hem <- Neurons.data@meta.data %>%
select("Barcodes", "nCount_RNA", "Spring_1", "Spring_2", "AP_signature","BP_signature", "EN_signature", "LN_signature", "Lineage") %>%
filter(Lineage == "Cajal-Retzius_neurons")fit <- principal_curve(as.matrix(Trajectories.Hem[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = .7,
stretch=0)## Starting curve---distance^2: 11526697569
## Iteration 1---distance^2: 13397620
## Iteration 2---distance^2: 13420664
## Iteration 3---distance^2: 13422248
#The principal curve smoothed
Hem.pc.line <- as.data.frame(fit$s[order(fit$lambda),])
#Pseudotime score
Trajectories.Hem$PseudotimeScore <- fit$lambda/max(fit$lambda)if (cor(Trajectories.Hem$PseudotimeScore, Neurons.data@assays$RNA@data['Hmga2', Trajectories.Hem$Barcodes]) > 0) {
Trajectories.Hem$PseudotimeScore <- -(Trajectories.Hem$PseudotimeScore - max(Trajectories.Hem$PseudotimeScore))
}Trajectories.Pallial <- Neurons.data@meta.data %>%
select("Barcodes", "nCount_RNA", "Spring_1", "Spring_2", "AP_signature","BP_signature", "EN_signature", "LN_signature", "Lineage") %>%
filter(Lineage == "Pallial_neurons")fit <- principal_curve(as.matrix(Trajectories.Pallial[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = .7,
stretch=0)## Starting curve---distance^2: 6447222830
## Iteration 1---distance^2: 10179991
## Iteration 2---distance^2: 10187204
#The principal curve smoothed
Pallial.pc.line <- as.data.frame(fit$s[order(fit$lambda),])
#Pseudotime score
Trajectories.Pallial$PseudotimeScore <- fit$lambda/max(fit$lambda)if (cor(Trajectories.Pallial$PseudotimeScore, Neurons.data@assays$RNA@data['Hmga2', Trajectories.Pallial$Barcodes]) > 0) {
Trajectories.Pallial$PseudotimeScore <- -(Trajectories.Pallial$PseudotimeScore - max(Trajectories.Pallial$PseudotimeScore))
}Trajectories.neurons <- rbind(Trajectories.Pallial, Trajectories.Hem)cols <- brewer.pal(n =11, name = "Spectral")
ggplot(Trajectories.neurons, aes(Spring_1, Spring_2)) +
geom_point(aes(color=PseudotimeScore), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Speudotime score') +
geom_line(data=Pallial.pc.line, color="#026c9a", size=0.77) +
geom_line(data=Hem.pc.line, color="#cc391b", size=0.77)Since we observe the first 25% of both trajectories are occupied by few, likely progenitor cells, we shift this cell along the axis
Pseudotime.intervals <- Trajectories.neurons%>%
select(Lineage, PseudotimeScore) %>%
mutate(Pseudotime.bins = cut(Trajectories.neurons$PseudotimeScore, seq(0, max(Trajectories.neurons$PseudotimeScore) + 0.05, 0.05), dig.lab = 2, right = FALSE)) %>%
group_by(Lineage, Pseudotime.bins) %>%
summarise(n=n())
ggplot(Pseudotime.intervals, aes(x=Pseudotime.bins, y=n, fill=Lineage)) +
geom_bar(stat = "identity", width = 0.90) +
theme(axis.text.x = element_text(angle = 45, hjust=1))+
scale_fill_manual(values= c("#cc391b", "#026c9a"))score <- sapply(Trajectories.neurons$PseudotimeScore,
FUN = function(x) if (x <= 0.2) {x= 0.2} else { x=x })
Trajectories.neurons$PseudotimeScore.shifted <- (score - min(score)) / (max(score) - min(score))ggplot(Trajectories.neurons, aes(x= PseudotimeScore.shifted, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(method="loess", n= 50, aes(color= Lineage)) +
ylim(0,NA)rm(list = ls()[!ls() %in% c("Trajectories.neurons")])Progenitors.data <- readRDS("../ProgenitorsDiversity/Progenitors.RDS")table(Progenitors.data$Cell_ident)##
## Dorso-Medial_pallium Hem Medial_pallium
## 3451 1954 2719
To balance the number of progenitors in both domain we will only work with Hem and Medial_pallium annotated cells. Since we are using pallial cell to contrast CR specific trajectory we think this approximation will not significantly affect our analysis.
Progenitors.data <- subset(Progenitors.data, subset = Cell_ident %in% c("Hem", "Medial_pallium") & orig.ident == "Hem1")p1 <- DimPlot(Progenitors.data,
reduction = "spring",
pt.size = 0.5,
cols = c("#e3c148", "#e46b6b")) + NoAxes()
p2 <- FeaturePlot(object = Progenitors.data,
features = "Revelio.cc",
pt.size = 0.5,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()
p3 <- DimPlot(object = Progenitors.data,
group.by = "Revelio.phase",
pt.size = 0.5,
reduction = "spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p1 + p2 + p3 + patchwork::plot_layout(ncol = 2)# Start with neurons data
Trajectories.all <- Trajectories.neurons %>% select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage)
Trajectories.all$Pseudotime <- Trajectories.neurons$PseudotimeScore.shifted + 0.5
Trajectories.all$Phase <- NA# Add progenitors data
Trajectories.progenitors <- Progenitors.data@meta.data %>%
select(Barcodes, nCount_RNA, Spring_1, Spring_2) %>%
mutate(Lineage= ifelse(Progenitors.data$Cell_ident == "Medial_pallium", "Pallial_neurons", "Cajal-Retzius_neurons") ,
Pseudotime= Progenitors.data$Revelio.cc/2,
Phase = Progenitors.data$Revelio.phase)Trajectories.WT<- rbind(Trajectories.all, Trajectories.progenitors)
Trajectories.WT$Phase <- factor(Trajectories.WT$Phase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1"))p1 <- ggplot(Trajectories.WT, aes(Spring_1, Spring_2)) +
geom_point(aes(color=Pseudotime), size=0.5) +
scale_color_gradientn(colours=rev(brewer.pal(n =11, name = "Spectral")), name='Pseudotime score')
p2 <- ggplot(Trajectories.WT, aes(Spring_1, Spring_2)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a"))
p1 + p2p1 <- ggplot(Trajectories.WT, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
p2 <- ggplot(Trajectories.WT, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
p1 / p2rm(list = ls()[!ls() %in% c("Trajectories.WT")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3462712 185 6140847 328.0 6140847 328.0
## Vcells 6021756 46 798239834 6090.1 1064977766 8125.2
WT.KO.integrated <- readRDS("WT_KO.integrated.RDS")Neurons.data <- subset(WT.KO.integrated,
subset = Lineage %in% c("Cajal-Retzius_neurons", "Pallial_neurons") & orig.ident == "Gmnc_KO" & Cell.ident.KO %in% c("Neuron_prob.2", "Neuron_prob.3"))
DimPlot(Neurons.data,
group.by = "Lineage",
reduction = "integrated_spring",
pt.size = 1,
cols = c("#cc391b","#026c9a")
) + NoAxes()fit <- principal_curve(as.matrix(Neurons.data@meta.data[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = 1,
stretch=0)## Starting curve---distance^2: 76242448164
## Iteration 1---distance^2: 48308567
## Iteration 2---distance^2: 49237931
## Iteration 3---distance^2: 50119026
## Iteration 4---distance^2: 51105768
## Iteration 5---distance^2: 51819999
## Iteration 6---distance^2: 52369574
## Iteration 7---distance^2: 52732675
## Iteration 8---distance^2: 52936995
## Iteration 9---distance^2: 53060989
## Iteration 10---distance^2: 53117718
#Pseudotime score
PseudotimeScore <- fit$lambda/max(fit$lambda)
if (cor(PseudotimeScore, Neurons.data@assays$RNA@data['Hmga2', ]) > 0) {
Neurons.data$PseudotimeScore <- -(PseudotimeScore - max(PseudotimeScore))
}
cols <- brewer.pal(n =11, name = "Spectral")
ggplot(Neurons.data@meta.data, aes(Spring_1, Spring_2)) +
geom_point(aes(color=PseudotimeScore), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Pseudotime score')score <- sapply(Neurons.data$PseudotimeScore,
FUN = function(x) if (x <= 0.25) {x= 0.25} else { x=x })
Neurons.data$PseudotimeScore.shifted <- (score - min(score)) / (max(score) - min(score))ggplot(Neurons.data@meta.data, aes(x= PseudotimeScore.shifted, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(method="loess", n= 50, aes(color= Lineage)) +
ylim(0,NA)To balance the number of progenitors in both domain we will only work with Hem and Medial_pallium annotated cells. Since we are using pallial cell to contrast CR specific trajectory we think this approximation will not significantly affect our analysis.
Progenitors.data <- subset(WT.KO.integrated,
subset = Lineage %in% c("Cajal-Retzius_neurons", "Pallial_neurons") & orig.ident == "Gmnc_KO" & Cell.ident.KO %in% c("Hem", "Medial_pallium"))
DimPlot(Progenitors.data,
reduction = "integrated_spring",
group.by = "Cell.ident.KO",
pt.size = 1,
cols = c("#cc391b","#026c9a")
) + NoAxes()rm(WT.KO.integrated)
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3510726 187.5 6140847 328.0 6140847 328
## Vcells 298010590 2273.7 1144880851 8734.8 1331680305 10160
rawCounts <- as.matrix(Progenitors.data[["RNA"]]@counts)
# Filter genes expressed by less than 10 cells
num.cells <- Matrix::rowSums(rawCounts > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 10)])
rawCounts <- rawCounts[genes.use, ]CCgenes <- read.table("../ChoroidPlexus_trajectory/CCgenes.csv", sep = ";", header = T)We can now follow the tutorial form the package github page
myData <- createRevelioObject(rawData = rawCounts,
cyclicGenes = CCgenes,
lowernGeneCutoff = 0,
uppernUMICutoff = Inf,
ccPhaseAssignBasedOnIndividualBatches = F)## 2022-06-13 15:19:40: reading data: 5.59secs
rm("rawCounts")
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3503173 187.1 6126737 327.3 6126737 327.3
## Vcells 357827865 2730.1 1144872590 8734.7 1331671700 10159.9
The getCellCyclePhaseAssignInformation filter “outlier” cells for cell cycle phase assignation. We modified the function to keep all cells as we observed this does not affect the global cell cycle fitting procedure
source("../Functions/functions_InitializationCCPhaseAssignFiltering.R")
myData <- getCellCyclePhaseAssign_allcells(myData)## 2022-06-13 15:19:55: assigning cell cycle phases: 32.85secs
myData <- getPCAData(dataList = myData)## 2022-06-13 15:20:45: calculating PCA: 25.35secs
myData <- getOptimalRotation(dataList = myData)## 2022-06-13 15:21:10: calculating optimal rotation: 14.21secs
CellCycledata <- cbind(as.data.frame(t(myData@transformedData$dc$data[1:2,])),
nUMI= myData@cellInfo$nUMI,
Revelio.phase = factor(myData@cellInfo$ccPhase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1")),
Revelio.cc= myData@cellInfo$ccPercentageUniformlySpaced,
Domain= Progenitors.data$Cell.ident.KO)p1 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Revelio.phase), size= 0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
p2 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Domain), size = 0.5) +
scale_color_manual(values= c("#cc391b","#026c9a"))
p3 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color=Revelio.cc), size=0.5, shape=16) +
scale_color_gradientn(colours=rev(colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)),
name='Revelio_cc')
p4 <- ggplot(CellCycledata, aes(x= Revelio.cc, y= nUMI/10000)) +
geom_point(aes(color= Revelio.phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
(p1 + p2) /(p3 + p4)Progenitors.data$Revelio.phase <- CellCycledata$Revelio.phase
Progenitors.data$Revelio.cc <- CellCycledata$Revelio.cc
p1 <- FeaturePlot(object = Progenitors.data,
features = "Revelio.cc",
pt.size = 1,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "integrated_spring",
order = T) & NoAxes()
p2 <- DimPlot(object = Progenitors.data,
group.by = "Revelio.phase",
pt.size = 1,
reduction = "integrated_spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p1 / p2Progenitors <- Progenitors.data@meta.data %>% select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage)
Progenitors$PseudotimeScore <- CellCycledata$Revelio.cc
Progenitors$nUMI <- Progenitors.data$nCount_RNA# Start with neurons data
Trajectories.all <- Neurons.data@meta.data %>% select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage)
Trajectories.all$Pseudotime <- Neurons.data$PseudotimeScore.shifted + 0.5
Trajectories.all$Phase <- NA# Add progenitors data
Trajectories.progenitors <- Progenitors %>%
select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage) %>%
mutate(Pseudotime= Progenitors.data$Revelio.cc/2,
Phase = Progenitors.data$Revelio.phase)Trajectories.KO <- rbind(Trajectories.all, Trajectories.progenitors)
Trajectories.KO$Phase <- factor(Trajectories.KO$Phase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1"))p1 <- ggplot(Trajectories.KO, aes(Spring_1, Spring_2)) +
geom_point(aes(color=Pseudotime), size=0.5) +
scale_color_gradientn(colours=rev(brewer.pal(n =11, name = "Spectral")), name='Pseudotime score')
p2 <- ggplot(Trajectories.KO, aes(Spring_1, Spring_2)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a"))
p1 + p2p1 <- ggplot(Trajectories.KO, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
p2 <- ggplot(Trajectories.KO, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(aes(color= Lineage), method="loess", n= 50, fill="grey") +
ylim(0,NA)
p1 / p2Trajectories <- rbind(Trajectories.KO, Trajectories.WT)
rm(list = ls()[!ls() %in% c("Trajectories")])WT.KO.integrated <- readRDS("WT_KO.integrated.RDS")
meta.data <- WT.KO.integrated@meta.data[Trajectories$Barcodes,]
meta.data$Pseudotime <- Trajectories$Pseudotime
meta.data$Phase <- Trajectories$PhaseWT.KO.integrated <- CreateSeuratObject(counts = WT.KO.integrated@assays$RNA@counts[, Trajectories$Barcodes],
meta.data = meta.data)
spring <- as.matrix(WT.KO.integrated@meta.data %>% select("Integrated_Spring_1", "Integrated_Spring_2"))
WT.KO.integrated[["integrated_spring"]] <- CreateDimReducObject(embeddings = spring, key = "Spring_", assay = DefaultAssay(WT.KO.integrated))p1 <- FeaturePlot(object = WT.KO.integrated,
features = "Pseudotime",
pt.size = 0.5,
cols = rev(colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)),
reduction = "integrated_spring",
order = T) & NoAxes()
p2 <- DimPlot(object = WT.KO.integrated,
group.by = "Lineage",
pt.size = 0.5,
reduction = "integrated_spring",
cols = c("#cc391b", "#026c9a")) & NoAxes()
p3 <- DimPlot(object = WT.KO.integrated,
group.by = "Phase",
pt.size = 0.5,
reduction = "integrated_spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p1 + p2 + p3ggsave("~/Bureau/Fig4_MM/Spring_WT_KO_Pseudotime.pdf",
device = "pdf",
dpi = "retina")WT.KO.integrated <- NormalizeData(WT.KO.integrated, normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA")WT.KO.integrated <- FindVariableFeatures(WT.KO.integrated, selection.method = "disp", nfeatures = 6500, assay = "RNA")source("../Functions/functions_GeneTrends.R")
Plot.Genes.trend(Seurat.data= WT.KO.integrated,
group.by = "Genotype",
genes= c("Gas1","Sox2",
"Neurog2", "Btg2",
"Tbr1", "Mapt",
"Trp73", "Foxg1"))ggsave("~/Bureau/Fig4_MM/Neuro-Genes_trend_WT-KO.pdf",
width = 15, height = 9,
device = "pdf",
dpi = "retina")Plot.Genes.trend(Seurat.data= WT.KO.integrated,
group.by = "Genotype",
genes= c("Gmnc", "Mcidas",
"Ccno", "Ccdc67",
"Foxj1", "Trp73",
"Lhx1", "Cdkn1a"))ggsave("~/Bureau/Fig4_MM/Gmnc-Genes_trend_WT-KO.pdf",
width = 15, height = 9,
device = "pdf",
dpi = "retina")CR <- WT.KO.integrated@meta.data %>% filter(Lineage == "Cajal-Retzius_neurons") %>% select(Barcodes,Pseudotime,orig.ident)
CR.genes <- as.data.frame(t(WT.KO.integrated@assays$RNA@data[c("Cacna2d2","Reln", "Nhlh2", "Ebf3"),CR$Barcodes]))
CR.genes$Pseudotime <- CR$Pseudotime
CR.genes$Genotype <- factor(CR$orig.ident, levels = c("Hem1", "Gmnc_KO") )
CR.genes <- reshape2::melt(CR.genes, id = c("Pseudotime", "Genotype"))
ggplot(CR.genes, aes(x= Pseudotime, y= value)) +
geom_smooth(method="loess", n= 50, aes(color= variable, linetype= Genotype)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],wes_palette("FantasticFox1")[5])) +
ylim(0,NA)ggsave("~/Bureau/Fig4_MM/CR-Genes_trend_WT-KO.pdf",
width = 5, height = 2,
device = "pdf",
dpi = "retina")rm(list = ls()[!ls() %in% c("WT.KO.integrated")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3533451 188.8 6140847 328.0 6140847 328
## Vcells 106898580 815.6 879319694 6708.7 1331680305 10160
# Transfer metadata
Annot.data <- new('AnnotatedDataFrame',
data = data.frame(Barcode= WT.KO.integrated$Barcodes,
Lineage= WT.KO.integrated$Lineage,
Pseudotime= WT.KO.integrated$Pseudotime,
Phase= WT.KO.integrated$Phase,
Genotype= WT.KO.integrated$orig.ident))
# Transfer counts data
var.genes <- WT.KO.integrated[["RNA"]]@var.features
feature.data <- new('AnnotatedDataFrame',
data = data.frame(gene_short_name = rownames(WT.KO.integrated[["RNA"]]@counts[var.genes,]),
row.names = rownames(WT.KO.integrated[["RNA"]]@counts[var.genes,])))
# Create the CellDataSet object including variable genes only
gbm_cds <- newCellDataSet(WT.KO.integrated[["RNA"]]@counts[var.genes,],
phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 0,
expressionFamily = negbinomial())gbm_cds <- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)rm(list = ls()[!ls() %in% c("WT.KO.integrated", "gbm_cds")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3607336 192.7 6140847 328 6140847 328
## Vcells 129862416 990.8 703455756 5367 1331680305 10160
KO.pData <- pData(gbm_cds) %>% subset(Genotype == "Gmnc_KO")
pseudo.maturation.diff <- differentialGeneTest(gbm_cds[fData(gbm_cds)$num_cells_expressed > 80, KO.pData$Barcode],
fullModelFormulaStr = "~sm.ns(Pseudotime, df = 3)*Lineage",
reducedModelFormulaStr = "~sm.ns(Pseudotime, df = 3)",
cores = parallel::detectCores() - 2)
# Filter genes based on FDR
pseudo.maturation.diff.filtered <- pseudo.maturation.diff %>% filter(qval < 1e-40)# Create a new pseudo-DV vector of 300 points
nPoints <- 300
new_data = list()
for (Lineage in unique(KO.pData$Lineage)){
new_data[[length(new_data) + 1]] = data.frame(Pseudotime = seq(min(KO.pData$Pseudotime), max(KO.pData$Pseudotime), length.out = nPoints), Lineage=Lineage)
}
new_data = do.call(rbind, new_data)
# Smooth gene expression
Diff.curve_matrix <- genSmoothCurves(gbm_cds[as.character(pseudo.maturation.diff.filtered$gene_short_name), KO.pData$Barcode],
trend_formula = "~sm.ns(Pseudotime, df = 3)*Lineage",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)# Extract matrix containing smoothed curves for each lineages
Pal_curve_matrix <- Diff.curve_matrix[, 1:nPoints] #Pallial points
CR_curve_matrix <- Diff.curve_matrix[, (nPoints + 1):(2 * nPoints)] #CR points
# Direction of the comparison : postive ABCs <=> Upregulated in CR lineage
ABCs_res <- CR_curve_matrix - Pal_curve_matrix
# Average logFC between the 2 curves
ILR_res <- log2(CR_curve_matrix/ (Pal_curve_matrix + 0.1))
ABCs_res <- apply(ABCs_res, 1, function(x, nPoints) {
avg_delta_x <- (x[1:(nPoints - 1)] + x[2:(nPoints)])/2
step <- (100/(nPoints - 1))
res <- round(sum(avg_delta_x * step), 3)
return(res)},
nPoints = nPoints) # Compute the area below the curve
ABCs_res <- cbind(ABCs_res, ILR_res[,ncol(ILR_res)])
colnames(ABCs_res)<- c("ABCs", "Endpoint_ILR")
# Import ABC values into the DE test results table
pseudo.maturation.diff.filtered <- cbind(pseudo.maturation.diff.filtered[,1:4],
ABCs_res,
pseudo.maturation.diff.filtered[,5:6])# Extract Cajal-Retzius expressed genes
CR.res <- as.data.frame(pseudo.maturation.diff.filtered[pseudo.maturation.diff.filtered$ABCs > 0,])
CR.genes <- row.names(CR.res)
CR_curve_matrix <- CR_curve_matrix[CR.genes, ]Pseudotime.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(CR_curve_matrix),method = "pearson"))), k= 5)
CR.Gene.dynamique <- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
q.val = CR.res$qval,
ABCs= CR.res$ABCs
) %>% arrange(Gene.Clusters)
row.names(CR.Gene.dynamique) <- CR.Gene.dynamique$Gene
CR.Gene.dynamique$Gene.Clusters <- paste0("Clust.", CR.Gene.dynamique$Gene.Clusters)
write.table(CR.Gene.dynamique, "KO_CR_dynamic_genes.csv", sep = ";", quote = F, row.names = F)# Order the rows using seriation
dst <- as.dist((1-cor(scale(t(CR_curve_matrix)), method = "pearson")))
row.ser <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
gene.order <- rownames(CR_curve_matrix[get_order(row.ser),])
# Set annotation colors
pal <- wes_palette("Darjeeling1")
anno.colors <- list(lineage = c(KO_Pallial_neurons="#026c9a", KO_Cajal_Retzius="#cc391b"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))pheatmap::pheatmap(Diff.curve_matrix[gene.order[c(235:1,292:236)],
c(300:1,#Pal
301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("KO_Pallial_neurons","KO_Cajal_Retzius"), each=300)),
annotation_colors = anno.colors,
gaps_col = c(200,400),
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "")WT.CR <- read.table("../CajalRetzius_trajectory/CR_dynamic_genes.csv", sep = ";", header = T)
WT.CPx <- read.table("../ChoroidPlexus_trajectory/ChP.Gene.dynamique.csv", sep = ";", header = T)
KO.CR <- read.table("./KO_CR_dynamic_genes.csv", sep = ";", header = T)
KO.CPx <- read.table("./KO_CPx_dynamic_genes.csv", sep = ";", header = T)
listInput <- list(WT.CR= WT.CR$Gene, WT.CPx= WT.CPx$Gene, KO.CR= KO.CR$Gene, KO.CPx= KO.CPx$Gene)upset(fromList(listInput),
order.by = "freq",
queries = list(list(query = intersects, params = list("WT.CR", "WT.CPx"), color = "orange", active = T),
list(query = intersects, params = list("WT.CPx", "KO.CR", "WT.CR"), color = "red", active = T),
list(query = intersects, params = list("KO.CR", "WT.CR"), color = "blue", active = T)))upset(fromList(listInput),
order.by = "freq",
intersections = list(list("WT.CR", "WT.CPx"),
list("KO.CR", "WT.CR"),
list("WT.CPx", "KO.CR", "WT.CR")),
queries = list(list(query = intersects, params = list("WT.CR", "WT.CPx"), color = "orange", active = T),
list(query = intersects, params = list("WT.CPx", "KO.CR", "WT.CR"), color = "red", active = T),
list(query = intersects, params = list("KO.CR", "WT.CR"), color = "blue", active = T))
)# Shared genes before Gmnc induction
WT.CR.anteGmnc <- WT.CR %>% filter(Waves %in% c(1,3)) %>% pull(Gene)
KO.CR.anteGmnc <- KO.CR %>% filter(Waves %in% c(2,3,5)) %>% pull(Gene)
gene.list <- list(WT.CR = WT.CR.anteGmnc, KO.CR = KO.CR.anteGmnc)
ggvenn::ggvenn(gene.list)# Shared genes after Gmnc induction
WT.CR.postGmnc <- WT.CR %>% filter(Waves %in% c(2,5,4)) %>% pull(Gene)
KO.CR.postGmnc <- KO.CR %>% filter(Waves %in% c(1,4)) %>% pull(Gene)
gene.list <- list(WT.CR = WT.CR.postGmnc, KO.CR = KO.CR.postGmnc)
ggvenn::ggvenn(gene.list)# Load WT CR variable genes along pseudotime
WT.CR.genes <- read.table("../CajalRetzius_trajectory/CR_dynamic_genes.csv", sep = ";", header = T)
# Extract CR lineage in both genotypes
CR.pData <- pData(gbm_cds) %>% subset(Lineage == "Cajal-Retzius_neurons")
# Create a new pseudotime vector of 300 points
nPoints <- 300
new_data = list()
for (Genotype in unique(CR.pData$Genotype)){
new_data[[length(new_data) + 1]] = data.frame(Pseudotime = seq(min(CR.pData$Pseudotime), max(CR.pData$Pseudotime), length.out = nPoints), Genotype= Genotype)
}
new_data = do.call(rbind, new_data)
# Smooth gene expression
Diff.curve_matrix <- genSmoothCurves(gbm_cds[WT.CR.genes$Gene, CR.pData$Barcode],
trend_formula = "~sm.ns(Pseudotime, df = 3)*Genotype",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)Pseudotime.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(Diff.curve_matrix[,301:600]),method = "pearson"))), k= 5)
CR.Gene.dynamique <- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
WT.CR_and_KO.CR = ifelse(listInput$WT.CR %in% listInput$KO.CR,
"Shared", "Not.Shared"),
WT.CR_and_WT.CPx_only = ifelse(listInput$WT.CR %in% listInput$WT.CPx & !listInput$WT.CR %in% listInput$KO.CR,
"Shared", "Not.Shared")
) %>% arrange(Gene.Clusters)
row.names(CR.Gene.dynamique) <- CR.Gene.dynamique$Gene
CR.Gene.dynamique$Gene.Clusters <- paste0("Clust.", CR.Gene.dynamique$Gene.Clusters)# Order the rows using seriation
dst <- as.dist((1-cor(scale(t(Diff.curve_matrix[,301:600])), method = "pearson")))
row.ser <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
gene.order <- rownames(Diff.curve_matrix[,301:600][get_order(row.ser),])
# Set annotation colors
pal <- wes_palette("Darjeeling1")
anno.colors <- list(lineage = c(Gmnc_KO="#026c9a", Gmnc_WT="#cc391b"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]),
WT.CR_and_WT.CPx = c(Shared = pal[2], Not.Shared=pal[3]),
WT.CR_and_WT.CPx_only= c(Shared = pal[2], Not.Shared=pal[3]))
gene.order <- gene.order[c(382:622,1:381)]heatmap.gene <- pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(300:1,#Pal
301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters, WT.CR_and_KO.CR, WT.CR_and_WT.CPx_only),
annotation_col = data.frame(lineage = rep(c("Gmnc_KO","Gmnc_WT"), each=300)),
annotation_colors = anno.colors,
show_colnames = F,
gaps_col = c(200,400),
gaps_row = c(233),
show_rownames = T,
fontsize_row = 8,
border_color = NA,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR enriched genes along GmncKO and GmncWT CR trajectories")pdf("~/Bureau/Fig4_MM/heatmap_Fig4Genes_WT-KO.pdf", width=10, height=5)
grid::grid.newpage()
grid::grid.draw(heatmap.gene$gtable)
dev.off()## png
## 2
anno.colors <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
col.anno <- data.frame(Cell.state = rep(c("Cycling_RG","Differentiating_cells"), c(100,200)))
rownames(col.anno) <- 301:600
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = col.anno,
annotation_colors = anno.colors,
gaps_col = 100,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
border_color = NA,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR enriched genes along GmncKO and GmncWT CR trajectories")anno.colors <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(1:300)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(Cell.state = rep(c("Cycling_RG","Differentiating_cells"), c(100,200))),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
gaps_col = 100,
fontsize_row = 8,
border_color = NA,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR enriched genes along GmncKO CR trajectories")CR <- WT.KO.integrated@meta.data %>% filter(Lineage == "Cajal-Retzius_neurons") %>% select(Barcodes,Pseudotime,orig.ident)
CR.genes <- as.data.frame(t(WT.KO.integrated@assays$RNA@data[c("Gmnc","Trp73", "Lhx1", "Barhl2"),CR$Barcodes]))
CR.genes$Pseudotime <- CR$Pseudotime
CR.genes$Genotype <- factor(CR$orig.ident, levels = c("Hem1", "Gmnc_KO") )
CR.genes <- reshape2::melt(CR.genes, id = c("Pseudotime", "Genotype"))
ggplot(CR.genes, aes(x= Pseudotime, y= value)) +
geom_smooth(method="loess", n= 50, aes(color= variable, linetype= Genotype)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],wes_palette("FantasticFox1")[5])) +
ylim(0,NA)ggsave("~/Bureau/Fig4_MM/Medial-CR_Genes-trend_WT-KO.pdf",
width = 5, height = 2,
device = "pdf",
dpi = "retina")# Set annotation colors
pal <- wes_palette("Darjeeling1")
anno.colors <- list(lineage = c(Gmnc_KO="#026c9a", Gmnc_WT="#cc391b"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
TFs <- read.table("TF.csv", sep = ";")[,1]
gene.order <- gene.order[gene.order %in% TFs]
heatmap.gene <- pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(300:1,#KO
301:600)], #WT
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Gmnc_KO","Gmnc_WT"), each=300)),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = T,
fontsize_row = 8,
border_color = NA,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR dynamic TFs along GmncWT and KO trajectories") # Which WT CR enriched genes are still expressed in the KO CR
KO.pData <- pData(gbm_cds) %>% subset(Genotype == "Gmnc_KO" & Pseudotime > 0.5)
WT.CR.postGmnc <- read.table("../CajalRetzius_trajectory/CR_dynamic_genes.csv", sep = ";", header = T)
WT.CR.postGmnc <- WT.CR.postGmnc %>% filter(Waves %in% c(2,3,4,5)) %>% pull(Gene)
pseudo.maturation.diff <- differentialGeneTest(gbm_cds[WT.CR.postGmnc, KO.pData$Barcode],
fullModelFormulaStr = "~sm.ns(Pseudotime, df = 3)*Lineage",
reducedModelFormulaStr = "~sm.ns(Pseudotime, df = 3)",
cores = parallel::detectCores() - 2)
# Filter genes based on FDR
pseudo.maturation.diff.filtered <- pseudo.maturation.diff %>% filter(qval < 1e-11)# Create a new pseudo-DV vector of 300 points
nPoints <- 100
new_data = list()
for (Lineage in unique(KO.pData$Lineage)){
new_data[[length(new_data) + 1]] = data.frame(Pseudotime = seq(min(KO.pData$Pseudotime), max(KO.pData$Pseudotime), length.out = nPoints), Lineage=Lineage)
}
new_data = do.call(rbind, new_data)
# Smooth gene expression
Diff.curve_matrix <- genSmoothCurves(gbm_cds[as.character(pseudo.maturation.diff.filtered$gene_short_name), KO.pData$Barcode],
trend_formula = "~sm.ns(Pseudotime, df = 3)*Lineage",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)# Extract matrix containing smoothed curves for each lineages
Pal_curve_matrix <- Diff.curve_matrix[, 1:nPoints] #Pallial points
CR_curve_matrix <- Diff.curve_matrix[, (nPoints + 1):(2 * nPoints)] #CR points
# Direction of the comparison : postive ABCs <=> Upregulated in CR lineage
ABCs_res <- CR_curve_matrix - Pal_curve_matrix
# Average logFC between the 2 curves
ILR_res <- log2(CR_curve_matrix/ (Pal_curve_matrix + 0.1))
ABCs_res <- apply(ABCs_res, 1, function(x, nPoints) {
avg_delta_x <- (x[1:(nPoints - 1)] + x[2:(nPoints)])/2
step <- (100/(nPoints - 1))
res <- round(sum(avg_delta_x * step), 3)
return(res)},
nPoints = nPoints) # Compute the area below the curve
ABCs_res <- cbind(ABCs_res, ILR_res[,ncol(ILR_res)])
colnames(ABCs_res)<- c("ABCs", "Endpoint_ILR")
# Import ABC values into the DE test results table
pseudo.maturation.diff.filtered <- cbind(pseudo.maturation.diff.filtered[,1:4],
ABCs_res,
pseudo.maturation.diff.filtered[,5:6])WT.CPx <- read.table("../ChoroidPlexus_trajectory/ChP.Gene.dynamique.csv", sep = ";", header = T)
WT.CPx.postGmnc <- WT.CPx %>% filter(Waves %in% c(1,2)) %>% pull(Gene)
KO.CR.postGmnc <- pseudo.maturation.diff.filtered %>% filter(ABCs >20) %>% pull(gene_short_name)
gene.list <- list(WT.CR.postGmnc = WT.CR.postGmnc, KO.CR.postGmnc = KO.CR.postGmnc, WT.CPx.postGmnc = WT.CPx.postGmnc)
ggvenn::ggvenn(gene.list)#date
format(Sys.time(), "%d %B, %Y, %H,%M")## [1] "20 juin, 2022, 18,05"
#Packages used
sessionInfo()## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] UpSetR_1.4.0 wesanderson_0.3.6 cowplot_1.1.1
## [4] ggExtra_0.9 RColorBrewer_1.1-2 dplyr_1.0.7
## [7] seriation_1.3.1 gprofiler2_0.2.1 monocle_2.22.0
## [10] DDRTree_0.1.5 irlba_2.3.3 VGAM_1.1-5
## [13] ggplot2_3.3.5 Biobase_2.54.0 BiocGenerics_0.40.0
## [16] Matrix_1.4-1 Revelio_0.1.0 princurve_2.1.6
## [19] SeuratObject_4.0.4 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2
## [4] densityClust_0.3 listenv_0.8.0 scattermore_0.7
## [7] fastICA_1.2-3 digest_0.6.29 foreach_1.5.1
## [10] htmltools_0.5.2 viridis_0.6.2 fansi_0.5.0
## [13] magrittr_2.0.2 tensor_1.5 cluster_2.1.3
## [16] ROCR_1.0-11 limma_3.50.0 globals_0.14.0
## [19] matrixStats_0.61.0 docopt_0.7.1 spatstat.sparse_2.0-0
## [22] colorspace_2.0-2 ggrepel_0.9.1 xfun_0.28
## [25] sparsesvd_0.2 crayon_1.4.2 jsonlite_1.7.2
## [28] spatstat.data_2.1-0 survival_3.2-13 zoo_1.8-9
## [31] iterators_1.0.13 glue_1.5.1 polyclip_1.10-0
## [34] registry_0.5-1 gtable_0.3.0 leiden_0.3.9
## [37] future.apply_1.8.1 abind_1.4-5 scales_1.1.1
## [40] pheatmap_1.0.12 DBI_1.1.1 miniUI_0.1.1.1
## [43] Rcpp_1.0.8 viridisLite_0.4.0 xtable_1.8-4
## [46] reticulate_1.22 spatstat.core_2.3-1 htmlwidgets_1.5.4
## [49] httr_1.4.2 FNN_1.1.3 ellipsis_0.3.2
## [52] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [55] sass_0.4.0 uwot_0.1.10 deldir_1.0-6
## [58] utf8_1.2.2 labeling_0.4.2 tidyselect_1.1.1
## [61] rlang_0.4.12 reshape2_1.4.4 later_1.3.0
## [64] munsell_0.5.0 tools_4.2.0 generics_0.1.1
## [67] ggridges_0.5.3 evaluate_0.14 stringr_1.4.0
## [70] fastmap_1.1.0 yaml_2.2.1 goftest_1.2-3
## [73] knitr_1.36 fitdistrplus_1.1-6 purrr_0.3.4
## [76] RANN_2.6.1 pbapply_1.5-0 future_1.23.0
## [79] nlme_3.1-153 mime_0.12 slam_0.1-49
## [82] ggvenn_0.1.9 compiler_4.2.0 plotly_4.10.0
## [85] png_0.1-7 spatstat.utils_2.2-0 tibble_3.1.6
## [88] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [91] lattice_0.20-45 HSMMSingleCell_1.14.0 vctrs_0.3.8
## [94] pillar_1.6.4 lifecycle_1.0.1 spatstat.geom_2.3-0
## [97] combinat_0.0-8 lmtest_0.9-39 jquerylib_0.1.4
## [100] RcppAnnoy_0.0.19 data.table_1.14.2 httpuv_1.6.3
## [103] patchwork_1.1.1 R6_2.5.1 promises_1.2.0.1
## [106] TSP_1.1-11 KernSmooth_2.23-20 gridExtra_2.3
## [109] parallelly_1.29.0 codetools_0.2-18 MASS_7.3-57
## [112] assertthat_0.2.1 withr_2.4.3 qlcMatrix_0.9.7
## [115] sctransform_0.3.2 mgcv_1.8-40 parallel_4.2.0
## [118] grid_4.2.0 rpart_4.1.16 tidyr_1.1.4
## [121] rmarkdown_2.11 Rtsne_0.15 shiny_1.7.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎