library(Seurat)
library(scrattch.hicat)
library(FateID)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(reticulate)
library(wesanderson)
library(princurve)
use_python("/usr/bin/python3")
#Set ggplot theme as classic
theme_set(theme_classic())
<- Read10X("../../RawData/Gmnc_KO/outs/filtered_feature_bc_matrix/")
Countdata
<- CreateSeuratObject(counts = Countdata,
Raw.data project = "Gmnc_KO",
min.cells = 3,
min.features = 800)
$Barcodes <- rownames(Raw.data@meta.data)
Raw.data
rm(Countdata)
dim(Raw.data)
## [1] 21077 16272
$percent.mito <- PercentageFeatureSet(Raw.data, pattern = "^mt-")
Raw.data$percent.ribo <- PercentageFeatureSet(Raw.data, pattern = "(^Rpl|^Rps|^Mrp)") Raw.data
VlnPlot(object = Raw.data, features = c("nFeature_RNA","nCount_RNA", "percent.mito", "percent.ribo"), ncol= 2) & NoAxes()
# Inspect cell based on relation between nUMI and nGene detected
# Relation between nUMI and nGene detected
<- Raw.data@meta.data
Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=nCount_RNA, y=nFeature_RNA)) + geom_point() + geom_smooth(method="lm")
p1 <- ggMarginal(p1, type = "histogram", fill="lightgrey")
p1
<- ggplot(Cell.QC.Stat, aes(x=log10(nCount_RNA), y=log10(nFeature_RNA))) + geom_point() + geom_smooth(method="lm")
p2 <- ggMarginal(p2, type = "histogram", fill="lightgrey")
p2
plot_grid(plotlist = list(p1,p2), ncol=2, align='h', rel_widths = c(1, 1)) ; rm(p1,p2)
Cells with deviating nGene/nUMI ratio display an Erythrocyte signature
<- AddModuleScore(Raw.data,
Raw.data features = list(c("Hbb-bt", "Hbq1a", "Isg20", "Fech", "Snca", "Rec114")),
ctrl = 10,
name = "Erythrocyte.signature")
$Erythrocyte.signature <- Raw.data$Erythrocyte.signature1 Cell.QC.Stat
<- colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)
gradient
<- ggplot(Cell.QC.Stat, aes(log10(nCount_RNA), y=log10(nFeature_RNA))) +
p1 geom_point(aes(color= Erythrocyte.signature)) +
scale_color_gradientn(colours=rev(gradient), name='Erythrocyte score') + theme(legend.position="none")
<- ggplot(Cell.QC.Stat, aes(log10(nCount_RNA), y=log10(nFeature_RNA))) +
p2 geom_point(aes(color= percent.mito)) +
scale_color_gradientn(colours=rev(gradient), name='Percent mito') + theme(legend.position="none")
<- ggplot(Cell.QC.Stat, aes(log10(nCount_RNA), y=log10(nFeature_RNA))) +
p3 geom_point(aes(color= percent.ribo)) +
scale_color_gradientn(colours=rev(gradient), name='Percent ribo') + theme(legend.position="none")
+ p2 + p3 p1
## Exclude Erythrocytes
$Erythrocyte <- ifelse(Cell.QC.Stat$Erythrocyte.signature > 0.1, "Erythrocyte", "Not_Erythrocyte") Cell.QC.Stat
<- ggplot(Cell.QC.Stat, aes(x=log10(nCount_RNA), y=log10(nFeature_RNA))) +
p2 geom_point(aes(colour = Erythrocyte)) +
theme(legend.position="none")
ggMarginal(p2, type = "histogram", fill="lightgrey")
# Filter cells based on these thresholds
<- Cell.QC.Stat %>% filter(Cell.QC.Stat$Erythrocyte.signature < 0.1) Cell.QC.Stat
We applied a high and low median absolute deviation (mad) thresholds to exclude outlier cells
<- median(Cell.QC.Stat$percent.mito) + 3*mad(Cell.QC.Stat$percent.mito)
max.mito.thr <- median(Cell.QC.Stat$percent.mito) - 3*mad(Cell.QC.Stat$percent.mito) min.mito.thr
<- ggplot(Cell.QC.Stat, aes(x=nFeature_RNA, y=percent.mito)) +
p1 geom_point() +
geom_hline(aes(yintercept = max.mito.thr), colour = "red", linetype = 2) +
geom_hline(aes(yintercept = min.mito.thr), colour = "red", linetype = 2) +
annotate(geom = "text", label = paste0(as.numeric(table(Cell.QC.Stat$percent.mito > max.mito.thr | Cell.QC.Stat$percent.mito < min.mito.thr)[2])," cells removed\n",
as.numeric(table(Cell.QC.Stat$percent.mito > max.mito.thr | Cell.QC.Stat$percent.mito < min.mito.thr)[1])," cells remain"),
x = 6000, y = 20)
ggMarginal(p1, type = "histogram", fill="lightgrey", bins=100)
# Filter cells based on these thresholds
<- Cell.QC.Stat %>% filter(percent.mito < max.mito.thr) %>% filter(percent.mito > min.mito.thr) Cell.QC.Stat
We filter cells which are likely to be doublet based on their higher content of transcript detected as well as cell with to few genes/UMI sequenced
# Set low and hight thresholds on the number of detected genes based on the one obtain with the WT dataset
<- log10(1635)
min.Genes.thr <- log10(8069)
max.Genes.thr
# Set hight threshold on the number of transcripts
<- log10(58958) max.nUMI.thr
# Gene/UMI scatter plot before filtering
<- ggplot(Cell.QC.Stat, aes(x=log10(nCount_RNA), y=log10(nFeature_RNA))) +
p1 geom_point() +
geom_smooth(method="lm") +
geom_hline(aes(yintercept = min.Genes.thr), colour = "green", linetype = 2) +
geom_hline(aes(yintercept = max.Genes.thr), colour = "green", linetype = 2) +
geom_vline(aes(xintercept = max.nUMI.thr), colour = "red", linetype = 2)
ggMarginal(p1, type = "histogram", fill="lightgrey")
# Filter cells base on both metrics
<- Cell.QC.Stat %>% filter(log10(nFeature_RNA) > min.Genes.thr) %>% filter(log10(nCount_RNA) < max.nUMI.thr) Cell.QC.Stat
<- lm(data = Cell.QC.Stat, formula = log10(nFeature_RNA) ~ log10(nCount_RNA))
lm.model
<- ggplot(Cell.QC.Stat, aes(x=log10(nCount_RNA), y=log10(nFeature_RNA))) +
p2 geom_point() +
geom_smooth(method="lm") +
geom_hline(aes(yintercept = min.Genes.thr), colour = "green", linetype = 2) +
geom_hline(aes(yintercept = max.Genes.thr), colour = "green", linetype = 2) +
geom_vline(aes(xintercept = max.nUMI.thr), colour = "red", linetype = 2) +
annotate(geom = "text", label = paste0(dim(Cell.QC.Stat)[1], " QC passed cells"), x = 4, y = 3.8)
ggMarginal(p2, type = "histogram", fill="lightgrey")
<- subset(x = Raw.data, subset = Barcodes %in% Cell.QC.Stat$Barcodes) Raw.data
# Plot final QC metrics
VlnPlot(object = Raw.data, features = c("nFeature_RNA","nCount_RNA", "percent.mito", "percent.ribo"), ncol= 2) & NoAxes()
<- ggplot(Raw.data@meta.data, aes(x=log10(nCount_RNA), y=log10(nFeature_RNA))) + geom_point() + geom_smooth(method="lm")
p1 ggMarginal(p1, type = "histogram", fill="lightgrey")
rm(list = ls()[!ls() %in% "Raw.data"])
Export raw count matrix as input to Scrublet
#Export filtered matrix
dir.create("../../RawData/Gmnc_KO/Scrublet_inputs")
<- Matrix(as.matrix(Raw.data@assays[["RNA"]]@counts), sparse = TRUE)
exprData writeMM(exprData, "../../RawData/Gmnc_KO/Scrublet_inputs/matrix1.mtx")
## NULL
import scrublet as scr
import scipy.io
import numpy as np
import os
#Load raw counts matrix and gene list
= '../../RawData/Gmnc_KO/Scrublet_inputs'
input_dir = scipy.io.mmread(input_dir + '/matrix1.mtx').T.tocsc()
counts_matrix
#Initialize Scrublet
= scr.Scrublet(counts_matrix,
scrub =0.1,
expected_doublet_rate=2,
sim_doublet_ratio= 8)
n_neighbors
#Run the default pipeline
= scrub.scrub_doublets(min_counts=1,
doublet_scores, predicted_doublets =3,
min_cells=85,
min_gene_variability_pctl=25) n_prin_comps
## Preprocessing...
## Simulating doublets...
## Embedding transcriptomes using PCA...
## Calculating doublet scores...
## Automatically set threshold at doublet score = 0.35
## Detected doublet rate = 3.7%
## Estimated detectable doublet fraction = 26.0%
## Overall doublet rate:
## Expected = 10.0%
## Estimated = 14.2%
## Elapsed time: 19.9 seconds
# Import scrublet's doublet score
$Doubletscore <- py$doublet_scores
Raw.data
# Plot doublet score
ggplot(Raw.data@meta.data, aes(x = Doubletscore, stat(ndensity))) +
geom_histogram(bins = 200, colour ="lightgrey")+
geom_vline(xintercept = 0.15, colour = "red", linetype = 2)
# Manually set threshold at doublet score to 0.2
$Predicted_doublets <- ifelse(py$doublet_scores > 0.15, "Doublet","Singlet")
Raw.datatable(Raw.data$Predicted_doublets)
##
## Doublet Singlet
## 2273 10215
<- subset(x = Raw.data, subset = Predicted_doublets == "Singlet") Raw.data
dir.create("./SpringCoordinates")
# Export raw expression matrix and gene list to regenerate a spring plot
<- Matrix(as.matrix(Raw.data@assays[["RNA"]]@counts), sparse = TRUE)
exprData writeMM(exprData, "./SpringCoordinates/ExprData.mtx")
## NULL
<- row.names(Raw.data@assays[["RNA"]]@counts)
Genelist write.table(Genelist, "./SpringCoordinates/Genelist.csv", sep="\t", col.names = F, row.names = F, quote = F)
#Export metadata
<- c("Scrublet", Raw.data$Predicted_doublets)
Scrublet <- paste(Scrublet, sep=",", collapse=",")
Scrublet
<- Scrublet
Cellgrouping write.table(Cellgrouping, "./SpringCoordinates/Cellgrouping.csv", quote =F, row.names = F, col.names = F)
<- read.table("SpringCoordinates/coordinates.txt", sep = ",", header = F, row.names = 1)
spring.coor colnames(spring.coor) <- c("Spring_1", "Spring_2")
<- function(x){
Spring.Sym = abs(max(spring.coor$Spring_2)-x)
x
}
$Spring_2 <- sapply(spring.coor$Spring_2, function(x) Spring.Sym(x)) spring.coor
$Spring_1 <- spring.coor$Spring_1
Raw.data$Spring_2 <- spring.coor$Spring_2 Raw.data
<- as.matrix(Raw.data@meta.data %>% select("Spring_1", "Spring_2"))
spring
"spring"]] <- CreateDimReducObject(embeddings = spring, key = "Spring_", assay = DefaultAssay(Raw.data)) Raw.data[[
DimPlot(Raw.data,
reduction = "spring",
pt.size = 0.5) & NoAxes()
# Broad clustering
<- SCTransform(Raw.data,
Raw.data method = "glmGamPoi",
vars.to.regress = c("percent.mito"),
verbose = T)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 5%
|
|===== | 8%
|
|======= | 10%
|
|========= | 13%
|
|=========== | 15%
|
|============= | 18%
|
|============== | 21%
|
|================ | 23%
|
|================== | 26%
|
|==================== | 28%
|
|====================== | 31%
|
|======================= | 33%
|
|========================= | 36%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================ | 46%
|
|================================== | 49%
|
|==================================== | 51%
|
|====================================== | 54%
|
|======================================= | 56%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|====================================================== | 77%
|
|======================================================== | 79%
|
|========================================================= | 82%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 92%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 5%
|
|===== | 8%
|
|======= | 10%
|
|========= | 13%
|
|=========== | 15%
|
|============= | 18%
|
|============== | 21%
|
|================ | 23%
|
|================== | 26%
|
|==================== | 28%
|
|====================== | 31%
|
|======================= | 33%
|
|========================= | 36%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================ | 46%
|
|================================== | 49%
|
|==================================== | 51%
|
|====================================== | 54%
|
|======================================= | 56%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|====================================================== | 77%
|
|======================================================== | 79%
|
|========================================================= | 82%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 92%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|======================================================================| 100%
<- RunPCA(Raw.data, verbose = FALSE)
Raw.data
<- FindNeighbors(Raw.data,
Raw.data dims = 1:20,
k.param = 8)
<- FindClusters(Raw.data, resolution = 0.2) Raw.data
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10215
## Number of edges: 218490
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9335
## Number of communities: 9
## Elapsed time: 1 seconds
DimPlot(Raw.data,
reduction = "spring",
cols = c("#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#d14c8d", "#4cabdc", "#5ab793", "#e7823a", "#046c9a", "#4990c9"),
pt.size = 0.5) & NoAxes()
$Broadclust.ident <- Raw.data$seurat_clusters Raw.data
<- subset(Raw.data, idents = 3)
Neurons.data
DimPlot(Neurons.data,
reduction = "spring",
pt.size = 1,
cols = c("#cc3a1b")) + NoAxes()
## Fit pseudotime
<- principal_curve(as.matrix(Neurons.data@meta.data[,c("Spring_1", "Spring_2")]),
fit smoother='lowess',
trace=TRUE,
f = 1,
stretch=0)
## Starting curve---distance^2: 84204082853
## Iteration 1---distance^2: 46039577
## Iteration 2---distance^2: 42955956
## Iteration 3---distance^2: 41286356
## Iteration 4---distance^2: 40749859
## Iteration 5---distance^2: 40679459
## Iteration 6---distance^2: 40714117
#Pseudotime score
<- fit$lambda/max(fit$lambda)
PseudotimeScore
if (cor(PseudotimeScore, Neurons.data@assays$SCT@data['Hmga2', ]) > 0) {
$PseudotimeScore <- -(PseudotimeScore - max(PseudotimeScore))
Neurons.data
}
<- brewer.pal(n =11, name = "Spectral")
cols
ggplot(Neurons.data@meta.data, aes(Spring_1, Spring_2)) +
geom_point(aes(color=PseudotimeScore), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Pseudotime score')
# Late Neurons diversity
$Cell.state <- cut(Neurons.data$PseudotimeScore,
Neurons.datac(0,0.4,0.8,1),
include.lowest = T,
labels=c("BP","EN","LN"))
DimPlot(Neurons.data,
group.by = "Cell.state",
reduction = "spring",
cols = c("#ebcb2e", "#9ec22f", "#a9961b"),
pt.size = 1.5) & NoAxes()
<- subset(Neurons.data, subset = Cell.state == "LN") LN.data
DimPlot(LN.data,
reduction = "spring",
cols = c("#ebcb2e", "#9ec22f", "#a9961b"),
pt.size = 1.5) & NoAxes()
## Prepare the dataset for clustering with scrattch.hicat
# Exclude genes detected in less than 3 cells
<- Matrix::rowSums(LN.data@assays[["RNA"]]@counts > 0)
num.cells <- names(x = num.cells[which(x = num.cells >= 3)])
genes.use
<- c(grep(pattern = "(^Rpl|^Rps|^Mrp)", x = genes.use, value = TRUE),
GenesToRemove grep(pattern = "^mt-", x = genes.use, value = TRUE),
"Xist")
<- genes.use[!genes.use %in% GenesToRemove] genes.use
<- as.matrix(LN.data@assays[["RNA"]]@counts)[rownames(LN.data@assays[["RNA"]]@counts) %in% genes.use,]
dgeMatrix_count <- cpm(dgeMatrix_count)
dgeMatrix_cpm <- log2(dgeMatrix_cpm + 1)
norm.dat
<- Matrix(norm.dat, sparse = TRUE)
norm.dat <- list(raw.dat=dgeMatrix_count, norm.dat=norm.dat)
Data.matrix attach(Data.matrix)
<- log2(colSums(as.matrix(Data.matrix$norm.dat) > 0))
gene.counts <- log2(colSums(Data.matrix$raw.dat))
nUMI <- LN.data$percent.mito
perctMito <- LN.data$percent.ribo
perctRibo <- LN.data$PseudotimeScore
Pseudotime
<- as.matrix(cbind(gene.counts,
rm.eigen
nUMI,
perctMito,
perctRibo,
Pseudotime))
row.names(rm.eigen) <- names(gene.counts)
colnames(rm.eigen) <- c("log2nGenes",
"log2nUMI",
"perctMito",
"perctRibo",
"Pseudotime ")
rm(gene.counts, nUMI, perctMito, perctRibo, Pseudotime)
# Parameters for iterative clustering
<- de_param(padj.th = 0.01,
de.param lfc.th = 0.9,
low.th = 1,
q1.th = 0.25,
q2.th = NULL,
q.diff.th = 0.7,
de.score.th = 80,
min.cells = 10)
<- iter_clust(norm.dat,
iter.result counts = raw.dat,
dim.method = "pca",
max.dim = 15,
k.nn = 8,
de.param = de.param,
rm.eigen = rm.eigen,
rm.th = 0.7,
vg.padj.th = 0.5,
method = "louvain",
prefix = "test-iter_clust",
verbose = F)
## [1] "test-iter_clust"
## Finding nearest neighbors...DONE ~ 0.01 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.016 s
## Build undirected graph from the weighted links...DONE ~ 0.03 s
## Run louvain clustering on the graph ...DONE ~ 0.015 s
## Return a community class
## -Modularity value: 0.8158831
## -Number of clusters: 19[1] "test-iter_clust.1"
## [1] "test-iter_clust.2"
## Finding nearest neighbors...DONE ~ 0.002 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.008 s
## Build undirected graph from the weighted links...DONE ~ 0.013 s
## Run louvain clustering on the graph ...DONE ~ 0.006 s
## Return a community class
## -Modularity value: 0.8361195
## -Number of clusters: 16[1] "test-iter_clust.3"
## Finding nearest neighbors...DONE ~ 0.001 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 0.008 s
## Build undirected graph from the weighted links...DONE ~ 0.012 s
## Run louvain clustering on the graph ...DONE ~ 0.005 s
## Return a community class
## -Modularity value: 0.8542279
## -Number of clusters: 15
# Merge clusters which are not seperable by DEGs
<- t(norm.dat[iter.result$markers,])
rd.dat <- merge_cl(norm.dat,
merge.result cl = iter.result$cl,
rd.dat = rd.dat,
de.param = de.param)
cat(length(unique(merge.result$cl))," Clusters")
## 3 Clusters
$iter.clust <- merge.result$cl
LN.data
Idents(LN.data) <- "iter.clust"
<- c("#ebcb2e", "#9ec22f", "#cc3a1b")
colors
DimPlot(LN.data,
reduction = "spring",
#cols = colors,
pt.size = 1.5) & NoAxes()
<- FindAllMarkers(LN.data,
Neurons.markers test.use = "roc",
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
<- Neurons.markers %>%
top10 group_by(cluster) %>%
filter(power > 0.45)
DoHeatmap(LN.data,
group.colors = c("#ebcb2e", "#9ec22f", "#cc3a1b"),
features = top10$gene) + NoLegend()
FeaturePlot(object = LN.data,
features = c("Foxg1", "Zfpm2",
"Lhx1", "Zic5", "Zfp503"),
pt.size = 1,
cols = c("grey90", brewer.pal(9,"YlGnBu")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
## Use fate ID to infer lineages along differentiating cells
$Broadclust.ident <- sapply(Neurons.data$Barcodes,
Neurons.dataFUN = function(x) {
if (x %in% LN.data$Barcodes) {
= paste0("Neuron_", LN.data@meta.data[x, "iter.clust"])
x else {
} = Neurons.data@meta.data[x, "Broadclust.ident"]
x
}
})
Idents(Neurons.data) <- "Broadclust.ident"
DimPlot(Neurons.data,
reduction = "spring",
cols = c("#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#d14c8d", "#4cabdc", "#5ab793", "grey90", "#e7823a", "#046c9a", "#4990c9", "grey60"),
pt.size = 0.5) & NoAxes()
<- SCTransform(Neurons.data,
Neurons.data method = "glmGamPoi",
vars.to.regress = c("percent.mito", "percent.ribo"),
verbose = T)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
<- FindVariableFeatures(Neurons.data, selection.method = "vst", nfeatures = 1500) Neurons.data
<- as.data.frame(as.matrix(Neurons.data@assays$SCT@data[Neurons.data@assays$SCT@var.features,]))
Norm.Mat
#Rename idents
<- 4:1
id names(id) <- levels(Neurons.data)
<- RenameIdents(Neurons.data, id)
Neurons.data
# Set a cluster assignment factor for each cells
<- Idents(Neurons.data)
ClusterIdent names(ClusterIdent) <- names(Idents(Neurons.data))
<- 1:3
Attractors
# Distance in spring space
<- as.matrix(dist(cbind(Neurons.data$Spring_1, Neurons.data$Spring_2))) z
<- fateBias(Norm.Mat, ClusterIdent, Attractors,
Infered.Fate.bias z = z,
minnr=20,
minnrh=30,
adapt=TRUE,
confidence=0.75,
nbfactor=5,
use.dist=FALSE,
seed=1234,
nbtree=NULL)
$FateID.iteration <- "Attractors"
Neurons.dataIdents(Neurons.data) <- "FateID.iteration"
for (i in seq(0, length(Infered.Fate.bias$rfl), by = 5)[-1]) {
<- seq(i-4,i)
iter <- c()
Barcodes for (j in iter) {
<- c(Barcodes, names(Infered.Fate.bias$rfl[[j]]$test$predicted))
Barcodes
}<- SetIdent(Neurons.data, cells = Barcodes, value = paste0("iter ",iter[1]," to ", iter[4]))
Neurons.data
}
DimPlot(Neurons.data,
reduction = "spring",
pt.size = 1) & NoAxes()
<- Infered.Fate.bias$probs[,seq(length(Attractors))]
probs
$prob.1 <- probs$t1
Neurons.data$prob.2 <- probs$t2
Neurons.data$prob.3 <- probs$t3
Neurons.data
FeaturePlot(object = Neurons.data,
features = c("prob.1", "prob.2", "prob.3"),
pt.size = 0.5,
cols = rev(RColorBrewer::brewer.pal(n = 11, name = "Spectral")),
reduction = "spring",
order = T) & NoAxes() & NoLegend()
<- data.frame(barcode=Neurons.data$Barcodes,
New.data cluster= Neurons.data$Broadclust.ident,
spring1= Neurons.data$Spring_1,
spring2= Neurons.data$Spring_2,
prob.1= Neurons.data$prob.1,
prob.2= Neurons.data$prob.2,
prob.3 = Neurons.data$prob.3)
$lineage.bias <- colnames(New.data[,5:7])[apply(New.data[,5:7],1,which.max)]
New.data
ggplot(New.data, aes(spring1, spring2, colour = lineage.bias)) +
scale_color_manual(values=c("#e7823a","#cc391b","#026c9a","#d14c8d")) +
geom_point()
$Lineage.bias <- New.data$lineage.bias
Neurons.data
$Broadclust.ident <- sapply(Raw.data$Barcodes,
Raw.dataFUN = function(x) {
if (x %in% Neurons.data$Barcodes) {
= paste0("Neuron_",Neurons.data@meta.data[x, "Lineage.bias"])
x else {
} = Raw.data@meta.data[x, "Broadclust.ident"]
x
}
})
Idents(Raw.data) <- "Broadclust.ident"
DimPlot(Raw.data,
reduction = "spring",
cols = c("#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#d14c8d", "#4cabdc", "#5ab793", "#e7823a", "#046c9a", "grey90", "#4990c9"),
pt.size = 0.5) & NoAxes()
rm(list = ls()[!ls() %in% "Raw.data"])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3051089 163.0 5399218 288.4 5399218 288.4
## Vcells 252292949 1924.9 712258272 5434.1 699288732 5335.2
<- list(WT = readRDS("../QC.filtered.clustered.cells.RDS") %>%
WT.KO subset(subset = orig.ident == "Hem1" & Cell_ident %in% c("ChP_progenitors", "ChP",
"Dorso-Medial_pallium", "Medial_pallium",
"Hem", "Thalamic_eminence") ),
KO = Raw.data %>% subset(idents = c(1,2,3,5)))
<- DimPlot(object = WT.KO[["WT"]],
p1 group.by = "Cell.state",
reduction = "spring",
cols = c("#31b6bd", "#ebcb2e", "#9ec22f", "#cc3a1b", "#d14c8d", "#4cabdc", "#5ab793", "#e7823a", "#046c9a", "#4990c9"),
pt.size = 1.5
& NoAxes()
)
<- DimPlot(WT.KO[["KO"]],
p2 reduction = "spring",
group.by = "Broadclust.ident",
cols = c("#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b"),
pt.size = 1.5) & NoAxes()
+ p2 p1
"WT"]] <- NormalizeData(WT.KO[["WT"]], normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA")
WT.KO[["KO"]] <- NormalizeData(WT.KO[["KO"]], normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA") WT.KO[[
"WT"]] <- FindVariableFeatures(WT.KO[["WT"]], selection.method = "vst", nfeatures = 2000)
WT.KO[["KO"]] <- FindVariableFeatures(WT.KO[["KO"]], selection.method = "vst", nfeatures = 2000) WT.KO[[
<- SelectIntegrationFeatures(object.list = WT.KO, nfeatures = 1500)
features
<- read.table("TF.csv", sep = ";")[,1]
TFs <- features[features %in% TFs] TFs
<- FindTransferAnchors(reference = WT.KO[["WT"]],
KO.anchors query = WT.KO[["KO"]],
features = TFs,
reduction = "rpca",
k.anchor = 5,
k.filter = 100,
k.score = 30,
npcs = 25,
dims = 1:25,
max.features = 200)
<- TransferData(anchorset = KO.anchors,
predictions refdata = WT.KO[["WT"]]$Cell.state,
dims = 1:25)
"KO"]] <- AddMetaData(WT.KO[["KO"]], metadata = predictions) WT.KO[[
<- brewer.pal(n =11, name = "Spectral")
cols
ggplot(WT.KO[["KO"]]@meta.data, aes(Spring_1, Spring_2)) +
geom_point(aes(color=prediction.score.max), size=1, shape=16) +
scale_color_gradientn(colours=rev(cols), name='prediction.score.max')
<- DimPlot(object = WT.KO[["WT"]],
p1 group.by = "Cell.state",
reduction = "spring",
cols = c("#7293c8", "#b79f0b", "#3ca73f","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b",
"#d14c8d", "#4cabdc", "#5ab793", "#e7823a",
"#046c9a", "#4990c9"),
pt.size = 1) & NoAxes()
<- DimPlot(WT.KO[["KO"]],
p2 group.by = "predicted.id",
reduction = "spring",
cols = c("#31b6bd", "#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b"),
pt.size = 1) & NoAxes()
+ p2 p1
$Cell.ident <- sapply(Raw.data$Barcodes,
Raw.dataFUN = function(x) {
if (x %in% WT.KO[["KO"]]$Barcodes) {
= WT.KO[["KO"]]@meta.data[x, "predicted.id"]
x else {
} = Raw.data@meta.data[x, "Broadclust.ident"]
x
} })
DimPlot(object = Raw.data,
group.by = "Cell.ident",
reduction = "spring",
cols = c( "#4cabdc", "#7293c8", "grey40" ,"#3ca73f","grey80",
"#31b6bd", "#ebcb2e", "#9ec22f", "#a9961b",
"#046c9a", "#cc3a1b","#4990c9","#e7823a"),
pt.size = 0.5) & NoAxes()
Reads were realigned using the same transcriptome annotation as the WT E11.5-E12.5 dataset. We re import the count matrix from the realignment.
rm(list = ls()[!ls() %in% "Raw.data"])
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3059672 163.5 5399218 288.4 5399218 288.4
## Vcells 252333733 1925.2 1033469064 7884.8 1105207168 8432.1
@assays[["RNA"]]@counts <- Read10X("../../RawData/Gmnc_KO/mm10_ref/outs/filtered_feature_bc_matrix/")[,Raw.data$Barcodes]
Raw.data@assays[["RNA"]]@data <- Raw.data@assays[["RNA"]]@counts
Raw.data
<- SCTransform(Raw.data,
Raw.data method = "glmGamPoi",
vars.to.regress = c("percent.mito"),
verbose = T)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 8%
|
|======== | 11%
|
|========== | 14%
|
|============ | 17%
|
|============== | 19%
|
|================ | 22%
|
|================== | 25%
|
|=================== | 28%
|
|===================== | 31%
|
|======================= | 33%
|
|========================= | 36%
|
|=========================== | 39%
|
|============================= | 42%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 58%
|
|=========================================== | 61%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 69%
|
|=================================================== | 72%
|
|==================================================== | 75%
|
|====================================================== | 78%
|
|======================================================== | 81%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 8%
|
|======== | 11%
|
|========== | 14%
|
|============ | 17%
|
|============== | 19%
|
|================ | 22%
|
|================== | 25%
|
|=================== | 28%
|
|===================== | 31%
|
|======================= | 33%
|
|========================= | 36%
|
|=========================== | 39%
|
|============================= | 42%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 58%
|
|=========================================== | 61%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 69%
|
|=================================================== | 72%
|
|==================================================== | 75%
|
|====================================================== | 78%
|
|======================================================== | 81%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
saveRDS(Raw.data, "./GmncKO.cells.RDS")
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "02 juin, 2022, 10,21"
#Packages used
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /home/matthieu/anaconda3/lib/libmkl_rt.so.1
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Rphenograph_0.99.1 igraph_1.2.11 matrixStats_0.61.0
## [4] princurve_2.1.6 wesanderson_0.3.6 reticulate_1.22
## [7] cowplot_1.1.1 ggExtra_0.9 ggplot2_3.3.5
## [10] RColorBrewer_1.1-2 dplyr_1.0.7 Matrix_1.4-1
## [13] FateID_0.2.1 scrattch.hicat_1.0.0 SeuratObject_4.0.4
## [16] Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.6 lazyeval_0.2.2
## [3] splines_4.2.0 listenv_0.8.0
## [5] scattermore_0.7 lle_1.1
## [7] GenomeInfoDb_1.30.0 digest_0.6.29
## [9] htmltools_0.5.2 fansi_0.5.0
## [11] magrittr_2.0.2 tensor_1.5
## [13] cluster_2.1.3 ROCR_1.0-11
## [15] limma_3.50.0 globals_0.14.0
## [17] askpass_1.1 spatstat.sparse_2.0-0
## [19] colorspace_2.0-2 ggrepel_0.9.1
## [21] xfun_0.28 RCurl_1.98-1.5
## [23] crayon_1.4.2 jsonlite_1.7.2
## [25] spatstat.data_2.1-0 survival_3.2-13
## [27] zoo_1.8-9 glue_1.5.1
## [29] polyclip_1.10-0 gtable_0.3.0
## [31] zlibbioc_1.40.0 XVector_0.34.0
## [33] leiden_0.3.9 DelayedArray_0.20.0
## [35] future.apply_1.8.1 BiocGenerics_0.40.0
## [37] abind_1.4-5 scales_1.1.1
## [39] pheatmap_1.0.12 DBI_1.1.1
## [41] som_0.3-5.1 miniUI_0.1.1.1
## [43] Rcpp_1.0.8 viridisLite_0.4.0
## [45] xtable_1.8-4 spatstat.core_2.3-1
## [47] stats4_4.2.0 umap_0.2.8.0
## [49] htmlwidgets_1.5.4 httr_1.4.2
## [51] ellipsis_0.3.2 ica_1.0-2
## [53] pkgconfig_2.0.3 farver_2.1.0
## [55] sass_0.4.0 uwot_0.1.10
## [57] deldir_1.0-6 locfit_1.5-9.4
## [59] utf8_1.2.2 tidyselect_1.1.1
## [61] labeling_0.4.2 rlang_0.4.12
## [63] reshape2_1.4.4 later_1.3.0
## [65] munsell_0.5.0 tools_4.2.0
## [67] generics_0.1.1 ggridges_0.5.3
## [69] evaluate_0.14 stringr_1.4.0
## [71] fastmap_1.1.0 yaml_2.2.1
## [73] goftest_1.2-3 knitr_1.36
## [75] fitdistrplus_1.1-6 purrr_0.3.4
## [77] randomForest_4.7-1 RANN_2.6.1
## [79] sparseMatrixStats_1.6.0 pbapply_1.5-0
## [81] future_1.23.0 nlme_3.1-153
## [83] mime_0.12 compiler_4.2.0
## [85] plotly_4.10.0 png_0.1-7
## [87] spatstat.utils_2.2-0 tibble_3.1.6
## [89] bslib_0.3.1 glmGamPoi_1.6.0
## [91] stringi_1.7.6 highr_0.9
## [93] RSpectra_0.16-0 lattice_0.20-45
## [95] vctrs_0.3.8 pillar_1.6.4
## [97] lifecycle_1.0.1 spatstat.geom_2.3-0
## [99] lmtest_0.9-39 jquerylib_0.1.4
## [101] RcppAnnoy_0.0.19 bitops_1.0-7
## [103] data.table_1.14.2 irlba_2.3.3
## [105] GenomicRanges_1.46.1 httpuv_1.6.3
## [107] patchwork_1.1.1 R6_2.5.1
## [109] promises_1.2.0.1 KernSmooth_2.23-20
## [111] gridExtra_2.3 IRanges_2.28.0
## [113] parallelly_1.29.0 codetools_0.2-18
## [115] MASS_7.3-57 assertthat_0.2.1
## [117] SummarizedExperiment_1.24.0 openssl_1.4.5
## [119] withr_2.4.3 sctransform_0.3.2
## [121] GenomeInfoDbData_1.2.7 S4Vectors_0.32.3
## [123] mgcv_1.8-40 parallel_4.2.0
## [125] grid_4.2.0 rpart_4.1.16
## [127] tidyr_1.1.4 DelayedMatrixStats_1.16.0
## [129] rmarkdown_2.11 MatrixGenerics_1.6.0
## [131] Rtsne_0.15 Biobase_2.54.0
## [133] scatterplot3d_0.3-41 shiny_1.7.1
## [135] snowfall_1.84-6.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎