library(Seurat)
library(sctransform)
library(dplyr)
library(wesanderson)
<- as.matrix(read.table("Counts.csv", sep = ";", header = T, row.names = 1))
counts
1:5, 1:5] counts[
## Hem1_AAACCCAAGCGTACAG Hem1_AAACCCAAGCTCGTGC Hem1_AAACCCAAGGCACTCC
## Xkr4 0 0 0
## Rp1 0 0 0
## Mrpl15 5 0 6
## Lypla1 0 0 0
## Gm37988 0 0 0
## Hem1_AAACCCAAGGCATCAG Hem1_AAACCCAAGGCCTGCT
## Xkr4 0 0
## Rp1 0 0
## Mrpl15 2 1
## Lypla1 3 0
## Gm37988 0 0
<- read.table("Hem_metadata.csv", sep= ";", header= T)
meta.data
1:5, 1:5] meta.data[
## Registered S3 method overwritten by 'cli':
## method from
## print.boxx spatstat.geom
## # A tibble: 5 × 5
## Barcodes orig.ident nGene nUMI percent.mito
## <chr> <chr> <int> <int> <dbl>
## 1 Hem1_AAACCCAAGCGTACAG Hem1 3344 12705 0.0393
## 2 Hem1_AAACCCAAGCTCGTGC Hem1 3495 10377 0.0516
## 3 Hem1_AAACCCAAGGCACTCC Hem1 5261 25386 0.0280
## 4 Hem1_AAACCCAAGGCATCAG Hem1 4473 15642 0.0476
## 5 Hem1_AAACCCAAGGCCTGCT Hem1 3054 7937 0.0619
<- CreateSeuratObject(counts = counts,
Hem.data meta.data = meta.data)
## Warning: The following arguments are not used: row.names
@assays$RNA@counts[1:5,1:5] Hem.data
## 5 x 5 sparse Matrix of class "dgCMatrix"
## Hem1_AAACCCAAGCGTACAG Hem1_AAACCCAAGCTCGTGC Hem1_AAACCCAAGGCACTCC
## Xkr4 . . .
## Rp1 . . .
## Mrpl15 5 . 6
## Lypla1 . . .
## Gm37988 . . .
## Hem1_AAACCCAAGGCATCAG Hem1_AAACCCAAGGCCTGCT
## Xkr4 . .
## Rp1 . .
## Mrpl15 2 1
## Lypla1 3 .
## Gm37988 . .
head(Hem.data@meta.data)
## # A tibble: 6 × 22
## orig.ident nCount_RNA nFeature_RNA Barcodes nGene nUMI percent.mito
## <chr> <dbl> <int> <chr> <int> <int> <dbl>
## 1 Hem1 12705 3344 Hem1_AAACCCAAGCGT… 3344 12705 0.0393
## 2 Hem1 10376 3494 Hem1_AAACCCAAGCTC… 3495 10377 0.0516
## 3 Hem1 25385 5260 Hem1_AAACCCAAGGCA… 5261 25386 0.0280
## 4 Hem1 15642 4473 Hem1_AAACCCAAGGCA… 4473 15642 0.0476
## 5 Hem1 7937 3054 Hem1_AAACCCAAGGCC… 3054 7937 0.0619
## 6 Hem1 25683 5167 Hem1_AAACCCAAGTTC… 5168 25684 0.0427
## # … with 15 more variables: percent.ribo <dbl>, Doubletscore <dbl>,
## # Predicted_doublets <chr>, Spring_1 <dbl>, Spring_2 <dbl>,
## # Erythrocyte.signature1 <dbl>, S.Score <dbl>, G2M.Score <dbl>, Phase <chr>,
## # AP_signature1 <dbl>, BP_signature1 <dbl>, EN_signature1 <dbl>,
## # LN_signature1 <dbl>, Immune_signature1 <dbl>, Meninges_signature1 <dbl>
rm(list = ls()[!ls() %in% "Hem.data"])
<- as.matrix(Hem.data@meta.data %>% select("Spring_1", "Spring_2"))
spring
"spring"]] <- CreateDimReducObject(embeddings = spring, key = "Spring_", assay = DefaultAssay(Hem.data)) Hem.data[[
DimPlot(Hem.data, reduction = "spring", pt.size = 0.5)
While scaling we regressed sources of variation associated with cell cycle and cell quality metrics
# Cell cycle difference scoring
$CC.Difference <- Hem.data$S.Score - Hem.data$G2M.Score Hem.data
<- SCTransform(Hem.data,
Hem.data method = "glmGamPoi",
vars.to.regress = c("percent.mito", "percent.ribo", "CC.Difference"),
verbose = T)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 17667 by 15333
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 107 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 17667 genes
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 8%
|
|======== | 11%
|
|========== | 14%
|
|============ | 17%
|
|============== | 19%
|
|================ | 22%
|
|================== | 25%
|
|=================== | 28%
|
|===================== | 31%
|
|======================= | 33%
|
|========================= | 36%
|
|=========================== | 39%
|
|============================= | 42%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 58%
|
|=========================================== | 61%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 69%
|
|=================================================== | 72%
|
|==================================================== | 75%
|
|====================================================== | 78%
|
|======================================================== | 81%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Computing corrected count matrix for 17667 genes
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 8%
|
|======== | 11%
|
|========== | 14%
|
|============ | 17%
|
|============== | 19%
|
|================ | 22%
|
|================== | 25%
|
|=================== | 28%
|
|===================== | 31%
|
|======================= | 33%
|
|========================= | 36%
|
|=========================== | 39%
|
|============================= | 42%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 58%
|
|=========================================== | 61%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 69%
|
|=================================================== | 72%
|
|==================================================== | 75%
|
|====================================================== | 78%
|
|======================================================== | 81%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 56.33922 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent.mito, percent.ribo, CC.Difference
## Centering data matrix
## Set default assay to SCT
<- RunPCA(Hem.data, verbose = FALSE)
Hem.data
<- FindNeighbors(Hem.data,
Hem.data dims = 1:20,
k.param = 8)
## Computing nearest neighbor graph
## Computing SNN
<- FindClusters(Hem.data, resolution = 0.1) Hem.data
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 15333
## Number of edges: 359502
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9588
## Number of communities: 6
## Elapsed time: 2 seconds
DimPlot(Hem.data,
reduction = "spring",
cols = c(wes_palette("FantasticFox1"),"grey60"),
pt.size = 0.5) & NoAxes()
DimPlot(Hem.data,
reduction = "spring",
pt.size = 0.5,
cols = c(wes_palette("FantasticFox1"),"grey60"),
split.by = 'ident') + NoLegend() & NoAxes()
saveRDS(Hem.data, "../QC.filtered.cells.RDS")
#date
format(Sys.time(), "%d %B, %Y, %H,%M")
## [1] "18 novembre, 2021, 09,44"
#Packages used
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 dplyr_1.0.7 sctransform_0.3.2 SeuratObject_4.0.3
## [5] Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-2
## [3] deldir_1.0-6 ellipsis_0.3.2
## [5] ggridges_0.5.3 XVector_0.34.0
## [7] GenomicRanges_1.46.0 rstudioapi_0.13
## [9] spatstat.data_2.1-0 leiden_0.3.9
## [11] listenv_0.8.0 farver_2.1.0
## [13] ggrepel_0.9.1 fansi_0.5.0
## [15] sparseMatrixStats_1.6.0 codetools_0.2-18
## [17] splines_4.1.2 knitr_1.36
## [19] polyclip_1.10-0 jsonlite_1.7.2
## [21] ica_1.0-2 cluster_2.1.2
## [23] png_0.1-7 uwot_0.1.10
## [25] shiny_1.7.1 spatstat.sparse_2.0-0
## [27] compiler_4.1.2 httr_1.4.2
## [29] Matrix_1.3-4 fastmap_1.1.0
## [31] lazyeval_0.2.2 cli_3.1.0
## [33] later_1.3.0 htmltools_0.5.2
## [35] tools_4.1.2 igraph_1.2.8
## [37] GenomeInfoDbData_1.2.7 gtable_0.3.0
## [39] glue_1.5.0 RANN_2.6.1
## [41] reshape2_1.4.4 Rcpp_1.0.7
## [43] Biobase_2.54.0 scattermore_0.7
## [45] jquerylib_0.1.4 vctrs_0.3.8
## [47] nlme_3.1-153 DelayedMatrixStats_1.16.0
## [49] lmtest_0.9-39 xfun_0.28
## [51] stringr_1.4.0 globals_0.14.0
## [53] mime_0.12 miniUI_0.1.1.1
## [55] lifecycle_1.0.1 irlba_2.3.3
## [57] goftest_1.2-3 future_1.23.0
## [59] zlibbioc_1.40.0 MASS_7.3-54
## [61] zoo_1.8-9 scales_1.1.1
## [63] spatstat.core_2.3-1 MatrixGenerics_1.6.0
## [65] promises_1.2.0.1 spatstat.utils_2.2-0
## [67] SummarizedExperiment_1.24.0 parallel_4.1.2
## [69] RColorBrewer_1.1-2 yaml_2.2.1
## [71] reticulate_1.22 pbapply_1.5-0
## [73] gridExtra_2.3 ggplot2_3.3.5
## [75] sass_0.4.0 rpart_4.1-15
## [77] stringi_1.7.5 highr_0.9
## [79] S4Vectors_0.32.2 BiocGenerics_0.40.0
## [81] GenomeInfoDb_1.30.0 bitops_1.0-7
## [83] rlang_0.4.12 pkgconfig_2.0.3
## [85] matrixStats_0.61.0 glmGamPoi_1.6.0
## [87] evaluate_0.14 lattice_0.20-45
## [89] ROCR_1.0-11 purrr_0.3.4
## [91] tensor_1.5 patchwork_1.1.1
## [93] htmlwidgets_1.5.4 labeling_0.4.2
## [95] cowplot_1.1.1 tidyselect_1.1.1
## [97] parallelly_1.28.1 RcppAnnoy_0.0.19
## [99] plyr_1.8.6 magrittr_2.0.1
## [101] R6_2.5.1 IRanges_2.28.0
## [103] generics_0.1.1 DelayedArray_0.20.0
## [105] pillar_1.6.4 mgcv_1.8-38
## [107] fitdistrplus_1.1-6 RCurl_1.98-1.5
## [109] survival_3.2-13 abind_1.4-5
## [111] tibble_3.1.6 future.apply_1.8.1
## [113] crayon_1.4.2 KernSmooth_2.23-20
## [115] utf8_1.2.2 spatstat.geom_2.3-0
## [117] plotly_4.10.0 rmarkdown_2.11
## [119] grid_4.1.2 data.table_1.14.2
## [121] digest_0.6.28 xtable_1.8-4
## [123] tidyr_1.1.4 httpuv_1.6.3
## [125] stats4_4.1.2 munsell_0.5.0
## [127] viridisLite_0.4.0 bslib_0.3.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎