
Summary information
Xiaotao Shen
Created on 2021-12-04 and updated on 2026-03-02
Source:vignettes/summary_info.Rmd
summary_info.RmdFor one mass_dataset class object, we can get the
summary information of it.
Data preparation
library(massdataset)
library(tidyverse)
data("expression_data")
data("sample_info")
data("variable_info")
object =
create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info
)Summary information
Just type this object in the R session.
object
#> --------------------
#> massdataset version: 0.99.1
#> --------------------
#> 1.expression_data:[ 1000 x 8 data.frame]
#> 2.sample_info:[ 8 x 4 data.frame]
#> 8 samples:Blank_3 Blank_4 QC_1 ... PS4P3 PS4P4
#> 3.variable_info:[ 1000 x 3 data.frame]
#> 1000 variables:M136T55_2_POS M79T35_POS M307T548_POS ... M232T937_POS M301T277_POS
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information
#> 1 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2026-03-02 09:28:32We can just basic information of the object.
Use functions to get the summary information
##dim of object
dim(object)
#> variables samples
#> 1000 8
##row number
nrow(object)
#> variables
#> 1000
##column number
ncol(object)
#> samples
#> 8
##sample number
get_sample_number(object)
#> [1] 8
#variable number
get_variable_number(object)
#> [1] 1000
##sample id
colnames(object)
#> [1] "Blank_3" "Blank_4" "QC_1" "QC_2" "PS4P1" "PS4P2" "PS4P3"
#> [8] "PS4P4"
##variable id
rownames(object) %>%
head()
#> [1] "M136T55_2_POS" "M79T35_POS" "M307T548_POS" "M183T224_POS"
#> [5] "M349T47_POS" "M182T828_POS"
##sample id
get_sample_id(object)
#> [1] "Blank_3" "Blank_4" "QC_1" "QC_2" "PS4P1" "PS4P2" "PS4P3"
#> [8] "PS4P4"
##variable id
get_variable_id(object) %>%
head()
#> [1] "M136T55_2_POS" "M79T35_POS" "M307T548_POS" "M183T224_POS"
#> [5] "M349T47_POS" "M182T828_POS"Explore
{r,eval=TRUE,warning=FALSE, R.options="", message=FALSE, cache=FALSE, fig.alt=c("Scatter plot of retention time versus m/z for the original dataset.", "Scatter plot of retention time versus m/z after log transformation.", "Hexbin plot of retention time versus m/z for the original dataset.")} ###show mz rt plot object %>% show_mz_rt_plot() ###should log object %>% `+`(1) %>% log(10) %>% show_mz_rt_plot() ###use hex object %>% show_mz_rt_plot(hex = TRUE)
Missing values
```{r,eval=TRUE,warning=FALSE, R.options=““, message=FALSE, cache=FALSE, fig.alt=c(”Heatmap of missing values across the full dataset.”, “Heatmap of missing values for the first ten variables with white cells.”, “Heatmap of missing values for subject samples only.”, “Heatmap of missing values for QC samples only.”, “Heatmap of missing values for variables with m/z below 100.”, “Sample-level missing value summary plot.”, “Sample-level missing value summary plot colored by sample class.”, “Sample-level missing value summary plot ordered by missing value count.”, “Sample-level missing value summary plot ordered by missing value count in descending order.”, “Variable-level missing value summary plot.”, “Variable-level missing value summary plot colored by m/z.”, “Variable-level missing value summary plot colored by retention time.”, “Variable-level missing value summary plot colored by m/z and ordered by missing value count.”, “Variable-level missing value percentage plot colored by m/z and ordered by missing value count in descending order.”)} ##show missing values plot show_missing_values(object)
show_missing_values(object[1:10,], cell_color = “white”)
###only show subject samples object %>% activate_mass_dataset(what = “sample_info”) %>% filter(class == “Subject”) %>% show_missing_values()
###only show QC samples object %>% activate_mass_dataset(what = “expression_data”) %>% dplyr::select(contains(“QC”)) %>% show_missing_values()
###only show features with mz < 100 object %>% activate_mass_dataset(what = “variable_info”) %>% dplyr::filter(mz < 100) %>% show_missing_values(cell_color = “white”, show_row_names = TRUE, row_names_side = “left”)
##show missing values plot show_sample_missing_values(object) show_sample_missing_values(object, color_by = “class”) show_sample_missing_values(object, color_by = “class”, order_by = “na”) show_sample_missing_values(object, color_by = “class”, order_by = “na”, desc = TRUE)
##show missing values plot show_variable_missing_values(object) show_variable_missing_values(object, color_by = “mz”)
show_variable_missing_values(object, color_by = “rt”) + scale_color_gradient(low = “skyblue”, high = “red”)
show_variable_missing_values(object, color_by = “mz”, order_by = “na”) show_variable_missing_values(object, color_by = “mz”, order_by = “na”, desc = TRUE, percentage = TRUE)
# **Session information**
``` r
sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS Tahoe 26.3
#>
#> Matrix products: default
#> BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
#>
#> locale:
#> [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
#>
#> time zone: Asia/Singapore
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 purrr_1.1.0
#> [5] readr_2.1.5 tidyr_1.3.1 tibble_3.3.0 tidyverse_2.0.0
#> [9] magrittr_2.0.3 dplyr_1.1.4 ggplot2_4.0.2 massdataset_0.99.1
#>
#> loaded via a namespace (and not attached):
#> [1] tidyselect_1.2.1 farver_2.1.2
#> [3] S7_0.2.0 fastmap_1.2.0
#> [5] digest_0.6.37 timechange_0.3.0
#> [7] lifecycle_1.0.4 cluster_2.1.8.1
#> [9] compiler_4.5.2 rlang_1.1.6
#> [11] sass_0.4.10 tools_4.5.2
#> [13] yaml_2.3.10 knitr_1.50
#> [15] S4Arrays_1.8.1 htmlwidgets_1.6.4
#> [17] DelayedArray_0.34.1 RColorBrewer_1.1-3
#> [19] abind_1.4-8 withr_3.0.2
#> [21] BiocGenerics_0.54.0 desc_1.4.3
#> [23] grid_4.5.2 stats4_4.5.2
#> [25] colorspace_2.1-1 scales_1.4.0
#> [27] iterators_1.0.14 dichromat_2.0-0.1
#> [29] SummarizedExperiment_1.38.1 cli_3.6.5
#> [31] rmarkdown_2.29 crayon_1.5.3
#> [33] ragg_1.4.0 generics_0.1.4
#> [35] rstudioapi_0.17.1 httr_1.4.7
#> [37] tzdb_0.5.0 rjson_0.2.23
#> [39] cachem_1.1.0 parallel_4.5.2
#> [41] XVector_0.48.0 matrixStats_1.5.0
#> [43] vctrs_0.6.5 Matrix_1.7-4
#> [45] jsonlite_2.0.0 IRanges_2.42.0
#> [47] hms_1.1.3 GetoptLong_1.0.5
#> [49] S4Vectors_0.48.0 clue_0.3-66
#> [51] systemfonts_1.2.3 foreach_1.5.2
#> [53] jquerylib_0.1.4 glue_1.8.0
#> [55] pkgdown_2.1.3 codetools_0.2-20
#> [57] stringi_1.8.7 shape_1.4.6.1
#> [59] gtable_0.3.6 GenomeInfoDb_1.44.2
#> [61] GenomicRanges_1.60.0 UCSC.utils_1.4.0
#> [63] ComplexHeatmap_2.24.1 pillar_1.11.0
#> [65] htmltools_0.5.8.1 GenomeInfoDbData_1.2.14
#> [67] circlize_0.4.16 R6_2.6.1
#> [69] textshaping_1.0.1 doParallel_1.0.17
#> [71] evaluate_1.0.4 Biobase_2.68.0
#> [73] lattice_0.22-7 png_0.1-8
#> [75] openxlsx_4.2.8 bslib_0.9.0
#> [77] Rcpp_1.1.0 zip_2.3.3
#> [79] SparseArray_1.8.1 xfun_0.53
#> [81] fs_1.6.6 MatrixGenerics_1.20.0
#> [83] pkgconfig_2.0.3 GlobalOptions_0.1.2