Data preparation
Now the positive mode and negative mode have been annotated respectively. We need to merge positive and negative mode data.
library(tidymass)
library(tidyverse)
Load data 🔗︎
load("metabolite_annotation/object_pos2")
load("metabolite_annotation/object_neg2")
Remove the features without annotations 🔗︎
Positive mode 🔗︎
object_pos2 <-
object_pos2 %>%
activate_mass_dataset(what = "annotation_table") %>%
filter(!is.na(Level)) %>%
filter(Level == 1 | Level == 2)
object_pos2
#> --------------------
#> massdataset version: 0.99.1
#> --------------------
#> 1.expression_data:[ 206 x 259 data.frame]
#> 2.sample_info:[ 259 x 6 data.frame]
#> 3.variable_info:[ 206 x 6 data.frame]
#> 4.sample_info_note:[ 6 x 2 data.frame]
#> 5.variable_info_note:[ 6 x 2 data.frame]
#> 6.ms2_data:[ 1042 variables x 951 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-01-16 16:19:04
#> process_data ----------
#> Package Function.used Time
#> 1 massprocesser process_data 2022-01-16 16:18:43
#> mutate ----------
#> Package Function.used Time
#> 1 massdataset mutate() 2022-01-16 23:48:08
#> mutate_variable_na_freq ----------
#> Package Function.used Time
#> 1 massdataset mutate_variable_na_freq() 2022-01-18 09:11:43
#> 2 massdataset mutate_variable_na_freq() 2022-01-18 09:11:43
#> 3 massdataset mutate_variable_na_freq() 2022-01-18 09:11:43
#> filter ----------
#> Package Function.used Time
#> 1 massdataset filter() 2022-01-18 09:11:44
#> 2 massdataset filter() 2022-03-10 23:57:33
#> 3 massdataset filter() 2022-03-10 23:57:33
#> impute_mv ----------
#> Package Function.used Time
#> 1 masscleaner impute_mv() 2022-01-18 09:38:02
#> normalize_data ----------
#> Package Function.used Time
#> 1 masscleaner normalize_data() 2022-01-18 09:38:07
#> integrate_data ----------
#> Package Function.used Time
#> 1 masscleaner integrate_data() 2022-01-18 09:38:08
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2022-01-19 21:53:01
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2022-01-19 21:53:36
#> annotate_metabolites_mass_dataset ----------
#> Package Function.used Time
#> 1 metid annotate_metabolites_mass_dataset() 2022-02-22 21:16:23
#> 2 metid annotate_metabolites_mass_dataset() 2022-02-22 21:30:10
#> 3 metid annotate_metabolites_mass_dataset() 2022-02-22 21:47:59
Negative mode 🔗︎
object_neg2 <-
object_neg2 %>%
activate_mass_dataset(what = "annotation_table") %>%
filter(!is.na(Level)) %>%
filter(Level == 1 | Level == 2)
object_neg2
#> --------------------
#> massdataset version: 0.99.1
#> --------------------
#> 1.expression_data:[ 165 x 259 data.frame]
#> 2.sample_info:[ 259 x 6 data.frame]
#> 3.variable_info:[ 165 x 6 data.frame]
#> 4.sample_info_note:[ 6 x 2 data.frame]
#> 5.variable_info_note:[ 6 x 2 data.frame]
#> 6.ms2_data:[ 1092 variables x 988 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-01-16 16:20:02
#> process_data ----------
#> Package Function.used Time
#> 1 massprocesser process_data 2022-01-16 16:19:48
#> mutate ----------
#> Package Function.used Time
#> 1 massdataset mutate() 2022-01-16 23:48:08
#> mutate_variable_na_freq ----------
#> Package Function.used Time
#> 1 massdataset mutate_variable_na_freq() 2022-01-18 09:11:47
#> 2 massdataset mutate_variable_na_freq() 2022-01-18 09:11:47
#> 3 massdataset mutate_variable_na_freq() 2022-01-18 09:11:47
#> filter ----------
#> Package Function.used Time
#> 1 massdataset filter() 2022-01-18 09:11:47
#> 2 massdataset filter() 2022-03-10 23:57:33
#> 3 massdataset filter() 2022-03-10 23:57:33
#> impute_mv ----------
#> Package Function.used Time
#> 1 masscleaner impute_mv() 2022-01-18 09:38:06
#> normalize_data ----------
#> Package Function.used Time
#> 1 masscleaner normalize_data() 2022-01-18 09:50:47
#> integrate_data ----------
#> Package Function.used Time
#> 1 masscleaner integrate_data() 2022-01-18 09:50:47
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2022-01-19 21:53:37
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2022-01-19 21:54:06
#> annotate_metabolites_mass_dataset ----------
#> Package Function.used Time
#> 1 metid annotate_metabolites_mass_dataset() 2022-02-22 21:50:29
#> 2 metid annotate_metabolites_mass_dataset() 2022-02-22 22:08:10
#> 3 metid annotate_metabolites_mass_dataset() 2022-02-22 22:26:33
Merge data 🔗︎
We need to merge positive and negative mode data as one mass_dataset
class.
head(colnames(object_pos2))
#> [1] "sample_06" "sample_103" "sample_11" "sample_112" "sample_117"
#> [6] "sample_12"
head(colnames(object_neg2))
#> [1] "sample_06" "sample_103" "sample_11" "sample_112" "sample_117"
#> [6] "sample_12"
object <-
merge_mass_dataset(x = object_pos2,
y = object_neg2,
sample_direction = "inner",
variable_direction = "full",
sample_by = "sample_id",
variable_by = c("variable_id", "mz", "rt"))
object
#> --------------------
#> massdataset version: 0.99.20
#> --------------------
#> 1.expression_data:[ 371 x 259 data.frame]
#> 2.sample_info:[ 259 x 11 data.frame]
#> 3.variable_info:[ 371 x 9 data.frame]
#> 4.sample_info_note:[ 11 x 2 data.frame]
#> 5.variable_info_note:[ 9 x 2 data.frame]
#> 6.ms2_data:[ 2084 variables x 1902 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-01-16 16:19:04
#> process_data ----------
#> Package Function.used Time
#> 1 massprocesser process_data 2022-01-16 16:18:43
#> mutate ----------
#> Package Function.used Time
#> 1 massdataset mutate() 2022-01-16 23:48:08
#> mutate_variable_na_freq ----------
#> Package Function.used Time
#> 1 massdataset mutate_variable_na_freq() 2022-01-18 09:11:43
#> 2 massdataset mutate_variable_na_freq() 2022-01-18 09:11:43
#> 3 massdataset mutate_variable_na_freq() 2022-01-18 09:11:43
#> filter ----------
#> Package Function.used Time
#> 1 massdataset filter() 2022-01-18 09:11:44
#> 2 massdataset filter() 2022-03-10 23:57:33
#> 3 massdataset filter() 2022-03-10 23:57:33
#> impute_mv ----------
#> Package Function.used Time
#> 1 masscleaner impute_mv() 2022-01-18 09:38:02
#> normalize_data ----------
#> Package Function.used Time
#> 1 masscleaner normalize_data() 2022-01-18 09:38:07
#> integrate_data ----------
#> Package Function.used Time
#> 1 masscleaner integrate_data() 2022-01-18 09:38:08
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2022-01-19 21:53:01
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2022-01-19 21:53:36
#> annotate_metabolites_mass_dataset ----------
#> Package Function.used Time
#> 1 metid annotate_metabolites_mass_dataset() 2022-02-22 21:16:23
#> 2 metid annotate_metabolites_mass_dataset() 2022-02-22 21:30:10
#> 3 metid annotate_metabolites_mass_dataset() 2022-02-22 21:47:59
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-01-16 16:20:02
#> process_data ----------
#> Package Function.used Time
#> 1 massprocesser process_data 2022-01-16 16:19:48
#> mutate ----------
#> Package Function.used Time
#> 1 massdataset mutate() 2022-01-16 23:48:08
#> mutate_variable_na_freq ----------
#> Package Function.used Time
#> 1 massdataset mutate_variable_na_freq() 2022-01-18 09:11:47
#> 2 massdataset mutate_variable_na_freq() 2022-01-18 09:11:47
#> 3 massdataset mutate_variable_na_freq() 2022-01-18 09:11:47
#> filter ----------
#> Package Function.used Time
#> 1 massdataset filter() 2022-01-18 09:11:47
#> 2 massdataset filter() 2022-03-10 23:57:33
#> 3 massdataset filter() 2022-03-10 23:57:33
#> impute_mv ----------
#> Package Function.used Time
#> 1 masscleaner impute_mv() 2022-01-18 09:38:06
#> normalize_data ----------
#> Package Function.used Time
#> 1 masscleaner normalize_data() 2022-01-18 09:50:47
#> integrate_data ----------
#> Package Function.used Time
#> 1 masscleaner integrate_data() 2022-01-18 09:50:47
#> update_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset update_mass_dataset() 2022-01-19 21:53:37
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2022-01-19 21:54:06
#> annotate_metabolites_mass_dataset ----------
#> Package Function.used Time
#> 1 metid annotate_metabolites_mass_dataset() 2022-02-22 21:50:29
#> 2 metid annotate_metabolites_mass_dataset() 2022-02-22 22:08:10
#> 3 metid annotate_metabolites_mass_dataset() 2022-02-22 22:26:33
#> merge_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset merge_mass_dataset 2022-03-10 23:57:34
Trace processing information of object
Then we can use the report_parameters()
function to trace processing information of object.
All the analysis results will be placed in a folder named as statistical_analysis
.
dir.create(path = "statistical_analysis", showWarnings = FALSE)
report_parameters(object = object, path = "statistical_analysis/")
A html format file named as parameter_report.html
will be generated.
Remove redundant metabolites
Remove the redundant annotated metabolites bases on Level
and score.
object <-
object %>%
activate_mass_dataset(what = "annotation_table") %>%
group_by(Compound.name) %>%
filter(Level == min(Level)) %>%
filter(SS == max(SS)) %>%
slice_head(n = 1)
object <-
object %>%
activate_mass_dataset(what = "annotation_table") %>%
group_by(variable_id) %>%
filter(Level == min(Level)) %>%
filter(SS == max(SS)) %>%
slice_head(n = 1)
Differential expression metabolites
Calculate the fold changes.
control_sample_id =
object %>%
activate_mass_dataset(what = "sample_info") %>%
filter(group == "Control") %>%
pull(sample_id)
case_sample_id =
object %>%
activate_mass_dataset(what = "sample_info") %>%
filter(group == "Case") %>%
pull(sample_id)
object <-
mutate_fc(object = object,
control_sample_id = control_sample_id,
case_sample_id = case_sample_id,
mean_median = "mean")
#> 110 control samples.
#> 110 case samples.
Calculate p values.
object <-
mutate_p_value(
object = object,
control_sample_id = control_sample_id,
case_sample_id = case_sample_id,
method = "t.test",
p_adjust_methods = "BH"
)
#> 110 control samples.
#> 110 case samples.
Volcano plot.
volcano_plot(object = object,
add_text = TRUE,
text_from = "Compound.name",
point_size_scale = "p_value") +
scale_size_continuous(range = c(0.5, 5))
Output result
Output the differential expression metabolites.
differential_metabolites <-
extract_variable_info(object = object) %>%
filter(fc > 2 | fc < 0.5) %>%
filter(p_value_adjust < 0.05)
readr::write_csv(differential_metabolites,
file = "statistical_analysis/differential_metabolites.csv")
Save result for subsequent analysis.
object_final <- object
save(object_final, file = "statistical_analysis/object_final")
Session information
sessionInfo()
#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] forcats_0.5.1.9000 stringr_1.4.0 purrr_0.3.4
#> [4] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6
#> [7] tidyverse_1.3.1 dplyr_1.0.8 metid_1.2.4
#> [10] metpath_0.99.4 massstat_0.99.13 ggfortify_0.4.14
#> [13] massqc_0.99.7 masscleaner_0.99.7 xcms_3.16.1
#> [16] MSnbase_2.20.4 ProtGenerics_1.26.0 S4Vectors_0.32.3
#> [19] mzR_2.28.0 Rcpp_1.0.8 Biobase_2.54.0
#> [22] BiocGenerics_0.40.0 BiocParallel_1.28.3 massprocesser_0.99.3
#> [25] ggplot2_3.3.5 masstools_0.99.5 massdataset_0.99.20
#> [28] tidymass_0.99.6 magrittr_2.0.2
#>
#> loaded via a namespace (and not attached):
#> [1] blogdown_1.7 bit64_4.0.5
#> [3] missForest_1.4 knitr_1.37
#> [5] DelayedArray_0.20.0 data.table_1.14.2
#> [7] rpart_4.1.16 KEGGREST_1.34.0
#> [9] RCurl_1.98-1.5 doParallel_1.0.17
#> [11] generics_0.1.2 snow_0.4-4
#> [13] leaflet_2.1.0 preprocessCore_1.56.0
#> [15] mixOmics_6.18.1 RANN_2.6.1
#> [17] proxy_0.4-26 future_1.23.0
#> [19] bit_4.0.4 tzdb_0.2.0
#> [21] xml2_1.3.3 lubridate_1.8.0
#> [23] ggsci_2.9 SummarizedExperiment_1.24.0
#> [25] assertthat_0.2.1 viridis_0.6.2
#> [27] xfun_0.29 hms_1.1.1
#> [29] jquerylib_0.1.4 evaluate_0.15
#> [31] DEoptimR_1.0-10 fansi_1.0.2
#> [33] dbplyr_2.1.1 readxl_1.3.1
#> [35] igraph_1.2.11 DBI_1.1.2
#> [37] htmlwidgets_1.5.4 MsFeatures_1.3.0
#> [39] rARPACK_0.11-0 ellipsis_0.3.2
#> [41] RSpectra_0.16-0 crosstalk_1.2.0
#> [43] backports_1.4.1 bookdown_0.24
#> [45] ggcorrplot_0.1.3 MatrixGenerics_1.6.0
#> [47] vctrs_0.3.8 remotes_2.4.2
#> [49] here_1.0.1 withr_2.4.3
#> [51] ggforce_0.3.3 itertools_0.1-3
#> [53] robustbase_0.93-9 vroom_1.5.7
#> [55] checkmate_2.0.0 cluster_2.1.2
#> [57] lazyeval_0.2.2 crayon_1.5.0
#> [59] ellipse_0.4.2 labeling_0.4.2
#> [61] pkgconfig_2.0.3 tweenr_1.0.2
#> [63] GenomeInfoDb_1.30.0 nnet_7.3-17
#> [65] rlang_1.0.1 globals_0.14.0
#> [67] lifecycle_1.0.1 affyio_1.64.0
#> [69] extrafontdb_1.0 fastDummies_1.6.3
#> [71] MassSpecWavelet_1.60.0 modelr_0.1.8
#> [73] cellranger_1.1.0 randomForest_4.7-1
#> [75] rprojroot_2.0.2 polyclip_1.10-0
#> [77] matrixStats_0.61.0 Matrix_1.4-0
#> [79] reprex_2.0.1 base64enc_0.1-3
#> [81] GlobalOptions_0.1.2 png_0.1-7
#> [83] viridisLite_0.4.0 rjson_0.2.21
#> [85] clisymbols_1.2.0 bitops_1.0-7
#> [87] pander_0.6.4 Biostrings_2.62.0
#> [89] shape_1.4.6 parallelly_1.30.0
#> [91] robust_0.7-0 jpeg_0.1-9
#> [93] gridGraphics_0.5-1 scales_1.1.1
#> [95] plyr_1.8.6 zlibbioc_1.40.0
#> [97] compiler_4.1.2 RColorBrewer_1.1-2
#> [99] pcaMethods_1.86.0 clue_0.3-60
#> [101] rrcov_1.6-2 cli_3.2.0
#> [103] affy_1.72.0 XVector_0.34.0
#> [105] listenv_0.8.0 patchwork_1.1.1
#> [107] pbapply_1.5-0 htmlTable_2.4.0
#> [109] Formula_1.2-4 MASS_7.3-55
#> [111] tidyselect_1.1.1 vsn_3.62.0
#> [113] stringi_1.7.6 highr_0.9
#> [115] yaml_2.3.4 latticeExtra_0.6-29
#> [117] MALDIquant_1.21 ggrepel_0.9.1
#> [119] grid_4.1.2 sass_0.4.0
#> [121] tools_4.1.2 parallel_4.1.2
#> [123] circlize_0.4.14 rstudioapi_0.13
#> [125] MsCoreUtils_1.6.0 foreach_1.5.2
#> [127] foreign_0.8-82 gridExtra_2.3
#> [129] farver_2.1.0 mzID_1.32.0
#> [131] ggraph_2.0.5 rvcheck_0.2.1
#> [133] digest_0.6.29 BiocManager_1.30.16
#> [135] GenomicRanges_1.46.1 broom_0.7.12
#> [137] ncdf4_1.19 httr_1.4.2
#> [139] ComplexHeatmap_2.10.0 colorspace_2.0-2
#> [141] rvest_1.0.2 XML_3.99-0.8
#> [143] fs_1.5.2 IRanges_2.28.0
#> [145] splines_4.1.2 yulab.utils_0.0.4
#> [147] graphlayouts_0.8.0 ggplotify_0.1.0
#> [149] plotly_4.10.0 fit.models_0.64
#> [151] jsonlite_1.7.3 tidygraph_1.2.0
#> [153] corpcor_1.6.10 R6_2.5.1
#> [155] Hmisc_4.6-0 pillar_1.7.0
#> [157] htmltools_0.5.2 glue_1.6.1
#> [159] fastmap_1.1.0 class_7.3-20
#> [161] codetools_0.2-18 pcaPP_1.9-74
#> [163] mvtnorm_1.1-3 furrr_0.2.3
#> [165] utf8_1.2.2 lattice_0.20-45
#> [167] bslib_0.3.1 zip_2.2.0
#> [169] openxlsx_4.2.5 Rttf2pt1_1.3.9
#> [171] survival_3.2-13 limma_3.50.0
#> [173] rmarkdown_2.11 munsell_0.5.0
#> [175] e1071_1.7-9 GetoptLong_1.0.5
#> [177] GenomeInfoDbData_1.2.7 iterators_1.0.14
#> [179] impute_1.68.0 haven_2.4.3
#> [181] reshape2_1.4.4 gtable_0.3.0
#> [183] extrafont_0.17