Statistical analysis

Data preparation

Now the positive mode and negative mode have been annotated respectively. We need to merge positive and negative mode data.

library(tidymass)
#> Registered S3 method overwritten by 'Hmisc':
#>   method       from      
#>   vcov.default fit.models
#> ── Attaching packages ──────────────────────────────────────── tidymass 1.0.8 ──
#> ✔ massdataset   1.0.25     ✔ metid         1.2.28
#> ✔ massprocesser 1.0.10     ✔ masstools     1.0.10
#> ✔ masscleaner   1.0.11     ✔ dplyr         1.1.2 
#> ✔ massqc        1.0.6      ✔ ggplot2       3.4.2 
#> ✔ massstat      1.0.5      ✔ magrittr      2.0.3 
#> ✔ metpath       1.0.8
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ forcats   1.0.0     ✔ readr     2.1.4
#> ✔ lubridate 1.9.2     ✔ stringr   1.5.0
#> ✔ purrr     1.0.1     ✔ tibble    3.2.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ xcms::collect()          masks dplyr::collect()
#> ✖ MSnbase::combine()       masks Biobase::combine(), BiocGenerics::combine(), dplyr::combine()
#> ✖ tidyr::expand()          masks S4Vectors::expand()
#> ✖ tidyr::extract()         masks magrittr::extract()
#> ✖ metid::filter()          masks metpath::filter(), dplyr::filter(), massdataset::filter(), stats::filter()
#> ✖ S4Vectors::first()       masks dplyr::first()
#> ✖ xcms::groups()           masks dplyr::groups()
#> ✖ dplyr::lag()             masks stats::lag()
#> ✖ purrr::map()             masks mixOmics::map()
#> ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
#> ✖ purrr::reduce()          masks MSnbase::reduce()
#> ✖ S4Vectors::rename()      masks dplyr::rename(), massdataset::rename()
#> ✖ lubridate::second()      masks S4Vectors::second()
#> ✖ lubridate::second<-()    masks S4Vectors::second<-()
#> ✖ MASS::select()           masks dplyr::select(), massdataset::select()
#> ✖ purrr::set_names()       masks magrittr::set_names()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load data

load("metabolite_annotation/object_pos2")
load("metabolite_annotation/object_neg2")

Remove the features without annotations

Positive mode

object_pos2 <- 
  object_pos2 %>% 
  activate_mass_dataset(what = "annotation_table") %>% 
  filter(!is.na(Level)) %>% 
  filter(Level == 1 | Level == 2)
object_pos2
#> -------------------- 
#> massdataset version: 0.99.1 
#> -------------------- 
#> 1.expression_data:[ 206 x 259 data.frame]
#> 2.sample_info:[ 259 x 6 data.frame]
#> 259 samples:sample_06 sample_103 sample_11 ... sample_QC_38 sample_QC_39
#> 3.variable_info:[ 206 x 6 data.frame]
#> 206 variables:M86T95_POS M95T100_1_POS M100T160_POS ... M568T622_POS M609T427_POS
#> 4.sample_info_note:[ 6 x 2 data.frame]
#> 5.variable_info_note:[ 6 x 2 data.frame]
#> 6.ms2_data:[ 1042 variables x 951 MS2 spectra]
#> -------------------- 
#> Processing information
#> 11 processings in total
#> Latest 3 processings show
#> update_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset update_mass_dataset() 2022-01-19 21:53:01
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2022-01-19 21:53:36
#> annotate_metabolites_mass_dataset ---------- 
#>   Package                       Function.used                       Time
#> 1   metid annotate_metabolites_mass_dataset()  2022-02-22 21:16:23.17454
#> 2   metid annotate_metabolites_mass_dataset() 2022-02-22 21:30:10.706486
#> 3   metid annotate_metabolites_mass_dataset() 2022-02-22 21:47:59.474422

Negative mode

object_neg2 <- 
  object_neg2 %>% 
  activate_mass_dataset(what = "annotation_table") %>% 
  filter(!is.na(Level)) %>% 
  filter(Level == 1 | Level == 2)
object_neg2
#> -------------------- 
#> massdataset version: 0.99.1 
#> -------------------- 
#> 1.expression_data:[ 165 x 259 data.frame]
#> 2.sample_info:[ 259 x 6 data.frame]
#> 259 samples:sample_06 sample_103 sample_11 ... sample_QC_38 sample_QC_39
#> 3.variable_info:[ 165 x 6 data.frame]
#> 165 variables:M73T74_NEG M75T52_NEG M85T99_NEG ... M480T641_NEG M514T611_NEG
#> 4.sample_info_note:[ 6 x 2 data.frame]
#> 5.variable_info_note:[ 6 x 2 data.frame]
#> 6.ms2_data:[ 1092 variables x 988 MS2 spectra]
#> -------------------- 
#> Processing information
#> 11 processings in total
#> Latest 3 processings show
#> update_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset update_mass_dataset() 2022-01-19 21:53:37
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2022-01-19 21:54:06
#> annotate_metabolites_mass_dataset ---------- 
#>   Package                       Function.used                       Time
#> 1   metid annotate_metabolites_mass_dataset() 2022-02-22 21:50:29.759271
#> 2   metid annotate_metabolites_mass_dataset() 2022-02-22 22:08:10.771619
#> 3   metid annotate_metabolites_mass_dataset() 2022-02-22 22:26:33.968578

Merge data

We need to merge positive and negative mode data as one mass_dataset class.

head(colnames(object_pos2))
#> [1] "sample_06"  "sample_103" "sample_11"  "sample_112" "sample_117"
#> [6] "sample_12"
head(colnames(object_neg2))
#> [1] "sample_06"  "sample_103" "sample_11"  "sample_112" "sample_117"
#> [6] "sample_12"
object <- 
merge_mass_dataset(x = object_pos2, 
                   y = object_neg2, 
                   sample_direction = "inner",
                   variable_direction = "full", 
                   sample_by = "sample_id", 
                   variable_by = c("variable_id", "mz", "rt"))
object
#> -------------------- 
#> massdataset version: 1.0.25 
#> -------------------- 
#> 1.expression_data:[ 371 x 259 data.frame]
#> 2.sample_info:[ 259 x 11 data.frame]
#> 259 samples:sample_06 sample_103 sample_11 ... sample_QC_38 sample_QC_39
#> 3.variable_info:[ 371 x 9 data.frame]
#> 371 variables:M86T95_POS M95T100_1_POS M100T160_POS ... M480T641_NEG M514T611_NEG
#> 4.sample_info_note:[ 11 x 2 data.frame]
#> 5.variable_info_note:[ 9 x 2 data.frame]
#> 6.ms2_data:[ 2084 variables x 1902 MS2 spectra]
#> -------------------- 
#> Processing information
#> 23 processings in total
#> Latest 3 processings show
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2022-01-19 21:54:06
#> annotate_metabolites_mass_dataset ---------- 
#>   Package                       Function.used                       Time
#> 1   metid annotate_metabolites_mass_dataset() 2022-02-22 21:50:29.759271
#> 2   metid annotate_metabolites_mass_dataset() 2022-02-22 22:08:10.771619
#> 3   metid annotate_metabolites_mass_dataset() 2022-02-22 22:26:33.968578
#> merge_mass_dataset ---------- 
#>       Package      Function.used                Time
#> 1 massdataset merge_mass_dataset 2023-08-31 00:00:35

Trace processing information of object

Then we can use the report_parameters() function to trace processing information of object.

All the analysis results will be placed in a folder named as statistical_analysis.

dir.create(path = "statistical_analysis", showWarnings = FALSE)
report_parameters(object = object, path = "statistical_analysis/")

A html format file named as parameter_report.html will be generated.

Remove redundant metabolites

Remove the redundant annotated metabolites bases on Level and score.

object <-
  object %>% 
  activate_mass_dataset(what = "annotation_table") %>% 
  group_by(Compound.name) %>% 
  filter(Level == min(Level)) %>% 
  filter(SS == max(SS)) %>% 
  slice_head(n = 1)
object <-
  object %>% 
  activate_mass_dataset(what = "annotation_table") %>% 
  group_by(variable_id) %>% 
  filter(Level == min(Level)) %>% 
  filter(SS == max(SS)) %>% 
  slice_head(n = 1)

Differential expression metabolites

Calculate the fold changes.

control_sample_id =
  object %>%
  activate_mass_dataset(what = "sample_info") %>%
  filter(group == "Control") %>%
  pull(sample_id)

case_sample_id =
  object %>%
  activate_mass_dataset(what = "sample_info") %>%
  filter(group == "Case") %>%
  pull(sample_id)
object <-
  mutate_fc(object = object, 
            control_sample_id = control_sample_id, 
            case_sample_id = case_sample_id, 
            mean_median = "mean")
#> 110 control samples.
#> 110 case samples.

Calculate p values.

object <-
  mutate_p_value(
    object = object,
    control_sample_id = control_sample_id,
    case_sample_id = case_sample_id,
    method = "t.test",
    p_adjust_methods = "BH"
  )
#> 110 control samples.
#> 110 case samples.

Volcano plot.

volcano_plot(object = object,
             add_text = TRUE,
             text_from = "Compound.name", 
             point_size_scale = "p_value") +
  scale_size_continuous(range = c(0.5, 5))

Output result

Output the differential expression metabolites.

differential_metabolites <- 
  extract_variable_info(object = object) %>% 
  filter(fc > 2 | fc < 0.5) %>% 
  filter(p_value_adjust < 0.05)

readr::write_csv(differential_metabolites, 
                 file = "statistical_analysis/differential_metabolites.csv")

Save result for subsequent analysis.

object_final <- object
save(object_final, file = "statistical_analysis/object_final")

Session information

sessionInfo()
#> R version 4.3.0 (2023-04-21)
#> Platform: x86_64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.5.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> time zone: America/Los_Angeles
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] grid      stats4    stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#>  [1] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
#>  [4] purrr_1.0.1           readr_2.1.4           tibble_3.2.1         
#>  [7] tidyverse_2.0.0       metid_1.2.28          metpath_1.0.8        
#> [10] ComplexHeatmap_2.16.0 mixOmics_6.24.0       lattice_0.21-8       
#> [13] MASS_7.3-58.4         massstat_1.0.5        tidyr_1.3.0          
#> [16] ggfortify_0.4.16      massqc_1.0.6          masscleaner_1.0.11   
#> [19] xcms_3.22.0           MSnbase_2.26.0        ProtGenerics_1.32.0  
#> [22] S4Vectors_0.38.1      mzR_2.34.0            Rcpp_1.0.10          
#> [25] Biobase_2.60.0        BiocGenerics_0.46.0   BiocParallel_1.34.2  
#> [28] massprocesser_1.0.10  ggplot2_3.4.2         dplyr_1.1.2          
#> [31] magrittr_2.0.3        masstools_1.0.10      massdataset_1.0.25   
#> [34] tidymass_1.0.8       
#> 
#> loaded via a namespace (and not attached):
#>   [1] splines_4.3.0               bitops_1.0-7               
#>   [3] cellranger_1.1.0            polyclip_1.10-4            
#>   [5] preprocessCore_1.62.1       XML_3.99-0.14              
#>   [7] rpart_4.1.19                fastDummies_1.6.3          
#>   [9] lifecycle_1.0.3             doParallel_1.0.17          
#>  [11] rprojroot_2.0.3             vroom_1.6.3                
#>  [13] globals_0.16.2              backports_1.4.1            
#>  [15] plotly_4.10.2               openxlsx_4.2.5.2           
#>  [17] limma_3.56.2                Hmisc_5.1-0                
#>  [19] sass_0.4.6                  rmarkdown_2.22             
#>  [21] jquerylib_0.1.4             yaml_2.3.7                 
#>  [23] remotes_2.4.2               doRNG_1.8.6                
#>  [25] zip_2.3.0                   MsCoreUtils_1.12.0         
#>  [27] pbapply_1.7-0               RColorBrewer_1.1-3         
#>  [29] zlibbioc_1.46.0             GenomicRanges_1.52.0       
#>  [31] ggraph_2.1.0                itertools_0.1-3            
#>  [33] RCurl_1.98-1.12             nnet_7.3-18                
#>  [35] tweenr_2.0.2                circlize_0.4.15            
#>  [37] GenomeInfoDbData_1.2.10     IRanges_2.34.0             
#>  [39] ggrepel_0.9.3               listenv_0.9.0              
#>  [41] ellipse_0.4.5               RSpectra_0.16-1            
#>  [43] missForest_1.5              parallelly_1.36.0          
#>  [45] ncdf4_1.21                  codetools_0.2-19           
#>  [47] DelayedArray_0.26.3         ggforce_0.4.1              
#>  [49] tidyselect_1.2.0            shape_1.4.6                
#>  [51] farver_2.1.1                viridis_0.6.3              
#>  [53] matrixStats_1.0.0           base64enc_0.1-3            
#>  [55] jsonlite_1.8.5              GetoptLong_1.0.5           
#>  [57] multtest_2.56.0             e1071_1.7-13               
#>  [59] tidygraph_1.2.3             Formula_1.2-5              
#>  [61] survival_3.5-5              iterators_1.0.14           
#>  [63] foreach_1.5.2               progress_1.2.2             
#>  [65] tools_4.3.0                 glue_1.6.2                 
#>  [67] rARPACK_0.11-0              gridExtra_2.3              
#>  [69] xfun_0.39                   here_1.0.1                 
#>  [71] MatrixGenerics_1.12.2       GenomeInfoDb_1.36.0        
#>  [73] withr_2.5.0                 BiocManager_1.30.21        
#>  [75] fastmap_1.1.1               fansi_1.0.4                
#>  [77] blogdown_1.18.1             digest_0.6.31              
#>  [79] timechange_0.2.0            R6_2.5.1                   
#>  [81] colorspace_2.1-0            utf8_1.2.3                 
#>  [83] generics_0.1.3              data.table_1.14.8          
#>  [85] corpcor_1.6.10              robustbase_0.95-1          
#>  [87] class_7.3-21                graphlayouts_1.0.0         
#>  [89] prettyunits_1.1.1           httr_1.4.6                 
#>  [91] htmlwidgets_1.6.2           S4Arrays_1.0.4             
#>  [93] pkgconfig_2.0.3             gtable_0.3.3               
#>  [95] robust_0.7-1                impute_1.74.1              
#>  [97] MassSpecWavelet_1.66.0      XVector_0.40.0             
#>  [99] furrr_0.3.1                 pcaPP_2.0-3                
#> [101] htmltools_0.5.5             bookdown_0.34              
#> [103] MALDIquant_1.22.1           clue_0.3-64                
#> [105] scales_1.2.1                png_0.1-8                  
#> [107] knitr_1.43                  rstudioapi_0.14            
#> [109] reshape2_1.4.4              tzdb_0.4.0                 
#> [111] rjson_0.2.21                checkmate_2.2.0            
#> [113] ggcorrplot_0.1.4            proxy_0.4-27               
#> [115] cachem_1.0.8                GlobalOptions_0.1.2        
#> [117] parallel_4.3.0              foreign_0.8-84             
#> [119] mzID_1.38.0                 vsn_3.68.0                 
#> [121] pillar_1.9.0                vctrs_0.6.2                
#> [123] MsFeatures_1.8.0            RANN_2.6.1                 
#> [125] pcaMethods_1.92.0           randomForest_4.7-1.1       
#> [127] cluster_2.1.4               htmlTable_2.4.1            
#> [129] evaluate_0.21               mvtnorm_1.2-2              
#> [131] cli_3.6.1                   compiler_4.3.0             
#> [133] rlang_1.1.1                 crayon_1.5.2               
#> [135] rngtools_1.5.2              Rdisop_1.60.0              
#> [137] rrcov_1.7-3                 labeling_0.4.2             
#> [139] affy_1.78.0                 plyr_1.8.8                 
#> [141] stringi_1.7.12              viridisLite_0.4.2          
#> [143] Biostrings_2.68.1           munsell_0.5.0              
#> [145] lazyeval_0.2.2              fit.models_0.64            
#> [147] Matrix_1.5-4                hms_1.1.3                  
#> [149] patchwork_1.1.2             bit64_4.0.5                
#> [151] future_1.32.0               KEGGREST_1.40.0            
#> [153] highr_0.10                  SummarizedExperiment_1.30.2
#> [155] igraph_1.4.3                affyio_1.70.0              
#> [157] bslib_0.5.0                 bit_4.0.5                  
#> [159] DEoptimR_1.0-14             readxl_1.4.2