Pathway enrichment

Data preparation

Load the differential expressional metabolites.

library(tidymass)
#> Registered S3 method overwritten by 'Hmisc':
#>   method       from      
#>   vcov.default fit.models
#> ── Attaching packages ──────────────────────────────────────── tidymass 1.0.8 ──
#> ✔ massdataset   1.0.25     ✔ metid         1.2.28
#> ✔ massprocesser 1.0.10     ✔ masstools     1.0.10
#> ✔ masscleaner   1.0.11     ✔ dplyr         1.1.2 
#> ✔ massqc        1.0.6      ✔ ggplot2       3.4.2 
#> ✔ massstat      1.0.5      ✔ magrittr      2.0.3 
#> ✔ metpath       1.0.8
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ forcats   1.0.0     ✔ readr     2.1.4
#> ✔ lubridate 1.9.2     ✔ stringr   1.5.0
#> ✔ purrr     1.0.1     ✔ tibble    3.2.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ xcms::collect()          masks dplyr::collect()
#> ✖ MSnbase::combine()       masks Biobase::combine(), BiocGenerics::combine(), dplyr::combine()
#> ✖ tidyr::expand()          masks S4Vectors::expand()
#> ✖ tidyr::extract()         masks magrittr::extract()
#> ✖ metid::filter()          masks metpath::filter(), dplyr::filter(), massdataset::filter(), stats::filter()
#> ✖ S4Vectors::first()       masks dplyr::first()
#> ✖ xcms::groups()           masks dplyr::groups()
#> ✖ dplyr::lag()             masks stats::lag()
#> ✖ purrr::map()             masks mixOmics::map()
#> ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
#> ✖ purrr::reduce()          masks MSnbase::reduce()
#> ✖ S4Vectors::rename()      masks dplyr::rename(), massdataset::rename()
#> ✖ lubridate::second()      masks S4Vectors::second()
#> ✖ lubridate::second<-()    masks S4Vectors::second<-()
#> ✖ MASS::select()           masks dplyr::select(), massdataset::select()
#> ✖ purrr::set_names()       masks magrittr::set_names()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
load("statistical_analysis/object_final")
object_final
#> -------------------- 
#> massdataset version: 1.0.25 
#> -------------------- 
#> 1.expression_data:[ 291 x 259 data.frame]
#> 2.sample_info:[ 259 x 11 data.frame]
#> 259 samples:sample_06 sample_103 sample_11 ... sample_QC_38 sample_QC_39
#> 3.variable_info:[ 291 x 12 data.frame]
#> 291 variables:M86T95_POS M95T100_1_POS M100T160_POS ... M480T641_NEG M514T611_NEG
#> 4.sample_info_note:[ 11 x 2 data.frame]
#> 5.variable_info_note:[ 12 x 2 data.frame]
#> 6.ms2_data:[ 2084 variables x 1902 MS2 spectra]
#> -------------------- 
#> Processing information
#> 26 processings in total
#> Latest 3 processings show
#> slice_head ---------- 
#>       Package Function.used                       Time
#> 1 massdataset  slice_head() 2023-08-31 00:00:36.358056
#> 2 massdataset  slice_head()  2023-08-31 00:00:36.85759
#> mutate_fc ---------- 
#>       Package Function.used                Time
#> 1 massdataset   mutate_fc() 2023-08-31 00:00:37
#> mutate_p_value ---------- 
#>       Package    Function.used                Time
#> 1 massdataset mutate_p_value() 2023-08-31 00:00:37

Pathway enrichment

All the results will be placed in a folder named as pathway_enrichment.

dir.create(path = "pathway_enrichment", showWarnings = FALSE)
diff_metabolites <-
  object_final %>% 
  activate_mass_dataset(what = "variable_info") %>% 
  filter(p_value_adjust < 0.05) %>% 
  extract_variable_info()
head(diff_metabolites)
#>     variable_id        mz        rt    na_freq  na_freq.1  na_freq.2 na_freq_2
#> 1    M86T95_POS  86.09716  94.57264 0.02564103 0.55454545 0.29090909        NA
#> 2 M95T100_1_POS  95.04975  99.77637 0.00000000 0.00000000 0.00000000        NA
#> 3  M103T100_POS 103.05477  99.90601 0.00000000 0.00000000 0.00000000        NA
#> 4   M104T51_POS 104.10746  51.27993 0.00000000 0.07272727 0.00000000        NA
#> 5   M113T81_POS 113.03501  80.73506 0.00000000 0.00000000 0.00000000        NA
#> 6  M113T187_POS 113.06018 186.56470 0.02564103 0.01818182 0.02727273        NA
#>   na_freq.1_2 na_freq.2_2       fc      p_value p_value_adjust
#> 1          NA          NA 1.714629 3.034938e-20   4.702229e-19
#> 2          NA          NA 1.439791 6.761432e-31   3.935153e-29
#> 3          NA          NA 1.345809 5.215317e-19   6.898442e-18
#> 4          NA          NA 1.751085 2.695171e-08   1.188325e-07
#> 5          NA          NA 1.500756 5.966911e-22   1.335670e-20
#> 6          NA          NA 1.951417 1.848299e-17   1.854673e-16
#>          Compound.name    CAS.ID   HMDB.ID KEGG.ID     Lab.ID     Adduct
#> 1           Piperidine  110-89-4      <NA>  C01746  MONA_2852     (M+H)+
#> 2               Phenol  108-95-2      <NA>  D01960 MONA_18506     (M+H)+
#> 3   Phenylacetaldehyde  122-78-1 HMDB06236  C00601    NO07389 (M+H-H2O)+
#> 4   5-Amino-1-pentanol 2508-29-4      <NA>    <NA>    NO07238     (M+H)+
#> 5               URACIL      <NA>      <NA>    <NA> MONA_18148     (M+H)+
#> 6 1,4-Cyclohexanedione      <NA>      <NA>    <NA> MONA_14519     (M+H)+
#>   mz.error mz.match.score RT.error RT.match.score                  CE        SS
#> 1 1.746869      0.9932417       NA             NA                  30 0.6143541
#> 2 1.416428      0.9955515       NA             NA                  10 0.6102452
#> 3 1.537004      0.9947640       NA             NA                  10 0.5748835
#> 4 1.169128      0.9969671       NA             NA                   5 0.5971697
#> 5 1.275544      0.9963909       NA             NA                  10 0.6889885
#> 6 1.051626      0.9975454       NA             NA HCD (NCE 20-30-40%) 0.5401414
#>   Total.score   Database Level
#> 1   0.7564369 MoNA_0.0.1     2
#> 2   0.7547351 MoNA_0.0.1     2
#> 3   0.7323387 NIST_0.0.1     2
#> 4   0.7470937 NIST_0.0.1     2
#> 5   0.8042644 MoNA_0.0.1     2
#> 6   0.7116679 MoNA_0.0.1     2

Load KEGG human pathway database

data("kegg_hsa_pathway", package = "metpath")
kegg_hsa_pathway
#> ---------Pathway source&version---------
#> KEGG  &  2021-12-13
#> -----------Pathway information------------
#> 345  pathways
#> 334  pathways have genes
#> 0  pathways have proteins
#> 281  pathways have compounds
#> Pathway class (top 10): Metabolism; Carbohydrate metabolism;Metabolism; Lipid metabolism
get_pathway_class(kegg_hsa_pathway)
#> # A tibble: 43 × 2
#>    class                                                                       n
#>    <chr>                                                                   <int>
#>  1 Cellular Processes; Cell growth and death                                   8
#>  2 Cellular Processes; Cell motility                                           1
#>  3 Cellular Processes; Cellular community - eukaryotes                         5
#>  4 Cellular Processes; Transport and catabolism                                7
#>  5 Environmental Information Processing; Membrane transport                    1
#>  6 Environmental Information Processing; Signal transduction                  26
#>  7 Environmental Information Processing; Signaling molecules and interact…     5
#>  8 Genetic Information Processing; Folding, sorting and degradation            7
#>  9 Genetic Information Processing; Replication and repair                      7
#> 10 Genetic Information Processing; Transcription                               3
#> # ℹ 33 more rows

Remove the disease pathways:

#get the class of pathways
pathway_class = 
  metpath::pathway_class(kegg_hsa_pathway)
head(pathway_class)
#> $hsa00010
#> [1] "Metabolism; Carbohydrate metabolism"
#> 
#> $hsa00020
#> [1] "Metabolism; Carbohydrate metabolism"
#> 
#> $hsa00030
#> [1] "Metabolism; Carbohydrate metabolism"
#> 
#> $hsa00040
#> [1] "Metabolism; Carbohydrate metabolism"
#> 
#> $hsa00051
#> [1] "Metabolism; Carbohydrate metabolism"
#> 
#> $hsa00052
#> [1] "Metabolism; Carbohydrate metabolism"
remain_idx =
  pathway_class %>%
  unlist() %>%
  stringr::str_detect("Disease") %>%
  `!`() %>%
  which()

remain_idx
#>   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
#>  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
#>  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
#>  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
#>  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  90  91  92  93  94
#>  [91]  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111 112
#> [109] 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
#> [127] 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
#> [145] 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
#> [163] 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
#> [181] 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
#> [199] 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
#> [217] 221 222 223 224 225 226 227 228 229 230 236 240 241 242 243 244 245 246
#> [235] 247 248 249 250 251 252 253 254
pathway_database =
  kegg_hsa_pathway[remain_idx]
pathway_database
#> ---------Pathway source&version---------
#> KEGG  &  2021-12-13
#> -----------Pathway information------------
#> 242  pathways
#> 235  pathways have genes
#> 0  pathways have proteins
#> 191  pathways have compounds
#> Pathway class (top 10): Metabolism; Carbohydrate metabolism;Metabolism; Lipid metabolism
kegg_id <-
  diff_metabolites$KEGG.ID 
kegg_id <-
  kegg_id[!is.na(kegg_id)]
kegg_id
#>  [1] "C01746" "D01960" "C00601" "C00153" "C01108" "C00906" "C10438" "C00300"
#>  [9] "C00407" "C14790" "C08493" "C02237" "C01575" "C00073" "C05842" "C00637"
#> [17] "C00079" "D00022" "C07481" "C12305" "C17846" "D00029" "C00399" "C08362"
#> [25] "C14214" "C01595" "C00319" "C00410" "C10523" "C01780" "C00762" "C00735"
#> [33] "C17337" "C01921" "C04230" "C06539" "C00186" "C01546" "C00490" "C02226"
#> [41] "C00064" "C06104" "C02612" "C07599" "C05593" "0"      "C01601" "C02656"
#> [49] "C00366" "C07130" "C05635" "C16038" "C08322" "C16308" "C10911" "C05498"
#> [57] "C05472" "C04555"
result <- 
enrich_kegg(query_id = kegg_id, 
            query_type = "compound", 
            id_type = "KEGG",
            pathway_database = pathway_database, 
            p_cutoff = 0.05, 
            p_adjust_method = "BH", 
            threads = 3)
#> 191 pathways.

Check the result:

result
#> ---------Pathway database&version---------
#> KEGG  &  1.0.8
#> -----------Enrichment result------------
#> 191  pathways are enriched
#> 11  pathways p-values < 0.05
#> Steroid hormone biosynthesis
#> Aldosterone-regulated sodium reabsorption
#> Valine, leucine and isoleucine biosynthesis
#> Aminoacyl-tRNA biosynthesis
#> Phenylalanine metabolism ... (only top 5 shows)
#> -----------Parameters------------
#>   Package Function.used                Time
#> 1 metpath enrich_kegg() 2023-08-31 00:08:43

Plot to show pathway enrichment result

enrich_bar_plot(object = result,
                x_axis = "p_value",
                cutoff = 0.05)
enrich_scatter_plot(object = result, y_axis = "p_value", y_axis_cutoff = 0.05)
enrich_network(
  object = result,
  point_size = "p_value",
  p_cutoff = 0.05,
  only_significant_pathway = TRUE
)

Session information

sessionInfo()
#> R version 4.3.0 (2023-04-21)
#> Platform: x86_64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.5.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> time zone: America/Los_Angeles
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] grid      stats4    stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#>  [1] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
#>  [4] purrr_1.0.1           readr_2.1.4           tibble_3.2.1         
#>  [7] tidyverse_2.0.0       metid_1.2.28          metpath_1.0.8        
#> [10] ComplexHeatmap_2.16.0 mixOmics_6.24.0       lattice_0.21-8       
#> [13] MASS_7.3-58.4         massstat_1.0.5        tidyr_1.3.0          
#> [16] ggfortify_0.4.16      massqc_1.0.6          masscleaner_1.0.11   
#> [19] xcms_3.22.0           MSnbase_2.26.0        ProtGenerics_1.32.0  
#> [22] S4Vectors_0.38.1      mzR_2.34.0            Rcpp_1.0.10          
#> [25] Biobase_2.60.0        BiocGenerics_0.46.0   BiocParallel_1.34.2  
#> [28] massprocesser_1.0.10  ggplot2_3.4.2         dplyr_1.1.2          
#> [31] magrittr_2.0.3        masstools_1.0.10      massdataset_1.0.25   
#> [34] tidymass_1.0.8       
#> 
#> loaded via a namespace (and not attached):
#>   [1] splines_4.3.0               bitops_1.0-7               
#>   [3] cellranger_1.1.0            polyclip_1.10-4            
#>   [5] preprocessCore_1.62.1       XML_3.99-0.14              
#>   [7] rpart_4.1.19                fastDummies_1.6.3          
#>   [9] lifecycle_1.0.3             doParallel_1.0.17          
#>  [11] rprojroot_2.0.3             globals_0.16.2             
#>  [13] backports_1.4.1             plotly_4.10.2              
#>  [15] openxlsx_4.2.5.2            limma_3.56.2               
#>  [17] Hmisc_5.1-0                 sass_0.4.6                 
#>  [19] rmarkdown_2.22              jquerylib_0.1.4            
#>  [21] yaml_2.3.7                  remotes_2.4.2              
#>  [23] doRNG_1.8.6                 zip_2.3.0                  
#>  [25] MsCoreUtils_1.12.0          pbapply_1.7-0              
#>  [27] RColorBrewer_1.1-3          zlibbioc_1.46.0            
#>  [29] GenomicRanges_1.52.0        ggraph_2.1.0               
#>  [31] itertools_0.1-3             RCurl_1.98-1.12            
#>  [33] nnet_7.3-18                 tweenr_2.0.2               
#>  [35] circlize_0.4.15             GenomeInfoDbData_1.2.10    
#>  [37] IRanges_2.34.0              ggrepel_0.9.3              
#>  [39] listenv_0.9.0               ellipse_0.4.5              
#>  [41] RSpectra_0.16-1             missForest_1.5             
#>  [43] parallelly_1.36.0           ncdf4_1.21                 
#>  [45] codetools_0.2-19            DelayedArray_0.26.3        
#>  [47] ggforce_0.4.1               tidyselect_1.2.0           
#>  [49] shape_1.4.6                 farver_2.1.1               
#>  [51] viridis_0.6.3               matrixStats_1.0.0          
#>  [53] base64enc_0.1-3             jsonlite_1.8.5             
#>  [55] GetoptLong_1.0.5            multtest_2.56.0            
#>  [57] e1071_1.7-13                tidygraph_1.2.3            
#>  [59] Formula_1.2-5               survival_3.5-5             
#>  [61] iterators_1.0.14            foreach_1.5.2              
#>  [63] progress_1.2.2              tools_4.3.0                
#>  [65] glue_1.6.2                  rARPACK_0.11-0             
#>  [67] gridExtra_2.3               xfun_0.39                  
#>  [69] here_1.0.1                  MatrixGenerics_1.12.2      
#>  [71] GenomeInfoDb_1.36.0         withr_2.5.0                
#>  [73] BiocManager_1.30.21         fastmap_1.1.1              
#>  [75] fansi_1.0.4                 blogdown_1.18.1            
#>  [77] digest_0.6.31               timechange_0.2.0           
#>  [79] R6_2.5.1                    colorspace_2.1-0           
#>  [81] utf8_1.2.3                  generics_0.1.3             
#>  [83] data.table_1.14.8           corpcor_1.6.10             
#>  [85] robustbase_0.95-1           class_7.3-21               
#>  [87] graphlayouts_1.0.0          prettyunits_1.1.1          
#>  [89] httr_1.4.6                  htmlwidgets_1.6.2          
#>  [91] S4Arrays_1.0.4              pkgconfig_2.0.3            
#>  [93] gtable_0.3.3                robust_0.7-1               
#>  [95] impute_1.74.1               MassSpecWavelet_1.66.0     
#>  [97] XVector_0.40.0              furrr_0.3.1                
#>  [99] pcaPP_2.0-3                 htmltools_0.5.5            
#> [101] bookdown_0.34               MALDIquant_1.22.1          
#> [103] clue_0.3-64                 scales_1.2.1               
#> [105] png_0.1-8                   knitr_1.43                 
#> [107] rstudioapi_0.14             reshape2_1.4.4             
#> [109] tzdb_0.4.0                  rjson_0.2.21               
#> [111] checkmate_2.2.0             ggcorrplot_0.1.4           
#> [113] proxy_0.4-27                cachem_1.0.8               
#> [115] GlobalOptions_0.1.2         parallel_4.3.0             
#> [117] foreign_0.8-84              mzID_1.38.0                
#> [119] vsn_3.68.0                  pillar_1.9.0               
#> [121] vctrs_0.6.2                 MsFeatures_1.8.0           
#> [123] RANN_2.6.1                  pcaMethods_1.92.0          
#> [125] randomForest_4.7-1.1        cluster_2.1.4              
#> [127] htmlTable_2.4.1             evaluate_0.21              
#> [129] mvtnorm_1.2-2               cli_3.6.1                  
#> [131] compiler_4.3.0              rlang_1.1.1                
#> [133] crayon_1.5.2                rngtools_1.5.2             
#> [135] Rdisop_1.60.0               rrcov_1.7-3                
#> [137] labeling_0.4.2              affy_1.78.0                
#> [139] plyr_1.8.8                  stringi_1.7.12             
#> [141] viridisLite_0.4.2           Biostrings_2.68.1          
#> [143] munsell_0.5.0               lazyeval_0.2.2             
#> [145] fit.models_0.64             Matrix_1.5-4               
#> [147] hms_1.1.3                   patchwork_1.1.2            
#> [149] future_1.32.0               KEGGREST_1.40.0            
#> [151] highr_0.10                  SummarizedExperiment_1.30.2
#> [153] igraph_1.4.3                affyio_1.70.0              
#> [155] bslib_0.5.0                 DEoptimR_1.0-14            
#> [157] readxl_1.4.2