Identify single peak with metid

Some times we only want to match one peak. We can use identify_single_peak() function to identify single peak.

Data preparation

library(massdataset)
library(tidyverse)
library(metid)

ms1_data =
  readr::read_csv(file.path(
    system.file("ms1_peak", package = "metid"),
    "ms1.peak.table.csv"
  ))

ms1_data = data.frame(ms1_data, sample1 = 1, sample2 = 2)

expression_data = ms1_data %>%
  dplyr::select(-c(name:rt))

variable_info =
  ms1_data %>%
  dplyr::select(name:rt) %>%
  dplyr::rename(variable_id = name)

sample_info =
  data.frame(
    sample_id = colnames(expression_data),
    injection.order = c(1, 2),
    class = c("Subject", "Subject"),
    group = c("Subject", "Subject")
  )
rownames(expression_data) = variable_info$variable_id

object = create_mass_dataset(
  expression_data = expression_data,
  sample_info = sample_info,
  variable_info = variable_info
)

object
#> -------------------- 
#> massdataset version: 1.0.25 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 2 samples:sample1 sample2
#> 3.variable_info:[ 100 x 3 data.frame]
#> 100 variables:pRPLC_376 pRPLC_391 pRPLC_603 ... pRPLC_21734 pRPLC_22098
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information
#> 1 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2023-09-02 10:42:06

Add MS2 to mass_dataset object

path = "./metabolite_annotation"
dir.create(path)

ms2_data <- system.file("ms2_data", package = "metid")
file.copy(
  from = file.path(ms2_data, "QC1_MSMS_NCE25.mgf"),
  to = path,
  overwrite = TRUE,
  recursive = TRUE
)
#> [1] FALSE

object =
  massdataset::mutate_ms2(
    object = object,
    column = "rp",
    polarity = "positive",
    ms1.ms2.match.mz.tol = 10,
    ms1.ms2.match.rt.tol = 30, 
    path = path
  )

object
#> -------------------- 
#> massdataset version: 1.0.25 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 2 samples:sample1 sample2
#> 3.variable_info:[ 100 x 3 data.frame]
#> 100 variables:pRPLC_376 pRPLC_391 pRPLC_603 ... pRPLC_21734 pRPLC_22098
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 16 variables x 16 MS2 spectra]
#> -------------------- 
#> Processing information
#> 2 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2023-09-02 10:42:06
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2023-09-02 10:52:12

object@ms2_data
#> $QC1_MSMS_NCE25.mgf
#> -------------------- 
#> column: rp 
#> polarity: positive 
#> mz_tol: 10 
#> rt_tol (second): 30 
#> -------------------- 
#> 16 variables:
#> pRPLC_603 pRPLC_1112 pRPLC_1307 pRPLC_1860 pRPLC_3927...
#> 16 MS2 spectra.
#> mz162.112344859731rt35.281947 mz209.092013478675rt62.786886 mz314.232098215226rt415.887162 mz249.184679448673rt585.115404 mz232.154251098633rt84.328626...

Annotate single peaks

data("snyder_database_rplc0.0.3", package = "metid")

annotate_single_peak_mass_dataset(
  object = object,
  variable_index = 3,
  based_on_rt = FALSE,
  based_on_ms2 = FALSE,
  database = snyder_database_rplc0.0.3,
  add_to_annotation_table = FALSE
)
#>   variable_id ms2_files_id ms2_spectrum_id  Compound.name   CAS.ID   HMDB.ID
#> 1   pRPLC_603           NA              NA L(-)-Carnitine     <NA>      <NA>
#> 2   pRPLC_603           NA              NA    L-Carnitine 541-15-1 HMDB00062
#>   KEGG.ID   Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1    <NA>  RPLC_54 (M+H)+  0.05375      0.9999977       NA             NA NA NA
#> 2  C00318 RPLC_406 (M+H)+  1.86625      0.9972176       NA             NA NA NA
#>   Total.score Database Level
#> 1   0.9999977 MS_0.0.2     3
#> 2   0.9972176 MS_0.0.2     3
annotate_single_peak_mass_dataset(
  object = object,
  variable_index = 3,
  based_on_rt = TRUE,
  based_on_ms2 = FALSE,
  database = snyder_database_rplc0.0.3,
  add_to_annotation_table = FALSE
)
#>   variable_id ms2_files_id ms2_spectrum_id  Compound.name   CAS.ID   HMDB.ID
#> 1   pRPLC_603           NA              NA L(-)-Carnitine     <NA>      <NA>
#> 2   pRPLC_603           NA              NA    L-Carnitine 541-15-1 HMDB00062
#>   KEGG.ID   Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1    <NA>  RPLC_54 (M+H)+  0.05375      0.9999977    0.746      0.9996909 NA NA
#> 2  C00318 RPLC_406 (M+H)+  1.86625      0.9972176    2.254      0.9971815 NA NA
#>   Total.score Database Level
#> 1   0.9998443 MS_0.0.2     2
#> 2   0.9971995 MS_0.0.2     2
annotate_single_peak_mass_dataset(
  object = object,
  variable_index = 3,
  based_on_rt = TRUE,
  based_on_ms2 = TRUE,
  database = snyder_database_rplc0.0.3,
  add_to_annotation_table = FALSE
)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
#>   variable_id       ms2_files_id               ms2_spectrum_id Compound.name
#> 1   pRPLC_603 QC1_MSMS_NCE25.mgf mz162.112344859731rt35.281947   L-Carnitine
#>     CAS.ID   HMDB.ID KEGG.ID   Lab.ID Adduct mz.error mz.match.score RT.error
#> 1 541-15-1 HMDB00062  C00318 RPLC_406 (M+H)+ 1.424649      0.9983776 0.718053
#>   RT.match.score    CE        SS Total.score Database Level
#> 1      0.9997136 NCE25 0.5995589   0.7993023 MS_0.0.2     1

Add annotation result to object


object@annotation_table
#> data frame with 0 columns and 0 rows

object1 = 
annotate_single_peak_mass_dataset(
  object = object,
  variable_index = 3,
  based_on_rt = FALSE,
  based_on_ms2 = FALSE,
  database = snyder_database_rplc0.0.3,
  add_to_annotation_table = TRUE
)

object1@annotation_table
#>   variable_id ms2_files_id ms2_spectrum_id  Compound.name   CAS.ID   HMDB.ID
#> 1   pRPLC_603           NA              NA L(-)-Carnitine     <NA>      <NA>
#> 2   pRPLC_603           NA              NA    L-Carnitine 541-15-1 HMDB00062
#>   KEGG.ID   Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1    <NA>  RPLC_54 (M+H)+  0.05375      0.9999977       NA             NA NA NA
#> 2  C00318 RPLC_406 (M+H)+  1.86625      0.9972176       NA             NA NA NA
#>   Total.score Database Level
#> 1   0.9999977 MS_0.0.2     3
#> 2   0.9972176 MS_0.0.2     3
object2 = 
annotate_single_peak_mass_dataset(
  object = object1,
  variable_index = 3,
  based_on_rt = TRUE,
  based_on_ms2 = FALSE,
  database = snyder_database_rplc0.0.3,
  add_to_annotation_table = TRUE
)

object2@annotation_table
#>   variable_id ms2_files_id ms2_spectrum_id  Compound.name   CAS.ID   HMDB.ID
#> 1   pRPLC_603           NA              NA L(-)-Carnitine     <NA>      <NA>
#> 2   pRPLC_603           NA              NA    L-Carnitine 541-15-1 HMDB00062
#> 3   pRPLC_603           NA              NA L(-)-Carnitine     <NA>      <NA>
#>   KEGG.ID   Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1    <NA>  RPLC_54 (M+H)+  0.05375      0.9999977    0.746      0.9996909 NA NA
#> 2  C00318 RPLC_406 (M+H)+  1.86625      0.9972176    2.254      0.9971815 NA NA
#> 3    <NA>  RPLC_54 (M+H)+  0.05375      0.9999977       NA             NA NA NA
#>   Total.score Database Level
#> 1   0.9998443 MS_0.0.2     2
#> 2   0.9971995 MS_0.0.2     2
#> 3   0.9999977 MS_0.0.2     3
object3 =
  annotate_single_peak_mass_dataset(
    object = object2,
    variable_index = 3,
    based_on_rt = TRUE,
    based_on_ms2 = FALSE,
    database = snyder_database_rplc0.0.3,
    add_to_annotation_table = TRUE
  )

object3@annotation_table
#>   variable_id ms2_files_id ms2_spectrum_id  Compound.name   CAS.ID   HMDB.ID
#> 1   pRPLC_603           NA              NA L(-)-Carnitine     <NA>      <NA>
#> 2   pRPLC_603           NA              NA    L-Carnitine 541-15-1 HMDB00062
#>   KEGG.ID   Lab.ID Adduct mz.error mz.match.score RT.error RT.match.score CE SS
#> 1    <NA>  RPLC_54 (M+H)+  0.05375      0.9999977    0.746      0.9996909 NA NA
#> 2  C00318 RPLC_406 (M+H)+  1.86625      0.9972176    2.254      0.9971815 NA NA
#>   Total.score Database Level
#> 1   0.9998443 MS_0.0.2     2
#> 2   0.9971995 MS_0.0.2     2

Session information

sessionInfo()
#> R version 4.3.0 (2023-04-21)
#> Platform: x86_64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.5.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> time zone: America/Los_Angeles
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] grid      stats4    stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#>  [1] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
#>  [4] purrr_1.0.1           readr_2.1.4           tibble_3.2.1         
#>  [7] tidyverse_2.0.0       metid_1.2.29          metpath_1.0.8        
#> [10] ComplexHeatmap_2.16.0 mixOmics_6.24.0       lattice_0.21-8       
#> [13] MASS_7.3-58.4         massstat_1.0.5        tidyr_1.3.0          
#> [16] ggfortify_0.4.16      massqc_1.0.6          masscleaner_1.0.11   
#> [19] xcms_3.22.0           MSnbase_2.26.0        ProtGenerics_1.32.0  
#> [22] S4Vectors_0.38.1      mzR_2.34.0            Rcpp_1.0.10          
#> [25] Biobase_2.60.0        BiocGenerics_0.46.0   BiocParallel_1.34.2  
#> [28] massprocesser_1.0.10  ggplot2_3.4.2         dplyr_1.1.2          
#> [31] magrittr_2.0.3        masstools_1.0.10      massdataset_1.0.25   
#> [34] tidymass_1.0.8       
#> 
#> loaded via a namespace (and not attached):
#>   [1] splines_4.3.0               bitops_1.0-7               
#>   [3] cellranger_1.1.0            polyclip_1.10-4            
#>   [5] preprocessCore_1.62.1       XML_3.99-0.14              
#>   [7] rpart_4.1.19                fastDummies_1.6.3          
#>   [9] lifecycle_1.0.3             doParallel_1.0.17          
#>  [11] rprojroot_2.0.3             globals_0.16.2             
#>  [13] backports_1.4.1             plotly_4.10.2              
#>  [15] openxlsx_4.2.5.2            limma_3.56.2               
#>  [17] Hmisc_5.1-0                 sass_0.4.6                 
#>  [19] rmarkdown_2.22              jquerylib_0.1.4            
#>  [21] yaml_2.3.7                  remotes_2.4.2              
#>  [23] doRNG_1.8.6                 zip_2.3.0                  
#>  [25] MsCoreUtils_1.12.0          pbapply_1.7-0              
#>  [27] RColorBrewer_1.1-3          zlibbioc_1.46.0            
#>  [29] GenomicRanges_1.52.0        ggraph_2.1.0               
#>  [31] itertools_0.1-3             RCurl_1.98-1.12            
#>  [33] nnet_7.3-18                 tweenr_2.0.2               
#>  [35] circlize_0.4.15             GenomeInfoDbData_1.2.10    
#>  [37] IRanges_2.34.0              ggrepel_0.9.3              
#>  [39] listenv_0.9.0               ellipse_0.4.5              
#>  [41] RSpectra_0.16-1             missForest_1.5             
#>  [43] parallelly_1.36.0           ncdf4_1.21                 
#>  [45] codetools_0.2-19            DelayedArray_0.26.3        
#>  [47] ggforce_0.4.1               tidyselect_1.2.0           
#>  [49] shape_1.4.6                 farver_2.1.1               
#>  [51] viridis_0.6.3               matrixStats_1.0.0          
#>  [53] base64enc_0.1-3             jsonlite_1.8.5             
#>  [55] GetoptLong_1.0.5            multtest_2.56.0            
#>  [57] e1071_1.7-13                tidygraph_1.2.3            
#>  [59] Formula_1.2-5               survival_3.5-5             
#>  [61] iterators_1.0.14            foreach_1.5.2              
#>  [63] progress_1.2.2              tools_4.3.0                
#>  [65] glue_1.6.2                  rARPACK_0.11-0             
#>  [67] gridExtra_2.3               xfun_0.39                  
#>  [69] here_1.0.1                  MatrixGenerics_1.12.2      
#>  [71] GenomeInfoDb_1.36.0         withr_2.5.0                
#>  [73] BiocManager_1.30.21         fastmap_1.1.1              
#>  [75] fansi_1.0.4                 blogdown_1.18.1            
#>  [77] digest_0.6.31               timechange_0.2.0           
#>  [79] R6_2.5.1                    colorspace_2.1-0           
#>  [81] utf8_1.2.3                  generics_0.1.3             
#>  [83] data.table_1.14.8           corpcor_1.6.10             
#>  [85] robustbase_0.95-1           class_7.3-21               
#>  [87] graphlayouts_1.0.0          prettyunits_1.1.1          
#>  [89] httr_1.4.6                  htmlwidgets_1.6.2          
#>  [91] S4Arrays_1.0.4              pkgconfig_2.0.3            
#>  [93] gtable_0.3.3                robust_0.7-1               
#>  [95] impute_1.74.1               MassSpecWavelet_1.66.0     
#>  [97] XVector_0.40.0              furrr_0.3.1                
#>  [99] pcaPP_2.0-3                 htmltools_0.5.5            
#> [101] bookdown_0.34               MALDIquant_1.22.1          
#> [103] clue_0.3-64                 scales_1.2.1               
#> [105] png_0.1-8                   knitr_1.43                 
#> [107] rstudioapi_0.14             reshape2_1.4.4             
#> [109] tzdb_0.4.0                  rjson_0.2.21               
#> [111] checkmate_2.2.0             ggcorrplot_0.1.4           
#> [113] proxy_0.4-27                cachem_1.0.8               
#> [115] GlobalOptions_0.1.2         parallel_4.3.0             
#> [117] foreign_0.8-84              mzID_1.38.0                
#> [119] vsn_3.68.0                  pillar_1.9.0               
#> [121] vctrs_0.6.2                 MsFeatures_1.8.0           
#> [123] RANN_2.6.1                  pcaMethods_1.92.0          
#> [125] randomForest_4.7-1.1        cluster_2.1.4              
#> [127] htmlTable_2.4.1             evaluate_0.21              
#> [129] mvtnorm_1.2-2               cli_3.6.1                  
#> [131] compiler_4.3.0              rlang_1.1.1                
#> [133] crayon_1.5.2                rngtools_1.5.2             
#> [135] Rdisop_1.60.0               rrcov_1.7-3                
#> [137] affy_1.78.0                 plyr_1.8.8                 
#> [139] stringi_1.7.12              viridisLite_0.4.2          
#> [141] Biostrings_2.68.1           munsell_0.5.0              
#> [143] lazyeval_0.2.2              fit.models_0.64            
#> [145] Matrix_1.5-4                hms_1.1.3                  
#> [147] patchwork_1.1.2             future_1.32.0              
#> [149] KEGGREST_1.40.0             SummarizedExperiment_1.30.2
#> [151] igraph_1.4.3                affyio_1.70.0              
#> [153] bslib_0.5.0                 DEoptimR_1.0-14            
#> [155] readxl_1.4.2
Previous