libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfeaturesscan.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/evalscan/psmfeaturesscan.cpp
3 * \date 15/07/2025
4 * \author Olivier Langella
5 * \brief compute features on scan's PSM
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfeaturesscan.h"
29#include <QCborArray>
30#include <QCborMap>
31#include "../../../../peptide/peptideproformaparser.h"
32#include "../../../../psm/xtandem/xtandemhyperscore.h"
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
43 pappso::XtandemSpectrumProcess &tandem_spectrum_process,
44 std::list<Enums::PeptideIon> &ion_list,
45 pappso::PsmFeatures &psm_features,
46 pappso::PrecisionPtr fragment_tolerance)
47 : CborScanMapBase(psm_file_scan_process),
48 m_tandemSpectrumProcess(tandem_spectrum_process),
49 m_ionList(ion_list),
50 m_psmFeatures(psm_features)
51{
52 m_fragmentTolerance = fragment_tolerance;
53}
54
58
59double
60PsmFeaturesScan::checkInf(double input) const
61{
62 if(input < 0)
63 return 0;
64 return input;
65}
66
67void
69{
70
71 if(keys().contains("psm_list"))
72 {
74
75 pappso::MassSpectrum spectrum =
76 m_tandemSpectrumProcess.process(*qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
77 qualified_mass_spectrum.get()->getPrecursorMz(),
78 qualified_mass_spectrum.get()->getPrecursorCharge());
79
80 QCborArray new_psm_arr;
81 for(QCborValue cbor_psm : value("psm_list").toArray())
82 {
83 QCborMap cbor_psm_map = cbor_psm.toMap();
84 QCborMap cbor_psm_features;
85 pappso::PeptideSp peptide_sp =
86 pappso::PeptideProFormaParser::parseString(cbor_psm_map.value("proforma").toString());
87
88
89 std::size_t peptide_size = peptide_sp.get()->size();
90 cbor_psm_features.insert(QString("peptide_size"), (unsigned int)peptide_size);
91
92 pappso::XtandemHyperscore hyperscore(spectrum,
93 peptide_sp,
94 qualified_mass_spectrum.get()->getPrecursorCharge(),
97 true);
98 cbor_psm_features.insert(QString("hyperscore"), QCborValue(hyperscore.getHyperscore()));
99
100
102 peptide_sp,
103 qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
104 qualified_mass_spectrum.get()->getPrecursorCharge(),
105 2);
106 // TIC
107 cbor_psm_features.insert(QString("total_intensity"),
108 std::log(m_psmFeatures.getTotalIntensity()));
109 // MaxIntALL
110 cbor_psm_features.insert(QString("max_intensity"),
111 checkInf(std::log(qualified_mass_spectrum.get()
112 ->getMassSpectrumSPtr()
113 .get()
114 ->maxIntensityDataPoint()
115 .y)));
116
117 // MaxYionInt
118 cbor_psm_features.insert(
119 QString("MaxYionInt"),
121
122 // MaxBionInt
123 cbor_psm_features.insert(
124 QString("MaxBionInt"),
126
127 // SumYmatchInt
128 cbor_psm_features.insert(
129 QString("SumYmatchInt"),
131
132 // SumBmatchInt
133 cbor_psm_features.insert(
134 QString("SumBmatchInt"),
136
137 // FracYmatchInt
138 cbor_psm_features.insert(
139 QString("FracYmatchInt"),
142 // FracBmatchInt
143 cbor_psm_features.insert(
144 QString("FracBmatchInt"),
147
148 // SeqCoverYion
149 cbor_psm_features.insert(
150 QString("SeqCoverYion"),
152 (double)peptide_size);
153 // SeqCoverBion
154 cbor_psm_features.insert(
155 QString("SeqCoverBion"),
157 (double)peptide_size);
158
159
160 // ConsecutiveYion
161 cbor_psm_features.insert(
162 QString("ConsecutiveYion"),
164 // ConsecutiveBion
165 cbor_psm_features.insert(
166 QString("ConsecutiveBion"),
168
169 // MassErrMean
170 cbor_psm_features.insert(QString("MassErrMean"), m_psmFeatures.getMatchedMzDiffMean());
171
172 // MassErrSD
173 cbor_psm_features.insert(QString("MassErrSD"), m_psmFeatures.getMatchedMzDiffSd());
174
175 // NumofAnnoPeaks
176 cbor_psm_features.insert(QString("NumofAnnoPeaks"),
177 (unsigned int)m_psmFeatures.getNumberOfMatchedIons());
178
179 // NumofComplementPeaks
180 std::size_t num_of_pairs = m_psmFeatures.countMatchedIonComplementPairs();
181 cbor_psm_features.insert(QString("NumofComplementPeaks"), (unsigned int)num_of_pairs);
182 if(num_of_pairs > 0)
183 {
184 // SumComplementPeaksInt
185 cbor_psm_features.insert(
186 QString("SumComplementPeaksInt"),
188
189 // FracComplementPeaksInt
190 cbor_psm_features.insert(
191 QString("FracComplementPeaksInt"),
194 // SeqCoverComplementPeaks
195 cbor_psm_features.insert(
196 QString("SeqCoverComplementPeaks"),
198 (double)peptide_size);
199 }
201 cbor_psm_features.insert(QString("lrSize"), (unsigned int)lr.getSize());
202
203
204 double coeff_of_determination = lr.getCoefficientOfDetermination();
205 if(std::isnan(coeff_of_determination))
206 {
207 }
208 else
209 {
210 cbor_psm_features.insert(QString("lrCoeffDet"), coeff_of_determination);
211 }
212
213
214 QCborMap psm_eval = cbor_psm_map.value("eval").toMap();
215 psm_eval.remove(QString("features"));
216 psm_eval.insert(QString("features"), cbor_psm_features);
217 cbor_psm_map.remove(QString("eval"));
218 cbor_psm_map.insert(QString("eval"), psm_eval);
219
220 new_psm_arr.push_back(cbor_psm_map);
221 }
222
223 insert(QString("psm_list"), new_psm_arr);
224 }
225}
226
227} // namespace psm
228} // namespace cbor
229} // namespace pappso
std::size_t getSize() const
get data size
double getCoefficientOfDetermination() const
get Coefficient of determination (R2)
Class to represent a mass spectrum.
static PeptideSp parseString(const QString &pepstr)
double getMaxIntensityPeakIonMatch(Enums::PeptideIon ion_type) const
double getIntensityOfMatchedIon(Enums::PeptideIon ion_type)
get the sum of intensity of a specific ion
std::size_t getNumberOfMatchedIons() const
number of matched ions (peaks)
std::size_t getAaSequenceCoverage(Enums::PeptideIon ion_type)
number of amino acid covered by matched ions
double getTotalIntensity() const
sum of all peak intensities (matched or not)
double getMatchedMzDiffMean() const
get mean deviation of matched peak mass delta
double getTotalIntensityOfMatchedIonComplementPairs() const
intensity of matched ion complement
std::size_t countMatchedIonComplementPairs() const
count the number of matched ion complement
std::size_t getComplementPairsAaSequenceCoverage()
number of amino acid covered by matched complement pairs of ions
std::size_t getMaxConsecutiveIon(Enums::PeptideIon ion_type)
get the maximum consecutive fragments of one ion type
LinearRegression getIonIsotopeLinearRegression() const
void setPeptideSpectrumCharge(const pappso::PeptideSp peptideSp, const MassSpectrum *p_spectrum, unsigned int parent_charge, unsigned int max_isotope_number)
double getMatchedMzDiffSd() const
get standard deviation of matched peak mass delta
std::map< pappso_double, pappso_double > toMap() const
Definition trace.cpp:691
pappso_double getHyperscore() const
MassSpectrum process(const MassSpectrum &spectrum, pappso_double parent_ion_mass, unsigned int parent_charge) const
process raw spectrum to prepare hyperscore computation
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
PsmFeaturesScan(const PsmFileScanProcess &psm_file_scan_process, pappso::XtandemSpectrumProcess &tandem_spectrum_process, std::list< pappso::Enums::PeptideIon > &ion_list, pappso::PsmFeatures &psm_features, pappso::PrecisionPtr fragment_tolerance)
pappso::XtandemSpectrumProcess & m_tandemSpectrumProcess
std::list< pappso::Enums::PeptideIon > & m_ionList
double checkInf(double input) const
pappso::PrecisionPtr m_fragmentTolerance
Basic PSM file reader to process scan (parallelized scan processing)
@ y
Cter amino ions.
@ b
Nter acylium ions.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp