libpappsomspp
Library for mass spectrometry
pwizmsrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief MSrun file reader base on proteowizard library
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 
32 #include <QDebug>
33 
34 #include "pwizmsrunreader.h"
35 
36 #include <pwiz/data/msdata/DefaultReaderList.hpp>
37 
38 
39 #include "../../utils.h"
40 #include "../../pappsoexception.h"
41 #include "../../exception/exceptionnotfound.h"
42 #include "../../exception/exceptionnotpossible.h"
43 
44 
45 // int pwizMsRunReaderMetaTypeId =
46 // qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47 
48 
49 namespace pappso
50 {
51 
52 
54  : MsRunReader(msrun_id_csp)
55 {
56  // The initialization needs to be done immediately so that we get the pwiz
57  // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58  // pointer will be set to msp_msData.
59 
60  initialize();
61 }
62 
63 
64 void
66 {
67  std::string file_name_std =
69 
70  // Make a backup of the current locale
71  std::string env_backup = setlocale(LC_ALL, "");
72  // struct lconv *lc = localeconv();
73 
74  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75  //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76  //<< lc->decimal_point;
77 
78  // Now actually search the useful MSDataPtr to the member variable.
79 
80  pwiz::msdata::DefaultReaderList defaultReaderList;
81 
82  std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83 
84  try
85  {
86  defaultReaderList.read(file_name_std, msDataPtrVector);
87  }
88  catch(std::exception &error)
89  {
90  qDebug() << QString("Failed to read the data from file %1")
91  .arg(QString::fromStdString(file_name_std));
92  }
93 
94  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
95  //<< "The number of runs is:" << msDataPtrVector.size()
96  //<< "The number of spectra in first run is:"
97  //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
98 
99  // Single-run file handling here.
100 
101  // Specific case of the MGF data format: we do not have a run id for that kind
102  // of data. In this case there must be a single run!
103 
104  if(mcsp_msRunId->getRunId().isEmpty())
105  {
106  if(msDataPtrVector.size() != 1)
107  throw(
108  ExceptionNotPossible("For the kind of file at hand there can only be "
109  "one run in the file."));
110 
111  // At this point we know the single msDataPtr is the one we are looking
112  // for.
113 
114  msp_msData = msDataPtrVector.front();
115  }
116 
117  else
118  {
119  // Multi-run file handling here.
120  for(auto &msDataPtr : msDataPtrVector)
121  {
122  if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
123  {
124  msp_msData = msDataPtr;
125 
126  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
127  //<< "Found the right MSDataPtr for run id.";
128 
129  break;
130  }
131  }
132  }
133 
134  if(msp_msData == nullptr)
135  {
136  throw(ExceptionNotPossible(
137  "Could not find a MSDataPtr matching the requested run id."));
138  }
139 
140 
141  // check if this MS run can be used with scan numbers
142  // MS:1000490 Agilent instrument model
143  pwiz::cv::CVID native_id_format =
144  pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
145 
146  // msp_msData.get()->getDefaultNativeIDFormat();
147 
148  if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
149  {
150  m_hasScanNumbers = true;
151  }
152  else
153  {
154  m_hasScanNumbers = false;
155  }
156 
157  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
158  {
159  m_hasScanNumbers = true;
160  }
161 }
162 
163 
165 {
166 }
167 
168 
169 pwiz::msdata::SpectrumPtr
170 PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
171  std::size_t spectrum_index,
172  bool want_binary_data) const
173 {
174  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
175 
176  try
177  {
178  native_pwiz_spectrum_sp =
179  p_spectrum_list->spectrum(spectrum_index, want_binary_data);
180  }
181  catch(std::runtime_error &error)
182  {
183  qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
184  << typeid(error).name();
185 
186  throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
187  "MS file std::runtime_error :\n%2")
188  .arg(spectrum_index)
189  .arg(error.what()));
190  }
191  catch(std::exception &error)
192  {
193  qDebug() << "getPwizSpectrumPtr error " << error.what()
194  << typeid(error).name();
195 
196  throw ExceptionNotFound(
197  QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
198  .arg(spectrum_index)
199  .arg(error.what()));
200  }
201 
202  if(native_pwiz_spectrum_sp.get() == nullptr)
203  {
204  throw ExceptionNotFound(
205  QObject::tr(
206  "Pwiz spectrum index %1 not found in MS file : null pointer")
207  .arg(spectrum_index));
208  }
209 
210  return native_pwiz_spectrum_sp;
211 }
212 
213 
214 bool
216  pwiz::msdata::Spectrum *spectrum_p,
217  QualifiedMassSpectrum &qualified_mass_spectrum) const
218 {
219 
220  // We now have to set the retention time at which this mass spectrum
221  // was acquired. This is the scan start time.
222 
223  if(!spectrum_p->scanList.scans[0].hasCVParam(
224  pwiz::msdata::MS_scan_start_time))
225  {
226  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
227  { // MGF could not have scan start time
228  qualified_mass_spectrum.setRtInSeconds(-1);
229  }
230  else
231  {
232  throw(ExceptionNotPossible(
233  "The spectrum has no scan start time value set."));
234  }
235  }
236  else
237  {
238  pwiz::data::CVParam retention_time_cv_param =
239  spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
240 
241  // Try to get the units of the retention time value.
242 
243  std::string unit_name = retention_time_cv_param.unitsName();
244  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
245  //<< "Unit name for the retention time:"
246  //<< QString::fromStdString(unit_name);
247 
248  if(unit_name == "second")
249  {
250  qualified_mass_spectrum.setRtInSeconds(
251  retention_time_cv_param.valueAs<double>());
252  }
253  else if(unit_name == "minute")
254  {
255  qualified_mass_spectrum.setRtInSeconds(
256  retention_time_cv_param.valueAs<double>() * 60);
257  }
258  else
259  throw(
260  ExceptionNotPossible("Could not determine the unit for the "
261  "scan start time value."));
262  }
263 
264  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
265  //<< "Retention time for spectrum is:"
266  //<< qualified_mass_spectrum.getRtInSeconds();
267 
268  // Old version not checking unit (by default unit is minutes for RT,
269  // not seconds)
270  //
271  // pappso_double retentionTime =
272  // QString(spectrum_p->scanList.scans[0]
273  //.cvParam(pwiz::msdata::MS_scan_start_time)
274  //.value.c_str())
275  //.toDouble();
276  // qualified_mass_spectrum.setRtInSeconds(retentionTime);
277 
278  return true;
279 }
280 
281 
282 bool
284  pwiz::msdata::Spectrum *spectrum_p,
285  QualifiedMassSpectrum &qualified_mass_spectrum) const
286 {
287  // Not all the acquisitions have ion mobility data. We need to test
288  // that:
289 
290  if(spectrum_p->scanList.scans[0].hasCVParam(
291  pwiz::msdata::MS_ion_mobility_drift_time))
292  {
293 
294  // qDebug() << "as strings:"
295  //<< QString::fromStdString(
296  // spectrum_p->scanList.scans[0]
297  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
298  //.valueAs<std::string>());
299 
300  pappso_double driftTime =
301  spectrum_p->scanList.scans[0]
302  .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
303  .valueAs<double>();
304 
305  // qDebug() << "driftTime:" << driftTime;
306 
307  // Old version requiring use of QString.
308  // pappso_double driftTime =
309  // QString(spectrum_p->scanList.scans[0]
310  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
311  //.value.c_str())
312  //.toDouble();
313 
314  // Now make positively sure that the obtained value is correct.
315  // Note that I suffered a lot with Waters Synapt data that
316  // contained apparently correct drift time XML element that in
317  // fact contained either NaN or inf. When such mass spectra were
318  // encountered, the mz,i data were bogus and crashed the data
319  // loading functions. We just want to skip this kind of bogus mass
320  // spectrum by letting the caller know that the drift time was
321  // bogus ("I" is Filippo Rusconi).
322 
323  if(std::isnan(driftTime) || std::isinf(driftTime))
324  {
325  // qDebug() << "detected as nan or inf.";
326 
327  return false;
328  }
329  else
330  {
331  // The mzML standard stipulates that drift times are in
332  // milliseconds.
333  qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
334  }
335  }
336  // End of
337  // if(spectrum_p->scanList.scans[0].hasCVParam(
338  // pwiz::msdata::MS_ion_mobility_drift_time))
339  else
340  {
341  // Not a bogus mass spectrum but also not a drift spectrum, set -1
342  // as the drift time value.
343  qualified_mass_spectrum.setDtInMilliSeconds(-1);
344  }
345 
346  return true;
347 }
348 
349 
352  const MassSpectrumId &massSpectrumId,
353  pwiz::msdata::Spectrum *spectrum_p,
354  bool want_binary_data,
355  bool &ok) const
356 {
357  // qDebug();
358 
359  std::string env;
360  env = setlocale(LC_ALL, "");
361  setlocale(LC_ALL, "C");
362 
363  QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
364 
365  try
366  {
367 
368  // We want to store the ms level for this spectrum
369 
370  int msLevel =
371  (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
372 
373  qualified_mass_spectrum.setMsLevel(msLevel);
374 
375  // We want to know if this spectrum is a fragmentation spectrum obtained
376  // from a selected precursor ion.
377 
378  std::size_t precursor_list_size = spectrum_p->precursors.size();
379 
380  //qDebug() << "For spectrum at index:" << massSpectrumId.getSpectrumIndex()
381  //<< "msLevel:" << msLevel
382  //<< "with number of precursors:" << precursor_list_size;
383 
384  if(precursor_list_size > 0)
385  {
386 
387  // Sanity check
388  if(msLevel < 2)
389  {
390  qDebug() << "Going to throw: msLevel cannot be less than two for "
391  "a spectrum that has items in its Precursor list.";
392 
393  throw(ExceptionNotPossible(
394  "msLevel cannot be less than two for "
395  "a spectrum that has items in its Precursor list."));
396  }
397 
398  // See what is the first precursor in the list.
399 
400  for(auto &precursor : spectrum_p->precursors)
401  {
402 
403  // Set this variable ready as we need that default value in
404  // certain circumstances.
405 
406  std::size_t precursor_spectrum_index =
407  std::numeric_limits<std::size_t>::max();
408 
409  // The spectrum ID of the precursor might be empty.
410 
411  if(precursor.spectrumID.empty())
412  {
413  // qDebug() << "The precursor's spectrum ID is empty.";
414 
415  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
416  {
417  // qDebug()
418  //<< "Format is MGF, precursor's spectrum ID can be
419  // empty.";
420  }
421  else
422  {
423  // When performing Lumos Fusion fragmentation experiments
424  // in Tune mode and with recording, the first spectrum of
425  // the list is a fragmentation spectrum (ms level 2) that
426  // has no identity for the precursor spectrum because
427  // there is no full scan accquisition.
428  }
429  }
430  // End of
431  // if(precursor.spectrumID.empty())
432  else
433  {
434  // We could get a native precursor spectrum id, so convert
435  // that native id to a spectrum index.
436 
437  qualified_mass_spectrum.setPrecursorNativeId(
438  QString::fromStdString(precursor.spectrumID));
439 
440  if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
441  {
442  // qDebug() << "The native id of the precursor spectrum is
443  // empty.";
444  }
445 
446  // Get the spectrum index of the spectrum that contained the
447  // precursor ion.
448 
449  precursor_spectrum_index =
450  msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
451 
452  // Note that the Mascot MGF format has a peculiar handling of
453  // the precursor ion stuff so we cannot throw.
454  if(precursor_spectrum_index ==
455  msp_msData->run.spectrumListPtr->size())
456  {
457  if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
458  {
459  throw(ExceptionNotPossible(
460  "Failed to find the index of the "
461  "precursor ion's spectrum."));
462  }
463  }
464 
465  qualified_mass_spectrum.setPrecursorSpectrumIndex(
466  precursor_spectrum_index);
467 
468  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
469  // "()"
470  //<< "Set the precursor spectrum index to:"
471  //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
472  //<< "for qualified mass spectrum:"
473  //<< &qualified_mass_spectrum;
474  }
475 
476  if(!precursor.selectedIons.size())
477  {
478  qDebug()
479  << "Going to throw The spectrum has msLevel > 1 but the "
480  "precursor ions's selected ions list is empty..";
481 
482  throw(
483  ExceptionNotPossible("The spectrum has msLevel > 1 but the "
484  "precursor ions's selected ions "
485  "list is empty."));
486  }
487 
488  pwiz::msdata::SelectedIon &ion =
489  *(precursor.selectedIons.begin());
490 
491  // selected ion m/z
492 
493  pappso_double selected_ion_mz =
494  QString(
495  ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
496  .toDouble();
497 
498  // selected ion peak intensity
499 
500  pappso_double selected_ion_peak_intensity =
501  QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
502  .toDouble();
503 
504  // charge state
505 
506  unsigned int selected_ion_charge_state =
507  QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
508  .toUInt();
509 
510  // At this point we can craft a new PrecursorIonData instance and
511  // push it back to the vector.
512 
513  PrecursorIonData precursor_ion_data(selected_ion_mz,
514  selected_ion_charge_state,
515  selected_ion_peak_intensity);
516 
517  qualified_mass_spectrum.appendPrecursorIonData(
518  precursor_ion_data);
519 
520  // General sum-up
521 
522  //qDebug()
523  //<< "Appended new PrecursorIonData:"
524  //<< "mz:"
525  //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
526  //<< "charge:"
527  //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
528  //<< "intensity:"
529  //<< qualified_mass_spectrum.getPrecursorIonData()
530  //.back()
531  //.intensity;
532  }
533  // End of
534  // for(auto &precursor : spectrum_p->precursors)
535  }
536  // End of
537  // if(precursor_list_size > 0)
538  else
539  {
540  // Sanity check
541 
542  // Unfortunately, logic here is defeated by some vendors that have
543  // files with MS2 spectra without <precursorList>. Thus we have
544  // spectrum_p->precursors.size() == 0 and msLevel > 1.
545 
546  // if(msLevel != 1)
547  //{
548  // throw(
549  // ExceptionNotPossible("msLevel cannot be different than 1 if "
550  //"there is not a single precursor ion."));
551  //}
552  }
553 
554  qDebug();
555 
556  // Sanity check.
557 
558  if(precursor_list_size !=
559  qualified_mass_spectrum.getPrecursorIonData().size())
560  {
561  qDebug() << "Going to throw The number of precursors in the file is "
562  "different from the number of precursors in memory.";
563 
565  QObject::tr("The number of precursors in the file is different "
566  "from the number of precursors in memory."));
567  }
568 
569  //if(precursor_list_size == 1)
570  //{
571  //qDebug() << "Trying to get the mz value of the unique precursor ion:"
572  //<< qualified_mass_spectrum.getPrecursorMz();
573  //}
574 
575  processRetentionTime(spectrum_p, qualified_mass_spectrum);
576 
577  processDriftTime(spectrum_p, qualified_mass_spectrum);
578 
579  // for(pwiz::data::CVParam cv_param : ion.cvParams)
580  //{
581  // pwiz::msdata::CVID param_id = cv_param.cvid;
582  // qDebug() << param_id;
583  // qDebug() << cv_param.cvid.c_str();
584  // qDebug() << cv_param.name().c_str();
585  // qDebug() << cv_param.value.c_str();
586  //}
587 
588  if(want_binary_data)
589  {
590 
591  // Fill-in MZIntensityPair vector for convenient access to binary
592  // data
593 
594  std::vector<pwiz::msdata::MZIntensityPair> pairs;
595  spectrum_p->getMZIntensityPairs(pairs);
596 
597  MassSpectrum spectrum;
598  double tic = 0;
599  // std::size_t iterCount = 0;
600 
601  // Iterate through the m/z-intensity pairs
602  for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
603  it = pairs.begin(),
604  end = pairs.end();
605  it != end;
606  ++it)
607  {
608  //++iterCount;
609 
610  // qDebug() << "it->mz " << it->mz << " it->intensity" <<
611  // it->intensity;
612  if(it->intensity)
613  {
614  spectrum.push_back(DataPoint(it->mz, it->intensity));
615  tic += it->intensity;
616  }
617  }
618 
619  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
620  {
621  // Sort peaks by mz
622  spectrum.sortMz();
623  }
624 
625  // lc = localeconv ();
626  // qDebug() << " env=" << localeconv () << " lc->decimal_point "
627  // << lc->decimal_point;
628  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
629  // "<< spectrum.size();
630  MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
631  qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
632 
633  // double sumY =
634  // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
635  // <<
636  // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
637  //<< "iterCount:" << iterCount << "Spectrum size "
638  //<< spectrum.size() << "with tic:" << tic
639  //<< "and sumY:" << sumY;
640  }
641  else
642  qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
643  }
644  catch(PappsoException &errorp)
645  {
646  qDebug() << "Going to throw";
647 
649  QObject::tr("Error reading data using the proteowizard library: %1")
650  .arg(errorp.qwhat()));
651  }
652  catch(std::exception &error)
653  {
654  qDebug() << "Going to throw";
655 
657  QObject::tr("Error reading data using the proteowizard library: %1")
658  .arg(error.what()));
659  }
660 
661  // setlocale(LC_ALL, env.c_str());
662 
663  ok = true;
664 
665  // qDebug() << "QualifiedMassSpectrum: " <<
666  // qualified_mass_spectrum.toString();
667  return qualified_mass_spectrum;
668 }
669 
670 
673  bool want_binary_data,
674  bool &ok) const
675 {
676 
677  std::string env;
678  env = setlocale(LC_ALL, "");
679  // struct lconv *lc = localeconv();
680 
681  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
682  //<< "env=" << env.c_str()
683  //<< "lc->decimal_point:" << lc->decimal_point;
684 
685  setlocale(LC_ALL, "C");
686 
687  MassSpectrumId massSpectrumId(mcsp_msRunId);
688 
689  if(msp_msData == nullptr)
690  {
691  setlocale(LC_ALL, env.c_str());
692  return (QualifiedMassSpectrum(massSpectrumId));
693  }
694 
695  // const bool want_binary_data = true;
696 
697  pwiz::msdata::SpectrumListPtr spectrum_list_p =
698  msp_msData->run.spectrumListPtr;
699 
700  if(spectrum_index == spectrum_list_p.get()->size())
701  {
702  setlocale(LC_ALL, env.c_str());
703  throw ExceptionNotFound(
704  QObject::tr("The spectrum index cannot be equal to the size of the "
705  "spectrum list."));
706  }
707 
708  // At this point we know the spectrum index might be sane, so store it in
709  // the mass spec id object.
710  massSpectrumId.setSpectrumIndex(spectrum_index);
711 
712  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
713  getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
714 
715  setlocale(LC_ALL, env.c_str());
716 
717  massSpectrumId.setNativeId(
718  QString::fromStdString(native_pwiz_spectrum_sp->id));
719 
721  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
722 }
723 
724 
725 bool
726 PwizMsRunReader::accept(const QString &file_name) const
727 {
728  // We want to know if we can handle the file_name.
729  pwiz::msdata::ReaderList reader_list;
730 
731  std::string reader_type = reader_list.identify(file_name.toStdString());
732 
733  if(!reader_type.empty())
734  return true;
735 
736  return false;
737 }
738 
739 
741 PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
742 {
743  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
744  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
745 }
746 
748 PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
749 {
750  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
751  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
752 }
753 
755 PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
756  bool want_binary_data) const
757 {
758 
759  QualifiedMassSpectrum spectrum;
760  bool ok = false;
761 
762  spectrum =
763  qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
764 
765  if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
766  {
767  if(spectrum.getRtInSeconds() == 0)
768  {
769  // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
770  }
771  }
772 
773  // if(!ok)
774  // qDebug() << "Encountered a mass spectrum for which the status is bad.";
775 
776  return spectrum;
777 }
778 
779 
780 void
783 {
784 
785  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
786 
787  // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
788  // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
789  // spectrum has been fully qualified (that is, the member data have been
790  // set), it is transferred to the handler passed as parameter to this
791  // function for the consumer to do what it wants with it.
792 
793  // Does the handler consuming the mass spectra read from file want these
794  // mass spectra to hold the binary data arrays (mz/i vectors)?
795 
796  const bool want_binary_data = handler.needPeakList();
797 
798 
799  std::string env;
800  env = setlocale(LC_ALL, "");
801  setlocale(LC_ALL, "C");
802 
803 
804  // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
805  // run member of msp_msData.
806 
807  pwiz::msdata::SpectrumListPtr spectrum_list_p =
808  msp_msData->run.spectrumListPtr;
809 
810  // We'll need it to perform the looping in the spectrum list.
811  std::size_t spectrum_list_size = spectrum_list_p.get()->size();
812 
813  // qDebug() << "The spectrum list has size:" << spectrum_list_size;
814 
815  // Inform the handler of the spectrum list so that it can handle feedback to
816  // the user.
817  handler.spectrumListHasSize(spectrum_list_size);
818 
819  // Iterate in the full list of spectra.
820 
821  for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
822  {
823 
824  // If the user of this reader instance wants to stop reading the
825  // spectra, then break this loop.
826  if(handler.shouldStop())
827  {
828  qDebug() << "The operation was cancelled. Breaking the loop.";
829  break;
830  }
831 
832  // Get the native pwiz-spectrum from the spectrum list.
833  // Note that this pointer is a shared pointer from pwiz.
834 
835  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
836  getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
837 
838  /*
839  * we want to load metadata of the spectrum even if it does not contain
840  peaks
841 
842  * if(!native_pwiz_spectrum_sp->hasBinaryData())
843  {
844  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
845  "
846  ()"
847  //<< "native pwiz spectrum is empty, continuing.";
848  continue;
849  }
850  */
851 
852  // Instantiate the mass spectrum id that will hold critical information
853  // like the the native id string and the spectrum index.
854 
855  MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
856 
857  // Get the spectrum native id as a QString to store it in the mass
858  // spectrum id class. This is will allow later to refer to the same
859  // spectrum starting back from the file.
860 
861  QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
862  massSpectrumId.setNativeId(native_id);
863 
864  // Finally, instantiate the qualified mass spectrum with its id. This
865  // function will continue performing pappso-spectrum detailed
866  // qualification.
867 
868  bool ok = false;
869 
870  QualifiedMassSpectrum qualified_mass_spectrum =
872  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
873 
874  if(!ok)
875  {
876  // qDebug() << "Encountered a mass spectrum for which the returned "
877  //"status is bad.";
878  continue;
879  }
880 
881  // Before handing the mass spectrum out to the handler, see if the
882  // native mass spectrum was empty or not.
883 
884  // if(!native_pwiz_spectrum_sp->defaultArrayLength)
885  // qDebug() << "The mass spectrum has not defaultArrayLength";
886 
887  qualified_mass_spectrum.setEmptyMassSpectrum(
888  !native_pwiz_spectrum_sp->defaultArrayLength);
889 
890  // The handler will receive the index of the mass spectrum in the
891  // current run via the mass spectrum id member datum.
892  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
893  }
894 
895  setlocale(LC_ALL, env.c_str());
896  // End of
897  // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
898 
899  // Now let the loading handler know that the loading of the data has ended.
900  // The handler might need this "signal" to perform additional tasks or to
901  // cleanup cruft.
902 
903  // qDebug() << "Loading ended";
904  handler.loadingEnded();
905 }
906 
907 
908 std::size_t
910 {
911  return msp_msData->run.spectrumListPtr.get()->size();
912 }
913 
914 bool
916 {
917  return m_hasScanNumbers;
918 }
919 
920 
921 } // namespace pappso
pappso::PwizMsRunReader::initialize
virtual void initialize() override
Definition: pwizmsrunreader.cpp:65
pappso::MassSpectrum::makeMassSpectrumSPtr
MassSpectrumSPtr makeMassSpectrumSPtr() const
Definition: massspectrum.cpp:126
pappso::pappso_double
double pappso_double
A type definition for doubles.
Definition: types.h:48
pappso::MassSpectrumCstSPtr
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
pappso::QualifiedMassSpectrum::getMassSpectrumSPtr
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
Definition: qualifiedmassspectrum.cpp:133
pappso::MsRunReader
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:158
pappso::PwizMsRunReader::qualifiedMassSpectrumFromPwizMSData
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
Definition: pwizmsrunreader.cpp:672
pappso::SpectrumCollectionHandlerInterface::spectrumListHasSize
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:55
pappso::PwizMsRunReader::m_hasScanNumbers
bool m_hasScanNumbers
Definition: pwizmsrunreader.h:95
pappso::PwizMsRunReader::hasScanNumbers
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
Definition: pwizmsrunreader.cpp:915
pappso::PwizMsRunReader::massSpectrumSPtr
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Definition: pwizmsrunreader.cpp:741
pappso
tries to keep as much as possible monoisotopes, removing any possible C13 peaks
Definition: aa.cpp:39
pappso::MassSpectrum
Class to represent a mass spectrum.
Definition: massspectrum.h:71
pappso::SpectrumCollectionHandlerInterface::setQualifiedMassSpectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
pappso::PwizMsRunReader::qualifiedMassSpectrumFromPwizSpectrumPtr
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
Definition: pwizmsrunreader.cpp:351
pappso::MsRunIdCstSPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
pappso::MassSpectrum::sortMz
void sortMz()
Sort the DataPoint instances of this spectrum.
Definition: massspectrum.cpp:202
pappso::DataPoint
Definition: datapoint.h:21
pappso::PwizMsRunReader::~PwizMsRunReader
virtual ~PwizMsRunReader()
Definition: pwizmsrunreader.cpp:164
pappso::SpectrumCollectionHandlerInterface::shouldStop
virtual bool shouldStop()
Definition: msrunreader.cpp:46
pappso::ExceptionNotPossible
Definition: exceptionnotpossible.h:32
pappso::MsRunReader::mcsp_msRunId
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:207
pappso::QualifiedMassSpectrum::getPrecursorIonData
const std::vector< PrecursorIonData > & getPrecursorIonData() const
Definition: qualifiedmassspectrum.cpp:438
pappso::QualifiedMassSpectrum::setMassSpectrumSPtr
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
Definition: qualifiedmassspectrum.cpp:125
pappso::SpectrumCollectionHandlerInterface::loadingEnded
virtual void loadingEnded()
Definition: msrunreader.cpp:51
pappso::QualifiedMassSpectrum::setMsLevel
void setMsLevel(uint ms_level)
Set the mass spectrum level.
Definition: qualifiedmassspectrum.cpp:179
pappso::QualifiedMassSpectrum::setRtInSeconds
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:195
pappso::PwizMsRunReader::processRetentionTime
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
Definition: pwizmsrunreader.cpp:215
pappso::PwizMsRunReader::processDriftTime
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
Definition: pwizmsrunreader.cpp:283
pappso::QualifiedMassSpectrum
Class representing a fully specified mass spectrum.
Definition: qualifiedmassspectrum.h:85
pappso::Utils::toUtf8StandardString
static std::string toUtf8StandardString(const QString &text)
Definition: utils.cpp:115
pappso::QualifiedMassSpectrum::setPrecursorNativeId
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
Definition: qualifiedmassspectrum.cpp:417
pappso::SpectrumCollectionHandlerInterface::needPeakList
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
pappso::QualifiedMassSpectrum::appendPrecursorIonData
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
Definition: qualifiedmassspectrum.cpp:430
pappso::QualifiedMassSpectrum::getPrecursorNativeId
const QString & getPrecursorNativeId() const
Definition: qualifiedmassspectrum.cpp:423
pappso::PwizMsRunReader::massSpectrumCstSPtr
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
Definition: pwizmsrunreader.cpp:748
pappso::ExceptionNotFound
Definition: exceptionnotfound.h:32
pappso::QualifiedMassSpectrum::getMassSpectrumCstSPtr
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
Definition: qualifiedmassspectrum.cpp:141
pappso::MassSpectrumId
Definition: massspectrumid.h:38
pappso::QualifiedMassSpectrum::setEmptyMassSpectrum
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
Definition: qualifiedmassspectrum.cpp:164
pappso::QualifiedMassSpectrum::setPrecursorSpectrumIndex
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
Definition: qualifiedmassspectrum.cpp:400
pappso::PwizMsRunReader::accept
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
Definition: pwizmsrunreader.cpp:726
pappso::PappsoException::qwhat
virtual const QString & qwhat() const
Definition: pappsoexception.h:66
pappso::PwizMsRunReader::qualifiedMassSpectrum
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
Definition: pwizmsrunreader.cpp:755
pappso::PwizMsRunReader::PwizMsRunReader
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
Definition: pwizmsrunreader.cpp:53
pappso::MassSpectrumId::setSpectrumIndex
void setSpectrumIndex(std::size_t index)
Definition: massspectrumid.cpp:103
pappso::QualifiedMassSpectrum::setDtInMilliSeconds
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
Definition: qualifiedmassspectrum.cpp:219
pappso::MassSpectrumId::setNativeId
void setNativeId(const QString &native_id)
Definition: massspectrumid.cpp:89
pappso::PwizMsRunReader::msp_msData
pwiz::msdata::MSDataPtr msp_msData
Definition: pwizmsrunreader.h:70
pappso::PwizMsRunReader::getPwizSpectrumPtr
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
Definition: pwizmsrunreader.cpp:170
pwizmsrunreader.h
MSrun file reader base on proteowizard library.
pappso::PwizMsRunReader::spectrumListSize
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
Definition: pwizmsrunreader.cpp:909
pappso::PrecursorIonData
Definition: qualifiedmassspectrum.h:62
pappso::SpectrumCollectionHandlerInterface
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:59
pappso::MassSpectrumSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
pappso::PappsoException
Definition: pappsoexception.h:42
pappso::QualifiedMassSpectrum::getRtInSeconds
pappso_double getRtInSeconds() const
Get the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:203
pappso::PwizMsRunReader::readSpectrumCollection
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
Definition: pwizmsrunreader.cpp:781