70 const QString &protein_seq_in,
71 const QString &protein_id)
74 m_best_alignment.reset();
75 m_best_corrected_alignment.reset();
76 m_best_post_processed_alignment.reset();
78 std::size_t sequence_length = protein_seq_in.size();
79 QString protein_seq(protein_seq_in);
80 std::reverse(protein_seq.begin(), protein_seq.end());
82 std::vector<AaPosition> aa_positions;
83 m_interest_cells.at(0).n_row = 0;
84 m_interest_cells.at(0).score = 0;
85 m_interest_cells.at(0).beginning = 0;
86 m_interest_cells.at(0).tree_id = 0;
88 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
90 m_interest_cells.at(i).n_row = 0;
92 m_interest_cells.at(i).beginning = 0;
93 m_interest_cells.at(i).tree_id = 0;
96 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
98 m_interest_cells.push_back({0, m_scorevalues.get(
ScoreType::init), 0, 0});
100 m_location_saver.resetLocationSaver();
101 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
103 if(protein_seq[row_number - 1] ==
'U' or protein_seq[row_number - 1] ==
'X')
116 updateAlignmentMatrix(protein_seq, row_number, aa_positions, spectrum,
true, protein_id);
122 const QString &protein_seq,
123 const QString &protein_id,
124 const std::size_t beginning,
125 const std::size_t length)
129 if((qsizetype)(beginning + length) <= protein_seq.size())
135 length2 = protein_seq.size() - beginning;
139 QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
140 std::reverse(sequence.begin(), sequence.end());
141 std::vector<AaPosition> aa_positions;
145 m_scenario.reserve(length2 + 1, spectrum.size());
146 m_interest_cells.reserve(spectrum.size());
147 m_interest_cells.at(0).n_row = 0;
148 m_interest_cells.at(0).score = 0;
149 m_interest_cells.at(0).beginning = 0;
150 m_interest_cells.at(0).tree_id = 0;
151 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
153 m_interest_cells.at(i).n_row = 0;
155 m_interest_cells.at(i).beginning = 0;
156 m_interest_cells.at(i).tree_id = 0;
159 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
161 m_interest_cells.push_back({0, m_scorevalues.get(
ScoreType::init), 0, 0});
164 m_scenario.resetScenario();
166 for(std::size_t row_number = 1; row_number <= length2; row_number++)
168 qDebug() <<
"row_number - 1=" << row_number - 1 <<
" sequence.size()=" << sequence.size();
169 if(sequence[row_number - 1] ==
'U' or sequence[row_number - 1] ==
'X')
174 qDebug() <<
"row_number - 1=" << row_number - 1 <<
" sequence.size()=" << sequence.size();
178 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum,
false, protein_id);
181 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
186 if(m_scenario.getBestScore() >
190 m_best_corrected_alignment.score = 0;
191 for(std::size_t iter : m_best_alignment.peaks)
197 else if(std::find(m_best_alignment.peaks.begin(),
198 m_best_alignment.peaks.end(),
205 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
206 if(corrections.size() > 0)
208 m_best_alignment.score = 0;
211 for(
auto peaks_to_remove : corrections)
215 1, sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
219 m_best_alignment = m_best_corrected_alignment;
227 const QString &sequence,
228 const QString &protein_id,
230 std::vector<std::size_t> peaks_to_remove,
233 qDebug() << recursive_call_count;
234 std::vector<AaPosition> aa_positions;
237 m_interest_cells.at(0).n_row = 0;
238 m_interest_cells.at(0).score = 0;
239 m_interest_cells.at(0).beginning = 0;
240 m_interest_cells.at(0).tree_id = 0;
241 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
243 m_interest_cells.at(i).n_row = 0;
245 m_interest_cells.at(i).beginning = 0;
246 m_interest_cells.at(i).tree_id = 0;
249 m_scenario.resetScenario();
251 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
253 qDebug() << row_number - 1 <<
" " << sequence.size();
254 if(sequence[row_number - 1] ==
'U' or sequence[row_number - 1] ==
'X')
263 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum,
false, protein_id);
270 qDebug() << m_scenario.getBestScore();
271 if(m_scenario.getBestScore() >
275 qDebug() << sequence <<
" " << recursive_call_count;
278 saveBestAlignment(sequence, spectrum, offset);
280 for(std::size_t iter : m_best_alignment.peaks)
286 else if(std::find(m_best_alignment.peaks.begin(),
287 m_best_alignment.peaks.end(),
293 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
294 if(corrections.size() > 0)
296 for(
auto peaks_to_remove : corrections)
299 recursive_call_count + 1, sequence, protein_id, spectrum, peaks_to_remove, offset);
302 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
304 m_best_corrected_alignment = m_best_alignment;
339 const QString &sequence,
340 const std::size_t row_number,
341 const std::vector<AaPosition> aa_positions,
343 const bool fast_align,
344 const QString &protein)
346 int score_found, score_shift, best_score, alt_score, tree_id;
348 std::size_t best_column,
shift, beginning, missing_aas, length, perfect_shift_origin;
349 KeyCell *current_cell_ptr, *tested_cell_ptr;
350 AlignType alignment_type, temp_align_type;
352 m_updated_cells.reserve(aa_positions.size());
360 condition += 2 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
361 qDebug() <<
"condition" << condition << sequence[row_number - 2]
362 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
366 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
367 aa_position != aa_positions.end();
370 if(((condition & aa_position->condition) != 0) ||
373 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
374 if(spectrum.
peakType(aa_position->r_peak) ==
387 best_column = aa_position->r_peak;
388 best_score = current_cell_ptr->
score + (row_number - current_cell_ptr->
n_row) *
391 tree_id = current_cell_ptr->
tree_id;
395 if(aa_position->l_support)
397 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
398 if(aa_position->l_peak == 0)
400 alt_score = tested_cell_ptr->
score + score_found;
404 if(tested_cell_ptr->
n_row == row_number - 1)
406 alt_score = tested_cell_ptr->
score +
407 (row_number - tested_cell_ptr->
n_row - 1) *
413 alt_score = tested_cell_ptr->
score +
414 (row_number - tested_cell_ptr->
n_row - 1) *
419 if(alt_score >= best_score)
422 best_score = alt_score;
423 best_column = aa_position->l_peak;
437 tree_id = m_location_saver.getNextTree();
443 tree_id = tested_cell_ptr->
tree_id;
450 while(shift < aa_position->next_l_peak)
452 tested_cell_ptr = &m_interest_cells.at(aa_position->next_l_peak -
shift);
454 if(perfectShiftPossible(sequence,
456 tested_cell_ptr->
n_row,
458 aa_position->next_l_peak -
shift,
459 aa_position->r_peak))
461 alt_score = tested_cell_ptr->
score +
462 (row_number - tested_cell_ptr->
n_row - 1) *
469 alt_score = tested_cell_ptr->
score +
470 (row_number - tested_cell_ptr->
n_row - 1) *
475 if(alt_score > best_score)
477 alignment_type = temp_align_type;
478 best_score = alt_score;
479 best_column = aa_position->next_l_peak -
shift;
481 tree_id = tested_cell_ptr->
tree_id;
487 tested_cell_ptr = &m_interest_cells.at(0);
489 perfect_shift_origin =
490 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
491 if(perfect_shift_origin != row_number)
493 alt_score = tested_cell_ptr->
score + score_found;
498 alt_score = tested_cell_ptr->
score + score_shift;
501 if(alt_score > best_score)
503 alignment_type = temp_align_type;
504 best_score = alt_score;
507 std::floor(spectrum.
getMZShift(0, aa_position->l_peak) / m_aaCode.getMass(
'G'));
514 beginning = std::max((std::size_t)(row_number - missing_aas -
ALIGNMENT_SURPLUS),
519 tree_id = m_location_saver.getNextTree();
522 if(best_column != aa_position->r_peak)
524 m_updated_cells.push_back(
525 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
527 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
530 row_number - beginning + 1 +
531 std::ceil(spectrum.
getMissingMass(aa_position->r_peak) / m_aaCode.getMass(
'G')) +
533 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
539 m_scenario.saveOrigin(row_number,
541 perfect_shift_origin,
548 m_scenario.saveOrigin(row_number,
550 m_interest_cells.at(best_column).n_row,
560 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
563 while(m_updated_cells.size() > 0)
565 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
566 m_updated_cells.pop_back();
667 m_best_alignment.peaks.clear();
668 m_best_alignment.shifts.clear();
669 std::vector<QString> temp_interpretation;
670 std::size_t previous_row;
671 std::size_t previous_column = 0;
672 std::size_t perfect_shift_end;
673 std::pair<std::vector<ScenarioCell>,
int> best_alignment = m_scenario.getBestAlignment();
674 m_best_alignment.score = best_alignment.second;
678 if(best_alignment.first.front().previous_row > offset)
681 QString(
"best_alignment.first.front().previous_row > offset %1 %2")
683 .arg(best_alignment.first.front().previous_row));
685 if(best_alignment.first.back().previous_row > offset)
688 QString(
"best_alignment.first.back().previous_row > offset %1 %2")
690 .arg(best_alignment.first.back().previous_row));
692 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
693 m_best_alignment.end = offset - best_alignment.first.back().previous_row;
697 for(
auto cell : best_alignment.first)
699 switch(cell.alignment_type)
702 temp_interpretation.push_back(sequence.at(previous_row - 1));
703 if(previous_row > cell.previous_row + 1)
705 skipped_mass = m_aaCode.getMass(
706 sequence.at(previous_row - 1)
709 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
710 for(
auto aa : skipped_aa)
712 temp_interpretation.push_back(
'[' + aa +
']');
713 skipped_mass += m_aaCode.getMass(aa.toLatin1());
715 temp_interpretation.push_back(
717 QString::number(spectrum.
getMZShift(cell.previous_column, previous_column) -
721 m_best_alignment.peaks.push_back(cell.previous_column);
724 temp_interpretation.push_back(
'[' + sequence.at(previous_row - 1) +
']');
727 m_best_alignment.peaks.push_back(cell.previous_column);
728 temp_interpretation.push_back(sequence.at(previous_row - 1));
729 temp_interpretation.push_back(
731 QString::number(spectrum.
getMZShift(cell.previous_column, previous_column) -
732 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1())) +
734 m_best_alignment.shifts.push_back(
735 spectrum.
getMZShift(cell.previous_column, previous_column) -
736 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1()));
739 m_best_alignment.peaks.push_back(cell.previous_column);
740 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
741 std::reverse(skipped_aa.begin(), skipped_aa.end());
742 temp_interpretation.push_back(skipped_aa);
745 previous_row = cell.previous_row;
746 previous_column = cell.previous_column;
747 m_best_alignment.peaks.push_back(cell.previous_column);
750 previous_row = cell.previous_row;
751 previous_column = cell.previous_column;
753 std::reverse(temp_interpretation.begin(), temp_interpretation.end());
754 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
758 MzRange zero(0, m_precision_ptr);
759 m_best_alignment.begin_shift = spectrum.
getMZShift(0, m_best_alignment.peaks.front());
760 m_best_alignment.end_shift = spectrum.
getMissingMass(m_best_alignment.peaks.back());
761 if(zero.
contains(m_best_alignment.end_shift))
763 m_best_alignment.end_shift = 0;
768 m_best_alignment.SPC = 0;
769 for(
auto peak : m_best_alignment.peaks)
771 switch(spectrum.at(peak).type)
774 qDebug() << peak <<
"native";
775 m_best_alignment.SPC += 1;
778 qDebug() << peak <<
"both";
779 m_best_alignment.SPC += 2;
782 qDebug() << peak <<
"synthetic";
785 qDebug() << peak <<
"symmetric";
786 m_best_alignment.SPC += 1;
793 if(m_best_alignment.end_shift > 0)
795 perfect_shift_end = perfectShiftPossibleEnd(sequence,
797 best_alignment.first.front().previous_row,
798 m_best_alignment.peaks.back());
799 if(perfect_shift_end != best_alignment.first.front().previous_row)
802 sequence.sliced(best_alignment.first.front().previous_row,
803 perfect_shift_end - best_alignment.first.front().previous_row);
804 for(
auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
806 temp_interpretation.push_back(
'[' + *aa +
']');
808 m_best_alignment.beginning = offset - perfect_shift_end;
809 m_best_alignment.end_shift = 0;
819 m_best_alignment.interpretation =
"";
820 if(m_best_alignment.end_shift > 0)
822 m_best_alignment.interpretation +=
'[' + QString::number(m_best_alignment.end_shift) +
']';
824 for(
auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
826 m_best_alignment.interpretation += *(str);
828 if(m_best_alignment.begin_shift > 0)
830 m_best_alignment.interpretation +=
'[' + QString::number(m_best_alignment.begin_shift) +
']';