IsoSpec 2.2.1
isoSpec++.h
1
17#pragma once
18
19#include <unordered_map>
20#include <queue>
21#include <limits>
22#include <string>
23#include <vector>
24#include "platform.h"
25#include "dirtyAllocator.h"
26#include "summator.h"
27#include "operators.h"
28#include "marginalTrek++.h"
29
30
31
32namespace IsoSpec
33{
34
35// This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
36unsigned int parse_formula(const char* formula,
37 std::vector<double>& isotope_masses,
38 std::vector<double>& isotope_probabilities,
39 int** isotopeNumbers,
40 int** atomCounts,
41 unsigned int* confSize,
42 bool use_nominal_masses = false);
43
44
46
49class ISOSPEC_EXPORT_SYMBOL Iso {
50 private:
52
58 void setupMarginals(const double* _isotopeMasses,
59 const double* _isotopeProbabilities);
60 bool disowned;
62 protected:
66 unsigned int confSize;
67 int allDim;
70 bool doMarginalsNeedSorting() const;
71
72 public:
73 Iso();
74
76
83 Iso(
84 int _dimNumber,
85 const int* _isotopeNumbers,
86 const int* _atomCounts,
87 const double* _isotopeMasses,
88 const double* _isotopeProbabilities
89 );
90 Iso(
91 int _dimNumber,
92 const int* _isotopeNumbers,
93 const int* _atomCounts,
94 const double* const * _isotopeMasses,
95 const double* const * _isotopeProbabilities
96 );
97
99 Iso(const char* formula, bool use_nominal_masses = false); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
100
102 inline Iso(const std::string& formula, bool use_nominal_masses = false) : Iso(formula.c_str(), use_nominal_masses) {} // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
103
105
113 static Iso FromFASTA(const char* fasta, bool use_nominal_masses = false, bool add_water = true);
114
116 static inline Iso FromFASTA(const std::string& fasta, bool use_nominal_masses = false, bool add_water = true) { return FromFASTA(fasta.c_str(), use_nominal_masses, add_water); }
117
119 Iso(Iso&& other);
120
121 /* We're not exactly following standard copy and assign semantics with Iso objects, so delete the default assign constructor just in case, so noone tries to use it. Copy ctor declared below. */
122 Iso& operator=(const Iso& other) = delete;
123
125
129 Iso(const Iso& other, bool fullcopy);
130
132 virtual ~Iso();
133
135 double getLightestPeakMass() const;
136
138 double getHeaviestPeakMass() const;
139
145 double getMonoisotopicPeakMass() const;
146
148 double getModeLProb() const;
149
151 double getUnlikeliestPeakLProb() const;
152
154 double getModeMass() const;
155
157 double getTheoreticalAverageMass() const;
158
160 double variance() const;
161
163 double stddev() const { return sqrt(variance()); }
164
166 inline int getDimNumber() const { return dimNumber; }
167
169 inline int getAllDim() const { return allDim; }
170
172 void addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities);
173
175 void saveMarginalLogSizeEstimates(double* priorities, double target_total_prob) const;
176};
177
178
180
183class ISOSPEC_EXPORT_SYMBOL IsoGenerator : public Iso
184{
185 public:
186 const double mode_lprob;
187
188 protected:
191 double* partialProbs;
193 public:
195
198 virtual bool advanceToNextConfiguration() = 0;
199
201
204 virtual double lprob() const { return partialLProbs[0]; }
205
207
210 virtual double mass() const { return partialMasses[0]; }
211
213
216 virtual double prob() const { return partialProbs[0]; }
217
219 virtual void get_conf_signature(int* space) const = 0;
220
222 IsoGenerator(Iso&& iso, bool alloc_partials = true); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
223
225 virtual ~IsoGenerator();
226};
227
228
229
231
236class ISOSPEC_EXPORT_SYMBOL IsoOrderedGenerator: public IsoGenerator
237{
238 private:
239 MarginalTrek** marginalResults;
240 std::priority_queue<void*, pod_vector<void*>, ConfOrder> pq;
241 void* topConf;
242 DirtyAllocator allocator;
243 const pod_vector<double>** logProbs;
244 const pod_vector<double>** masses;
245 const pod_vector<Conf>** marginalConfs;
246 double currentLProb;
247 double currentMass;
248 double currentProb;
249 int ccount;
250
251 public:
252 IsoOrderedGenerator(const IsoOrderedGenerator& other) = delete;
253 IsoOrderedGenerator& operator=(const IsoOrderedGenerator& other) = delete;
254
255 bool advanceToNextConfiguration() override final;
256
258
262 inline void get_conf_signature(int* space) const override final
263 {
264 int* c = getConf(topConf);
265
266 if (ccount >= 0)
267 c[ccount]--;
268
269 for(int ii = 0; ii < dimNumber; ii++)
270 {
271 memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*sizeof(int));
272 space += isotopeNumbers[ii];
273 }
274
275 if (ccount >= 0)
276 c[ccount]++;
277 };
278
280 IsoOrderedGenerator(Iso&& iso, int _tabSize = 1000, int _hashSize = 1000); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
281
283 virtual ~IsoOrderedGenerator();
284};
285
286
287
288
290
295class ISOSPEC_EXPORT_SYMBOL IsoThresholdGenerator: public IsoGenerator
296{
297 private:
298 int* counter;
299 double* maxConfsLPSum;
300 const double Lcutoff;
301 PrecalculatedMarginal** marginalResults;
302 PrecalculatedMarginal** marginalResultsUnsorted;
303 int* marginalOrder;
304
305 const double* lProbs_ptr;
306 const double* lProbs_ptr_start;
307 double* partialLProbs_second;
308 double partialLProbs_second_val, lcfmsv;
309 bool empty;
310
311 public:
312 IsoThresholdGenerator(const IsoThresholdGenerator& other) = delete;
313 IsoThresholdGenerator& operator=(const IsoThresholdGenerator& other) = delete;
314
315 inline void get_conf_signature(int* space) const override final
316 {
317 counter[0] = lProbs_ptr - lProbs_ptr_start;
318 if(marginalOrder != nullptr)
319 {
320 for(int ii = 0; ii < dimNumber; ii++)
321 {
322 int jj = marginalOrder[ii];
323 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
324 space += isotopeNumbers[ii];
325 }
326 }
327 else
328 {
329 for(int ii = 0; ii < dimNumber; ii++)
330 {
331 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
332 space += isotopeNumbers[ii];
333 }
334 }
335 };
336
338
346 IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute = true, int _tabSize = 1000, int _hashSize = 1000, bool reorder_marginals = true);
347
349
350 // Perform highly aggressive inling as this function is often called as while(advanceToNextConfiguration()) {}
351 // which leads to an extremely tight loop and some compilers miss this (potentially due to the length of the function).
352 ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
353 {
354 lProbs_ptr++;
355
356 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
357 {
358 return true;
359 }
360
361 // If we reached this point, a carry is needed
362
363 int idx = 0;
364 lProbs_ptr = lProbs_ptr_start;
365
366 int * cntr_ptr = counter;
367
368 while(idx < dimNumber-1)
369 {
370 // counter[idx] = 0;
371 *cntr_ptr = 0;
372 idx++;
373 cntr_ptr++;
374 // counter[idx]++;
375 (*cntr_ptr)++;
376 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
377 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
378 {
379 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
380 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
381 recalc(idx-1);
382 return true;
383 }
384 }
385
386 terminate_search();
387 return false;
388 }
389
390
391 ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); }
392 ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); }
393 ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); }
394
396 void terminate_search();
397
402 void reset();
403
408 size_t count_confs();
409
410 private:
412 ISOSPEC_FORCE_INLINE void recalc(int idx)
413 {
414 for(; idx > 0; idx--)
415 {
416 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
417 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
418 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
419 }
420 partialLProbs_second_val = *partialLProbs_second;
421 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->get_lProb(counter[0]);
422 lcfmsv = Lcutoff - partialLProbs_second_val;
423 }
424
425 ISOSPEC_FORCE_INLINE void short_recalc(int idx)
426 {
427 for(; idx > 0; idx--)
428 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
429 partialLProbs_second_val = *partialLProbs_second;
430 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->get_lProb(counter[0]);
431 lcfmsv = Lcutoff - partialLProbs_second_val;
432 }
433};
434
435
436
437
438
439class ISOSPEC_EXPORT_SYMBOL IsoLayeredGenerator : public IsoGenerator
440{
441 private:
442 int* counter;
443 double* maxConfsLPSum;
444 double currentLThreshold, lastLThreshold;
445 LayeredMarginal** marginalResults;
446 LayeredMarginal** marginalResultsUnsorted;
447 int* marginalOrder;
448
449 const double* lProbs_ptr;
450 const double* lProbs_ptr_start;
451 const double** resetPositions;
452 double* partialLProbs_second;
453 double partialLProbs_second_val, lcfmsv, last_lcfmsv;
454 bool marginalsNeedSorting;
455
456
457 public:
458 IsoLayeredGenerator(const IsoLayeredGenerator& other) = delete;
459 IsoLayeredGenerator& operator=(const IsoLayeredGenerator& other) = delete;
460
461 inline void get_conf_signature(int* space) const override final
462 {
463 counter[0] = lProbs_ptr - lProbs_ptr_start;
464 if(marginalOrder != nullptr)
465 {
466 for(int ii = 0; ii < dimNumber; ii++)
467 {
468 int jj = marginalOrder[ii];
469 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
470 space += isotopeNumbers[ii];
471 }
472 }
473 else
474 {
475 for(int ii = 0; ii < dimNumber; ii++)
476 {
477 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
478 space += isotopeNumbers[ii];
479 }
480 }
481 };
482
483 inline double get_currentLThreshold() const { return currentLThreshold; }
484
485 IsoLayeredGenerator(Iso&& iso, int _tabSize = 1000, int _hashSize = 1000, bool reorder_marginals = true, double t_prob_hint = 0.99); // NOLINT(runtime/explicit) - constructor deliberately left to be used as a conversion
486
487 ~IsoLayeredGenerator();
488
489 ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
490 {
491 do
492 {
493 if(advanceToNextConfigurationWithinLayer())
494 return true;
495 } while(IsoLayeredGenerator::nextLayer(-2.0));
496 return false;
497 }
498
499 ISOSPEC_FORCE_INLINE bool advanceToNextConfigurationWithinLayer()
500 {
501 do{
502 lProbs_ptr++;
503
504 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
505 return true;
506 }
507 while(carry()); // NOLINT(whitespace/empty_loop_body) - cpplint bug, that's not an empty loop body, that's a do{...}while(...) construct
508 return false;
509 }
510
511 ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); };
512 ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
513 ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
514
516 void terminate_search();
517
518
520 ISOSPEC_FORCE_INLINE void recalc(int idx)
521 {
522 for(; idx > 0; idx--)
523 {
524 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
525 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
526 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
527 }
528 partialLProbs_second_val = *partialLProbs_second;
529 partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->get_lProb(counter[0]);
530 lcfmsv = currentLThreshold - partialLProbs_second_val;
531 last_lcfmsv = lastLThreshold - partialLProbs_second_val;
532 }
533
534 bool nextLayer(double offset);
535
536 private:
537 bool carry();
538};
539
540
541
543{
545 size_t to_sample_left;
546 const double precision;
547 const double beta_bias;
548 double confs_prob;
549 double chasing_prob;
550 size_t current_count;
551
552 public:
553 IsoStochasticGenerator(Iso&& iso, size_t no_molecules, double precision = 0.9999, double beta_bias = 5.0);
554
555 ISOSPEC_FORCE_INLINE size_t count() const { return current_count; }
556
557 ISOSPEC_FORCE_INLINE double mass() const override final { return ILG.mass(); }
558
559 ISOSPEC_FORCE_INLINE double prob() const override final { return static_cast<double>(count()); }
560
561 ISOSPEC_FORCE_INLINE double lprob() const override final { return log(prob()); }
562
563 ISOSPEC_FORCE_INLINE void get_conf_signature(int* space) const override final { ILG.get_conf_signature(space); }
564
565 ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
566 {
567 /* This function will be used mainly in very small, tight loops, therefore it makes sense to
568 * aggressively inline it, despite its seemingly large body.
569 */
570 while(true)
571 {
572 double curr_conf_prob_left, current_prob;
573
574 if(to_sample_left <= 0)
575 return false;
576
577 if(confs_prob < chasing_prob)
578 {
579 // Beta was last
580 current_count = 1;
581 to_sample_left--;
583 return false;
584 current_prob = ILG.prob();
585 confs_prob += current_prob;
586 while(confs_prob <= chasing_prob)
587 {
589 return false;
590 current_prob = ILG.prob();
591 confs_prob += current_prob;
592 }
593 if(to_sample_left <= 0)
594 return true;
595 curr_conf_prob_left = confs_prob - chasing_prob;
596 }
597 else
598 {
599 // Binomial was last
600 current_count = 0;
602 return false;
603 current_prob = ILG.prob();
604 confs_prob += current_prob;
605 curr_conf_prob_left = current_prob;
606 }
607
608 double prob_left_to_1 = precision - chasing_prob;
609 double expected_confs = curr_conf_prob_left * to_sample_left / prob_left_to_1;
610
611 if(expected_confs <= beta_bias)
612 {
613 // Beta mode: we keep making beta jumps until we leave the current configuration
614 chasing_prob += rdvariate_beta_1_b(to_sample_left) * prob_left_to_1;
615 while(chasing_prob <= confs_prob)
616 {
617 current_count++;
618 to_sample_left--;
619 if(to_sample_left == 0)
620 return true;
621 prob_left_to_1 = precision - chasing_prob;
622 chasing_prob += rdvariate_beta_1_b(to_sample_left) * prob_left_to_1;
623 }
624 if(current_count > 0)
625 return true;
626 }
627 else
628 {
629 // Binomial mode: a single binomial step
630 size_t rbin = rdvariate_binom(to_sample_left, curr_conf_prob_left/prob_left_to_1);
631 current_count += rbin;
632 to_sample_left -= rbin;
633 chasing_prob = confs_prob;
634 if(current_count > 0)
635 return true;
636 }
637 };
638 }
639};
640
641
642} // namespace IsoSpec
The generator of isotopologues.
Definition: isoSpec++.h:184
virtual void get_conf_signature(int *space) const =0
Write the signature of configuration into target memory location. It must be large enough to accomoda...
virtual bool advanceToNextConfiguration()=0
Advance to the next, not yet visited, most probable isotopologue.
virtual double mass() const
Get the mass of the current isotopologue.
Definition: isoSpec++.h:210
double * partialLProbs
Definition: isoSpec++.h:189
virtual double lprob() const
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:204
virtual double prob() const
Get the probability of the current isotopologue.
Definition: isoSpec++.h:216
double * partialMasses
Definition: isoSpec++.h:190
The Iso class for the calculation of the isotopic distribution.
Definition: isoSpec++.h:49
double stddev() const
Get the standard deviation of the theoretical distribution.
Definition: isoSpec++.h:163
static Iso FromFASTA(const std::string &fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C++ std::string. See above for details.
Definition: isoSpec++.h:116
int getDimNumber() const
Get the number of elements in the chemical formula of the molecule.
Definition: isoSpec++.h:166
int getAllDim() const
Get the total number of isotopes of elements present in a chemical formula.
Definition: isoSpec++.h:169
int * isotopeNumbers
Definition: isoSpec++.h:64
unsigned int confSize
Definition: isoSpec++.h:66
int dimNumber
Definition: isoSpec++.h:63
int * atomCounts
Definition: isoSpec++.h:65
Iso(const std::string &formula, bool use_nominal_masses=false)
Constructor from C++ std::string chemical formula.
Definition: isoSpec++.h:102
Marginal ** marginals
Definition: isoSpec++.h:68
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:511
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
Definition: isoSpec++.h:512
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
Definition: isoSpec++.h:520
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.h:489
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
Definition: isoSpec++.h:461
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
Definition: isoSpec++.h:513
The generator of isotopologues sorted by their probability of occurrence.
Definition: isoSpec++.h:237
void get_conf_signature(int *space) const override final
Save the counts of isotopes in the space.
Definition: isoSpec++.h:262
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
Definition: isoSpec++.h:557
ISOSPEC_FORCE_INLINE void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
Definition: isoSpec++.h:563
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:561
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.h:565
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
Definition: isoSpec++.h:559
The generator of isotopologues above a given threshold value.
Definition: isoSpec++.h:296
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:391
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
Definition: isoSpec++.h:315
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.h:352
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
Definition: isoSpec++.h:393
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
Definition: isoSpec++.h:392
LayeredMarginal class.
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
The marginal distribution class (a subisotopologue).
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.