AIToolbox
A library that offers tools for AI problem solving.
Probability.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_UTILS_PROBABILITY_HEADER_FILE
2 #define AI_TOOLBOX_UTILS_PROBABILITY_HEADER_FILE
3 
4 #include <random>
5 #include <algorithm>
6 
7 #include <AIToolbox/Types.hpp>
9 
10 namespace AIToolbox {
11  static std::uniform_real_distribution<double> probabilityDistribution(0.0, 1.0);
12 
37  template <typename T>
38  bool isProbability(const size_t size, const T & in) {
39  double p = 0.0;
40  for (size_t i = 0; i < size; ++i) {
41  const double value = static_cast<double>(in[i]);
42  if (value < 0.0) return false;
43  p += value;
44  }
45  if (checkDifferentSmall(p, 1.0))
46  return false;
47 
48  return true;
49  }
50 
75  template <typename T>
76  bool isProbability(const size_t rows, const size_t cols, const T & in) {
77  for (size_t row = 0; row < rows; ++row)
78  if (!isProbability(cols, in[row]))
79  return false;
80  return true;
81  }
82 
108  template <typename T>
109  bool isProbability(const size_t depth, const size_t rows, const size_t cols, const T & in) {
110  for (size_t d = 0; d < depth; ++d)
111  if (!isProbability(rows, cols, in[d]))
112  return false;
113  return true;
114  }
115 
126  bool isProbability(const Matrix2D & in);
127 
138  bool isProbability(const Matrix3D & in);
139 
150  bool isProbability(const SparseMatrix2D & in);
151 
162  bool isProbability(const SparseMatrix3D & in);
163 
187  template <typename T, typename G>
188  size_t sampleProbability(const size_t d, const T& in, G& generator) {
189  double p = probabilityDistribution(generator);
190 
191  for ( size_t i = 0; i < d; ++i ) {
192  if ( in[i] > p ) return i;
193  p -= in[i];
194  }
195  return d-1;
196  }
197 
220  template <typename G>
221  size_t sampleProbability(const size_t d, const SparseMatrix2D::ConstRowXpr& in, G& generator) {
222  double p = probabilityDistribution(generator);
223 
224  for ( SparseMatrix2D::ConstRowXpr::InnerIterator i(in, 0); ; ++i ) {
225  if ( i.value() > p ) return i.col();
226  p -= i.value();
227  }
228  return d-1;
229  }
230 
249  template <typename G>
250  double sampleBetaDistribution(double a, double b, G & generator) {
251  std::gamma_distribution<double> dista(a, 1.0);
252  std::gamma_distribution<double> distb(b, 1.0);
253  const auto X = dista(generator);
254  const auto Y = distb(generator);
255  return X / (X + Y);
256  }
257 
269  template <typename TIn, typename G>
270  ProbabilityVector sampleDirichletDistribution(const TIn & params, G & generator) {
271  ProbabilityVector retval(params.size());
272 
273  sampleDirichletDistribution(params, generator, retval);
274 
275  return retval;
276  }
277 
289  template <typename TIn, typename TOut, typename G>
290  void sampleDirichletDistribution(const TIn & params, G & generator, TOut && out) {
291  assert(params.size() == out.size());
292 
293  double sum = 0.0;
294  for (size_t i = 0; i < static_cast<size_t>(params.size()); ++i) {
295  std::gamma_distribution<double> dist(params[i], 1.0);
296  out[i] = dist(generator);
297  sum += out[i];
298  }
299  out /= sum;
300  }
301 
315  template <typename G>
316  ProbabilityVector makeRandomProbability(const size_t S, G & generator) {
317  ProbabilityVector b(S);
318  double * bData = b.data();
319  // The way this works is that we're going to generate S-1 numbers in
320  // [0,1], and sort them with together with an implied 0.0 and 1.0, for
321  // a total of S+1 numbers.
322  //
323  // The output will be represented by the differences between each pair
324  // of numbers, after sorting the original vector.
325  //
326  // The idea is basically to take a unit vector and cut it up into
327  // random parts. The size of each part is the value of an entry of the
328  // output.
329 
330  // We must set the first element to zero even if we're later
331  // overwriting it. This is to avoid bugs in case the input S is one -
332  // in which case we should return a vector with a single element
333  // containing 1.0.
334  bData[0] = 0.0;
335  for ( size_t s = 0; s < S-1; ++s )
336  bData[s] = probabilityDistribution(generator);
337 
338  // Sort all but the implied last 1.0 which we'll add later.
339  std::sort(bData, bData + S - 1);
340 
341  // For each number, keep track of what was in the vector there, and
342  // transform it into the difference with its predecessor.
343  double helper1 = bData[0], helper2;
344  for ( size_t s = 1; s < S - 1; ++s ) {
345  helper2 = bData[s];
346  bData[s] -= helper1;
347  helper1 = helper2;
348  }
349  // The final one is computed with respect to the overall sum of 1.0.
350  bData[S-1] = 1.0 - helper1;
351 
352  return b;
353  }
354 
369  inline bool checkEqualProbability(const ProbabilityVector & lhs, const ProbabilityVector & rhs) {
370  const auto size = lhs.size();
371  for (auto i = 0; i < size; ++i)
372  if (!checkEqualSmall(lhs[i], rhs[i]))
373  return false;
374  return true;
375  }
376 
384  inline double getEntropy(const ProbabilityVector & v) {
385  return (v.array() * v.array().log()).sum();
386  }
387 
395  inline double getEntropyBase2(const ProbabilityVector & v) {
396  double entropy = 0.0;
397  for (auto i = 0; i < v.size(); ++i)
398  entropy += v[i] * std::log2(v[i]);
399  return entropy;
400  }
401 
417 
437  public:
444 
452  template <typename G>
453  size_t sampleProbability(G & generator) const {
454  const auto x = sampleDistribution_(generator);
455  const int i = x;
456  const auto y = x - i;
457 
458  if (y < prob_[i]) return i;
459  return alias_[i];
460  }
461 
462  private:
463  Vector prob_;
464  std::vector<size_t> alias_;
465  mutable std::uniform_real_distribution<double> sampleDistribution_;
466  };
467 }
468 
469 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
Core.hpp
AIToolbox::SparseMatrix3D
std::vector< SparseMatrix2D > SparseMatrix3D
Definition: Types.hpp:22
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::VoseAliasSampler::VoseAliasSampler
VoseAliasSampler(const ProbabilityVector &p)
Basic constructor.
AIToolbox::checkEqualProbability
bool checkEqualProbability(const ProbabilityVector &lhs, const ProbabilityVector &rhs)
This function checks whether two input ProbabilityVector are equal.
Definition: Probability.hpp:369
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::isProbability
bool isProbability(const size_t size, const T &in)
This function checks whether the supplied 1D container is a valid discrete distribution.
Definition: Probability.hpp:38
AIToolbox::makeRandomProbability
ProbabilityVector makeRandomProbability(const size_t S, G &generator)
This function generates a random probability vector.
Definition: Probability.hpp:316
AIToolbox
Definition: Experience.hpp:6
AIToolbox::getEntropy
double getEntropy(const ProbabilityVector &v)
This function returns the entropy of the input ProbabilityVector.
Definition: Probability.hpp:384
AIToolbox::projectToProbability
ProbabilityVector projectToProbability(const Vector &v)
This function projects the input vector to a valid probability space.
AIToolbox::sampleBetaDistribution
double sampleBetaDistribution(double a, double b, G &generator)
This function samples from a Beta distribution.
Definition: Probability.hpp:250
AIToolbox::sampleDirichletDistribution
ProbabilityVector sampleDirichletDistribution(const TIn &params, G &generator)
This function samples from the input Dirichlet distribution.
Definition: Probability.hpp:270
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::Matrix3D
std::vector< Matrix2D > Matrix3D
Definition: Types.hpp:21
AIToolbox::ProbabilityVector
Vector ProbabilityVector
Definition: Types.hpp:34
AIToolbox::getEntropyBase2
double getEntropyBase2(const ProbabilityVector &v)
This function returns the entropy of the input ProbabilityVector computed using log2.
Definition: Probability.hpp:395
AIToolbox::probabilityDistribution
static std::uniform_real_distribution< double > probabilityDistribution(0.0, 1.0)
AIToolbox::SparseMatrix2D
Eigen::SparseMatrix< double, Eigen::RowMajor > SparseMatrix2D
Definition: Types.hpp:19
AIToolbox::VoseAliasSampler::sampleProbability
size_t sampleProbability(G &generator) const
This function samples a number that follows the distribution of the class.
Definition: Probability.hpp:453
AIToolbox::VoseAliasSampler
This class represents the Alias sampling method.
Definition: Probability.hpp:436