AIToolbox
A library that offers tools for AI problem solving.
SparseMaximumLikelihoodModel.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_SPARSE_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_SPARSE_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
3 
4 #include <tuple>
5 #include <random>
6 
7 #include <AIToolbox/Seeder.hpp>
8 #include <AIToolbox/Types.hpp>
10 #include <AIToolbox/MDP/Types.hpp>
12 
13 namespace AIToolbox::MDP {
66  template <IsExperience E>
68  public:
98  SparseMaximumLikelihoodModel(const E & exp, double discount = 1.0, bool sync = false);
99 
105  void setDiscount(double d);
106 
119  void sync();
120 
143  void sync(size_t s, size_t a);
144 
158  void sync(size_t s, size_t a, size_t s1);
159 
176  std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;
177 
183  size_t getS() const;
184 
190  size_t getA() const;
191 
197  double getDiscount() const;
198 
204  const E & getExperience() const;
205 
215  double getTransitionProbability(size_t s, size_t a, size_t s1) const;
216 
226  double getExpectedReward(size_t s, size_t a, size_t s1) const;
227 
233  const TransitionMatrix & getTransitionFunction() const;
234 
242  const SparseMatrix2D & getTransitionFunction(size_t a) const;
243 
249  const RewardMatrix & getRewardFunction() const;
250 
258  bool isTerminal(size_t s) const;
259 
260  private:
261  size_t S, A;
262  double discount_;
263 
264  const E & experience_;
265 
266  TransitionMatrix transitions_;
267  RewardMatrix rewards_;
268 
269  mutable RandomEngine rand_;
270  };
271 
272  template <IsExperience E>
273  SparseMaximumLikelihoodModel<E>::SparseMaximumLikelihoodModel(const E & exp, const double discount, const bool toSync) :
274  S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A, SparseMatrix2D(S, S)),
275  rewards_(S, A), rand_(Seeder::getSeed())
276  {
277  setDiscount(discount);
278 
279  if ( toSync ) {
280  sync();
281  // Sync does not touch state-action pairs which have never been
282  // seen. To keep the model consistent we set all of them as
283  // self-absorbing.
284  for ( size_t a = 0; a < A; ++a ) {
285  for ( size_t s = 0; s < S; ++s )
286  if ( experience_.getVisitsSum(s, a) == 0ul )
287  transitions_[a].insert(s, s) = 1.0;
288  // We don't bother making it compressed since it is bound
289  // to change eventually anyway
290  }
291  }
292  else {
293  // Make transition matrix true probability
294  for ( size_t a = 0; a < A; ++a )
295  transitions_[a].setIdentity();
296  }
297  }
298 
299  template <IsExperience E>
301  if ( d <= 0.0 || d > 1.0 ) throw std::invalid_argument("Discount parameter must be in (0,1]");
302  discount_ = d;
303  }
304 
305  template <IsExperience E>
307  for ( size_t a = 0; a < A; ++a )
308  for ( size_t s = 0; s < S; ++s )
309  sync(s,a);
310  }
311 
312  template <IsExperience E>
313  void SparseMaximumLikelihoodModel<E>::sync(const size_t s, const size_t a) {
314  // Nothing to do
315  const auto visitSum = experience_.getVisitsSum(s, a);
316  if ( visitSum == 0ul ) return;
317 
318  // Update reward by just copying the average from experience Note that
319  // we check different from rewards_, rather than zero, because it's
320  // possible that by averaging some rewards go BACK to zero, rather than
321  // away from it. In those case we still have to set the new rewards to zero.
322  if (checkDifferentSmall(rewards_.coeffRef(s, a), experience_.getReward(s, a)))
323  rewards_.coeffRef(s, a) = experience_.getReward(s, a);
324 
325  // Clear beginning's identity matrix
326  if ( visitSum == 1ul )
327  transitions_[a].coeffRef(s, s) = 0.0;
328 
329  // Create reciprocal for fast division
330  const double visitSumReciprocal = 1.0 / visitSum;
331 
332  if constexpr (IsExperienceEigen<E>) {
333  transitions_[a].row(s) = experience_.getVisitsTable(a).row(s).template cast<double>() * visitSumReciprocal;
334  } else {
335  // Normalize
336  for ( size_t s1 = 0; s1 < S; ++s1 ) {
337  const auto visits = experience_.getVisits(s, a, s1);
338  if (visits > 0)
339  transitions_[a].coeffRef(s, s1) = static_cast<double>(visits) * visitSumReciprocal;
340  }
341  }
342  }
343 
344  template <IsExperience E>
345  void SparseMaximumLikelihoodModel<E>::sync(const size_t s, const size_t a, const size_t s1) {
346  const auto visitSum = experience_.getVisitsSum(s, a);
347  // The second condition is related to numerical errors. Once in a
348  // while we reset those by forcing a true update using real data.
349  if ( !(visitSum % 10000ul) ) return sync(s, a);
350 
351  // Update reward by just copying the average from experience Note that
352  // we check different from rewards_, rather than zero, because it's
353  // possible that by averaging some rewards go BACK to zero, rather than
354  // away from it. In those case we still have to set the new rewards to zero.
355  if (checkDifferentSmall(rewards_.coeffRef(s, a), experience_.getReward(s, a)))
356  rewards_.coeffRef(s, a) = experience_.getReward(s, a);
357 
358  if ( visitSum == 1ul ) {
359  transitions_[a].coeffRef(s, s) = 0.0;
360  transitions_[a].coeffRef(s, s1) = 1.0;
361  } else {
362  const double newVisits = static_cast<double>(experience_.getVisits(s, a, s1));
363 
364  const double newTransitionValue = newVisits / static_cast<double>(visitSum - 1);
365  const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a].coeff(s, s1));
366  // This works because as long as all the values in the transition have the same denominator
367  // (in this case visitSum-1), then the numerators do not matter, as we can simply normalize.
368  // In the end of the process the new values will be the same as if we updated directly using
369  // an increased denominator, and thus we will be able to call this function again correctly.
370  transitions_[a].coeffRef(s, s1) = newTransitionValue;
371  transitions_[a].row(s) /= newVectorSum;
372  }
373  }
374 
375  template <IsExperience E>
376  std::tuple<size_t, double> SparseMaximumLikelihoodModel<E>::sampleSR(const size_t s, const size_t a) const {
377  const size_t s1 = sampleProbability(S, transitions_[a].row(s), rand_);
378 
379  return std::make_tuple(s1, rewards_.coeff(s, a));
380  }
381 
382  template <IsExperience E>
383  double SparseMaximumLikelihoodModel<E>::getTransitionProbability(const size_t s, const size_t a, const size_t s1) const {
384  return transitions_[a].coeff(s, s1);
385  }
386 
387  template <IsExperience E>
388  double SparseMaximumLikelihoodModel<E>::getExpectedReward(const size_t s, const size_t a, const size_t) const {
389  return rewards_.coeff(s, a);
390  }
391 
392  template <IsExperience E>
393  bool SparseMaximumLikelihoodModel<E>::isTerminal(const size_t s) const {
394  for ( size_t a = 0; a < A; ++a )
395  if ( !checkEqualSmall(1.0, transitions_[a].coeff(s, s)) )
396  return false;
397  return true;
398  }
399 
400  template <IsExperience E>
401  size_t SparseMaximumLikelihoodModel<E>::getS() const { return S; }
402  template <IsExperience E>
403  size_t SparseMaximumLikelihoodModel<E>::getA() const { return A; }
404  template <IsExperience E>
405  double SparseMaximumLikelihoodModel<E>::getDiscount() const { return discount_; }
406  template <IsExperience E>
407  const E & SparseMaximumLikelihoodModel<E>::getExperience() const { return experience_; }
408 
409  template <IsExperience E>
411  template <IsExperience E>
413 
414  template <IsExperience E>
415  const SparseMatrix2D & SparseMaximumLikelihoodModel<E>::getTransitionFunction(const size_t a) const { return transitions_[a]; }
416 }
417 
418 #endif
AIToolbox::MDP::SparseMaximumLikelihoodModel::TransitionMatrix
SparseMatrix3D TransitionMatrix
Definition: SparseMaximumLikelihoodModel.hpp:69
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::MDP::SparseMaximumLikelihoodModel::sampleSR
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP for the specified state action pair.
Definition: SparseMaximumLikelihoodModel.hpp:376
AIToolbox::MDP::SparseMaximumLikelihoodModel::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
Definition: SparseMaximumLikelihoodModel.hpp:412
AIToolbox::MDP::SparseMaximumLikelihoodModel::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
Definition: SparseMaximumLikelihoodModel.hpp:410
AIToolbox::MDP::SparseMaximumLikelihoodModel::getTransitionProbability
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
Definition: SparseMaximumLikelihoodModel.hpp:383
AIToolbox::MDP::SparseMaximumLikelihoodModel::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
Definition: SparseMaximumLikelihoodModel.hpp:405
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::SparseMatrix3D
std::vector< SparseMatrix2D > SparseMatrix3D
Definition: Types.hpp:22
AIToolbox::MDP::SparseMaximumLikelihoodModel::sync
void sync()
This function syncs the whole SparseMaximumLikelihoodModel to the underlying Experience.
Definition: SparseMaximumLikelihoodModel.hpp:306
AIToolbox::MDP::SparseMaximumLikelihoodModel::RewardMatrix
SparseMatrix2D RewardMatrix
Definition: SparseMaximumLikelihoodModel.hpp:70
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::SparseMaximumLikelihoodModel::SparseMaximumLikelihoodModel
SparseMaximumLikelihoodModel(const E &exp, double discount=1.0, bool sync=false)
Constructor using previous Experience.
Definition: SparseMaximumLikelihoodModel.hpp:273
AIToolbox::MDP::SparseMaximumLikelihoodModel::isTerminal
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
Definition: SparseMaximumLikelihoodModel.hpp:393
AIToolbox::MDP::SparseMaximumLikelihoodModel::getExperience
const E & getExperience() const
This function enables inspection of the underlying Experience of the SparseMaximumLikelihoodModel.
Definition: SparseMaximumLikelihoodModel.hpp:407
AIToolbox::MDP::SparseMaximumLikelihoodModel::getA
size_t getA() const
This function returns the number of available actions to the agent.
Definition: SparseMaximumLikelihoodModel.hpp:403
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::MDP::SparseMaximumLikelihoodModel::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the Model.
Definition: SparseMaximumLikelihoodModel.hpp:300
AIToolbox::MDP::SparseMaximumLikelihoodModel
This class models Experience as a Markov Decision Process using Maximum Likelihood.
Definition: SparseMaximumLikelihoodModel.hpp:67
AIToolbox::MDP::SparseMaximumLikelihoodModel::getS
size_t getS() const
This function returns the number of states of the world.
Definition: SparseMaximumLikelihoodModel.hpp:401
Types.hpp
TypeTraits.hpp
AIToolbox::MDP::SparseMaximumLikelihoodModel::getExpectedReward
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
Definition: SparseMaximumLikelihoodModel.hpp:388
AIToolbox::SparseMatrix2D
Eigen::SparseMatrix< double, Eigen::RowMajor > SparseMatrix2D
Definition: Types.hpp:19
Probability.hpp