AIToolbox
A library that offers tools for AI problem solving.
MaximumLikelihoodModel.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
3 
4 #include <tuple>
5 #include <random>
6 
7 #include <AIToolbox/Types.hpp>
10 #include <AIToolbox/Seeder.hpp>
12 
13 namespace AIToolbox::MDP {
56  template <IsExperience E>
58  public:
61 
89  MaximumLikelihoodModel(const E & exp, double discount = 1.0, bool sync = false);
90 
96  void setDiscount(double d);
97 
110  void sync();
111 
133  void sync(size_t s, size_t a);
134 
148  void sync(size_t s, size_t a, size_t s1);
149 
166  std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;
167 
173  size_t getS() const;
174 
180  size_t getA() const;
181 
187  double getDiscount() const;
188 
194  const E & getExperience() const;
195 
205  double getTransitionProbability(size_t s, size_t a, size_t s1) const;
206 
216  double getExpectedReward(size_t s, size_t a, size_t s1) const;
217 
223  const TransitionMatrix & getTransitionFunction() const;
224 
232  const Matrix2D & getTransitionFunction(size_t a) const;
233 
239  const RewardMatrix & getRewardFunction() const;
240 
248  bool isTerminal(size_t s) const;
249 
250  private:
251  size_t S, A;
252  double discount_;
253 
254  const E & experience_;
255 
256  TransitionMatrix transitions_;
257  RewardMatrix rewards_;
258 
259  mutable RandomEngine rand_;
260  };
261 
262  template <IsExperience E>
263  MaximumLikelihoodModel<E>::MaximumLikelihoodModel(const E& exp, const double discount, const bool toSync) :
264  S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A, Matrix2D(S, S)),
265  rewards_(S, A), rand_(Seeder::getSeed())
266  {
267  setDiscount(discount);
268  rewards_.setZero();
269 
270  if ( toSync ) {
271  sync();
272  // Sync does not touch state-action pairs which have never been
273  // seen. To keep the model consistent we set all of them as
274  // self-absorbing.
275  for ( size_t a = 0; a < A; ++a )
276  for ( size_t s = 0; s < S; ++s )
277  if ( experience_.getVisitsSum(s, a) == 0ul )
278  transitions_[a](s, s) = 1.0;
279  }
280  else {
281  // Make transition matrix true probability
282  for ( size_t a = 0; a < A; ++a )
283  transitions_[a].setIdentity();
284  }
285  }
286 
287  template <IsExperience E>
289  if ( d <= 0.0 || d > 1.0 ) throw std::invalid_argument("Discount parameter must be in (0,1]");
290  discount_ = d;
291  }
292 
293  template <IsExperience E>
295  for ( size_t a = 0; a < A; ++a )
296  for ( size_t s = 0; s < S; ++s )
297  sync(s,a);
298  }
299 
300  template <IsExperience E>
301  void MaximumLikelihoodModel<E>::sync(const size_t s, const size_t a) {
302  // Nothing to do
303  const auto visitSum = experience_.getVisitsSum(s, a);
304  if ( visitSum == 0ul ) return;
305 
306  // Update reward by just copying the average from experience
307  rewards_(s, a) = experience_.getReward(s, a);
308 
309  // Create reciprocal for fast division
310  const double visitSumReciprocal = 1.0 / visitSum;
311 
312  if constexpr (IsExperienceEigen<E>) {
313  transitions_[a].row(s) = experience_.getVisitsTable(a).row(s).template cast<double>() * visitSumReciprocal;
314  } else {
315  // Normalize
316  for ( size_t s1 = 0; s1 < S; ++s1 ) {
317  const auto visits = experience_.getVisits(s, a, s1);
318  transitions_[a](s, s1) = static_cast<double>(visits) * visitSumReciprocal;
319  }
320  }
321  }
322 
323  template <IsExperience E>
324  void MaximumLikelihoodModel<E>::sync(const size_t s, const size_t a, const size_t s1) {
325  const auto visitSum = experience_.getVisitsSum(s, a);
326  // The second condition is related to numerical errors. Once in a
327  // while we reset those by forcing a true update using real data.
328  if ( !(visitSum % 10000ul) ) return sync(s, a);
329 
330  // Update reward by just copying the average from experience
331  rewards_(s, a) = experience_.getReward(s, a);
332 
333  if ( visitSum == 1ul ) {
334  transitions_[a](s, s) = 0.0;
335  transitions_[a](s, s1) = 1.0;
336  } else {
337  const double newVisits = static_cast<double>(experience_.getVisits(s, a, s1));
338 
339  const double newTransitionValue = newVisits / static_cast<double>(visitSum - 1);
340  const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a](s, s1));
341  // This works because as long as all the values in the transition have the same denominator
342  // (in this case visitSum-1), then the numerators do not matter, as we can simply normalize.
343  // In the end of the process the new values will be the same as if we updated directly using
344  // an increased denominator, and thus we will be able to call this function again correctly.
345  transitions_[a](s, s1) = newTransitionValue;
346  transitions_[a].row(s) /= newVectorSum;
347  }
348  }
349 
350  template <IsExperience E>
351  std::tuple<size_t, double> MaximumLikelihoodModel<E>::sampleSR(const size_t s, const size_t a) const {
352  const size_t s1 = sampleProbability(S, transitions_[a].row(s), rand_);
353 
354  return std::make_tuple(s1, rewards_(s, a));
355  }
356 
357  template <IsExperience E>
358  double MaximumLikelihoodModel<E>::getTransitionProbability(const size_t s, const size_t a, const size_t s1) const {
359  return transitions_[a](s, s1);
360  }
361 
362  template <IsExperience E>
363  double MaximumLikelihoodModel<E>::getExpectedReward(const size_t s, const size_t a, const size_t) const {
364  return rewards_(s, a);
365  }
366 
367  template <IsExperience E>
368  bool MaximumLikelihoodModel<E>::isTerminal(const size_t s) const {
369  for ( size_t a = 0; a < A; ++a )
370  if ( !checkEqualSmall(1.0, transitions_[a](s, s)) )
371  return false;
372  return true;
373  }
374 
375  template <IsExperience E>
376  size_t MaximumLikelihoodModel<E>::getS() const { return S; }
377  template <IsExperience E>
378  size_t MaximumLikelihoodModel<E>::getA() const { return A; }
379  template <IsExperience E>
380  double MaximumLikelihoodModel<E>::getDiscount() const { return discount_; }
381  template <IsExperience E>
382  const E & MaximumLikelihoodModel<E>::getExperience() const { return experience_; }
383 
384  template <IsExperience E>
386  template <IsExperience E>
388 
389  template <IsExperience E>
390  const Matrix2D & MaximumLikelihoodModel<E>::getTransitionFunction(const size_t a) const { return transitions_[a]; }
391 }
392 
393 #endif
AIToolbox::MDP::MaximumLikelihoodModel::isTerminal
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
Definition: MaximumLikelihoodModel.hpp:368
AIToolbox::MDP::MaximumLikelihoodModel
This class models Experience as a Markov Decision Process using Maximum Likelihood.
Definition: MaximumLikelihoodModel.hpp:57
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::MDP::MaximumLikelihoodModel::getTransitionProbability
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
Definition: MaximumLikelihoodModel.hpp:358
AIToolbox::MDP::MaximumLikelihoodModel::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
Definition: MaximumLikelihoodModel.hpp:385
AIToolbox::MDP::MaximumLikelihoodModel::getA
size_t getA() const
This function returns the number of available actions to the agent.
Definition: MaximumLikelihoodModel.hpp:378
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::MDP::MaximumLikelihoodModel::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
Definition: MaximumLikelihoodModel.hpp:387
AIToolbox::MDP::MaximumLikelihoodModel::TransitionMatrix
Matrix3D TransitionMatrix
Definition: MaximumLikelihoodModel.hpp:59
AIToolbox::MDP::MaximumLikelihoodModel::getExpectedReward
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
Definition: MaximumLikelihoodModel.hpp:363
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::MaximumLikelihoodModel::getExperience
const E & getExperience() const
This function enables inspection of the underlying Experience of the MaximumLikelihoodModel.
Definition: MaximumLikelihoodModel.hpp:382
Seeder.hpp
AIToolbox::MDP::MaximumLikelihoodModel::RewardMatrix
Matrix2D RewardMatrix
Definition: MaximumLikelihoodModel.hpp:60
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::MDP::MaximumLikelihoodModel::sync
void sync()
This function syncs the whole MaximumLikelihoodModel to the underlying Experience.
Definition: MaximumLikelihoodModel.hpp:294
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::Matrix3D
std::vector< Matrix2D > Matrix3D
Definition: Types.hpp:21
AIToolbox::MDP::MaximumLikelihoodModel::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
Definition: MaximumLikelihoodModel.hpp:380
AIToolbox::MDP::MaximumLikelihoodModel::getS
size_t getS() const
This function returns the number of states of the world.
Definition: MaximumLikelihoodModel.hpp:376
AIToolbox::MDP::MaximumLikelihoodModel::MaximumLikelihoodModel
MaximumLikelihoodModel(const E &exp, double discount=1.0, bool sync=false)
Constructor using previous Experience.
Definition: MaximumLikelihoodModel.hpp:263
AIToolbox::MDP::MaximumLikelihoodModel::sampleSR
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP for the specified state action pair.
Definition: MaximumLikelihoodModel.hpp:351
Types.hpp
AIToolbox::MDP::MaximumLikelihoodModel::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the Model.
Definition: MaximumLikelihoodModel.hpp:288
TypeTraits.hpp
Probability.hpp