AIToolbox
A library that offers tools for AI problem solving.
SparseModel.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_SPARSE_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_SPARSE_MODEL_HEADER_FILE
3 
4 #include <AIToolbox/Seeder.hpp>
5 
8 
10 
11 namespace AIToolbox::MDP {
77  class SparseModel {
78  public:
81 
96  SparseModel(size_t s, size_t a, double discount = 1.0);
97 
139  template <IsNaive3DMatrix T, IsNaive3DMatrix R>
140  SparseModel(size_t s, size_t a, const T & t, const R & r, double d = 1.0);
141 
154  template <IsModel M>
155  SparseModel(const M& model);
156 
174  SparseModel(NoCheck, size_t s, size_t a, TransitionMatrix && t, RewardMatrix && r, double d);
175 
204  template <IsNaive3DMatrix T>
205  void setTransitionFunction(const T & t);
206 
223  void setTransitionFunction(const TransitionMatrix & t);
224 
250  template <IsNaive3DMatrix R>
251  void setRewardFunction(const R & r);
252 
265  void setRewardFunction(const RewardMatrix & r);
266 
272  void setDiscount(double d);
273 
292  std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;
293 
299  size_t getS() const;
300 
306  size_t getA() const;
307 
313  double getDiscount() const;
314 
324  double getTransitionProbability(size_t s, size_t a, size_t s1) const;
325 
335  double getExpectedReward(size_t s, size_t a, size_t s1) const;
336 
342  const TransitionMatrix & getTransitionFunction() const;
343 
351  const SparseMatrix2D & getTransitionFunction(size_t a) const;
352 
358  const RewardMatrix & getRewardFunction() const;
359 
367  bool isTerminal(size_t s) const;
368 
369  private:
370  size_t S, A;
371  double discount_;
372 
373  TransitionMatrix transitions_;
374  RewardMatrix rewards_;
375 
376  mutable RandomEngine rand_;
377  };
378 
379  template <IsNaive3DMatrix T, IsNaive3DMatrix R>
380  SparseModel::SparseModel(const size_t s, const size_t a, const T & t, const R & r, const double d) :
381  S(s), A(a), transitions_(A, SparseMatrix2D(S, S)),
382  rewards_(S, A), rand_(Seeder::getSeed())
383  {
384  setDiscount(d);
387  }
388 
389  template <IsModel M>
390  SparseModel::SparseModel(const M& model) :
391  S(model.getS()), A(model.getA()), transitions_(A, SparseMatrix2D(S, S)),
392  rewards_(S, A), rand_(Seeder::getSeed())
393  {
394  setDiscount(model.getDiscount());
395  for ( size_t s = 0; s < S; ++s )
396  for ( size_t a = 0; a < A; ++a ) {
397  for ( size_t s1 = 0; s1 < S; ++s1 ) {
398  const double p = model.getTransitionProbability(s, a, s1);
399  if ( p < 0.0 || p > 1.0 )
400  throw std::invalid_argument("Input transition matrix contains an invalid value.");
401 
402  if ( checkDifferentSmall(0.0, p) ) transitions_[a].insert(s, s1) = p;
403  const double r = model.getExpectedReward(s, a, s1);
404  if ( checkDifferentSmall(0.0, r) ) rewards_.coeffRef(s, a) += r * p;
405  }
406  if ( checkDifferentSmall(1.0, transitions_[a].row(s).sum()) )
407  throw std::invalid_argument("Input transition matrix contains an invalid row.");
408  }
409 
410  for ( size_t a = 0; a < A; ++a )
411  transitions_[a].makeCompressed();
412  rewards_.makeCompressed();
413  }
414 
415  template <IsNaive3DMatrix T>
417  if (!isProbability(S, A, S, t))
418  throw std::invalid_argument("Input transition matrix does not contain valid probabilities.");
419 
420  // Then we copy.
421  for ( size_t a = 0; a < A; ++a ) {
422  transitions_[a].setZero();
423 
424  for ( size_t s = 0; s < S; ++s )
425  for ( size_t s1 = 0; s1 < S; ++s1 ) {
426  const double p = t[s][a][s1];
427  if ( checkDifferentSmall(0.0, p) ) transitions_[a].insert(s, s1) = p;
428  }
429  transitions_[a].makeCompressed();
430  }
431  }
432 
433  template <IsNaive3DMatrix R>
434  void SparseModel::setRewardFunction(const R & r) {
435  rewards_.setZero();
436  for ( size_t a = 0; a < A; ++a ) {
437  for ( size_t s = 0; s < S; ++s ) {
438  double newRew = 0.0;
439  for ( size_t s1 = 0; s1 < S; ++s1 )
440  newRew += r[s][a][s1] * transitions_[a].coeff(s, s1);
441 
442  if (checkDifferentSmall(newRew, 0.0))
443  rewards_.coeffRef(s, a) = newRew;
444  }
445  }
446  rewards_.makeCompressed();
447  }
448 }
449 
450 #endif
451 
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::MDP::SparseModel::TransitionMatrix
SparseMatrix3D TransitionMatrix
Definition: SparseModel.hpp:79
AIToolbox::MDP::SparseModel::sampleSR
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP for the specified state action pair.
AIToolbox::MDP::SparseModel::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
AIToolbox::MDP::SparseModel::getA
size_t getA() const
This function returns the number of available actions to the agent.
AIToolbox::MDP::SparseModel::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
AIToolbox::MDP::SparseModel::setTransitionFunction
void setTransitionFunction(const T &t)
This function replaces the transition function with the one provided.
Definition: SparseModel.hpp:416
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::SparseMatrix3D
std::vector< SparseMatrix2D > SparseMatrix3D
Definition: Types.hpp:22
AIToolbox::MDP::SparseModel::RewardMatrix
SparseMatrix2D RewardMatrix
Definition: SparseModel.hpp:80
AIToolbox::MDP::SparseModel::setRewardFunction
void setRewardFunction(const R &r)
This function replaces the reward function with the one provided.
Definition: SparseModel.hpp:434
AIToolbox::MDP::SparseModel::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
AIToolbox::isProbability
bool isProbability(const size_t size, const T &in)
This function checks whether the supplied 1D container is a valid discrete distribution.
Definition: Probability.hpp:38
AIToolbox::MDP::SparseModel::isTerminal
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::SparseModel::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the SparseModel.
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::MDP::SparseModel::getExpectedReward
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
AIToolbox::NoCheck
This is used to tag functions that avoid runtime checks.
Definition: Types.hpp:44
Types.hpp
AIToolbox::MDP::SparseModel
This class represents a Markov Decision Process.
Definition: SparseModel.hpp:77
TypeTraits.hpp
AIToolbox::MDP::SparseModel::SparseModel
SparseModel(size_t s, size_t a, double discount=1.0)
Basic constructor.
AIToolbox::MDP::SparseModel::getS
size_t getS() const
This function returns the number of states of the world.
AIToolbox::SparseMatrix2D
Eigen::SparseMatrix< double, Eigen::RowMajor > SparseMatrix2D
Definition: Types.hpp:19
AIToolbox::MDP::SparseModel::getTransitionProbability
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
Probability.hpp