AIToolbox
A library that offers tools for AI problem solving.
Model.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_MODEL_HEADER_FILE
3 
4 #include <utility>
5 #include <random>
6 
7 #include <AIToolbox/Seeder.hpp>
8 #include <AIToolbox/Types.hpp>
11 #include <AIToolbox/Utils/Core.hpp>
13 
14 namespace AIToolbox::MDP {
70  class Model {
71  public:
74 
89  Model(size_t s, size_t a, double discount = 1.0);
90 
124  template <IsNaive3DMatrix T, IsNaive3DMatrix R>
125  Model(size_t s, size_t a, const T & t, const R & r, double d = 1.0);
126 
139  template <IsModel M>
140  Model(const M& model);
141 
159  Model(NoCheck, size_t s, size_t a, TransitionMatrix && t, RewardMatrix && r, double d);
160 
181  template <IsNaive3DMatrix T>
182  void setTransitionFunction(const T & t);
183 
200  void setTransitionFunction(const TransitionMatrix & t);
201 
219  template <IsNaive3DMatrix R>
220  void setRewardFunction(const R & r);
221 
234  void setRewardFunction(const RewardMatrix & r);
235 
241  void setDiscount(double d);
242 
261  std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;
262 
268  size_t getS() const;
269 
275  size_t getA() const;
276 
282  double getDiscount() const;
283 
293  double getTransitionProbability(size_t s, size_t a, size_t s1) const;
294 
304  double getExpectedReward(size_t s, size_t a, size_t s1) const;
305 
311  const TransitionMatrix & getTransitionFunction() const;
312 
320  const Matrix2D & getTransitionFunction(size_t a) const;
321 
327  const RewardMatrix & getRewardFunction() const;
328 
336  bool isTerminal(size_t s) const;
337 
338  private:
339  size_t S, A;
340  double discount_;
341 
342  TransitionMatrix transitions_;
343  RewardMatrix rewards_;
344 
345  mutable RandomEngine rand_;
346  };
347 
348  template <IsNaive3DMatrix T, IsNaive3DMatrix R>
349  Model::Model(const size_t s, const size_t a, const T & t, const R & r, const double d) :
350  S(s), A(a), transitions_(A, Matrix2D(S, S)),
351  rewards_(S, A), rand_(Seeder::getSeed())
352  {
353  setDiscount(d);
356  }
357 
358  template <IsModel M>
359  Model::Model(const M& model) :
360  S(model.getS()), A(model.getA()), transitions_(A, Matrix2D(S, S)),
361  rewards_(S, A), rand_(Seeder::getSeed())
362  {
363  setDiscount(model.getDiscount());
364  rewards_.setZero();
365  for ( size_t a = 0; a < A; ++a )
366  for ( size_t s = 0; s < S; ++s ) {
367  for ( size_t s1 = 0; s1 < S; ++s1 ) {
368  transitions_[a](s, s1) = model.getTransitionProbability(s, a, s1);
369  rewards_ (s, a) += model.getExpectedReward (s, a, s1) * transitions_[a](s, s1);
370  }
371  if ( !isProbability(S, transitions_[a].row(s)) )
372  throw std::invalid_argument("Input transition matrix does not contain valid probabilities.");
373  }
374  }
375 
376  template <IsNaive3DMatrix T>
377  void Model::setTransitionFunction(const T & t) {
378  if (!isProbability(S, A, S, t))
379  throw std::invalid_argument("Input transition matrix does not contain valid probabilities.");
380 
381  for ( size_t s = 0; s < S; ++s )
382  for ( size_t a = 0; a < A; ++a )
383  for ( size_t s1 = 0; s1 < S; ++s1 )
384  transitions_[a](s, s1) = t[s][a][s1];
385  }
386 
387  template <IsNaive3DMatrix R>
388  void Model::setRewardFunction(const R & r) {
389  rewards_.setZero();
390  for ( size_t s = 0; s < S; ++s )
391  for ( size_t a = 0; a < A; ++a )
392  for ( size_t s1 = 0; s1 < S; ++s1 )
393  rewards_(s, a) += r[s][a][s1] * transitions_[a](s, s1);
394  }
395 }
396 
397 #endif
Core.hpp
AIToolbox::MDP::Model::sampleSR
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP with the specified state action pair.
AIToolbox::MDP::Model::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the Model.
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::MDP::Model::setRewardFunction
void setRewardFunction(const R &r)
This function replaces the Model reward function with the one provided.
Definition: Model.hpp:388
AIToolbox::isProbability
bool isProbability(const size_t size, const T &in)
This function checks whether the supplied 1D container is a valid discrete distribution.
Definition: Probability.hpp:38
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::Model::TransitionMatrix
Matrix3D TransitionMatrix
Definition: Model.hpp:72
AIToolbox::MDP::Model::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::MDP::Model::getS
size_t getS() const
This function returns the number of states of the world.
AIToolbox::MDP::Model::getExpectedReward
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
AIToolbox::MDP::Model::setTransitionFunction
void setTransitionFunction(const T &t)
This function replaces the Model transition function with the one provided.
Definition: Model.hpp:377
Types.hpp
AIToolbox::MDP::Model::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
AIToolbox::Matrix3D
std::vector< Matrix2D > Matrix3D
Definition: Types.hpp:21
AIToolbox::MDP::Model::getA
size_t getA() const
This function returns the number of available actions to the agent.
AIToolbox::NoCheck
This is used to tag functions that avoid runtime checks.
Definition: Types.hpp:44
AIToolbox::MDP::Model::getTransitionProbability
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
AIToolbox::MDP::Model::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
Types.hpp
TypeTraits.hpp
AIToolbox::MDP::Model
This class represents a Markov Decision Process.
Definition: Model.hpp:70
AIToolbox::MDP::Model::isTerminal
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
AIToolbox::MDP::Model::RewardMatrix
Matrix2D RewardMatrix
Definition: Model.hpp:73
AIToolbox::MDP::Model::Model
Model(size_t s, size_t a, double discount=1.0)
Basic constructor.
Probability.hpp