AIToolbox
A library that offers tools for AI problem solving.
OldMDPModel.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_OLD_MDP_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_OLD_MDP_MODEL_HEADER_FILE
3 
4 #include <utility>
5 #include <random>
6 
7 #include <AIToolbox/Types.hpp>
11 #include <AIToolbox/Utils/Core.hpp>
13 #include <AIToolbox/Seeder.hpp>
14 
75 class OldMDPModel {
76  public:
79 
94  OldMDPModel(size_t s, size_t a, double discount = 1.0);
95 
131  template <AIToolbox::IsNaive3DMatrix T, AIToolbox::IsNaive3DMatrix R>
132  OldMDPModel(size_t s, size_t a, const T & t, const R & r, double d = 1.0);
133 
146  template <AIToolbox::MDP::IsModel M>
147  OldMDPModel(const M& model);
148 
170  template <AIToolbox::IsNaive3DMatrix T>
171  void setTransitionFunction(const T & t);
172 
190  template <AIToolbox::IsNaive3DMatrix R>
191  void setRewardFunction(const R & r);
192 
198  void setDiscount(double d);
199 
218  std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;
219 
225  size_t getS() const;
226 
232  size_t getA() const;
233 
239  double getDiscount() const;
240 
250  double getTransitionProbability(size_t s, size_t a, size_t s1) const;
251 
261  double getExpectedReward(size_t s, size_t a, size_t s1) const;
262 
268  const TransitionMatrix & getTransitionFunction() const;
269 
275  const RewardMatrix & getRewardFunction() const;
276 
284  bool isTerminal(size_t s) const;
285 
286  private:
287  size_t S, A;
288  double discount_;
289 
290  TransitionMatrix transitions_;
291  RewardMatrix rewards_;
292 
293  mutable AIToolbox::RandomEngine rand_;
294 
295  friend std::istream& operator>>(std::istream &is, OldMDPModel &);
296 };
297 
298 template <AIToolbox::IsNaive3DMatrix T, AIToolbox::IsNaive3DMatrix R>
299 OldMDPModel::OldMDPModel(size_t s, size_t a, const T & t, const R & r, double d) : S(s), A(a), transitions_(boost::extents[S][A][S]), rewards_(boost::extents[S][A][S]),
300  rand_(AIToolbox::Seeder::getSeed())
301 {
302  setDiscount(d);
305 }
306 
307 template <AIToolbox::MDP::IsModel M>
308 OldMDPModel::OldMDPModel(const M& model) : S(model.getS()), A(model.getA()), discount_(model.getDiscount()), transitions_(boost::extents[S][A][S]), rewards_(boost::extents[S][A][S]),
309  rand_(AIToolbox::Seeder::getSeed())
310 {
311  for ( size_t s = 0; s < S; ++s )
312  for ( size_t a = 0; a < A; ++a ) {
313  for ( size_t s1 = 0; s1 < S; ++s1 ) {
314  transitions_[s][a][s1] = model.getTransitionProbability(s, a, s1);
315  rewards_ [s][a][s1] = model.getExpectedReward (s, a, s1);
316  }
317  if ( ! AIToolbox::isProbability(S, transitions_[s][a]) )
318  throw std::invalid_argument("Input transition matrix does not contain valid probabilities.");
319  }
320 }
321 
322 template <AIToolbox::IsNaive3DMatrix T>
324  for ( size_t s = 0; s < S; ++s )
325  for ( size_t a = 0; a < A; ++a )
326  if ( ! AIToolbox::isProbability(S, t[s][a]) )
327  throw std::invalid_argument("Input transition matrix does not contain valid probabilities.");
328 
329  copyDumb3D(t, transitions_, S, A, S);
330 }
331 
332 template <AIToolbox::IsNaive3DMatrix R>
333 void OldMDPModel::setRewardFunction( const R & r ) {
334  copyDumb3D(r, rewards_, S, A, S);
335 }
336 
337 OldMDPModel::OldMDPModel(size_t s, size_t a, double discount) : S(s), A(a), discount_(discount), transitions_(boost::extents[S][A][S]), rewards_(boost::extents[S][A][S]),
338  rand_(AIToolbox::Seeder::getSeed())
339 {
340  // Make transition matrix true probability
341  for ( size_t s = 0; s < S; ++s )
342  for ( size_t a = 0; a < A; ++a )
343  transitions_[s][a][s] = 1.0;
344 }
345 
346 inline std::tuple<size_t, double> OldMDPModel::sampleSR(size_t s, size_t a) const {
347  size_t s1 = AIToolbox::sampleProbability(S, transitions_[s][a], rand_);
348 
349  return std::make_tuple(s1, rewards_[s][a][s1]);
350 }
351 
352 inline double OldMDPModel::getTransitionProbability(size_t s, size_t a, size_t s1) const {
353  return transitions_[s][a][s1];
354 }
355 
356 inline double OldMDPModel::getExpectedReward(size_t s, size_t a, size_t s1) const {
357  return rewards_[s][a][s1];
358 }
359 
360 inline void OldMDPModel::setDiscount(double d) {
361  if ( d <= 0.0 || d > 1.0 ) throw std::invalid_argument("Discount parameter must be in (0,1]");
362  discount_ = d;
363 }
364 
365 inline bool OldMDPModel::isTerminal(size_t s) const {
366  bool answer = true;
367  for ( size_t a = 0; a < A; ++a ) {
368  if ( !AIToolbox::checkEqualSmall(1.0, transitions_[s][a][s]) ) {
369  answer = false;
370  break;
371  }
372  }
373  return answer;
374 }
375 
376 inline size_t OldMDPModel::getS() const { return S; }
377 inline size_t OldMDPModel::getA() const { return A; }
378 inline double OldMDPModel::getDiscount() const { return discount_; }
379 
380 inline const OldMDPModel::TransitionMatrix & OldMDPModel::getTransitionFunction() const { return transitions_; }
381 inline const OldMDPModel::RewardMatrix & OldMDPModel::getRewardFunction() const { return rewards_; }
382 
383 #endif
OldMDPModel
This class represents a Markov Decision Process.
Definition: OldMDPModel.hpp:75
OldMDPModel::getA
size_t getA() const
This function returns the number of available actions to the agent.
Definition: OldMDPModel.hpp:377
Core.hpp
OldMDPModel::getExpectedReward
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
Definition: OldMDPModel.hpp:356
OldMDPModel::getS
size_t getS() const
This function returns the number of states of the world.
Definition: OldMDPModel.hpp:376
AIToolbox::DumbMatrix3D
boost::multi_array< double, 3 > DumbMatrix3D
Definition: Types.hpp:37
OldMDPModel::getTransitionProbability
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
Definition: OldMDPModel.hpp:352
OldMDPModel::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
Definition: OldMDPModel.hpp:381
AIToolbox::copyDumb3D
void copyDumb3D(const T &in, U &out, const size_t d1, const size_t d2, const size_t d3)
Copies a 3d container into another 3d container.
Definition: Core.hpp:350
OldMDPModel::RewardMatrix
AIToolbox::DumbMatrix3D RewardMatrix
Definition: OldMDPModel.hpp:78
OldMDPModel::isTerminal
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
Definition: OldMDPModel.hpp:365
OldMDPModel::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the OldMDPModel.
Definition: OldMDPModel.hpp:360
AIToolbox::isProbability
bool isProbability(const size_t size, const T &in)
This function checks whether the supplied 1D container is a valid discrete distribution.
Definition: Probability.hpp:38
OldMDPModel::setRewardFunction
void setRewardFunction(const R &r)
This function replaces the OldMDPModel reward function with the one provided.
Definition: OldMDPModel.hpp:333
TypeTraits.hpp
AIToolbox
Definition: Experience.hpp:6
OldMDPModel::operator>>
friend std::istream & operator>>(std::istream &is, OldMDPModel &)
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
OldMDPModel::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
Definition: OldMDPModel.hpp:378
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
OldMDPModel::setTransitionFunction
void setTransitionFunction(const T &t)
This function replaces the OldMDPModel transition function with the one provided.
Definition: OldMDPModel.hpp:323
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
OldMDPModel::TransitionMatrix
AIToolbox::DumbMatrix3D TransitionMatrix
Definition: OldMDPModel.hpp:77
Types.hpp
TypeTraits.hpp
OldMDPModel::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
Definition: OldMDPModel.hpp:380
OldMDPModel::sampleSR
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP for the specified state action pair.
Definition: OldMDPModel.hpp:346
OldMDPModel::OldMDPModel
OldMDPModel(size_t s, size_t a, double discount=1.0)
Basic constructor.
Definition: OldMDPModel.hpp:337
Probability.hpp