AIToolbox
A library that offers tools for AI problem solving.
PolicyIteration.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_POLICY_ITERATION_HEADER_FILE
2 #define AI_TOOLBOX_MDP_POLICY_ITERATION_HEADER_FILE
3 
9 
10 namespace AIToolbox::MDP {
26  public:
33  PolicyIteration(unsigned horizon, double tolerance = 0.001);
34 
43  template <IsModel M>
44  QFunction operator()(const M & m);
45 
51  void setTolerance(double t);
52 
56  void setHorizon(unsigned h);
57 
61  double getTolerance() const;
62 
66  unsigned getHorizon() const;
67 
68  private:
69  unsigned horizon_;
70  double tolerance_;
71  };
72 
73  template <IsModel M>
75  const auto S = m.getS();
76  const auto A = m.getA();
77 
78  PolicyEvaluation<M> eval(m, horizon_, tolerance_);
79 
80  auto qfun = makeQFunction(m.getS(), m.getA());
81  QGreedyPolicy p(qfun);
82  auto matrix = p.getPolicy();
83 
84  {
85 nextLoop:
86  auto [bound, v, q] = eval(p);
87  (void)bound;
88 
89  eval.setValues(std::move(v));
90  qfun = std::move(q);
91 
92  auto newMatrix = p.getPolicy();
93  for (size_t s = 0; s < S; ++s) {
94  for (size_t a = 0; a < A; ++a) {
95  if (checkDifferentSmall(matrix(s,a), newMatrix(s,a))) {
96  matrix = std::move(newMatrix);
97  goto nextLoop;
98  }
99  }
100  }
101  }
102  return qfun;
103  }
104 }
105 
106 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::MDP::makeQFunction
QFunction makeQFunction(size_t S, size_t A)
This function creates and zeroes a QFunction.
AIToolbox::MDP::PolicyEvaluation::setValues
void setValues(Values v)
This function sets the starting value function.
Definition: PolicyEvaluation.hpp:215
AIToolbox::MDP::PolicyIteration::getTolerance
double getTolerance() const
This function returns the currently set tolerance parameter.
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
AIToolbox::MDP::PolicyIteration::getHorizon
unsigned getHorizon() const
This function returns the currently set horizon parameter.
AIToolbox::MDP::QGreedyPolicy::getPolicy
virtual Matrix2D getPolicy() const override
This function returns a matrix containing all probabilities of the policy.
AIToolbox::MDP::PolicyIteration::setHorizon
void setHorizon(unsigned h)
This function sets the horizon parameter.
AIToolbox::MDP::PolicyEvaluation
This class applies the policy evaluation algorithm on a policy.
Definition: PolicyEvaluation.hpp:28
AIToolbox::MDP::PolicyIteration::setTolerance
void setTolerance(double t)
This function sets the tolerance parameter.
PolicyEvaluation.hpp
AIToolbox::MDP::PolicyIteration::PolicyIteration
PolicyIteration(unsigned horizon, double tolerance=0.001)
Basic constructor.
AIToolbox::MDP::PolicyIteration
This class represents the Policy Iteration algorithm.
Definition: PolicyIteration.hpp:25
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::PolicyIteration::operator()
QFunction operator()(const M &m)
This function applies policy iteration on an MDP to solve it.
Definition: PolicyIteration.hpp:74
Utils.hpp
AIToolbox::MDP::QGreedyPolicy
This class implements a greedy policy through a QFunction.
Definition: QGreedyPolicy.hpp:13
Types.hpp
QGreedyPolicy.hpp
TypeTraits.hpp