AIToolbox
A library that offers tools for AI problem solving.
PolicyEvaluation.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_POLICY_EVALUATION_HEADER_FILE
2 #define AI_TOOLBOX_MDP_POLICY_EVALUATION_HEADER_FILE
3 
4 #include <tuple>
5 #include <iterator>
6 
7 #include <AIToolbox/Logging.hpp>
10 #include <AIToolbox/MDP/Utils.hpp>
13 
14 namespace AIToolbox::MDP {
27  template <IsModel M>
29  public:
51  PolicyEvaluation(const M & m, unsigned horizon, double tolerance = 0.001, Values v = Values());
52 
63  std::tuple<double, Values, QFunction> operator()(const PolicyInterface & p);
64 
78  void setTolerance(double e);
79 
85  void setHorizon(unsigned h);
86 
97  void setValues(Values v);
98 
104  double getTolerance() const;
105 
111  unsigned getHorizon() const;
112 
118  const Values & getValues() const;
119 
120  private:
121  // Parameters
122  double tolerance_;
123  unsigned horizon_;
124  Values vParameter_;
125  const M & model_;
126 
127  // Internals
128  QFunction immediateRewards_;
129  Values v1_;
130  size_t S, A;
131  };
132 
133  template <IsModel M>
134  PolicyEvaluation<M>::PolicyEvaluation(const M & m, const unsigned horizon, const double tolerance, Values v) :
135  horizon_(horizon), vParameter_(std::move(v)), model_(m), S(0), A(0)
136  {
137  setTolerance(tolerance);
138 
139  // Extract necessary knowledge from model so we don't have to pass it around
140  S = model_.getS();
141  A = model_.getA();
142 
143  // Only compute the immediate rewards if we need them.
144  if constexpr (!IsModelEigen<M>)
145  immediateRewards_ = computeImmediateRewards(m);
146  }
147 
148  template <IsModel M>
149  std::tuple<double, Values, QFunction> PolicyEvaluation<M>::operator()(const PolicyInterface & policy) {
150  {
151  // Verify that parameter value function is compatible.
152  const size_t size = vParameter_.size();
153  if ( size != S ) {
154  if ( size != 0 ) {
155  AI_LOGGER(AI_SEVERITY_WARNING, "Size of starting value function is incorrect, ignoring...");
156  }
157  // Defaulting
158  v1_ = Values(S);
159  v1_.setZero();
160  }
161  else
162  v1_ = vParameter_;
163  }
164 
165  unsigned timestep = 0;
166  double variation = tolerance_ * 2; // Make it bigger
167 
168  Values val0;
169  QFunction q = makeQFunction(S, A);
170  const auto p = policy.getPolicy();
171 
172  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
173  while ( timestep < horizon_ && (!useTolerance || variation > tolerance_) ) {
174  ++timestep;
175  AI_LOGGER(AI_SEVERITY_DEBUG, "Processing timestep " << timestep);
176 
177  val0 = v1_;
178 
179  // We apply the discount directly on the values vector.
180  v1_ *= model_.getDiscount();
181  // We use the implicit reward function if it is available,
182  // otherwise we use the one we computed beforehand.
183  if constexpr(IsModelEigen<M>)
184  q = computeQFunction(model_, v1_, model_.getRewardFunction());
185  else
186  q = computeQFunction(model_, v1_, immediateRewards_);
187 
188  // Compute the values for this policy
189  for ( size_t s = 0; s < S; ++s )
190  v1_(s) = q.row(s) * p.row(s).transpose();
191 
192  // We do this only if the tolerance specified is positive,
193  // otherwise we continue for all the timesteps.
194  if ( useTolerance )
195  variation = (v1_ - val0).cwiseAbs().maxCoeff();
196  }
197 
198  // We do not guarantee that the Value/QFunctions are the perfect
199  // ones, as we stop within the input tolerance.
200  return std::make_tuple(useTolerance ? variation : 0.0, std::move(v1_), std::move(q));
201  }
202 
203  template <IsModel M>
204  void PolicyEvaluation<M>::setTolerance(const double t) {
205  if ( t < 0.0 ) throw std::invalid_argument("Tolerance must be >= 0");
206  tolerance_ = t;
207  }
208 
209  template <IsModel M>
210  void PolicyEvaluation<M>::setHorizon(const unsigned h) {
211  horizon_ = h;
212  }
213 
214  template <IsModel M>
216  vParameter_ = std::move(v);
217  }
218 
219  template <IsModel M>
220  double PolicyEvaluation<M>::getTolerance() const { return tolerance_; }
221 
222  template <IsModel M>
223  unsigned PolicyEvaluation<M>::getHorizon() const { return horizon_; }
224 
225  template <IsModel M>
226  const Values & PolicyEvaluation<M>::getValues() const { return vParameter_; }
227 }
228 
229 #endif
AIToolbox::MDP::computeQFunction
QFunction computeQFunction(const M &model, const Values &v, QFunction ir)
This function computes the Model's QFunction from the values of a ValueFunction.
Definition: Utils.hpp:106
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::MDP::makeQFunction
QFunction makeQFunction(size_t S, size_t A)
This function creates and zeroes a QFunction.
AIToolbox::MDP::PolicyEvaluation::setValues
void setValues(Values v)
This function sets the starting value function.
Definition: PolicyEvaluation.hpp:215
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AI_SEVERITY_WARNING
#define AI_SEVERITY_WARNING
Definition: Logging.hpp:70
PolicyInterface.hpp
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
AIToolbox::MDP::PolicyEvaluation
This class applies the policy evaluation algorithm on a policy.
Definition: PolicyEvaluation.hpp:28
AIToolbox::MDP::PolicyEvaluation::getValues
const Values & getValues() const
This function will return the currently set default values.
Definition: PolicyEvaluation.hpp:226
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::Values
Vector Values
Definition: Types.hpp:44
AIToolbox::MDP::PolicyEvaluation::setTolerance
void setTolerance(double e)
This function sets the tolerance parameter.
Definition: PolicyEvaluation.hpp:204
Utils.hpp
AIToolbox::MDP::PolicyInterface::getPolicy
virtual Matrix2D getPolicy() const =0
This function returns a matrix containing all probabilities of the policy.
AIToolbox::MDP::PolicyEvaluation::operator()
std::tuple< double, Values, QFunction > operator()(const PolicyInterface &p)
This function applies policy evaluation on a policy.
Definition: PolicyEvaluation.hpp:149
AIToolbox::MDP::PolicyEvaluation::getHorizon
unsigned getHorizon() const
This function will return the current horizon parameter.
Definition: PolicyEvaluation.hpp:223
AIToolbox::MDP::PolicyEvaluation::PolicyEvaluation
PolicyEvaluation(const M &m, unsigned horizon, double tolerance=0.001, Values v=Values())
Basic constructor.
Definition: PolicyEvaluation.hpp:134
AIToolbox::MDP::PolicyEvaluation::setHorizon
void setHorizon(unsigned h)
This function sets the horizon parameter.
Definition: PolicyEvaluation.hpp:210
Types.hpp
TypeTraits.hpp
Logging.hpp
AIToolbox::MDP::PolicyInterface
Simple typedef for most of MDP's policy needs.
Definition: PolicyInterface.hpp:11
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AIToolbox::MDP::PolicyEvaluation::getTolerance
double getTolerance() const
This function will return the currently set tolerance parameter.
Definition: PolicyEvaluation.hpp:220
AI_SEVERITY_DEBUG
#define AI_SEVERITY_DEBUG
Definition: Logging.hpp:68
Probability.hpp