AIToolbox
A library that offers tools for AI problem solving.
ValueIteration.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_VALUE_ITERATION_HEADER_FILE
2 #define AI_TOOLBOX_MDP_VALUE_ITERATION_HEADER_FILE
3 
4 #include <AIToolbox/Logging.hpp>
9 
10 namespace AIToolbox::MDP {
28  public:
49  ValueIteration(unsigned horizon, double tolerance = 0.001, ValueFunction v = {Values(), Actions(0)});
50 
63  template <IsModel M>
64  std::tuple<double, ValueFunction, QFunction> operator()(const M & m);
65 
79  void setTolerance(double e);
80 
86  void setHorizon(unsigned h);
87 
99 
105  double getTolerance() const;
106 
112  unsigned getHorizon() const;
113 
119  const ValueFunction & getValueFunction() const;
120 
121  private:
122  // Parameters
123  double tolerance_;
124  unsigned horizon_;
125  ValueFunction vParameter_;
126 
127  // Internals
128  ValueFunction v1_;
129  };
130 
131  template <IsModel M>
132  std::tuple<double, ValueFunction, QFunction> ValueIteration::operator()(const M & model) {
133  // Extract necessary knowledge from model so we don't have to pass it around
134  const size_t S = model.getS();
135  const size_t A = model.getA();
136 
137  {
138  // Verify that parameter value function is compatible.
139  const size_t size = vParameter_.values.size();
140  if ( size != S ) {
141  if ( size != 0 ) {
142  AI_LOGGER(AI_SEVERITY_WARNING, "Size of starting value function is incorrect, ignoring...");
143  }
144  // Defaulting
145  v1_ = makeValueFunction(S);
146  }
147  else
148  v1_ = vParameter_;
149  }
150 
151  const auto & ir = [&]{
152  if constexpr (IsModelEigen<M>) return model.getRewardFunction();
153  else return computeImmediateRewards(model);
154  }();
155 
156  unsigned timestep = 0;
157  double variation = tolerance_ * 2; // Make it bigger
158 
159  Values val0;
160  auto & val1 = v1_.values;
161  QFunction q = makeQFunction(S, A);
162 
163  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
164  while ( timestep < horizon_ && (!useTolerance || variation > tolerance_) ) {
165  ++timestep;
166  AI_LOGGER(AI_SEVERITY_DEBUG, "Processing timestep " << timestep);
167 
168  val0 = val1;
169 
170  // We apply the discount directly on the values vector.
171  val1 *= model.getDiscount();
172  q = computeQFunction(model, val1, ir);
173 
174  // Compute the new value function (note that also val1 is overwritten)
175  bellmanOperatorInplace(q, &v1_);
176 
177  // We do this only if the tolerance specified is positive, otherwise we
178  // continue for all the timesteps.
179  if ( useTolerance )
180  variation = (val1 - val0).cwiseAbs().maxCoeff();
181  }
182 
183  // We do not guarantee that the Value/QFunctions are the perfect ones,
184  // as we stop as within the given tolerance.
185  return std::make_tuple(useTolerance ? variation : 0.0, std::move(v1_), std::move(q));
186  }
187 }
188 
189 #endif
AIToolbox::MDP::computeQFunction
QFunction computeQFunction(const M &model, const Values &v, QFunction ir)
This function computes the Model's QFunction from the values of a ValueFunction.
Definition: Utils.hpp:106
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::MDP::bellmanOperatorInplace
void bellmanOperatorInplace(const QFunction &q, ValueFunction *v)
This function converts a QFunction into the equivalent optimal ValueFunction.
AIToolbox::MDP::makeQFunction
QFunction makeQFunction(size_t S, size_t A)
This function creates and zeroes a QFunction.
AIToolbox::MDP::ValueIteration::setValueFunction
void setValueFunction(ValueFunction v)
This function sets the starting value function.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AI_SEVERITY_WARNING
#define AI_SEVERITY_WARNING
Definition: Logging.hpp:70
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
AIToolbox::MDP::ValueIteration::setHorizon
void setHorizon(unsigned h)
This function sets the horizon parameter.
AIToolbox::MDP::ValueIteration::getHorizon
unsigned getHorizon() const
This function will return the current horizon parameter.
AIToolbox::MDP::ValueFunction
Definition: Types.hpp:47
AIToolbox::MDP::ValueIteration::getValueFunction
const ValueFunction & getValueFunction() const
This function will return the current set default value function.
AIToolbox::MDP::ValueIteration::ValueIteration
ValueIteration(unsigned horizon, double tolerance=0.001, ValueFunction v={Values(), Actions(0)})
Basic constructor.
AIToolbox::MDP::ValueFunction::values
Values values
Definition: Types.hpp:48
AIToolbox::MDP::Actions
std::vector< size_t > Actions
Definition: Types.hpp:45
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::Values
Vector Values
Definition: Types.hpp:44
AIToolbox::MDP::ValueIteration::getTolerance
double getTolerance() const
This function will return the currently set tolerance parameter.
Utils.hpp
AIToolbox::MDP::ValueIteration
This class applies the value iteration algorithm on a Model.
Definition: ValueIteration.hpp:27
AIToolbox::MDP::ValueIteration::setTolerance
void setTolerance(double e)
This function sets the tolerance parameter.
AIToolbox::MDP::ValueIteration::operator()
std::tuple< double, ValueFunction, QFunction > operator()(const M &m)
This function applies value iteration on an MDP to solve it.
Definition: ValueIteration.hpp:132
Types.hpp
TypeTraits.hpp
AIToolbox::MDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates and zeroes a ValueFunction.
Logging.hpp
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AI_SEVERITY_DEBUG
#define AI_SEVERITY_DEBUG
Definition: Logging.hpp:68
Probability.hpp