AIToolbox
A library that offers tools for AI problem solving.
PBVI.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_PBVI_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_PBVI_HEADER_FILE
3 
10 
11 namespace AIToolbox::POMDP {
43  class PBVI {
44  public:
56  PBVI(size_t nBeliefs, unsigned h, double tolerance);
57 
71  void setTolerance(double tolerance);
72 
78  void setHorizon(unsigned h);
79 
85  void setBeliefSize(size_t nBeliefs);
86 
92  double getTolerance() const;
93 
99  unsigned getHorizon() const;
100 
106  size_t getBeliefSize() const;
107 
136  template <IsModel M>
137  std::tuple<double, ValueFunction> operator()(const M & model, ValueFunction v = {});
138 
159  template <IsModel M>
160  std::tuple<double, ValueFunction> operator()(const M & model, const std::vector<Belief> & beliefs, ValueFunction v = {});
161 
162  private:
182  template <typename ProjectionsRow>
183  VList crossSum(const ProjectionsRow & projs, size_t a, const std::vector<Belief> & bl);
184 
185  size_t S, A, O, beliefSize_;
186  unsigned horizon_;
187  double tolerance_;
188 
189  mutable RandomEngine rand_;
190  };
191 
192  template <IsModel M>
193  std::tuple<double, ValueFunction> PBVI::operator()(const M & model, ValueFunction v) {
194  // In this implementation we compute all beliefs in advance. This
195  // is mostly due to the fact that I prefer counter parameters (how
196  // many beliefs do you want?) versus timers (loop until time is
197  // up). However, this is easily changeable, since the belief generator
198  // can be called multiple times to increase the size of the belief
199  // vector.
200  BeliefGenerator bGen(model);
201  return operator()(model, bGen(beliefSize_), v);
202  }
203 
204  template <IsModel M>
205  std::tuple<double, ValueFunction> PBVI::operator()(const M & model, const std::vector<Belief> & beliefs, ValueFunction v) {
206  // Initialize "global" variables
207  S = model.getS();
208  A = model.getA();
209  O = model.getO();
210 
211  if (v.size() == 0)
212  v = makeValueFunction(S);
213 
214  unsigned timestep = 0;
215 
216  Projecter projecter(model);
217 
218  // And off we go
219  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
220  double variation = tolerance_ * 2; // Make it bigger
221  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
222  ++timestep;
223 
224  // Compute all possible outcomes, from our previous results.
225  // This means that for each action-observation pair, we are going
226  // to obtain the same number of possible outcomes as the number
227  // of entries in our initial vector w.
228  auto projs = projecter(v.back());
229 
230  size_t finalWSize = 0;
231  // In this method we split the work by action, which will then
232  // be joined again at the end of the loop. This is not required,
233  // but there does not seem to be a speed boost by not doing
234  // so (not that I found one, if there is one I'd like to know!)
235  for ( size_t a = 0; a < A; ++a ) {
236  projs[a][0] = crossSum( projs[a], a, beliefs );
237  finalWSize += projs[a][0].size();
238  }
239  VList w;
240  w.reserve(finalWSize);
241 
242  for ( size_t a = 0; a < A; ++a )
243  w.insert(std::end(w), std::make_move_iterator(std::begin(projs[a][0])), std::make_move_iterator(std::end(projs[a][0])));
244 
245  auto begin = std::begin(w);
246  auto end = std::end(w);
247  auto bound = begin;
248  for ( const auto & belief : beliefs )
249  bound = extractBestAtPoint(belief, begin, bound, end, unwrap);
250 
251  w.erase(bound, std::end(w));
252 
253  // If you want to save as much memory as possible, do this.
254  // It make take some time more though since it needs to reallocate
255  // and copy stuff around.
256  // w.shrink_to_fit();
257 
258  v.emplace_back(std::move(w));
259 
260  // Check convergence
261  if ( useTolerance )
262  variation = weakBoundDistance(v[v.size()-2], v.back());
263  }
264 
265  return std::make_tuple(useTolerance ? variation : 0.0, v);
266  }
267 
268  template <typename ProjectionsRow>
269  VList PBVI::crossSum(const ProjectionsRow & projs, const size_t a, const std::vector<Belief> & bl) {
270  VList result;
271  result.reserve(bl.size());
272 
273  for ( const auto & b : bl )
274  result.emplace_back(crossSumBestAtBelief(b, projs, a));
275 
276  const auto rbegin = std::begin(result);
277  const auto rend = std::end (result);
278 
279  result.erase(extractDominated(rbegin, rend, unwrap), rend);
280 
281  return result;
282  }
283 }
284 
285 #endif
AIToolbox::POMDP::PBVI
This class implements the Point Based Value Iteration algorithm.
Definition: PBVI.hpp:43
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::PBVI::getTolerance
double getTolerance() const
This function returns the currently set tolerance parameter.
AIToolbox::POMDP::PBVI::getHorizon
unsigned getHorizon() const
This function returns the currently set horizon parameter.
TypeTraits.hpp
AIToolbox::extractDominated
Iterator extractDominated(Iterator begin, Iterator end, P p=P{})
This function finds and moves all Hyperplanes in the range that are dominated by others.
Definition: Prune.hpp:30
AIToolbox::POMDP::PBVI::setHorizon
void setHorizon(unsigned h)
This function sets a new horizon parameter.
AIToolbox::POMDP::crossSumBestAtBelief
void crossSumBestAtBelief(const Belief &b, const ActionRow &row, VEntry *outp, double *value=nullptr)
This function computes the best VEntry for the input belief from the input VLists.
Definition: Utils.hpp:504
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::extractBestAtPoint
Iterator extractBestAtPoint(const Point &point, Iterator begin, Iterator bound, Iterator end, P p=P{})
This function finds and moves the Hyperplane with the highest value for the given point at the beginn...
Definition: Polytope.hpp:177
AIToolbox::POMDP::BeliefGenerator
This class generates reachable beliefs from a given Model.
Definition: BeliefGenerator.hpp:18
AIToolbox::POMDP::PBVI::operator()
std::tuple< double, ValueFunction > operator()(const M &model, ValueFunction v={})
This function solves a POMDP::Model approximately.
Definition: PBVI.hpp:193
AIToolbox::POMDP::PBVI::setTolerance
void setTolerance(double tolerance)
This function sets the tolerance parameter.
AIToolbox::POMDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates a default ValueFunction.
AIToolbox::POMDP::weakBoundDistance
double weakBoundDistance(const VList &oldV, const VList &newV)
This function returns a weak measure of distance between two VLists.
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
Prune.hpp
AIToolbox::POMDP::ValueFunction
std::vector< VList > ValueFunction
Definition: Types.hpp:78
Types.hpp
AIToolbox::POMDP::PBVI::PBVI
PBVI(size_t nBeliefs, unsigned h, double tolerance)
Basic constructor.
Projecter.hpp
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::POMDP::PBVI::setBeliefSize
void setBeliefSize(size_t nBeliefs)
This function sets a new number of support beliefs.
AIToolbox::POMDP::Projecter
This class offers projecting facilities for Models.
Definition: Projecter.hpp:13
BeliefGenerator.hpp
AIToolbox::POMDP::PBVI::getBeliefSize
size_t getBeliefSize() const
This function returns the currently set number of support beliefs to use during a solve pass.
Utils.hpp