AIToolbox
A library that offers tools for AI problem solving.
GapMin.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_GAPMIN_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_GAPMIN_HEADER_FILE
3 
4 #include <algorithm>
5 
6 #include <boost/heap/fibonacci_heap.hpp>
7 
8 #include <AIToolbox/Logging.hpp>
9 
11 
14 #include <AIToolbox/MDP/Model.hpp>
16 
20 
21 namespace AIToolbox::POMDP {
52  class GapMin {
53  public:
69  GapMin(double initialTolerance, unsigned precisionDigits);
70 
85  void setInitialTolerance(double initialTolerance);
86 
92  double getInitialTolerance() const;
93 
115  void setPrecisionDigits(unsigned digits);
116 
124  unsigned getPrecisionDigits() const;
125 
134  template <IsModel M>
135  std::tuple<double, double, VList, MDP::QFunction> operator()(const M & model, const Belief & initialBelief);
136 
137  private:
139 
140  // Queue sorted by gap. belief, gap, prob, lb, ub, depth, path
141  using QueueElement = std::tuple<Belief, double, double, double, double, unsigned, std::vector<Belief>>;
142 
143  struct QueueElementLess {
144  bool operator() (const QueueElement& arg1, const QueueElement& arg2) const;
145  };
146 
147  using QueueType = boost::heap::fibonacci_heap<QueueElement, boost::heap::compare<QueueElementLess>>;
148 
171  template <IsModel M>
172  std::tuple<std::vector<Belief>, std::vector<Belief>, std::vector<double>> selectReachableBeliefs(
173  const M & model,
174  const Belief & belief,
175  const VList & lbV,
176  const std::vector<Belief> & lbBeliefs,
177  const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV
178  );
179 
199  template <IsModel M>
200  std::tuple<IntermediatePOMDP, SparseMatrix4D> makeNewPomdp(const M& model, const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV);
201 
218  void cleanUp(const MDP::QFunction & ubQ, UpperBoundValueFunction * ubV, Matrix2D * fibQ);
219 
220  Matrix2D immediateRewards_;
221  double tolerance_;
222  double initialTolerance_;
223  unsigned precisionDigits_;
224  };
225 
226  template <IsModel M>
227  std::tuple<double, double, VList, MDP::QFunction> GapMin::operator()(const M & pomdp, const Belief & initialBelief) {
228  constexpr unsigned infiniteHorizon = 1000000;
229 
230  // Cache immediate rewards if we can't read the reward function directly.
231  if constexpr (!MDP::IsModelEigen<M>)
232  immediateRewards_ = computeImmediateRewards(pomdp);
233 
234  // Reset tolerance to set parameter;
235  tolerance_ = initialTolerance_;
236 
237  // Helper methods
238  BlindStrategies bs(infiniteHorizon, tolerance_);
239  FastInformedBound fib(infiniteHorizon, tolerance_);
240  PBVI pbvi(0, infiniteHorizon, tolerance_);
241 
242  // Here we use the BlindStrategies in order to obtain a very simple
243  // initial lower bound.
244  VList lbVList = std::get<1>(bs(pomdp, true));
245  lbVList.erase(extractDominated(std::begin(lbVList), std::end(lbVList), unwrap), std::end(lbVList));
246 
247  auto lbBeliefs = std::vector<Belief>{initialBelief};
248 
249  // The same we do here with FIB for the input POMDP.
250  MDP::QFunction ubQ = std::get<1>(fib(pomdp));
251  AI_LOGGER(AI_SEVERITY_DEBUG, "Initial QFunction:\n" << ubQ);
252 
253  // At the same time, we start initializing fibQ, which will be our
254  // pseudo-alphaVector storage for our belief-POMDPs which we'll create
255  // later.
256  //
257  // The basic idea is to create a new POMDP where each state is a belief
258  // of the input POMDP. This allows us to obtain better upper bounds,
259  // and project them to our input POMDP.
260  auto fibQ = Matrix2D(pomdp.getS()+1, pomdp.getA());
261  fibQ.topLeftCorner(pomdp.getS(), pomdp.getA()).noalias() = ubQ;
262  fibQ.row(pomdp.getS()).noalias() = initialBelief.transpose() * ubQ;
263 
264  // While we store the lower bound as alphaVectors, the upper bound is
265  // composed by both alphaVectors (albeit only S of them - out of the
266  // FastInformedBound), and a series of belief-value pairs, which we'll
267  // use with the later-constructed new POMDP in order to improve our
268  // bounds.
270  {initialBelief}, {fibQ.row(pomdp.getS()).maxCoeff()}
271  };
272 
273  // We also store two numbers for the overall lowerBound/upperBound
274  // differences. They are the values of the lowerBound and the
275  // upperBound at the initial belief.
276  double lb;
277  findBestAtPoint(initialBelief, std::begin(lbVList), std::end(lbVList), &lb, unwrap);
278 
279  double ub = ubV.second[0];
280 
281  AI_LOGGER(AI_SEVERITY_INFO, "Initial bounds: " << lb << ", " << ub);
282 
283  while (true) {
284  double threshold = std::pow(10, std::ceil(std::log10(std::max(std::fabs(ub), std::fabs(lb))))-precisionDigits_);
285  auto var = ub - lb;
286 
287  if (checkEqualSmall(var, 0.0) || var < threshold)
288  break;
289 
290  tolerance_ = threshold * (1.0 - pomdp.getDiscount()) / 2.0;
291  // Now we find beliefs for both lower and upper bound where we
292  // think we can improve. For the ub beliefs we also return their
293  // values, since we need them to improve the ub.
294  auto [newLbBeliefs, newUbBeliefs, newUbVals] = selectReachableBeliefs(pomdp, initialBelief, lbVList, lbBeliefs, ubQ, ubV);
295  const auto newLbBeliefsSize = newLbBeliefs.size();
296  const auto newUbBeliefsSize = newUbBeliefs.size();
297 
298  if (newLbBeliefsSize > 0) {
299  AI_LOGGER(AI_SEVERITY_DEBUG, "LB: Adding " << newLbBeliefsSize << " new beliefs...");
300  for (const auto & b : newLbBeliefs)
301  AI_LOGGER(AI_SEVERITY_DEBUG, "LB: - Belief: " << b.transpose());
302  // If we found something interesting for the lower bound, we
303  // add it to the beliefs we already had, and we rerun PBVI.
304  lbBeliefs.insert(std::end(lbBeliefs), std::make_move_iterator(std::begin(newLbBeliefs)), std::make_move_iterator(std::end(newLbBeliefs)));
305 
306  {
307  // Then we remove all beliefs which don't actively support any
308  // alphaVectors.
309  auto sol = pbvi(pomdp, lbBeliefs, ValueFunction{std::move(lbVList)});
310 
311  lbVList = std::move(std::get<1>(sol).back());
312 
313  const auto rbegin = std::begin(lbVList);
314  const auto rend = std::end (lbVList);
315 
316  lbBeliefs.erase(
318  std::begin(lbBeliefs), std::end(lbBeliefs),
319  rbegin, rend, unwrap
320  ),
321  std::end(lbBeliefs)
322  );
323 
324  // And we recompute the lower bound.
325  findBestAtPoint(initialBelief, rbegin, rend, &lb, unwrap);
326  }
327  }
328 
329  if (newUbBeliefsSize > 0) {
330  // Here we do the same for the upper bound.
331  const auto prevRows = pomdp.getS() + ubV.first.size();
332  fibQ.conservativeResize(prevRows + newUbBeliefsSize, Eigen::NoChange);
333 
334  AI_LOGGER(AI_SEVERITY_DEBUG, "UB: Adding " << newUbBeliefsSize << " new beliefs...");
335  for (size_t i = 0; i < newUbBeliefsSize; ++i)
336  AI_LOGGER(AI_SEVERITY_DEBUG, "UB: - Belief: " << newUbBeliefs[i].transpose() << " -- value: " << newUbVals[i]);
337 
338  // For each newly found belief which can improve the upper
339  // bound, we add it to to the list containing the beliefs for
340  // the upper bound. At the same time we add horizontal planes
341  // in the fibQ which will come useful on the next round of
342  // FastInformedBound.
343  for (size_t i = 0; i < newUbBeliefs.size(); ++i) {
344  ubV.first.emplace_back(std::move(newUbBeliefs[i]));
345  ubV.second.emplace_back(newUbVals[i]);
346  fibQ.row(prevRows + i).fill(newUbVals[i]);
347  }
348 
349  // We create a new POMDP where each state is a belief.
350  auto [newPOMDP, newPOMDPSOSA] = makeNewPomdp(pomdp, ubQ, ubV);
351  // And we approximate its upper bound.
352  fibQ = std::get<1>(fib(newPOMDP, newPOMDPSOSA, std::move(fibQ)));
353  // We extract from the found upper bound the part for the
354  // states of the input POMDP, and we copy them to our
355  // upperBound alphavectors. We additionally update the values
356  // for all ub beliefs.
357  ubQ.noalias() = fibQ.topRows(pomdp.getS());
358  for (size_t i = 0; i < ubV.second.size(); ++i)
359  ubV.second[i] = fibQ.row(pomdp.getS() + i).maxCoeff();
360 
361  // Finally, we remove some unused stuff, and we recompute the upperbound.
362  cleanUp(ubQ, &ubV, &fibQ);
363 
364  ub = std::get<0>(LPInterpolation(initialBelief, ubQ, ubV));
365  }
366 
367  // Update the difference between upper and lower bound so we can
368  // return it/use it to stop the loop.
369  auto oldVar = var;
370  var = ub - lb;
371  AI_LOGGER(AI_SEVERITY_INFO, "Updated bounds to " << lb << ", " << ub << " -- size LB: " << lbVList.size() << ", size UB " << ubV.first.size());
372 
373  // Stop if we didn't find anything new, or if we have converged the bounds.
374  if (newLbBeliefsSize + newUbBeliefsSize == 0 || std::fabs(var - oldVar) < tolerance_ * 5)
375  break;
376  }
377  return std::make_tuple(lb, ub, lbVList, ubQ);
378  }
379 
380  template <IsModel M>
381  std::tuple<GapMin::IntermediatePOMDP, SparseMatrix4D> GapMin::makeNewPomdp(const M& model, const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV) {
382  size_t S = model.getS() + ubV.first.size();
383 
384  // First we build the new reward function. For normal states, this is
385  // the same as the old one. For all additional states (beliefs), we
386  // simply take their expected reward with respect to the original
387  // reward function.
388  Matrix2D R(S, model.getA());
389  const auto & ir = [&]{
390  if constexpr (MDP::IsModelEigen<M>) return model.getRewardFunction();
391  else return immediateRewards_;
392  }();
393 
394  R.topRows(model.getS()) = ir;
395  for (size_t b = 0; b < ubV.first.size(); ++b)
396  R.row(model.getS()+b) = ubV.first[b].transpose() * ir;
397 
398  // Now we create the SOSA matrix for this new POMDP. For each pair of
399  // action/observation, and for each belief we have (thus state), we
400  // compute the probability of going to any other belief.
401  //
402  // This is done through the UB function, although I must admit I don't
403  // fully understand the math behind of why it works.
404  Belief helper(model.getS()), corner(model.getS());
405  corner.setZero();
406 
407  SparseMatrix4D sosa( boost::extents[model.getA()][model.getO()] );
408  const auto updateMatrix = [&](SparseMatrix2D & m, const Belief & b, size_t a, size_t o, size_t index) {
409  updateBeliefUnnormalized(model, b, a, o, &helper);
410  auto sum = helper.sum();
411  if (checkDifferentSmall(sum, 0.0)) {
412  // Note that we do not normalize helper since we'd also have to
413  // multiply `dist` by the same probability. Instead we don't
414  // normalize, and we don't multiply, so we save some work.
415  Vector dist = std::get<1>(LPInterpolation(helper, ubQ, ubV));
416  for (size_t i = 0; i < S; ++i)
417  if (checkDifferentSmall(dist[i], 0.0))
418  m.insert(index, i) = dist[i];
419  }
420  };
421 
422  for (size_t a = 0; a < model.getA(); ++a) {
423  for (size_t o = 0; o < model.getO(); ++o) {
424  SparseMatrix2D m(S, S);
425  for (size_t s = 0; s < model.getS(); ++s) {
426  corner[s] = 1.0;
427  updateMatrix(m, corner, a, o, s);
428  corner[s] = 0.0;
429  }
430 
431  for (size_t b = 0; b < ubV.first.size(); ++b)
432  updateMatrix(m, ubV.first[b], a, o, model.getS() + b);
433 
434  // After updating all rows of the matrix, we put it inside the
435  // SOSA matrix.
436  sosa[a][o] = std::move(m);
437  sosa[a][o].makeCompressed();
438  }
439  }
440 
441  // Finally we return a POMDP with no transition nor observation
442  // function, since those are contained in the SOSA matrix.
443  //
444  // We do however include the new reward function that contains rewards
445  // for each new "state"/belief.
446  return std::make_tuple(
447  IntermediatePOMDP(
448  NO_CHECK, model.getO(), Matrix3D(),
449  NO_CHECK, S, model.getA(), Matrix3D(), std::move(R), model.getDiscount()
450  ),
451  std::move(sosa)
452  );
453  }
454 
455  template <IsModel M>
456  std::tuple<std::vector<Belief>, std::vector<Belief>, std::vector<double>> GapMin::selectReachableBeliefs(
457  const M & pomdp, const Belief & initialBelief, const VList & lbVList,
458  const std::vector<Belief> & lbBeliefs, const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV
459  )
460  {
461  std::vector<Belief> newLbBeliefs, newUbBeliefs, visitedBeliefs;
462  std::vector<double> newUbValues;
463 
464  constexpr size_t maxVisitedBeliefs = 1000;
465  size_t overwriteCounter = 0;
466  visitedBeliefs.reserve(maxVisitedBeliefs);
467 
468  QueueType queue;
469  unsigned newBeliefs = 0;
470 
471  // From the original code, a limitation on how many new beliefs we find.
472  const auto maxNewBeliefs = std::max(20lu, (ubV.first.size() + lbVList.size()) / 5lu);
473 
474  // We initialize the queue with the initial belief.
475  {
476  double currentLowerBound;
477  const auto rbegin = std::begin(lbVList);
478  const auto rend = std::end (lbVList);
479  findBestAtPoint(initialBelief, rbegin, rend, &currentLowerBound, unwrap);
480  const double currentUpperBound = std::get<0>(LPInterpolation(initialBelief, ubQ, ubV));
481  queue.emplace(QueueElement(initialBelief, 0.0, 1.0, currentLowerBound, currentUpperBound, 1, {}));
482  }
483 
484  while (!queue.empty() && newBeliefs < maxNewBeliefs) {
485  const auto [belief, gap, beliefProbability, currentLowerBound, currentUpperBound, depth, path] = queue.top();
486  (void)gap; // ignore gap variable
487  queue.pop();
488 
489  // We add the new belief in the history, to avoid adding to the
490  // queue the same belief multiple times. We also limit the size of
491  // the history to avoid the check taking too much time, we tend to
492  // go deeper in the belief tree so it shouldn't be too dangerous.
493  if (visitedBeliefs.size() == maxVisitedBeliefs) {
494  visitedBeliefs[overwriteCounter] = belief;
495  overwriteCounter = (overwriteCounter + 1) % maxVisitedBeliefs;
496  } else {
497  visitedBeliefs.push_back(belief);
498  }
499 
500  // We find the best action for this belief with respect to both the
501  // upperBound and the lowerBound.
502  //
503  // If the found actions improve on the bounds, then we'll add this
504  // belief to the list.
505  const auto & ir = [&]{
506  if constexpr (MDP::IsModelEigen<M>) return pomdp.getRewardFunction();
507  else return immediateRewards_;
508  }();
509  const auto [ubAction, ubActionValue] = bestPromisingAction(pomdp, ir, belief, ubQ, ubV);
510  const auto [lbAction, lbActionValue] = bestConservativeAction(pomdp, ir, belief, lbVList);
511 
512  (void)lbAction; // ignore lbAction
513 
514  /***********************
515  ** UPPER GAP **
516  ***********************/
517 
518  const auto validForUb = [&newUbBeliefs, &ubV](const Belief & b) {
519  // We don't consider corners
520  for (auto i = 0; i < b.size(); ++i)
521  if (checkEqualSmall(b[i], 0.0) || checkEqualSmall(b[i], 1.0))
522  return false;
523 
524  // We also want to check whether we have already added this belief somewhere else.
525  const auto check = [&b](const Belief & bb){ return checkEqualProbability(b, bb); };
526  if (std::any_of(std::begin(newUbBeliefs), std::end(newUbBeliefs), check))
527  return false;
528  if (std::any_of(std::begin(ubV.first), std::end(ubV.first), check))
529  return false;
530  return true;
531  };
532 
533  if (validForUb(belief) && ubActionValue < currentUpperBound - tolerance_) {
534  newUbBeliefs.push_back(belief);
535  newUbValues.push_back(ubActionValue);
536 
537  // Find all beliefs that brought us here we didn't already have.
538  // Again, we don't consider corners.
539  for (const auto & p : path) {
540  if (validForUb(p)) {
541  newUbBeliefs.push_back(p);
542  newUbValues.push_back(std::get<0>(LPInterpolation(p, ubQ, ubV)));
543  }
544  }
545  // Note we only count a single belief even if we added more via
546  // the path (as per original code).
547  ++newBeliefs;
548  }
549 
550  /***********************
551  ** LOWER GAP **
552  ***********************/
553 
554  // For the lower gap we don't care about corners (as per original
555  // code). We still check on the lower bound lists though.
556  const auto validForLb = [&newLbBeliefs, &lbBeliefs](const Belief & b) {
557  const auto check = [&b](const Belief & bb){ return checkEqualProbability(b, bb); };
558  if (std::any_of(std::begin(newLbBeliefs), std::end(newLbBeliefs), check))
559  return false;
560  if (std::any_of(std::begin(lbBeliefs), std::end(lbBeliefs), check))
561  return false;
562  return true;
563  };
564 
565  if (validForLb(belief) && lbActionValue > currentLowerBound + tolerance_) {
566  // We add the new belief, and the same is done for all
567  // beliefs that led us to this one (if they're valid -
568  // i.e., we didn't have them already).
569  newLbBeliefs.push_back(belief);
570 
571  for (const auto & p : path) {
572  if (validForLb(p)) {
573  newLbBeliefs.push_back(p);
574  }
575  }
576  // Note we only count a single belief even if we added more via
577  // the path (as per original code).
578  ++newBeliefs;
579  }
580 
581  /***********************
582  ** QUEUE EXPANSION **
583  ***********************/
584 
585  // Avoid it if we're already done anyway.
586  if (newBeliefs >= maxNewBeliefs)
587  break;
588 
589  auto newPath = path;
590  newPath.push_back(belief);
591 
592  // For each new possible belief, we look if we've already visited
593  // it. If not, we compute the gap at that point, and we add it to
594  // the queue.
595  const Belief intermediateBelief = updateBeliefPartial(pomdp, belief, ubAction);
596  for (size_t o = 0; o < pomdp.getO(); ++o) {
597  Belief nextBelief = updateBeliefPartialUnnormalized(pomdp, intermediateBelief, ubAction, o);
598 
599  const auto nextBeliefProbability = nextBelief.sum();
600  if (checkEqualSmall(nextBeliefProbability, 0.0)) continue;
601  nextBelief /= nextBeliefProbability;
602 
603  const auto check = [&nextBelief](const Belief & bb){ return checkEqualProbability(nextBelief, bb); };
604  if (std::any_of(std::begin(visitedBeliefs), std::end(visitedBeliefs), check)) continue;
605 
606  const double ubValue = std::get<0>(LPInterpolation(nextBelief, ubQ, ubV));
607  double lbValue;
608  findBestAtPoint(nextBelief, std::begin(lbVList), std::end(lbVList), &lbValue, unwrap);
609 
610  if ((ubValue - lbValue) * std::pow(pomdp.getDiscount(), depth) > tolerance_ * 20) {
611  const auto nextBeliefOverallProbability = nextBeliefProbability * beliefProbability * pomdp.getDiscount();
612  const auto nextBeliefGap = nextBeliefOverallProbability * (ubValue - lbValue);
613 
614  const auto qcheck = [&nextBelief](const QueueElement & qe){ return checkEqualProbability(nextBelief, std::get<0>(qe)); };
615  const auto it = std::find_if(std::begin(queue), std::end(queue), qcheck);
616  if (it == std::end(queue)) {
617  queue.emplace(
618  std::move(nextBelief),
619  nextBeliefGap,
620  nextBeliefOverallProbability,
621  lbValue,
622  ubValue,
623  depth+1,
624  newPath
625  );
626  } else {
627  auto handle = QueueType::s_handle_from_iterator(it);
628  std::get<1>(*handle) += nextBeliefGap;
629  std::get<2>(*handle) += nextBeliefOverallProbability;
630  std::get<5>(*handle) = std::min(std::get<5>(*handle), depth+1);
631  queue.increase(handle);
632  }
633  }
634  }
635  }
636  return std::make_tuple(std::move(newLbBeliefs), std::move(newUbBeliefs), std::move(newUbValues));
637  }
638 }
639 
640 #endif
AIToolbox::POMDP::PBVI
This class implements the Point Based Value Iteration algorithm.
Definition: PBVI.hpp:43
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
Model.hpp
AIToolbox::POMDP::FastInformedBound
This class implements the Fast Informed Bound algorithm.
Definition: FastInformedBound.hpp:81
AIToolbox::POMDP::GapMin::getPrecisionDigits
unsigned getPrecisionDigits() const
This function returns the currently set digits of precision.
AIToolbox::NO_CHECK
struct AIToolbox::NoCheck NO_CHECK
AIToolbox::POMDP::GapMin::setPrecisionDigits
void setPrecisionDigits(unsigned digits)
This function sets the digits in precision for the returned solution.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
Model.hpp
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
TypeTraits.hpp
AIToolbox::extractDominated
Iterator extractDominated(Iterator begin, Iterator end, P p=P{})
This function finds and moves all Hyperplanes in the range that are dominated by others.
Definition: Prune.hpp:30
AIToolbox::POMDP::bestConservativeAction
std::tuple< size_t, double > bestConservativeAction(const M &pomdp, MDP::QFunction immediateRewards, const Belief &initialBelief, const VList &lbVList, MDP::Values *alpha=nullptr)
This function obtains the best action with respect to the input VList.
Definition: Utils.hpp:609
AIToolbox::POMDP::GapMin::operator()
std::tuple< double, double, VList, MDP::QFunction > operator()(const M &model, const Belief &initialBelief)
This function efficiently computes bounds for the optimal value of the input belief for the input POM...
Definition: GapMin.hpp:227
AIToolbox::POMDP::updateBeliefPartialUnnormalized
void updateBeliefPartialUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
This function terminates the unnormalized update of a partially updated belief.
Definition: Utils.hpp:350
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
BlindStrategies.hpp
AIToolbox::POMDP::GapMin::setInitialTolerance
void setInitialTolerance(double initialTolerance)
This function sets the initial tolerance used to compute the initial bounds.
AIToolbox::POMDP::Model
This class represents a Partially Observable Markov Decision Process.
Definition: Model.hpp:15
AIToolbox::checkEqualProbability
bool checkEqualProbability(const ProbabilityVector &lhs, const ProbabilityVector &rhs)
This function checks whether two input ProbabilityVector are equal.
Definition: Probability.hpp:369
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::POMDP::bestPromisingAction
std::tuple< size_t, double > bestPromisingAction(const M &pomdp, const MDP::QFunction &immediateRewards, const Belief &belief, const MDP::QFunction &ubQ, const UpperBoundValueFunction &ubV, Vector *vals=nullptr)
This function obtains the best action with respect to the input QFunction and UbV.
Definition: Utils.hpp:664
AIToolbox::SparseMatrix4D
boost::multi_array< SparseMatrix2D, 2 > SparseMatrix4D
Definition: Types.hpp:25
AIToolbox::POMDP::BlindStrategies
This class implements the blind strategies lower bound.
Definition: BlindStrategies.hpp:28
AIToolbox::POMDP::GapMin::GapMin
GapMin(double initialTolerance, unsigned precisionDigits)
Basic constructor.
AIToolbox::POMDP::GapMin::getInitialTolerance
double getInitialTolerance() const
This function returns the initial tolerance used to compute the initial bounds.
AIToolbox::POMDP::ValueFunction
std::vector< VList > ValueFunction
Definition: Types.hpp:78
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
PBVI.hpp
Types.hpp
AIToolbox::POMDP::UpperBoundValueFunction
std::pair< std::vector< Belief >, std::vector< double > > UpperBoundValueFunction
Definition: Types.hpp:80
AIToolbox::Matrix3D
std::vector< Matrix2D > Matrix3D
Definition: Types.hpp:21
AIToolbox::extractBestUsefulPoints
PIterator extractBestUsefulPoints(PIterator pbegin, PIterator pend, VIterator begin, VIterator end, P p=P{})
This function finds and moves all non-useful points at the end of the input range.
Definition: Polytope.hpp:248
AIToolbox::findBestAtPoint
Iterator findBestAtPoint(const Point &point, Iterator begin, Iterator end, double *value=nullptr, P p=P{})
This function returns an iterator pointing to the best Hyperplane for the specified point.
Definition: Polytope.hpp:65
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::ceil
unsigned ceil(unsigned x, unsigned y)
This function returns a fast ceiling between two unsigned ints.
Definition: Core.hpp:30
AIToolbox::POMDP::updateBeliefPartial
void updateBeliefPartial(const M &model, const Belief &b, const size_t a, Belief *bRet)
This function partially updates a belief.
Definition: Utils.hpp:289
AI_SEVERITY_INFO
#define AI_SEVERITY_INFO
Definition: Logging.hpp:69
AIToolbox::POMDP::updateBeliefUnnormalized
void updateBeliefUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
Creates a new belief reflecting changes after an action and observation for a particular Model.
Definition: Utils.hpp:171
AIToolbox::LPInterpolation
std::tuple< double, Vector > LPInterpolation(const Point &p, const CompactHyperplanes &ubQ, const PointSurface &ubV)
This function computes the exact value of the input Point w.r.t. the given surfaces.
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Logging.hpp
FastInformedBound.hpp
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AIToolbox::POMDP::GapMin
This class implements the GapMin algorithm.
Definition: GapMin.hpp:52
Polytope.hpp
AIToolbox::SparseMatrix2D
Eigen::SparseMatrix< double, Eigen::RowMajor > SparseMatrix2D
Definition: Types.hpp:19
AI_SEVERITY_DEBUG
#define AI_SEVERITY_DEBUG
Definition: Logging.hpp:68