AIToolbox
A library that offers tools for AI problem solving.
SARSOP.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_SARSOP_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_SARSOP_HEADER_FILE
3 
4 #include <AIToolbox/Logging.hpp>
5 
8 
11 
12 namespace AIToolbox::POMDP {
33  class SARSOP {
34  public:
41  SARSOP(double tolerance, double delta = 0.1);
42 
48  void setTolerance(double tolerance);
49 
55  double getTolerance() const;
56 
67  void setDelta(double delta);
68 
76  double getDelta() const;
77 
86  template <IsModel M>
87  std::tuple<double, double, VList, MDP::QFunction> operator()(const M & model, const Belief & initialBelief);
88 
89  private:
106  struct TreeNode {
107  Belief belief;
108 
109  // Number of non-suboptimal branches that reach this Belief.
110  unsigned count;
111 
112  // Bounds info
113  double UB, LB;
114  size_t actionUb;
115  // Per action info (per row: immediate reward, UB, suboptimal)
116  // Only initialized during expand
117  Eigen::Matrix<double, 3, Eigen::Dynamic, Eigen::RowMajor | Eigen::AutoAlign> actionData;
118 
119  // Per action-observation info
120  // Only initialized during expand
121  struct Children {
122  size_t id;
124  };
125  boost::multi_array<Children, 2> children;
126  };
127 
144  template <IsModel M>
145  void samplePoints(
146  const M & pomdp,
147  const VList & lbV,
148  const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV
149  );
150 
167  template <IsModel M>
168  void expandLeaf(
169  size_t id, const M & model,
170  const VList & lbV,
171  const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV
172  );
173 
199  template <IsModel M>
200  void updateNode(
201  TreeNode & node, const M & model,
202  const VList & lbV,
203  const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV,
204  bool expand
205  );
206 
223  template <IsModel M>
224  void backupNode(
225  size_t id, const M & model,
226  VList & lbV,
228  );
229 
230  double predictValue(size_t id, const TreeNode & node);
231  void deltaPrune(VList & lbV);
232  void deltaUpdate(const VList & lbV);
233  void updateSubOptimalPaths(TreeNode & root);
234  void treePrune(TreeNode & root);
235  void treeRevive(TreeNode & root);
236 
240  class LBPredictor {
241  public:
249  LBPredictor(size_t entropyBins, size_t UBBins, const MDP::QFunction & ubQ);
250 
269  std::pair<double, double> predict(size_t id, const TreeNode & node);
270 
271  private:
275  struct Bin {
276  double avg;
277  double error;
278  unsigned count;
279  };
280 
289  const Bin & update(size_t id, const TreeNode & node);
290 
291  const MDP::QFunction & ubQ_;
292  size_t entropyBins_, UBBins_;
293  double entropyStep_, UBMin_, UBStep_;
294 
295  // id is_initialized, ei, ubi, lb, err
296  std::unordered_map<size_t, std::tuple<bool, size_t, size_t, double, double>> nodes_;
297  // entropy x ub
298  boost::multi_array<Bin, 2> bins_;
299  };
300 
301  double tolerance_, initialDelta_;
302 
303  // Data reset at each main call
304  double delta_;
305  Matrix2D immediateRewards_;
306  std::vector<TreeNode> treeStorage_;
307  // We use this to check whether we have already encountered a
308  // Belief or not. Note that this is very sensitive to floating
309  // point errors, so it's probably not the best way to go about it
310  // (maybe an std::map using lexicographical order might be better).
311  // At the same time, SARSOP original code converted Beliefs to
312  // strings and applied md5 hashing to them, so it probably can't be
313  // worse than that either.
314  std::unordered_map<Belief, size_t, boost::hash<Belief>> beliefToNode_;
315  std::vector<LBPredictor> predictors_;
316 
317  // Storage to avoid reallocations
318  std::vector<size_t> sampledNodes_;
319  std::vector<char> backuppedActions_;
320  Belief intermediateBeliefTmp_, nextBeliefTmp_;
321  };
322 
323  template <IsModel M>
324  std::tuple<double, double, VList, MDP::QFunction> SARSOP::operator()(const M & pomdp, const Belief & initialBelief) {
325  constexpr unsigned infiniteHorizon = 1000000;
326 
327  AI_LOGGER(AI_SEVERITY_DEBUG, "Running SARSOP; POMDP S: " << pomdp.getS() << "; A: " << pomdp.getA() << "; O: " << pomdp.getO());
328  AI_LOGGER(AI_SEVERITY_DEBUG, "Initial Belief: " << initialBelief.transpose());
329 
330  // ##############################
331  // ### Resetting general data ###
332  // ##############################
333 
334  // Reset delta to the initial parameter set.
335  delta_ = initialDelta_;
336 
337  // Cache immediate rewards if we can't read the reward function directly.
338  if constexpr (!MDP::IsModelEigen<M>)
339  immediateRewards_ = computeImmediateRewards(pomdp);
340 
341  // First allocation for root node & children
342  treeStorage_.clear();
343  treeStorage_.reserve(pomdp.getA() * pomdp.getO() + 1);
344 
345  beliefToNode_.clear();
346 
347  // Bins initialization. Note that the "multiple bin predictors"
348  // mechanism has been taken from the original author's code, as the
349  // paper itself does not mention it. Just modify the constants below if
350  // you want the bins to behave differently.
351  predictors_.clear();
352 
353  // ########################################
354  // ### Pre-allocating temporary storage ###
355  // ########################################
356 
357  backuppedActions_.resize(pomdp.getA());
358  intermediateBeliefTmp_.resize(pomdp.getS());
359  nextBeliefTmp_.resize(pomdp.getS());
360 
361  // ################################
362  // ### Computing initial bounds ###
363  // ################################
364 
365  // Helper methods to compute initial LB/UB. Since SARSOP is not really
366  // the best method, it's unlikely that we will ask very stringent
367  // tolerances (unless we want to wait a while). However, since these
368  // two methods are pretty fast, there's not harm in asking them a
369  // tighter tolerance if they can reach it.. if they can't they'll
370  // simply stop on their own.
371  BlindStrategies bs(infiniteHorizon, std::min(0.00001, tolerance_));
372  FastInformedBound fib(infiniteHorizon, std::min(0.00001, tolerance_));
373 
374  // Here we use the BlindStrategies in order to obtain a very simple
375  // initial lower bound.
376  VList lbVList = std::get<1>(bs(pomdp, true));
377  lbVList.erase(extractDominated(std::begin(lbVList), std::end(lbVList), unwrap), std::end(lbVList));
378 
379  // ### Delta Pruning Setup ###
380  //
381  // In order to efficiently and aggressively prune alphavectors, SARSOP
382  // prunes one when it is dominated across the whole belief space we
383  // have explored, i.e. all the beliefs in treeStorage_.
384  //
385  // However, checking every vector every time against all beliefs would
386  // be a bit expensive. So what we do (as in the original code), is
387  // instead we associate each alphavector with a set of witness points.
388  // If it is dominated over that, it's dead. This idea comes from the
389  // HVSI paper.
390  //
391  // Additionally, we keep all vectors which are the max at any given belief.
392  //
393  // Since we need to keep track of all these points per each
394  // alphavector, we store them (temporarily) in the observations field
395  // of each VEntry. This is because we are not going to use that field
396  // for anything else here, and so might as well re-use it.
397  //
398  // In particular, each vector will be in this form:
399  //
400  // [ number_of_max_points + 1, max_point_id, ..., witness_point_id, ... ]
401  //
402  // The first element is simply to keep track of the "end" range of the
403  // max_point_ids, so we can remember which id means what. The id ranges
404  // are kept independently sorted, and refer to the id of the respective
405  // TreeNode in treeStorage_.
406  //
407  // Additionally, each id is actually the id plus |S|. This is because
408  // we also want to store the corners, so what we do is that ids in [0,
409  // S) mean corners, while ids higher refer to the (id-S) element in
410  // treeStorage_. You can see all this in action in the deltaPrune
411  // function.
412  //
413  // In any case, here we have to setup the vectors for the corners and
414  // the initial belief, so they are ready to go.
415 
416  // First element means that the end of the range of the max-ids is 1,
417  // i.e. we have no maxes (nor witnesses atm).
418  for (auto & ve : lbVList)
419  ve.observations.push_back(1);
420 
421  // For each corner, find the best alphavector for it, and add the
422  // corner in its max list.
423  for (size_t s = 0; s < pomdp.getS(); ++s) {
424  auto it = findBestAtSimplexCorner(s, std::begin(lbVList), std::end(lbVList), nullptr, unwrap);
425  // Mark that we are adding a max to the list
426  ++it->observations[0];
427  // Add the corner to the list (we can do push_back since we are
428  // sure we have no witness points yet, so we don't mix them).
429  it->observations.push_back(s);
430  }
431  // Finally, find the max for initialBelief and assign that.
432  auto it = findBestAtPoint(initialBelief, std::begin(lbVList), std::end(lbVList), nullptr, unwrap);
433  ++it->observations[0];
434  // id of initialBelief is 0, since not a corner => 0 + S = S
435  it->observations.push_back(pomdp.getS());
436 
437  // The same we do here with FIB for the input POMDP.
438  MDP::QFunction ubQ = std::get<1>(fib(pomdp));
439  AI_LOGGER(AI_SEVERITY_DEBUG, "Initial QFunction:\n" << ubQ);
440 
441  // While we store the lower bound as alphaVectors, the upper bound is
442  // composed by both alphaVectors (albeit only S of them - out of the
443  // FastInformedBound), and a series of belief-value pairs, which we'll
444  // use with the later-constructed new POMDP in order to improve our
445  // bounds.
447  {initialBelief}, {(initialBelief.transpose() * ubQ).maxCoeff()}
448  };
449 
450  // ###########################
451  // ### Setup UB predictors ###
452  // ###########################
453 
454  // This we use to estimate the UB buckets for each belief. We use more
455  // than one since they do the same in the original code.
456  const auto initialUbQ = ubQ;
457 
458  constexpr unsigned numBins = 2;
459  constexpr unsigned entropyBins = 5;
460  constexpr unsigned ubBins = 5;
461  constexpr unsigned binScaling = 2;
462 
463  for (unsigned i = 0; i < numBins; ++i) {
464  const unsigned scaling = std::pow(binScaling, i);
465  // Each predictor has differently sized buckets.
466  predictors_.emplace_back(entropyBins * scaling, ubBins * scaling, initialUbQ);
467  }
468 
469  // #######################
470  // ### Setup tree root ###
471  // #######################
472 
473  treeStorage_.emplace_back();
474 
475  // Note that we can't make a reference alias to the root since
476  // treeStorage_ is going to reallocate multiple times during solving.
477  treeStorage_[0].belief = initialBelief;
478  treeStorage_[0].count = 1;
479  updateNode(treeStorage_[0], pomdp, lbVList, ubQ, ubV, false);
480 
481  AI_LOGGER(AI_SEVERITY_INFO, "Initial bounds: " << treeStorage_[0].LB << ", " << treeStorage_[0].UB);
482 
483  // ##################
484  // ### Begin work ###
485  // ##################
486 
487  while (true) {
488  // Deep sample a branch of the action/observation trees. The
489  // sampled nodes (except the last one where we stop) are added to
490  // sampledNodes_.
491  AI_LOGGER(AI_SEVERITY_DEBUG, "Sampling points...");
492  samplePoints(pomdp, lbVList, ubQ, ubV);
493 
494  // If we have no nodes it means we stopped at the root, so we have
495  // already shrinked the gap enough; we are done.
496  if (sampledNodes_.size() == 0) {
497  AI_LOGGER(AI_SEVERITY_INFO, "No more points to sample found.");
498  break;
499  }
500 
501  // Backup the nodes we sampled, from (node-before) leaf to root.
502  // This updates the lower and upper bounds by adding
503  // alphavectors/points to them.
504  AI_LOGGER(AI_SEVERITY_DEBUG, "Backing up points...");
505  for (auto rIt = std::rbegin(sampledNodes_); rIt != std::rend(sampledNodes_); ++rIt)
506  backupNode(*rIt, pomdp, lbVList, ubQ, ubV);
507 
508  // # Lower Bound Pruning #
509 
510  // We aggressively prune the lbVList based on the beliefs we have
511  // explored. This prunes both using direct dominance as well as
512  // delta dominance, i.e. vectors count as dominated if they are
513  // dominated within a given neighborhood of all their witness
514  // beliefs.
515  AI_LOGGER(AI_SEVERITY_DEBUG, "Delta pruning...");
516  deltaPrune(lbVList);
517 
518  // # Upper Bound Pruning #
519 
520  // Prune unused beliefs that do not contribute to the upper bound.
521  // This means that their value is *higher* than what we can
522  // approximate using the other beliefs.
523  AI_LOGGER(AI_SEVERITY_DEBUG, "UB pruning...");
524  size_t i = ubV.first.size();
525  do {
526  --i;
527 
528  // We swap the current belief to check at the end, and we
529  // temporarily remove it so we can test the interpolation
530  // without it.
531  std::swap(ubV.first[i], ubV.first.back());
532  std::swap(ubV.second[i], ubV.second.back());
533 
534  auto belief = std::move(ubV.first.back());
535  auto value = ubV.second.back();
536 
537  ubV.first.pop_back();
538  ubV.second.pop_back();
539 
540  // If its original value is lower than the interpolation, we
541  // still need it to improve our upper bound.
542  if (value < std::get<0>(sawtoothInterpolation(belief, ubQ, ubV))) {
543  // Thus, we put it back inside.
544  ubV.first.emplace_back(std::move(belief));
545  ubV.second.emplace_back(value);
546  }
547  } while (i != 0 && ubV.first.size() > 1);
548 
550  "Root lower bound: " << treeStorage_[0].LB <<
551  "; upper bound: " << treeStorage_[0].UB <<
552  "; alpha vectors: " << lbVList.size() <<
553  "; belief points: " << ubV.first.size());
554 
555  if (treeStorage_[0].UB - treeStorage_[0].LB <= tolerance_)
556  break;
557  }
558 
559  // Remove witness data from lbVList since we don't need to pass it
560  // outside.
561  for (auto & ventry : lbVList)
562  ventry.observations.clear();
563 
564  return std::make_tuple(treeStorage_[0].LB, treeStorage_[0].UB, lbVList, ubQ);
565  }
566 
567  template <IsModel M>
568  void SARSOP::samplePoints(const M & pomdp, const VList & lbVList, const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV) {
569  sampledNodes_.clear();
570  // Always begin sampling from the root. We are going to go down a path
571  // until we hit our stopping conditions. If we end up outside the tree,
572  // we are going to add the new nodes to it as we go along.
573  size_t currentNodeId = 0;
574  const double rootGap = (treeStorage_[0].UB - treeStorage_[0].LB) * 0.95;
575 
576  int depth = 0;
577  double L = treeStorage_[0].LB;
578  double U = L + rootGap;
579 
580  while (true) {
581  // Compute target gap for this depth.
582  const double targetGap = rootGap * std::pow(pomdp.getDiscount(), -depth);
583  {
584  // Here we check whether we should stop. Note that the
585  // reference to node is intentionally kept scoped, as we may
586  // need to expand this node later, and doing so will invalidate
587  // its address.
588  const TreeNode & node = treeStorage_[currentNodeId];
589 
590  const double finalExcess = node.UB - node.LB - 0.5 * targetGap;
591  if (finalExcess <= 0.0)
592  break;
593 
594  // Stopping condition; we stop sampling if either our approximation
595  // falls below the lower bound, or if our upper bound is too low.
596  const auto vHat = predictValue(currentNodeId, node);
597  if (vHat <= L && node.UB <= std::max(U, node.LB + targetGap))
598  break;
599  }
600 
601  // We are indeed going down this node, so we add it to the nodes
602  // sampled.
603  sampledNodes_.push_back(currentNodeId);
604 
605  // Precompute this node's children if it was a leaf.
606  if (treeStorage_[currentNodeId].children.size() == 0)
607  expandLeaf(currentNodeId, pomdp, lbVList, ubQ, ubV);
608 
609  // Now we can take a reference as we won't need to allocate again.
610  const TreeNode & node = treeStorage_[currentNodeId];
611 
612  // Otherwise we keep sampling.
613  const auto L1 = std::max(L, node.LB);
614  const auto U1 = std::max(U, node.LB + targetGap);
615 
616  // TODO: possible do randomization for equally valued actions.
617  const auto a1 = node.actionUb;
618  // TODO: possible do randomization for equally valued obs.
619  size_t o1 = 0;
620  {
621  const double nextDepthGap = targetGap / pomdp.getDiscount();
622  double maxVal = std::numeric_limits<double>::lowest();
623  for (size_t o = 0; o < pomdp.getO(); ++o) {
624  if (node.children[a1][o].observationProbability == 0.0) continue;
625 
626  const auto & childNode = treeStorage_[node.children[a1][o].id];
627  const auto val = (childNode.UB - childNode.LB - nextDepthGap) * node.children[a1][o].observationProbability;
628  if (val > maxVal) {
629  maxVal = val;
630  o1 = o;
631  }
632  }
633  }
634 
635  double Lnorm = 0.0, Unorm = 0.0;
636  for (size_t o = 0; o < pomdp.getO(); ++o) {
637  if (o == o1) continue;
638 
639  const auto & childNode = treeStorage_[node.children[a1][o].id];
640 
641  Lnorm += childNode.LB * node.children[a1][o].observationProbability;
642  Unorm += childNode.UB * node.children[a1][o].observationProbability;
643  }
644 
645  // Lt, Ut
646  L = ((L1 - node.actionData(0, a1)) / pomdp.getDiscount() - Lnorm) / node.children[a1][o1].observationProbability;
647  U = ((U1 - node.actionData(0, a1)) / pomdp.getDiscount() - Unorm) / node.children[a1][o1].observationProbability;
648 
649  // Set the new node to go down to.
650  currentNodeId = node.children[a1][o1].id;
651 
652  ++depth;
653  }
654  }
655 
656  template <IsModel M>
657  void SARSOP::expandLeaf(
658  const size_t id, const M & pomdp,
659  const VList & lbVList,
660  const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV
661  )
662  {
663  // Note that we create a pointer as this function will add nodes to
664  // treeStorage_, which will possibly re-allocate its storage. This
665  // means that references will lose validity. Here we use a pointer to
666  // re-assign it when needed.
667  TreeNode * nodep = &treeStorage_[id];
668 
669  assert(nodep->children.size() == 0);
670  // This assert is to say that we shouldn't really be going down a
671  // provenly suboptimal path, so this should not really happen. If it
672  // happens, it might be something is broken or I misunderstood
673  // something.
674  assert(nodep->count > 0);
675 
676  // Allocate precompute bound values for future backups
677  updateNode(*nodep, pomdp, lbVList, ubQ, ubV, true);
678 
679  // Allocate children memory
680  nodep->children.resize(boost::extents[pomdp.getA()][pomdp.getO()]);
681 
682  for (size_t a = 0; a < pomdp.getA(); ++a) {
683  updateBeliefPartial(pomdp, nodep->belief, a, &intermediateBeliefTmp_);
684 
685  for (size_t o = 0; o < pomdp.getO(); ++o) {
686  auto & child = nodep->children[a][o];
687 
688  updateBeliefPartialUnnormalized(pomdp, intermediateBeliefTmp_, a, o, &nextBeliefTmp_);
689 
690  const auto prob = nextBeliefTmp_.sum();
691 
692  if (checkEqualSmall(prob, 0.0)) {
693  // observationProbability for this child is 0.0 by default,
694  // we'll use that for future checks.
695  continue;
696  }
697  nextBeliefTmp_ /= prob;
698 
699  child.observationProbability = prob;
700 
701  const auto it = beliefToNode_.find(nextBeliefTmp_);
702  if (it != beliefToNode_.end()) {
703  // If the node already existed, we simply point to it, and
704  // increase its reference count.
705  child.id = it->second;
706  if (++treeStorage_[child.id].count == 1) {
707  // If it's count was 0 before, then it represented a
708  // previously pruned branch. Since it's now back in the
709  // tree, we need to "revive" all its children warning
710  // them that a new path to them is open.
711  //
712  // Note that this does not bring "directly" alive any
713  // alphavectors associated with those beliefs (as
714  // alphavectors of dead branches are pruned away), but
715  // we'll have to wait until direct exploration makes us
716  // do backup of those beliefs again.
717  treeRevive(treeStorage_[child.id]);
718  }
719  continue;
720  }
721 
722  // Finish storing info about child as its reference is about to
723  // go stale.
724  child.id = treeStorage_.size();
725  beliefToNode_[nextBeliefTmp_] = child.id;
726 
727  // Adding a node to treeStorage_ invalidates every single
728  // reference we are holding to anything in it, since it may
729  // reallocate. Keep it in mind.
730  treeStorage_.emplace_back();
731  // Re-assign to nodep to get the possibly new pointer.
732  nodep = &treeStorage_[id];
733 
734  auto & childNode = treeStorage_.back();
735 
736  childNode.belief = nextBeliefTmp_;
737  childNode.count = 1;
738  // Compute UB and LB for this child
739  updateNode(childNode, pomdp, lbVList, ubQ, ubV, false);
740  }
741  }
742  }
743 
744  template <IsModel M>
745  void SARSOP::updateNode(
746  TreeNode & node, const M & pomdp,
747  const VList & lbVList,
748  const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV,
749  bool expand
750  )
751  {
752  const auto & ir = [&]{
753  if constexpr (MDP::IsModelEigen<M>) return pomdp.getRewardFunction();
754  else return immediateRewards_;
755  }();
756  // We update the UB using the sawtooth approximation since it's work we
757  // have to do whether we are expanding a node or updating a leaf.
758  Vector ubs; // Here we store per-action upper-bounds in case we need them.
759  const auto ub = bestPromisingAction<false>(pomdp, ir, node.belief, ubQ, ubV, &ubs);
760  node.UB = std::get<1>(ub);
761  node.actionUb = std::get<0>(ub);
762 
763  if (expand) {
764  // If we are expanding the node, we are only really interested in the
765  // actionData, as it contains pre-computed data which allows us to
766  // possibly skip some work when doing upper-bound backups.
767  node.actionData.resize(Eigen::NoChange, pomdp.getA());
768  node.actionData.row(0) = node.belief.transpose() * ir;
769  node.actionData.row(1) = ubs;
770  node.actionData.row(2).fill(0);
771  } else {
772  // Otherwise, we are just computing the upper and lower bounds of a
773  // leaf node. The UB we already did, so here we do the LB.
774  const auto lb = bestConservativeAction(pomdp, ir, node.belief, lbVList);
775  node.LB = std::get<1>(lb);
776  }
777  }
778 
779  template <IsModel M>
780  void SARSOP::backupNode(size_t id, const M & pomdp, VList & lbVList, MDP::QFunction & ubQ, UpperBoundValueFunction & ubV) {
781  const auto & ir = [&]{
782  if constexpr (MDP::IsModelEigen<M>) return pomdp.getRewardFunction();
783  else return immediateRewards_;
784  }();
785 
786  TreeNode & node = treeStorage_[id];
787  {
788  // Update lower bound and extract a new alphavector.
789  Vector alpha;
790  const auto result = bestConservativeAction(pomdp, ir, node.belief, lbVList, &alpha);
791  node.LB = std::get<1>(result);
792  // Add new alphavector with its witness point inserted
793  lbVList.emplace_back(std::move(alpha), std::get<0>(result), VObs{1, id + pomdp.getS()});
794  }
795 
796  // For the upper bound we use the precomputed values to try to skip
797  // some work. Since updating a upper-bound can only lower it, we update
798  // only the highest value. If then it's still the highest, we are done.
799  // Otherwise, we select the new highest and continue, until we end up
800  // with a new max.
801  std::fill(std::begin(backuppedActions_), std::end(backuppedActions_), false);
802  auto maxAction = node.actionUb;
803 
804  while (!backuppedActions_[maxAction]) {
805  double sum = 0.0;
806  for (size_t o = 0; o < pomdp.getO(); ++o) {
807  const double obsP = node.children[maxAction][o].observationProbability;
808 
809  if (obsP == 0.0) continue;
810 
811  const auto & childNode = treeStorage_[node.children[maxAction][o].id];
812 
813  sum += obsP * std::get<0>(sawtoothInterpolation(childNode.belief, ubQ, ubV));
814  }
815  sum = node.actionData(0, maxAction) + pomdp.getDiscount() * sum;
816 
817  node.actionData(1, maxAction) = sum;
818  backuppedActions_[maxAction] = true;
819 
820  node.UB = node.actionData.row(1).maxCoeff(&maxAction);
821  }
822  node.actionUb = maxAction;
823 
824  // Finally, we can add update this belief's value in the upper bound.
825  // If it's a corner point, we modify ubQ directly; otherwise we just
826  // add it to ubV.
827  for (size_t s = 0; s < pomdp.getS(); ++s) {
828  if (checkEqualSmall(node.belief[s], 1.0)) {
829  ubQ(s, maxAction) = node.UB;
830  return;
831  }
832  }
833  ubV.first.push_back(node.belief);
834  ubV.second.push_back(node.UB);
835  }
836 }
837 
838 #endif
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::FastInformedBound
This class implements the Fast Informed Bound algorithm.
Definition: FastInformedBound.hpp:81
AIToolbox::POMDP::SARSOP::setDelta
void setDelta(double delta)
This function sets the delta for pruning to use at the start of a solving process.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AIToolbox::POMDP::SARSOP::getTolerance
double getTolerance() const
This function returns the currently set tolerance to reach when solving a POMDP.
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
TypeTraits.hpp
AIToolbox::extractDominated
Iterator extractDominated(Iterator begin, Iterator end, P p=P{})
This function finds and moves all Hyperplanes in the range that are dominated by others.
Definition: Prune.hpp:30
AIToolbox::POMDP::bestConservativeAction
std::tuple< size_t, double > bestConservativeAction(const M &pomdp, MDP::QFunction immediateRewards, const Belief &initialBelief, const VList &lbVList, MDP::Values *alpha=nullptr)
This function obtains the best action with respect to the input VList.
Definition: Utils.hpp:609
AIToolbox::POMDP::updateBeliefPartialUnnormalized
void updateBeliefPartialUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
This function terminates the unnormalized update of a partially updated belief.
Definition: Utils.hpp:350
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::SARSOP::TreeNode::Children::id
size_t id
Definition: SARSOP.hpp:122
AIToolbox::findBestAtSimplexCorner
Iterator findBestAtSimplexCorner(const size_t corner, Iterator begin, Iterator end, double *value=nullptr, P p=P{})
This function returns an iterator pointing to the best Hyperplane for the specified corner of the sim...
Definition: Polytope.hpp:95
AIToolbox::POMDP::SARSOP::setTolerance
void setTolerance(double tolerance)
This function sets the tolerance to reach when solving a POMDP.
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
BlindStrategies.hpp
AIToolbox::POMDP::SARSOP::TreeNode::Children
Definition: SARSOP.hpp:121
AIToolbox::POMDP::SARSOP::SARSOP
SARSOP(double tolerance, double delta=0.1)
Basic constructor.
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::POMDP::SARSOP::getDelta
double getDelta() const
This function returns the delta for pruning to use at the start of a solving process.
AIToolbox::sawtoothInterpolation
std::tuple< double, Vector > sawtoothInterpolation(const Point &p, const CompactHyperplanes &ubQ, const PointSurface &ubV)
This function computes an approximate, but quick, upper bound on the surface value at the input point...
AIToolbox::POMDP::BlindStrategies
This class implements the blind strategies lower bound.
Definition: BlindStrategies.hpp:28
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
AIToolbox::POMDP::UpperBoundValueFunction
std::pair< std::vector< Belief >, std::vector< double > > UpperBoundValueFunction
Definition: Types.hpp:80
AIToolbox::POMDP::SARSOP
This class implements the SARSOP algorithm.
Definition: SARSOP.hpp:33
AIToolbox::findBestAtPoint
Iterator findBestAtPoint(const Point &point, Iterator begin, Iterator end, double *value=nullptr, P p=P{})
This function returns an iterator pointing to the best Hyperplane for the specified point.
Definition: Polytope.hpp:65
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::POMDP::SARSOP::TreeNode::Children::observationProbability
double observationProbability
Definition: SARSOP.hpp:123
AIToolbox::POMDP::SARSOP::operator()
std::tuple< double, double, VList, MDP::QFunction > operator()(const M &model, const Belief &initialBelief)
This function efficiently computes bounds for the optimal value of the input belief for the input POM...
Definition: SARSOP.hpp:324
AIToolbox::POMDP::updateBeliefPartial
void updateBeliefPartial(const M &model, const Belief &b, const size_t a, Belief *bRet)
This function partially updates a belief.
Definition: Utils.hpp:289
AI_SEVERITY_INFO
#define AI_SEVERITY_INFO
Definition: Logging.hpp:69
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Logging.hpp
FastInformedBound.hpp
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AIToolbox::POMDP::VObs
std::vector< size_t > VObs
Definition: Types.hpp:71
AI_SEVERITY_DEBUG
#define AI_SEVERITY_DEBUG
Definition: Logging.hpp:68