AIToolbox
A library that offers tools for AI problem solving.
Utils.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_UTILS_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_UTILS_HEADER_FILE
3 
4 #include <cstddef>
5 #include <iterator>
6 #include <numeric>
7 
13 
14 #include <boost/functional/hash.hpp>
15 
16 namespace AIToolbox::POMDP {
28  std::strong_ordering operator<=>(const VEntry & lhs, const VEntry & rhs);
29  bool operator==(const VEntry & lhs, const VEntry & rhs);
30 
43  inline VEntry makeVEntry(size_t S, size_t a, size_t O) {
44  VEntry entry;
45 
46  entry.values.resize(S);
47  entry.values.setZero();
48  entry.action = a;
49  entry.observations.resize(O);
50 
51  return entry;
52  }
53 
57  inline size_t hash_value(const VEntry & v) {
58  size_t seed = 0;
59  boost::hash_combine(seed, v.action);
60  boost::hash_combine(seed, v.observations);
61  boost::hash_combine(seed, v.values);
62  return seed;
63  }
64 
68  inline const MDP::Values & unwrap(const VEntry & ve) {
69  return ve.values;
70  }
71 
94 
111  double weakBoundDistance(const VList & oldV, const VList & newV);
112 
131  template <IsModel M>
132  auto makeSOSA(const M & m) {
133  if constexpr(IsModelEigen<M>) {
134  boost::multi_array<std::remove_cvref_t<decltype(m.getTransitionFunction(0))>, 2> retval( boost::extents[m.getA()][m.getO()] );
135  for (size_t a = 0; a < m.getA(); ++a)
136  for (size_t o = 0; o < m.getO(); ++o)
137  retval[a][o] = m.getTransitionFunction(a) * Vector(m.getObservationFunction(a).col(o)).asDiagonal();
138  return retval;
139  } else {
140  Matrix4D retval( boost::extents[m.getA()][m.getO()] );
141  for (size_t a = 0; a < m.getA(); ++a) {
142  for (size_t o = 0; o < m.getO(); ++o) {
143  retval[a][o].resize(m.getS(), m.getS());
144  for (size_t s = 0; s < m.getS(); ++s)
145  for (size_t s1 = 0; s1 < m.getS(); ++s1)
146  retval[a][o](s, s1) = m.getTransitionProbability(s, a, s1) * m.getObservationProbability(s1, a, o);
147  }
148  }
149  return retval;
150  }
151  }
152 
170  template <IsModel M>
171  void updateBeliefUnnormalized(const M & model, const Belief & b, const size_t a, const size_t o, Belief * bRet) {
172  if (!bRet) return;
173 
174  auto & br = *bRet;
175 
176  if constexpr(IsModelEigen<M>) {
177  br = model.getObservationFunction(a).col(o).cwiseProduct((b.transpose() * model.getTransitionFunction(a)).transpose());
178  } else {
179  const size_t S = model.getS();
180  for ( size_t s1 = 0; s1 < S; ++s1 ) {
181  double sum = 0.0;
182  for ( size_t s = 0; s < S; ++s )
183  sum += model.getTransitionProbability(s,a,s1) * b[s];
184 
185  br[s1] = model.getObservationProbability(s1,a,o) * sum;
186  }
187  }
188  }
189 
206  template <IsModel M>
207  Belief updateBeliefUnnormalized(const M & model, const Belief & b, const size_t a, const size_t o) {
208  Belief br(model.getS());
209  updateBeliefUnnormalized(model, b, a, o, &br);
210  return br;
211  }
212 
234  template <IsModel M>
235  void updateBelief(const M & model, const Belief & b, const size_t a, const size_t o, Belief * bRet) {
236  if (!bRet) return;
237 
238  updateBeliefUnnormalized(model, b, a, o, bRet);
239 
240  auto & br = *bRet;
241  br /= br.sum();
242  }
243 
264  template <IsModel M>
265  Belief updateBelief(const M & model, const Belief & b, const size_t a, const size_t o) {
266  Belief br(model.getS());
267  updateBelief(model, b, a, o, &br);
268  return br;
269  }
270 
288  template <IsModel M>
289  void updateBeliefPartial(const M & model, const Belief & b, const size_t a, Belief * bRet) {
290  if (!bRet) return;
291 
292  auto & br = *bRet;
293 
294  if constexpr(IsModelEigen<M>) {
295  br = (b.transpose() * model.getTransitionFunction(a)).transpose();
296  } else {
297  const size_t S = model.getS();
298  for ( size_t s1 = 0; s1 < S; ++s1 ) {
299  br[s1] = 0.0;
300  for ( size_t s = 0; s < S; ++s )
301  br[s1] += model.getTransitionProbability(s,a,s1) * b[s];
302  }
303  }
304  }
305 
322  template <IsModel M>
323  Belief updateBeliefPartial(const M & model, const Belief & b, const size_t a) {
324  Belief bRet(model.getS());
325  updateBeliefPartial(model, b, a, &bRet);
326  return bRet;
327  }
328 
349  template <IsModel M>
350  void updateBeliefPartialUnnormalized(const M & model, const Belief & b, const size_t a, const size_t o, Belief * bRet) {
351  if (!bRet) return;
352 
353  auto & br = *bRet;
354 
355  if constexpr(IsModelEigen<M>) {
356  br = model.getObservationFunction(a).col(o).cwiseProduct(b);
357  } else {
358  const size_t S = model.getS();
359  for ( size_t s = 0; s < S; ++s )
360  br[s] = model.getObservationProbability(s, a, o) * b[s];
361  }
362  }
363 
383  template <IsModel M>
384  Belief updateBeliefPartialUnnormalized(const M & model, const Belief & b, const size_t a, const size_t o) {
385  Belief bRet(model.getS());
386  updateBeliefPartialUnnormalized(model, b, a, o, &bRet);
387  return bRet;
388  }
389 
417  template <IsModel M>
418  void updateBeliefPartialNormalized(const M & model, const Belief & b, const size_t a, const size_t o, Belief * bRet) {
419  if (!bRet) return;
420 
421  auto & br = *bRet;
422 
423  updateBeliefPartialUnnormalized(model, b, a, o, bRet);
424 
425  br /= br.sum();
426  }
427 
454  template <IsModel M>
455  Belief updateBeliefPartialNormalized(const M & model, const Belief & b, const size_t a, const size_t o) {
456  auto newB = updateBeliefPartialUnnormalized(model, b, a, o);
457  newB /= newB.sum();
458  return newB;
459  }
460 
470  template <IsModel M>
471  double beliefExpectedReward(const M& model, const Belief & b, const size_t a) {
472  if constexpr (IsModelEigen<M>) {
473  return model.getRewardFunction().col(a).dot(b);
474  } else {
475  double rew = 0.0; const size_t S = model.getS();
476  for ( size_t s = 0; s < S; ++s )
477  for ( size_t s1 = 0; s1 < S; ++s1 )
478  rew += model.getTransitionProbability(s, a, s1) * model.getExpectedReward(s, a, s1) * b[s];
479 
480  return rew;
481  }
482  }
483 
503  template <typename ActionRow>
504  void crossSumBestAtBelief(const Belief & b, const ActionRow & row, VEntry * outp, double * value = nullptr) {
505  if (!outp) return;
506 
507  const size_t O = row.size();
508  double v = 0.0, tmp;
509 
510  auto & out = *outp;
511  out.values.setZero();
512 
513  // We compute the crossSum between each best vector for the belief.
514  for ( size_t o = 0; o < O; ++o ) {
515  const auto & r = row[o];
516  auto begin = std::begin(r);
517  auto end = std::end(r);
518 
519  auto bestMatch = findBestAtPoint(b, begin, end, &tmp, unwrap).base();
520 
521  out.values += bestMatch->values;
522  v += tmp;
523 
524  out.observations[o] = bestMatch->observations[0];
525  }
526  if (value) *value = v;
527  }
528 
545  template <typename ActionRow>
546  VEntry crossSumBestAtBelief(const Belief & b, const ActionRow & row, const size_t a, double * value = nullptr) {
547  auto entry = makeVEntry(b.size(), a, row.size());
548 
549  crossSumBestAtBelief(b, row, &entry, value);
550 
551  return entry;
552  }
553 
568  template <typename Projections>
569  VEntry crossSumBestAtBelief(const Belief & b, const Projections & projs, double * value = nullptr) {
570  const size_t A = projs.size();
571 
572  double bestValue, tmp;
573  VEntry entry = crossSumBestAtBelief(b, projs[0], (size_t)0, &bestValue);
574  VEntry helper = entry;
575 
576  for ( size_t a = 1; a < A; ++a ) {
577  helper.action = a;
578  crossSumBestAtBelief(b, projs[a], &helper, &tmp);
579 
580  if (tmp > bestValue) {
581  bestValue = tmp;
582  std::swap(entry, helper);
583  }
584  }
585  if (value) *value = bestValue;
586  return entry;
587  }
588 
608  template <IsModel M>
609  std::tuple<size_t, double> bestConservativeAction(const M & pomdp, MDP::QFunction immediateRewards, const Belief & initialBelief, const VList & lbVList, MDP::Values * alpha = nullptr) {
610  // Note that we update inline the alphavectors in immediateRewards
611  Vector bpAlpha(pomdp.getS());
612  // Storage to avoid reallocations
613  Belief intermediateBelief(pomdp.getS());
614  Belief nextBelief(pomdp.getS());
615 
616  for (size_t a = 0; a < pomdp.getA(); ++a) {
617  updateBeliefPartial(pomdp, initialBelief, a, &intermediateBelief);
618 
619  bpAlpha.setZero();
620 
621  for (size_t o = 0; o < pomdp.getO(); ++o) {
622  updateBeliefPartialUnnormalized(pomdp, intermediateBelief, a, o, &nextBelief);
623 
624  const auto nextBeliefProbability = nextBelief.sum();
625  if (checkEqualSmall(nextBeliefProbability, 0.0)) continue;
626  // Now normalized
627  nextBelief /= nextBeliefProbability;
628 
629  const auto it = findBestAtPoint(nextBelief, std::begin(lbVList), std::end(lbVList), nullptr, unwrap);
630 
631  bpAlpha += pomdp.getObservationFunction(a).col(o).cwiseProduct(it->values);
632  }
633  immediateRewards.col(a) += pomdp.getDiscount() * pomdp.getTransitionFunction(a) * bpAlpha;
634  }
635 
636  size_t id;
637  double v = (initialBelief.transpose() * immediateRewards).maxCoeff(&id);
638 
639  // Copy alphavector for selected action if needed
640  if (alpha) *alpha = immediateRewards.col(id);
641 
642  return std::make_tuple(id, v);
643  }
644 
663  template <bool useLP = true, IsModel M>
664  std::tuple<size_t, double> bestPromisingAction(const M & pomdp, const MDP::QFunction & immediateRewards, const Belief & belief, const MDP::QFunction & ubQ, const UpperBoundValueFunction & ubV, Vector * vals = nullptr) {
665  Vector storage;
666  Vector & qvals = vals ? *vals : storage;
667 
668  qvals = belief.transpose() * immediateRewards;
669 
670  // Storage to avoid reallocations
671  Belief intermediateBelief(pomdp.getS());
672  Belief nextBelief(pomdp.getS());
673 
674  for (size_t a = 0; a < pomdp.getA(); ++a) {
675  updateBeliefPartial(pomdp, belief, a, &intermediateBelief);
676  double sum = 0.0;
677  for (size_t o = 0; o < pomdp.getO(); ++o) {
678  updateBeliefPartialUnnormalized(pomdp, intermediateBelief, a, o, &nextBelief);
679 
680  const auto prob = nextBelief.sum();
681  if (checkEqualSmall(prob, 0.0)) continue;
682  // Note that we do not normalize nextBelief since we'd also
683  // have to multiply the result by the same probability. Instead
684  // we don't normalize, and we don't multiply, so we save some
685  // work.
686  if constexpr (useLP)
687  sum += std::get<0>(LPInterpolation(nextBelief, ubQ, ubV));
688  else
689  sum += std::get<0>(sawtoothInterpolation(nextBelief, ubQ, ubV));
690  }
691  qvals[a] += pomdp.getDiscount() * sum;
692  }
693  size_t bestAction;
694  double bestValue = qvals.maxCoeff(&bestAction);
695 
696  return std::make_tuple(bestAction, bestValue);
697  }
698 }
699 
700 #endif
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::VEntry
Definition: Types.hpp:72
Core.hpp
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
TypeTraits.hpp
AIToolbox::POMDP::bestConservativeAction
std::tuple< size_t, double > bestConservativeAction(const M &pomdp, MDP::QFunction immediateRewards, const Belief &initialBelief, const VList &lbVList, MDP::Values *alpha=nullptr)
This function obtains the best action with respect to the input VList.
Definition: Utils.hpp:609
AIToolbox::POMDP::crossSumBestAtBelief
void crossSumBestAtBelief(const Belief &b, const ActionRow &row, VEntry *outp, double *value=nullptr)
This function computes the best VEntry for the input belief from the input VLists.
Definition: Utils.hpp:504
AIToolbox::POMDP::hash_value
size_t hash_value(const VEntry &v)
This function enables hashing of VEntries with boost::hash.
Definition: Utils.hpp:57
AIToolbox::POMDP::updateBeliefPartialUnnormalized
void updateBeliefPartialUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
This function terminates the unnormalized update of a partially updated belief.
Definition: Utils.hpp:350
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::updateBelief
void updateBelief(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
Creates a new belief reflecting changes after an action and observation for a particular Model.
Definition: Utils.hpp:235
AIToolbox::POMDP::beliefExpectedReward
double beliefExpectedReward(const M &model, const Belief &b, const size_t a)
This function computes an immediate reward based on a belief rather than a state.
Definition: Utils.hpp:471
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::POMDP::bestPromisingAction
std::tuple< size_t, double > bestPromisingAction(const M &pomdp, const MDP::QFunction &immediateRewards, const Belief &belief, const MDP::QFunction &ubQ, const UpperBoundValueFunction &ubV, Vector *vals=nullptr)
This function obtains the best action with respect to the input QFunction and UbV.
Definition: Utils.hpp:664
AIToolbox::Matrix4D
boost::multi_array< Matrix2D, 2 > Matrix4D
Definition: Types.hpp:24
AIToolbox::POMDP::VEntry::values
MDP::Values values
Definition: Types.hpp:73
AIToolbox::MDP::Values
Vector Values
Definition: Types.hpp:44
AIToolbox::sawtoothInterpolation
std::tuple< double, Vector > sawtoothInterpolation(const Point &p, const CompactHyperplanes &ubQ, const PointSurface &ubV)
This function computes an approximate, but quick, upper bound on the surface value at the input point...
AIToolbox::POMDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates a default ValueFunction.
AIToolbox::POMDP::VEntry::observations
VObs observations
Definition: Types.hpp:75
AIToolbox::POMDP::weakBoundDistance
double weakBoundDistance(const VList &oldV, const VList &newV)
This function returns a weak measure of distance between two VLists.
AIToolbox::POMDP::makeVEntry
VEntry makeVEntry(size_t S, size_t a, size_t O)
This function creates and zeroes a VEntry.
Definition: Utils.hpp:43
AIToolbox::POMDP::operator==
bool operator==(const VEntry &lhs, const VEntry &rhs)
AIToolbox::POMDP::updateBeliefPartialNormalized
void updateBeliefPartialNormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
This function terminates the normalized update of a partially updated belief.
Definition: Utils.hpp:418
AIToolbox::POMDP::ValueFunction
std::vector< VList > ValueFunction
Definition: Types.hpp:78
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
AIToolbox::POMDP::UpperBoundValueFunction
std::pair< std::vector< Belief >, std::vector< double > > UpperBoundValueFunction
Definition: Types.hpp:80
AIToolbox::findBestAtPoint
Iterator findBestAtPoint(const Point &point, Iterator begin, Iterator end, double *value=nullptr, P p=P{})
This function returns an iterator pointing to the best Hyperplane for the specified point.
Definition: Polytope.hpp:65
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::POMDP::makeSOSA
auto makeSOSA(const M &m)
This function creates the SOSA matrix for the input POMDP.
Definition: Utils.hpp:132
AIToolbox::POMDP::operator<=>
std::strong_ordering operator<=>(const VEntry &lhs, const VEntry &rhs)
This function lexicographically sorts VEntries.
AIToolbox::POMDP::updateBeliefPartial
void updateBeliefPartial(const M &model, const Belief &b, const size_t a, Belief *bRet)
This function partially updates a belief.
Definition: Utils.hpp:289
AIToolbox::POMDP::VEntry::action
size_t action
Definition: Types.hpp:74
AIToolbox::POMDP::updateBeliefUnnormalized
void updateBeliefUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
Creates a new belief reflecting changes after an action and observation for a particular Model.
Definition: Utils.hpp:171
AIToolbox::LPInterpolation
std::tuple< double, Vector > LPInterpolation(const Point &p, const CompactHyperplanes &ubQ, const PointSurface &ubV)
This function computes the exact value of the input Point w.r.t. the given surfaces.
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Polytope.hpp
Probability.hpp