AIToolbox
A library that offers tools for AI problem solving.
RTBSS.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_RTBSS_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_RTBSS_HEADER_FILE
3 
4 #include <limits>
5 
10 
11 namespace AIToolbox::POMDP {
36  template <IsModel M>
37  class RTBSS {
38  public:
39 
46  RTBSS(const M& m, double maxR);
47 
56  std::tuple<size_t, double> sampleAction(const Belief& b, unsigned horizon);
57 
63  const M& getModel() const;
64 
65  private:
66  const M& model_;
67  size_t S, A, O;
68  size_t maxA_, maxDepth_;
69  double maxR_;
70 
84  double simulate(const Belief & b, unsigned horizon);
85 
104  double upperBound(const Belief & b, size_t a, unsigned horizon) const;
105  };
106 
107  template <IsModel M>
108  RTBSS<M>::RTBSS(const M& m, const double maxR) :
109  model_(m), S(model_.getS()), A(model_.getA()),
110  O(model_.getO()), maxR_(maxR) {}
111 
112  template <IsModel M>
113  std::tuple<size_t, double> RTBSS<M>::sampleAction(const Belief& b, const unsigned horizon) {
114  maxA_ = 0; maxDepth_ = horizon;
115 
116  const double value = simulate(b, horizon);
117 
118  return std::make_tuple(maxA_, value);
119  }
120 
121  template <IsModel M>
122  double RTBSS<M>::simulate(const Belief & b, const unsigned horizon) {
123  if ( horizon == 0 ) return 0;
124 
125  std::vector<size_t> actionList(A);
126 
127  // Here we use no heuristic to sort the actions. If you want one
128  // add it here!
129  std::iota(std::begin(actionList), std::end(actionList), 0);
130 
131  double max = -std::numeric_limits<double>::infinity();
132 
133  for ( auto a : actionList ) {
134  double rew = beliefExpectedReward(model_, b, a);
135 
136  const double uBound = rew + upperBound(b, a, horizon - 1);
137  if ( uBound > max ) {
138  for ( size_t o = 0; o < O; ++o ) {
139  const auto nextBelief = updateBeliefUnnormalized(model_, b, a, o);
140  const double sum = nextBelief.sum();
141  // Only work if it makes sense
142  if ( checkDifferentSmall(sum, 0.0) )
143  rew += model_.getDiscount() * sum * simulate(nextBelief / sum, horizon - 1);
144  }
145  }
146  if ( rew > max ) {
147  max = rew;
148  if ( horizon == maxDepth_ ) maxA_ = a;
149  }
150  }
151  return max;
152  }
153 
154  template <IsModel M>
155  double RTBSS<M>::upperBound(const Belief &, const size_t, const unsigned horizon) const {
156  return model_.getDiscount() * maxR_ * horizon;
157  }
158 
159  template <IsModel M>
160  const M& RTBSS<M>::getModel() const {
161  return model_;
162  }
163 }
164 
165 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
TypeTraits.hpp
AIToolbox::POMDP::RTBSS::getModel
const M & getModel() const
This function returns the POMDP model being used.
Definition: RTBSS.hpp:160
AIToolbox::POMDP::RTBSS
This class represents the RTBSS online planner.
Definition: RTBSS.hpp:37
AIToolbox::POMDP::beliefExpectedReward
double beliefExpectedReward(const M &model, const Belief &b, const size_t a)
This function computes an immediate reward based on a belief rather than a state.
Definition: Utils.hpp:471
AIToolbox::POMDP::RTBSS::RTBSS
RTBSS(const M &m, double maxR)
Basic constructor.
Definition: RTBSS.hpp:108
Types.hpp
AIToolbox::POMDP::RTBSS::sampleAction
std::tuple< size_t, double > sampleAction(const Belief &b, unsigned horizon)
This function computes the best value for a given belief and its value.
Definition: RTBSS.hpp:113
AIToolbox::POMDP::updateBeliefUnnormalized
void updateBeliefUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
Creates a new belief reflecting changes after an action and observation for a particular Model.
Definition: Utils.hpp:171
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Utils.hpp
Probability.hpp