1 #ifndef AI_TOOLBOX_POMDP_RTBSS_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_RTBSS_HEADER_FILE
46 RTBSS(
const M& m,
double maxR);
68 size_t maxA_, maxDepth_;
84 double simulate(
const Belief & b,
unsigned horizon);
104 double upperBound(
const Belief & b,
size_t a,
unsigned horizon)
const;
109 model_(m), S(model_.getS()), A(model_.getA()),
110 O(model_.getO()), maxR_(maxR) {}
114 maxA_ = 0; maxDepth_ = horizon;
116 const double value = simulate(b, horizon);
118 return std::make_tuple(maxA_, value);
123 if ( horizon == 0 )
return 0;
125 std::vector<size_t> actionList(A);
129 std::iota(std::begin(actionList), std::end(actionList), 0);
131 double max = -std::numeric_limits<double>::infinity();
133 for (
auto a : actionList ) {
136 const double uBound = rew + upperBound(b, a, horizon - 1);
137 if ( uBound > max ) {
138 for (
size_t o = 0; o < O; ++o ) {
140 const double sum = nextBelief.sum();
143 rew += model_.getDiscount() * sum * simulate(nextBelief / sum, horizon - 1);
148 if ( horizon == maxDepth_ ) maxA_ = a;
155 double RTBSS<M>::upperBound(
const Belief &,
const size_t,
const unsigned horizon)
const {
156 return model_.getDiscount() * maxR_ * horizon;