AI-Toolbox/RTBSS_8hpp_source.html

#ifndef AI_TOOLBOX_POMDP_RTBSS_HEADER_FILE

#define AI_TOOLBOX_POMDP_RTBSS_HEADER_FILE


#include <limits>


#include <AIToolbox/POMDP/Types.hpp>

#include <AIToolbox/POMDP/TypeTraits.hpp>

#include <AIToolbox/POMDP/Utils.hpp>

#include <AIToolbox/Utils/Probability.hpp>


namespace AIToolbox::POMDP {

    template <IsModel M>

    class RTBSS {

        public:


            RTBSS(const M& m, double maxR);


            std::tuple<size_t, double> sampleAction(const Belief& b, unsigned horizon);


            const M& getModel() const;


        private:

            const M& model_;

            size_t S, A, O;

            size_t maxA_, maxDepth_;

            double maxR_;


            double simulate(const Belief & b, unsigned horizon);


            double upperBound(const Belief & b, size_t a, unsigned horizon) const;

    };


    template <IsModel M>

    RTBSS<M>::RTBSS(const M& m, const double maxR) :

            model_(m), S(model_.getS()), A(model_.getA()),

            O(model_.getO()), maxR_(maxR) {}


    template <IsModel M>

    std::tuple<size_t, double> RTBSS<M>::sampleAction(const Belief& b, const unsigned horizon) {

        maxA_ = 0; maxDepth_ = horizon;


        const double value = simulate(b, horizon);


        return std::make_tuple(maxA_, value);

    }


    template <IsModel M>

    double RTBSS<M>::simulate(const Belief & b, const unsigned horizon) {

        if ( horizon == 0 ) return 0;


        std::vector<size_t> actionList(A);


        // Here we use no heuristic to sort the actions. If you want one

        // add it here!

        std::iota(std::begin(actionList), std::end(actionList), 0);


        double max = -std::numeric_limits<double>::infinity();


        for ( auto a : actionList ) {

            double rew = beliefExpectedReward(model_, b, a);


            const double uBound = rew + upperBound(b, a, horizon - 1);

            if ( uBound > max ) {

                for ( size_t o = 0; o < O; ++o ) {

                    const auto nextBelief = updateBeliefUnnormalized(model_, b, a, o);

                    const double sum = nextBelief.sum();

                    // Only work if it makes sense

                    if ( checkDifferentSmall(sum, 0.0) )

                        rew += model_.getDiscount() * sum * simulate(nextBelief / sum, horizon - 1);

                }

            }

            if ( rew > max ) {

                max = rew;

                if ( horizon == maxDepth_ ) maxA_ = a;

            }

        }

        return max;

    }


    template <IsModel M>

    double RTBSS<M>::upperBound(const Belief &, const size_t, const unsigned horizon) const {

        return model_.getDiscount() * maxR_ * horizon;

    }


    template <IsModel M>

    const M& RTBSS<M>::getModel() const {

        return model_;

    }

}


#endif