AI-Toolbox/rPOMCPGraph_8hpp_source.html

#ifndef AI_TOOLBOX_POMDP_rPOMCP_GRAPH_HEADER_FILE

#define AI_TOOLBOX_POMDP_rPOMCP_GRAPH_HEADER_FILE


#include <vector>

#include <unordered_map>


#include <AIToolbox/Utils/Probability.hpp>

#include <AIToolbox/POMDP/Types.hpp>


namespace AIToolbox::Impl::POMDP {

    struct EmptyStruct {};


    struct BeliefParticleEntropyAddon {

        double negativeEntropy = 0;

    };


    struct BeliefNodeNoEntropyAddon {

        size_t maxS_ = 0;

    };

}


namespace AIToolbox::POMDP {

    template <bool UseEntropy>

    struct ActionNode;


    template <bool UseEntropy>

    using ActionNodes = std::vector<ActionNode<UseEntropy>>;


    template <bool UseEntropy>

    struct BeliefParticle : public std::conditional_t<UseEntropy, Impl::POMDP::BeliefParticleEntropyAddon, Impl::POMDP::EmptyStruct> {

        unsigned N = 0;

    };


    // This is used to keep track of beliefs down in the tree. We use a map since

    // we do not need to sample from here, just to access fast and recompute the

    // entropy values.

    template <bool UseEntropy>

    using TrackBelief = std::unordered_map<

                            size_t,

                            BeliefParticle<UseEntropy>,

                            std::hash<size_t>,

                            std::equal_to<size_t>

                        >;


    template <bool UseEntropy>

    class BeliefNode : public std::conditional_t<UseEntropy, Impl::POMDP::EmptyStruct, Impl::POMDP::BeliefNodeNoEntropyAddon> {

        public:

            BeliefNode();


            void updateBeliefAndKnowledge(size_t s);


            double getKnowledgeMeasure() const;


            unsigned N;

            ActionNodes<UseEntropy> children;


            double V;

            double actionsV;

            size_t bestAction;


        protected:

            TrackBelief<UseEntropy> trackBelief_;

            double knowledgeMeasure_;

    };


    template <bool UseEntropy>

    using BeliefNodes = std::unordered_map<size_t, BeliefNode<UseEntropy>>;


    template <bool UseEntropy>

    struct ActionNode {

        BeliefNodes<UseEntropy> children;

        double V       = 0.0;

        unsigned N     = 0;

    };


    // This is used to sample at the top of the tree. It is a vector containing a

    // state-count pair for each particle.

    using SampleBelief = std::vector<std::pair<size_t, unsigned>>;


    template <bool UseEntropy>

    class HeadBeliefNode : public BeliefNode<UseEntropy> {

        public:

            HeadBeliefNode(size_t A, RandomEngine & rand);

            HeadBeliefNode(size_t A, size_t beliefSize, const AIToolbox::POMDP::Belief & b, RandomEngine & rand);

            HeadBeliefNode(size_t A, BeliefNode<UseEntropy> && bn, RandomEngine & rand);


            bool isSampleBeliefEmpty() const;

            size_t sampleBelief() const;

            size_t getMostCommonParticle() const;


        private:

            RandomEngine * rand_;

            SampleBelief sampleBelief_;

            size_t beliefSize_;

    };


    template <bool UseEntropy>

    BeliefNode<UseEntropy>::BeliefNode() :

            N(0), V(0.0),

            actionsV(0.0), bestAction(0),

            knowledgeMeasure_(0.0) {}


    // Note for ENTROPY implementation:

    // In theory this is wrong as we should update all the entropy terms, one

    // for each different type of particle. In practice we hope this will work

    // anyway, and that there are not going to be huge problems, as each particle

    // should be seen enough times to still keep a decent approximation of its

    // entropy term. Minor errors are ok since this is still an estimation.

    template <>

    void BeliefNode<true>::updateBeliefAndKnowledge(const size_t s) {

        // Remove entropy term for this state from summatory

        knowledgeMeasure_ -= trackBelief_[s].negativeEntropy;

        // Updating belief

        trackBelief_[s].N += 1;

        // Computing new entropy term for this state

        double p = static_cast<double>(trackBelief_[s].N) / static_cast<double>(N+1);

        double newEntropy = p * std::log(p);

        // Update values

        trackBelief_[s].negativeEntropy = newEntropy;

        knowledgeMeasure_ += newEntropy;

    }


    // This is the Max-Belief implementation

    template <>

    void BeliefNode<false>::updateBeliefAndKnowledge(const size_t s) {

        trackBelief_[s].N += 1;


        if ( trackBelief_[s].N > trackBelief_[maxS_].N )

            maxS_ = s;


        knowledgeMeasure_ = static_cast<double>(trackBelief_[maxS_].N) / static_cast<double>(N+1);

    }


    template <bool UseEntropy>

    double BeliefNode<UseEntropy>::getKnowledgeMeasure() const {

        return knowledgeMeasure_;

    }


    template <bool UseEntropy>

    HeadBeliefNode<UseEntropy>::HeadBeliefNode(const size_t A, RandomEngine & rand) : BeliefNode<UseEntropy>(), rand_(&rand) {

        this->children.resize(A);

    }


    template <bool UseEntropy>

    HeadBeliefNode<UseEntropy>::HeadBeliefNode(const size_t A, const size_t beliefSize, const AIToolbox::POMDP::Belief & b, RandomEngine & rand) :

            BeliefNode<UseEntropy>(), rand_(&rand), beliefSize_(beliefSize)

    {

        this->children.resize(A);

        std::unordered_map<size_t, unsigned> generatedSamples;


        size_t S = b.size();

        for ( size_t i = 0; i < beliefSize_; ++i )

            generatedSamples[AIToolbox::sampleProbability(S, b, *rand_)] += 1;


        sampleBelief_.reserve(beliefSize_);

        for ( auto & pair : generatedSamples ) {

            sampleBelief_.emplace_back(pair);

            // Compute entropy here since we don't have a parent in this case (is it really needed?)

            // double p = static_cast<double>(pair.second) / static_cast<double>(beliefSize_);

            // negativeEntropy += p * std::log(p);

        }

    }


    template <bool UseEntropy>

    HeadBeliefNode<UseEntropy>::HeadBeliefNode(const size_t A, BeliefNode<UseEntropy> && bn, RandomEngine & rand) :

            BeliefNode<UseEntropy>(std::move(bn)), rand_(&rand), beliefSize_(0)

    {

        this->children.resize(A);

        sampleBelief_.reserve(this->trackBelief_.size());

        for ( auto & pair : this->trackBelief_ ) {

            sampleBelief_.emplace_back(pair.first, pair.second.N);

            beliefSize_ += pair.second.N;

        }

        TrackBelief<UseEntropy>().swap(this->trackBelief_); // Clear belief memory

    }


    template <bool UseEntropy>

    bool HeadBeliefNode<UseEntropy>::isSampleBeliefEmpty() const {

        return sampleBelief_.empty();

    }


    template <bool UseEntropy>

    size_t HeadBeliefNode<UseEntropy>::sampleBelief() const {

        std::uniform_int_distribution<unsigned> generator(1, beliefSize_);

        int pick = generator(*rand_);


        size_t index = 0;

        while (true) {

            pick -= sampleBelief_[index].second;

            if ( pick < 1 ) return sampleBelief_[index].first;

            ++index;

        }

    }


    template <bool UseEntropy>

    size_t HeadBeliefNode<UseEntropy>::getMostCommonParticle() const {

        // We return the most common particle in the head belief

        size_t bestGuess; unsigned bestGuessCount = 0;

        for ( auto & pair : sampleBelief_ ) {

            if ( pair.second > bestGuessCount ) {

                bestGuessCount = pair.second;

                bestGuess = pair.first;

            }

        }

        return bestGuess;

    }

}


#endif