AI-Toolbox/MCTS_8hpp_source.html

#ifndef AI_TOOLBOX_MDP_MCTS_HEADER_FILE

#define AI_TOOLBOX_MDP_MCTS_HEADER_FILE


#include <AIToolbox/MDP/Types.hpp>

#include <AIToolbox/MDP/TypeTraits.hpp>

#include <AIToolbox/Utils/Probability.hpp>

#include <AIToolbox/Seeder.hpp>

#include <AIToolbox/MDP/Algorithms/Utils/Rollout.hpp>


#include <unordered_map>


namespace AIToolbox::MDP {

    template <typename M, template <typename> class StateHash = std::hash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    class MCTS {

        using State = std::remove_cvref_t<decltype(std::declval<M>().getS())>;

        static constexpr bool hashState = !std::is_same_v<size_t, State>;


        public:

            struct StateNode;

            using StateNodes = std::unordered_map<size_t, StateNode>;


            struct ActionNode {

                StateNodes children;

                double V = 0.0;

                unsigned N = 0;

            };

            using ActionNodes = std::vector<ActionNode>;


            struct StateNode {

                ActionNodes children;

                unsigned N = 0;

            };


            MCTS(const M& m, unsigned iterations, double exp);


            size_t sampleAction(const State & s, unsigned horizon);


            size_t sampleAction(size_t a, const State & s1, unsigned horizon);


            void setIterations(unsigned iter);


            void setExploration(double exp);


            const M& getModel() const;


            const StateNode& getGraph() const;


            unsigned getIterations() const;


            double getExploration() const;


        private:

            const M& model_;

            unsigned iterations_, maxDepth_;

            double exploration_;


            StateNode graph_;


            mutable RandomEngine rand_;


            // Private Methods

            size_t runSimulation(const State & s, unsigned horizon);

            double simulate(StateNode & sn, const State & s, unsigned horizon);

            void allocateActionNodes(ActionNodes & an, const State & s);


            template <typename Iterator>

            Iterator findBestA(Iterator begin, Iterator end);


            template <typename Iterator>

            Iterator findBestBonusA(Iterator begin, Iterator end, unsigned count);

    };


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    MCTS<M, StateHash>::MCTS(const M& m, const unsigned iter, const double exp) :

            model_(m), iterations_(iter),

            exploration_(exp), graph_(), rand_(Seeder::getSeed()) {}


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    size_t MCTS<M, StateHash>::sampleAction(const State & s, const unsigned horizon) {

        // Reset graph

        graph_ = StateNode();


        allocateActionNodes(graph_.children, s);


        return runSimulation(s, horizon);

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    size_t MCTS<M, StateHash>::sampleAction(const size_t a, const State & s1, const unsigned horizon) {

        auto & states = graph_.children[a].children;


        size_t s1Key;

        if constexpr (hashState) s1Key = StateHash<State>()(s1);

        else                     s1Key = s1;


        auto it = states.find(s1Key);

        if ( it == states.end() )

            return sampleAction(s1, horizon);


        // Here we need an additional step, because *it is contained by graph_.

        // If we just move assign, graph_ is first going to delete everything it

        // contains (included *it), and then we are going to move unallocated memory

        // into graph_! So we move *it outside of the graph_ hierarchy, so that

        // we can then assign safely.

        { auto tmp = std::move(it->second); graph_ = std::move(tmp); }


        // We resize here in case we didn't have time to sample the new

        // head node. In this case, the new head may not have children.

        // This would break the UCT call.

        allocateActionNodes(graph_.children, s1);


        return runSimulation(s1, horizon);

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    size_t MCTS<M, StateHash>::runSimulation(const State & s, const unsigned horizon) {

        if ( !horizon ) return 0;


        maxDepth_ = horizon;


        for (unsigned i = 0; i < iterations_; ++i )

            simulate(graph_, s, 0);


        auto begin = std::begin(graph_.children);

        return std::distance(begin, findBestA(begin, std::end(graph_.children)));

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    double MCTS<M, StateHash>::simulate(StateNode & sn, const State & s, const unsigned depth) {

        // Head update

        sn.N++;


        auto begin = std::begin(sn.children);

        const size_t a = std::distance(begin, findBestBonusA(begin, std::end(sn.children), sn.N));


        auto [s1, rew] = model_.sampleSR(s, a);


        auto & aNode = sn.children[a];


        // We only go deeper if needed (maxDepth_ is always at least 1).

        if ( depth + 1 < maxDepth_ && !model_.isTerminal(s1) ) {

            // If our state is not a size_t, hash it so we can work with the

            // StateNode map. The reason to hash it ourselves is that the map

            // *will* store the keys, and so if the state is an expensive

            // object (like a vector), we will have tons of allocations which

            // we can avoid, since we don't need to remember the exact state here.

            //

            // This *could* go wrong if two reachable states hash to the same

            // thing, since in this way we won't be able to distinguish them

            // (while a full-fledged map can), but this should be extremely

            // improbable and worth the performance gain.

            size_t s1Key;

            if constexpr (hashState) s1Key = StateHash<State>()(s1);

            else                     s1Key = s1;


            auto it = aNode.children.find(s1Key);


            double futureRew;

            if ( it == std::end(aNode.children) ) {

                // Touch node to create it

                aNode.children[s1Key];

                futureRew = rollout(model_, s1, maxDepth_ - depth + 1, rand_);

            }

            else {

                // Since most memory is allocated on the leaves,

                // we do not allocate on node creation but only when

                // we are actually descending into a node. If the node

                // already has memory this should not do anything in

                // any case.

                allocateActionNodes(it->second.children, s1);

                futureRew = simulate( it->second, s1, depth + 1 );

            }


            rew += model_.getDiscount() * futureRew;

        }


        // Action update

        aNode.N++;

        aNode.V += ( rew - aNode.V ) / static_cast<double>(aNode.N);


        return rew;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    template <typename Iterator>

    Iterator MCTS<M, StateHash>::findBestA(Iterator begin, Iterator end) {

        return std::max_element(begin, end, [](const ActionNode & lhs, const ActionNode & rhs){ return lhs.V < rhs.V; });

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    template <typename Iterator>

    Iterator MCTS<M, StateHash>::findBestBonusA(Iterator begin, Iterator end, const unsigned count) {

        // Count here can be as low as 1.

        // Since log(1) = 0, and 0/0 = error, we add 1.0.

        const double logCount = std::log(count + 1.0);

        // We use this function to produce a score for each action. This can be easily

        // substituted with something else to produce different POMCP variants.

        const auto evaluationFunction = [this, logCount](const ActionNode & an){

            return an.V + exploration_ * std::sqrt( logCount / an.N );

        };


        auto bestIterator = begin++;

        double bestValue = evaluationFunction(*bestIterator);


        for ( ; begin < end; ++begin ) {

            double actionValue = evaluationFunction(*begin);

            if ( actionValue > bestValue ) {

                bestValue = actionValue;

                bestIterator = begin;

            }

        }


        return bestIterator;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    void MCTS<M, StateHash>::allocateActionNodes(ActionNodes & an, const State & s) {

        if constexpr (HasFixedActionSpace<M>)

            an.resize(model_.getA());

        else

            an.resize(model_.getA(s));

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    void MCTS<M, StateHash>::setIterations(const unsigned iter) {

        iterations_ = iter;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    void MCTS<M, StateHash>::setExploration(const double exp) {

        exploration_ = exp;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    const M& MCTS<M, StateHash>::getModel() const {

        return model_;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    const typename MCTS<M, StateHash>::StateNode& MCTS<M, StateHash>::getGraph() const {

        return graph_;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    unsigned MCTS<M, StateHash>::getIterations() const {

        return iterations_;

    }


    template <typename M, template <typename> class StateHash>

    requires AIToolbox::IsGenerativeModel<M> && HasIntegralActionSpace<M>

    double MCTS<M, StateHash>::getExploration() const {

        return exploration_;

    }

}


#endif