AI-Toolbox/IncrementalPruning_8hpp_source.html

#ifndef AI_TOOLBOX_POMDP_INCREMENTAL_PRUNING_HEADER_FILE

#define AI_TOOLBOX_POMDP_INCREMENTAL_PRUNING_HEADER_FILE


#include <limits>


#include <AIToolbox/Utils/Probability.hpp>

#include <AIToolbox/Utils/Prune.hpp>

#include <AIToolbox/POMDP/Types.hpp>

#include <AIToolbox/POMDP/TypeTraits.hpp>

#include <AIToolbox/POMDP/Utils.hpp>

#include <AIToolbox/POMDP/Algorithms/Utils/Projecter.hpp>


namespace AIToolbox::POMDP {

    class IncrementalPruning {

        public:

            IncrementalPruning(unsigned h, double tolerance);


            void setTolerance(double t);


            void setHorizon(unsigned h);


            double getTolerance() const;


            unsigned getHorizon() const;


            template <IsModel M>

            std::tuple<double, ValueFunction> operator()(const M & model);


        private:

            VList crossSum(const VList & l1, const VList & l2, size_t a, bool order);


            size_t S, A, O;

            unsigned horizon_;

            double tolerance_;

    };


    template <IsModel M>

    std::tuple<double, ValueFunction> IncrementalPruning::operator()(const M & model) {

        // Initialize "global" variables

        S = model.getS();

        A = model.getA();

        O = model.getO();


        auto v = makeValueFunction(S); // TODO: May take user input


        unsigned timestep = 0;


        Pruner prune(S);

        Projecter projecter(model);


        const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);

        double variation = tolerance_ * 2; // Make it bigger

        while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {

            ++timestep;


            // Compute all possible outcomes, from our previous results.

            // This means that for each action-observation pair, we are going

            // to obtain the same number of possible outcomes as the number

            // of entries in our initial vector w.

            auto projs = projecter(v[timestep-1]);


            size_t finalWSize = 0;

            // In this method we split the work by action, which will then

            // be joined again at the end of the loop.

            for ( size_t a = 0; a < A; ++a ) {

                // We prune each outcome separately to be sure

                // we do not replicate work later.

                for ( size_t o = 0; o < O; ++o ) {

                    const auto begin = std::begin(projs[a][o]);

                    const auto end   = std::end  (projs[a][o]);

                    projs[a][o].erase(prune(begin, end, unwrap), end);

                }


                // Here we reduce at the minimum the cross-summing, by alternating

                // merges. We pick matches like a reverse binary tree, so that

                // we always pick lists that have been merged the least.

                //

                // Example for O==7:

                //

                //  0 <- 1    2 <- 3    4 <- 5    6

                //  0 ------> 2         4 ------> 6

                //            2 <---------------- 6

                //

                // In particular, the variables are:

                //

                // - oddOld:   Whether our starting step has an odd number of elements.

                //             If so, we skip the last one.

                // - front:    The id of the element at the "front" of our current pass.

                //             note that since passes can be backwards this can be high.

                // - back:     Opposite of front, which excludes the last element if we

                //             have odd elements.

                // - stepsize: The space between each "first" of each new merge.

                // - diff:     The space between each "first" and its match to merge.

                // - elements: The number of elements we have left to merge.


                bool oddOld = O % 2;

                int i, front = 0, back = O - oddOld, stepsize = 2, diff = 1, elements = O;

                while ( elements > 1 ) {

                    for ( i = front; i != back; i += stepsize ) {

                        projs[a][i] = crossSum(projs[a][i], projs[a][i + diff], a, stepsize > 0);

                        const auto begin = std::begin(projs[a][i]);

                        const auto end   = std::end  (projs[a][i]);

                        projs[a][i].erase(prune(begin, end, unwrap), end);

                        --elements;

                    }


                    const bool oddNew = elements % 2;


                    const int tmp   = back;

                    back      = front - ( oddNew ? 0 : stepsize );

                    front     = tmp   - ( oddOld ? 0 : stepsize );

                    stepsize *= -2;

                    diff     *= -2;


                    oddOld = oddNew;

                }

                // Put the result where we can find it

                if (front != 0)

                    projs[a][0] = std::move(projs[a][front]);

                finalWSize += projs[a][0].size();

            }

            VList w;

            w.reserve(finalWSize);


            // Here we don't have to do fancy merging since no cross-summing is involved

            for ( size_t a = 0; a < A; ++a )

                w.insert(std::end(w), std::make_move_iterator(std::begin(projs[a][0])), std::make_move_iterator(std::end(projs[a][0])));


            // We have them all, and we prune one final time to be sure we have

            // computed the parsimonious set of value functions.

            const auto begin = std::begin(w);

            const auto end   = std::end  (w);

            w.erase(prune(begin, end, unwrap), end);


            v.emplace_back(std::move(w));


            // Check convergence

            if ( useTolerance )

                variation = weakBoundDistance(v[timestep-1], v[timestep]);

        }


        return std::make_tuple(useTolerance ? variation : 0.0, v);

    }

}


#endif