AI-Toolbox/Witness_8hpp_source.html

#ifndef AI_TOOLBOX_POMDP_WITNESS_HEADER_FILE

#define AI_TOOLBOX_POMDP_WITNESS_HEADER_FILE


#include <unordered_set>


#include <boost/functional/hash.hpp>


#include <AIToolbox/POMDP/Types.hpp>

#include <AIToolbox/POMDP/Types.hpp>

#include <AIToolbox/POMDP/TypeTraits.hpp>

#include <AIToolbox/POMDP/Utils.hpp>

#include <AIToolbox/POMDP/Algorithms/Utils/Projecter.hpp>

#include <AIToolbox/Utils/Prune.hpp>


namespace AIToolbox::POMDP {

    class Witness {

        public:

            Witness(unsigned horizon, double tolerance);


            void setTolerance(double t);


            void setHorizon(unsigned h);


            double getTolerance() const;


            unsigned getHorizon() const;


            template <IsModel M>

            std::tuple<double, ValueFunction> operator()(const M & model);


        private:

            template <typename ProjectionsRow>

            void addDefaultEntry(const ProjectionsRow & projs);


            template <typename ProjectionsRow>

            void addVariations(const ProjectionsRow & projs, const VEntry & variated);


            size_t S, A, O;

            unsigned horizon_;

            double tolerance_;


            std::vector<MDP::Values> agenda_;

            std::unordered_set<VObs, boost::hash<VObs>> triedVectors_;

    };


    template <IsModel M>

    std::tuple<double, ValueFunction> Witness::operator()(const M& model) {

        S = model.getS();

        A = model.getA();

        O = model.getO();


        std::vector<VList> U(A);


        auto v = makeValueFunction(S); // TODO: May take user input


        unsigned timestep = 0;


        // This variable we use to manually control the allocations

        // for the LP solver. This is because this algorithm cannot

        // know in advance just how many constraints the LP is going

        // to get. Thus we implement a x2 doubling allocation scheme

        // to avoid too many reallocations.

        size_t reserveSize = 1;


        Projecter project(model);

        Pruner prune(S);

        WitnessLP lp(S);


        const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);

        double variation = tolerance_ * 2; // Make it bigger

        while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {

            ++timestep;


            // As default, we allocate double the numbers of VEntries for last step.

            reserveSize = std::max(reserveSize, 2 * v[timestep-1].size());

            // Compute all possible outcomes, from our previous results.

            // This means that for each action-observation pair, we are going

            // to obtain the same number of possible outcomes as the number

            // of entries in our initial vector w.

            auto projections = project(v[timestep-1]);


            size_t finalWSize = 0;

            for ( size_t a = 0; a < A; ++a ) {

                U[a].clear();

                lp.reset();

                agenda_.clear();

                triedVectors_.clear();

                size_t counter = 0;


                lp.allocate(reserveSize);


                // We add the VEntry to startoff the whole process. This

                // VEntry does not even need to be optimal, as we are going

                // to compute the optimal one for the witness point anyway.

                addDefaultEntry(projections[a]);


                // We check whether any element in the agenda improves what we have

                while ( !agenda_.empty() ) {

                    const auto witness = lp.findWitness(agenda_.back());

                    if ( witness ) {

                        // If so, we generate the best vector for that particular belief point.

                        U[a].push_back(crossSumBestAtBelief(*witness, projections[a], a));

                        lp.addOptimalRow(U[a].back().values);

                        // We add to the agenda all possible "variations" of the VEntry found.

                        addVariations(projections[a], U[a].back());

                        // We manually check memory for the lp, since this method

                        // cannot know in advance how many rows it'll need to do.

                        if ( ++counter == reserveSize ) {

                            reserveSize *= 2;

                            lp.allocate(reserveSize);

                        }

                    }

                    else

                        agenda_.pop_back();

                }

                finalWSize += U[a].size();

            }

            VList w;

            w.reserve(finalWSize);


            // We put together all VEntries we found.

            for ( size_t a = 0; a < A; ++a )

                w.insert(std::end(w), std::make_move_iterator(std::begin(U[a])), std::make_move_iterator(std::end(U[a])));


            // We have them all, and we prune one final time to be sure we have

            // computed the parsimonious set of value functions.

            const auto begin = std::begin(w);

            const auto end   = std::end  (w);

            w.erase(prune(begin, end, unwrap), end);


            v.emplace_back(std::move(w));


            // Check convergence

            if ( useTolerance ) {

                variation = weakBoundDistance(v[timestep-1], v[timestep]);

            }

        }


        return std::make_tuple(useTolerance ? variation : 0.0, v);

    }


    template <typename ProjectionsRow>

    void Witness::addDefaultEntry(const ProjectionsRow & projs) {

        MDP::Values v(S); v.setZero();


        // We compute the crossSum between each best vector for the belief.

        for ( size_t o = 0; o < O; ++o )

            v.noalias() += projs[o][0].values;


        triedVectors_.emplace(O, 0);

        agenda_.emplace_back(std::move(v));

    }


    template <typename ProjectionsRow>

    void Witness::addVariations(const ProjectionsRow & projs, const VEntry & variated) {

        // We need to copy this one unfortunately

        auto vObs = variated.observations;

        const auto & vValues = variated.values;


        for ( size_t o = 0; o < O; ++o ) {

            const size_t skip = vObs[o];


            for ( size_t i = 0; i < projs[o].size(); ++i ) {

                if ( i == skip ) continue;


                vObs[o] = i;

                if ( triedVectors_.find(vObs) != std::end(triedVectors_) ) continue;


                triedVectors_.insert(vObs);


                auto v = vValues - projs[o][skip].values + projs[o][i].values;

                agenda_.emplace_back(std::move(v));

            }

            vObs[o] = skip;

        }

    }

}


#endif