AI-Toolbox/PolicyIteration_8hpp_source.html

#ifndef AI_TOOLBOX_MDP_POLICY_ITERATION_HEADER_FILE

#define AI_TOOLBOX_MDP_POLICY_ITERATION_HEADER_FILE


#include <AIToolbox/MDP/Types.hpp>

#include <AIToolbox/MDP/TypeTraits.hpp>

#include <AIToolbox/MDP/Utils.hpp>

#include <AIToolbox/MDP/Policies/QGreedyPolicy.hpp>

#include <AIToolbox/MDP/Algorithms/Utils/PolicyEvaluation.hpp>


namespace AIToolbox::MDP {

    class PolicyIteration {

        public:

            PolicyIteration(unsigned horizon, double tolerance = 0.001);


            template <IsModel M>

            QFunction operator()(const M & m);


            void setTolerance(double t);


            void setHorizon(unsigned h);


            double getTolerance() const;


            unsigned getHorizon() const;


        private:

            unsigned horizon_;

            double tolerance_;

    };


    template <IsModel M>

    QFunction PolicyIteration::operator()(const M & m) {

        const auto S = m.getS();

        const auto A = m.getA();


        PolicyEvaluation<M> eval(m, horizon_, tolerance_);


        auto qfun = makeQFunction(m.getS(), m.getA());

        QGreedyPolicy p(qfun);

        auto matrix = p.getPolicy();


        {

nextLoop:

            auto [bound, v, q] = eval(p);

            (void)bound;


            eval.setValues(std::move(v));

            qfun = std::move(q);


            auto newMatrix = p.getPolicy();

            for (size_t s = 0; s < S; ++s) {

                for (size_t a = 0; a < A; ++a) {

                    if (checkDifferentSmall(matrix(s,a), newMatrix(s,a))) {

                        matrix = std::move(newMatrix);

                        goto nextLoop;

                    }

                }

            }

        }

        return qfun;

    }

}


#endif