AI-Toolbox/MDP_2SparseModel_8hpp_source.html

#ifndef AI_TOOLBOX_MDP_SPARSE_MODEL_HEADER_FILE

#define AI_TOOLBOX_MDP_SPARSE_MODEL_HEADER_FILE


#include <AIToolbox/Seeder.hpp>


#include <AIToolbox/MDP/Types.hpp>

#include <AIToolbox/MDP/TypeTraits.hpp>


#include <AIToolbox/Utils/Probability.hpp>


namespace AIToolbox::MDP {

    class SparseModel {

        public:

            using TransitionMatrix   = SparseMatrix3D;

            using RewardMatrix       = SparseMatrix2D;


            SparseModel(size_t s, size_t a, double discount = 1.0);


            template <IsNaive3DMatrix T, IsNaive3DMatrix R>

            SparseModel(size_t s, size_t a, const T & t, const R & r, double d = 1.0);


            template <IsModel M>

            SparseModel(const M& model);


            SparseModel(NoCheck, size_t s, size_t a, TransitionMatrix && t, RewardMatrix && r, double d);


            template <IsNaive3DMatrix T>

            void setTransitionFunction(const T & t);


            void setTransitionFunction(const TransitionMatrix & t);


            template <IsNaive3DMatrix R>

            void setRewardFunction(const R & r);


            void setRewardFunction(const RewardMatrix & r);


            void setDiscount(double d);


            std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;


            size_t getS() const;


            size_t getA() const;


            double getDiscount() const;


            double getTransitionProbability(size_t s, size_t a, size_t s1) const;


            double getExpectedReward(size_t s, size_t a, size_t s1) const;


            const TransitionMatrix & getTransitionFunction() const;


            const SparseMatrix2D & getTransitionFunction(size_t a) const;


            const RewardMatrix & getRewardFunction() const;


            bool isTerminal(size_t s) const;


        private:

            size_t S, A;

            double discount_;


            TransitionMatrix transitions_;

            RewardMatrix rewards_;


            mutable RandomEngine rand_;

    };


    template <IsNaive3DMatrix T, IsNaive3DMatrix R>

    SparseModel::SparseModel(const size_t s, const size_t a, const T & t, const R & r, const double d) :

            S(s), A(a), transitions_(A, SparseMatrix2D(S, S)),

            rewards_(S, A), rand_(Seeder::getSeed())

    {

        setDiscount(d);

        setTransitionFunction(t);

        setRewardFunction(r);

    }


    template <IsModel M>

    SparseModel::SparseModel(const M& model) :

            S(model.getS()), A(model.getA()), transitions_(A, SparseMatrix2D(S, S)),

            rewards_(S, A), rand_(Seeder::getSeed())

    {

        setDiscount(model.getDiscount());

        for ( size_t s = 0; s < S; ++s )

        for ( size_t a = 0; a < A; ++a ) {

            for ( size_t s1 = 0; s1 < S; ++s1 ) {

                const double p = model.getTransitionProbability(s, a, s1);

                if ( p < 0.0 || p > 1.0 )

                    throw std::invalid_argument("Input transition matrix contains an invalid value.");


                if ( checkDifferentSmall(0.0, p) ) transitions_[a].insert(s, s1) = p;

                const double r = model.getExpectedReward(s, a, s1);

                if ( checkDifferentSmall(0.0, r) ) rewards_.coeffRef(s, a) += r * p;

            }

            if ( checkDifferentSmall(1.0, transitions_[a].row(s).sum()) )

                throw std::invalid_argument("Input transition matrix contains an invalid row.");

        }


        for ( size_t a = 0; a < A; ++a )

            transitions_[a].makeCompressed();

        rewards_.makeCompressed();

    }


    template <IsNaive3DMatrix T>

    void SparseModel::setTransitionFunction(const T & t) {

        if (!isProbability(S, A, S, t))

            throw std::invalid_argument("Input transition matrix does not contain valid probabilities.");


        // Then we copy.

        for ( size_t a = 0; a < A; ++a ) {

            transitions_[a].setZero();


            for ( size_t s = 0; s < S; ++s )

            for ( size_t s1 = 0; s1 < S; ++s1 ) {

                const double p = t[s][a][s1];

                if ( checkDifferentSmall(0.0, p) ) transitions_[a].insert(s, s1) = p;

            }

            transitions_[a].makeCompressed();

        }

    }


    template <IsNaive3DMatrix R>

    void SparseModel::setRewardFunction(const R & r) {

        rewards_.setZero();

        for ( size_t a = 0; a < A; ++a ) {

            for ( size_t s = 0; s < S; ++s ) {

                double newRew = 0.0;

                for ( size_t s1 = 0; s1 < S; ++s1 )

                    newRew += r[s][a][s1] * transitions_[a].coeff(s, s1);


                if (checkDifferentSmall(newRew, 0.0))

                    rewards_.coeffRef(s, a) = newRew;

            }

        }

        rewards_.makeCompressed();

    }

}


#endif