AI-Toolbox/SparseMaximumLikelihoodModel_8hpp_source.html

#ifndef AI_TOOLBOX_MDP_SPARSE_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE

#define AI_TOOLBOX_MDP_SPARSE_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE


#include <tuple>

#include <random>


#include <AIToolbox/Seeder.hpp>

#include <AIToolbox/Types.hpp>

#include <AIToolbox/Utils/Probability.hpp>

#include <AIToolbox/MDP/Types.hpp>

#include <AIToolbox/MDP/TypeTraits.hpp>


namespace AIToolbox::MDP {

    template <IsExperience E>

    class SparseMaximumLikelihoodModel {

        public:

            using TransitionMatrix   = SparseMatrix3D;

            using RewardMatrix       = SparseMatrix2D;

            SparseMaximumLikelihoodModel(const E & exp, double discount = 1.0, bool sync = false);


            void setDiscount(double d);


            void sync();


            void sync(size_t s, size_t a);


            void sync(size_t s, size_t a, size_t s1);


            std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;


            size_t getS() const;


            size_t getA() const;


            double getDiscount() const;


            const E & getExperience() const;


            double getTransitionProbability(size_t s, size_t a, size_t s1) const;


            double getExpectedReward(size_t s, size_t a, size_t s1) const;


            const TransitionMatrix & getTransitionFunction() const;


            const SparseMatrix2D & getTransitionFunction(size_t a) const;


            const RewardMatrix & getRewardFunction() const;


            bool isTerminal(size_t s) const;


        private:

            size_t S, A;

            double discount_;


            const E & experience_;


            TransitionMatrix transitions_;

            RewardMatrix rewards_;


            mutable RandomEngine rand_;

    };


    template <IsExperience E>

    SparseMaximumLikelihoodModel<E>::SparseMaximumLikelihoodModel(const E & exp, const double discount, const bool toSync) :

            S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A, SparseMatrix2D(S, S)),

            rewards_(S, A), rand_(Seeder::getSeed())

    {

        setDiscount(discount);


        if ( toSync ) {

            sync();

            // Sync does not touch state-action pairs which have never been

            // seen. To keep the model consistent we set all of them as

            // self-absorbing.

            for ( size_t a = 0; a < A; ++a ) {

                for ( size_t s = 0; s < S; ++s )

                    if ( experience_.getVisitsSum(s, a) == 0ul )

                        transitions_[a].insert(s, s) = 1.0;

                // We don't bother making it compressed since it is bound

                // to change eventually anyway

            }

        }

        else {

            // Make transition matrix true probability

            for ( size_t a = 0; a < A; ++a )

                transitions_[a].setIdentity();

        }

    }


    template <IsExperience E>

    void SparseMaximumLikelihoodModel<E>::setDiscount(const double d) {

        if ( d <= 0.0 || d > 1.0 ) throw std::invalid_argument("Discount parameter must be in (0,1]");

        discount_ = d;

    }


    template <IsExperience E>

    void SparseMaximumLikelihoodModel<E>::sync() {

        for ( size_t a = 0; a < A; ++a )

            for ( size_t s = 0; s < S; ++s )

                sync(s,a);

    }


    template <IsExperience E>

    void SparseMaximumLikelihoodModel<E>::sync(const size_t s, const size_t a) {

        // Nothing to do

        const auto visitSum = experience_.getVisitsSum(s, a);

        if ( visitSum == 0ul ) return;


        // Update reward by just copying the average from experience Note that

        // we check different from rewards_, rather than zero, because it's

        // possible that by averaging some rewards go BACK to zero, rather than

        // away from it. In those case we still have to set the new rewards to zero.

        if (checkDifferentSmall(rewards_.coeffRef(s, a), experience_.getReward(s, a)))

            rewards_.coeffRef(s, a) = experience_.getReward(s, a);


        // Clear beginning's identity matrix

        if ( visitSum == 1ul )

            transitions_[a].coeffRef(s, s) = 0.0;


        // Create reciprocal for fast division

        const double visitSumReciprocal = 1.0 / visitSum;


        if constexpr (IsExperienceEigen<E>) {

            transitions_[a].row(s) = experience_.getVisitsTable(a).row(s).template cast<double>() * visitSumReciprocal;

        } else {

            // Normalize

            for ( size_t s1 = 0; s1 < S; ++s1 ) {

                const auto visits = experience_.getVisits(s, a, s1);

                if (visits > 0)

                    transitions_[a].coeffRef(s, s1) = static_cast<double>(visits) * visitSumReciprocal;

            }

        }

    }


    template <IsExperience E>

    void SparseMaximumLikelihoodModel<E>::sync(const size_t s, const size_t a, const size_t s1) {

        const auto visitSum = experience_.getVisitsSum(s, a);

        // The second condition is related to numerical errors. Once in a

        // while we reset those by forcing a true update using real data.

        if ( !(visitSum % 10000ul) ) return sync(s, a);


        // Update reward by just copying the average from experience Note that

        // we check different from rewards_, rather than zero, because it's

        // possible that by averaging some rewards go BACK to zero, rather than

        // away from it. In those case we still have to set the new rewards to zero.

        if (checkDifferentSmall(rewards_.coeffRef(s, a), experience_.getReward(s, a)))

            rewards_.coeffRef(s, a) = experience_.getReward(s, a);


        if ( visitSum == 1ul ) {

            transitions_[a].coeffRef(s, s) = 0.0;

            transitions_[a].coeffRef(s, s1) = 1.0;

        } else {

            const double newVisits = static_cast<double>(experience_.getVisits(s, a, s1));


            const double newTransitionValue = newVisits / static_cast<double>(visitSum - 1);

            const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a].coeff(s, s1));

            // This works because as long as all the values in the transition have the same denominator

            // (in this case visitSum-1), then the numerators do not matter, as we can simply normalize.

            // In the end of the process the new values will be the same as if we updated directly using

            // an increased denominator, and thus we will be able to call this function again correctly.

            transitions_[a].coeffRef(s, s1) = newTransitionValue;

            transitions_[a].row(s) /= newVectorSum;

        }

    }


    template <IsExperience E>

    std::tuple<size_t, double> SparseMaximumLikelihoodModel<E>::sampleSR(const size_t s, const size_t a) const {

        const size_t s1 = sampleProbability(S, transitions_[a].row(s), rand_);


        return std::make_tuple(s1, rewards_.coeff(s, a));

    }


    template <IsExperience E>

    double SparseMaximumLikelihoodModel<E>::getTransitionProbability(const size_t s, const size_t a, const size_t s1) const {

        return transitions_[a].coeff(s, s1);

    }


    template <IsExperience E>

    double SparseMaximumLikelihoodModel<E>::getExpectedReward(const size_t s, const size_t a, const size_t) const {

        return rewards_.coeff(s, a);

    }


    template <IsExperience E>

    bool SparseMaximumLikelihoodModel<E>::isTerminal(const size_t s) const {

        for ( size_t a = 0; a < A; ++a )

            if ( !checkEqualSmall(1.0, transitions_[a].coeff(s, s)) )

                return false;

        return true;

    }


    template <IsExperience E>

    size_t SparseMaximumLikelihoodModel<E>::getS() const { return S; }

    template <IsExperience E>

    size_t SparseMaximumLikelihoodModel<E>::getA() const { return A; }

    template <IsExperience E>

    double SparseMaximumLikelihoodModel<E>::getDiscount() const { return discount_; }

    template <IsExperience E>

    const E & SparseMaximumLikelihoodModel<E>::getExperience() const { return experience_; }


    template <IsExperience E>

    const typename SparseMaximumLikelihoodModel<E>::TransitionMatrix & SparseMaximumLikelihoodModel<E>::getTransitionFunction() const { return transitions_; }

    template <IsExperience E>

    const typename SparseMaximumLikelihoodModel<E>::RewardMatrix & SparseMaximumLikelihoodModel<E>::getRewardFunction() const { return rewards_; }


    template <IsExperience E>

    const SparseMatrix2D & SparseMaximumLikelihoodModel<E>::getTransitionFunction(const size_t a) const { return transitions_[a]; }

}


#endif