| 
    AIToolbox
    
   A library that offers tools for AI problem solving. 
   | 
 
 
 
 
Go to the documentation of this file.    1 #ifndef AI_TOOLBOX_MDP_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE 
    2 #define AI_TOOLBOX_MDP_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE 
   56     template <IsExperience E>
 
  133             void sync(
size_t s, 
size_t a);
 
  148             void sync(
size_t s, 
size_t a, 
size_t s1);
 
  166             std::tuple<size_t, double> 
sampleSR(
size_t s, 
size_t a) 
const;
 
  254             const E & experience_;
 
  262     template <IsExperience E>
 
  264             S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A, 
Matrix2D(S, S)),
 
  265             rewards_(S, A), rand_(
Seeder::getSeed())
 
  275             for ( 
size_t a = 0; a < A; ++a )
 
  276                 for ( 
size_t s = 0; s < S; ++s )
 
  277                     if ( experience_.getVisitsSum(s, a) == 0ul )
 
  278                         transitions_[a](s, s) = 1.0;
 
  282             for ( 
size_t a = 0; a < A; ++a )
 
  283                 transitions_[a].setIdentity();
 
  287     template <IsExperience E>
 
  289         if ( d <= 0.0 || d > 1.0 ) 
throw std::invalid_argument(
"Discount parameter must be in (0,1]");
 
  293     template <IsExperience E>
 
  295         for ( 
size_t a = 0; a < A; ++a )
 
  296         for ( 
size_t s = 0; s < S; ++s )
 
  300     template <IsExperience E>
 
  303         const auto visitSum = experience_.getVisitsSum(s, a);
 
  304         if ( visitSum == 0ul ) 
return;
 
  307         rewards_(s, a) = experience_.getReward(s, a);
 
  310         const double visitSumReciprocal = 1.0 / visitSum;
 
  312         if constexpr (IsExperienceEigen<E>) {
 
  313             transitions_[a].row(s) = experience_.getVisitsTable(a).row(s).template cast<double>() * visitSumReciprocal;
 
  316             for ( 
size_t s1 = 0; s1 < S; ++s1 ) {
 
  317                 const auto visits = experience_.getVisits(s, a, s1);
 
  318                 transitions_[a](s, s1) = 
static_cast<double>(visits) * visitSumReciprocal;
 
  323     template <IsExperience E>
 
  325         const auto visitSum = experience_.getVisitsSum(s, a);
 
  328         if ( !(visitSum % 10000ul) ) 
return sync(s, a);
 
  331         rewards_(s, a) = experience_.getReward(s, a);
 
  333         if ( visitSum == 1ul ) {
 
  334             transitions_[a](s, s) = 0.0;
 
  335             transitions_[a](s, s1) = 1.0;
 
  337             const double newVisits = 
static_cast<double>(experience_.getVisits(s, a, s1));
 
  339             const double newTransitionValue = newVisits / 
static_cast<double>(visitSum - 1);
 
  340             const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a](s, s1));
 
  345             transitions_[a](s, s1) = newTransitionValue;
 
  346             transitions_[a].row(s) /= newVectorSum;
 
  350     template <IsExperience E>
 
  354         return std::make_tuple(s1, rewards_(s, a));
 
  357     template <IsExperience E>
 
  359         return transitions_[a](s, s1);
 
  362     template <IsExperience E>
 
  364         return rewards_(s, a);
 
  367     template <IsExperience E>
 
  369         for ( 
size_t a = 0; a < A; ++a )
 
  375     template <IsExperience E>
 
  377     template <IsExperience E>
 
  379     template <IsExperience E>
 
  381     template <IsExperience E>
 
  384     template <IsExperience E>
 
  386     template <IsExperience E>
 
  389     template <IsExperience E>