AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_MDP_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
56 template <IsExperience E>
133 void sync(
size_t s,
size_t a);
148 void sync(
size_t s,
size_t a,
size_t s1);
166 std::tuple<size_t, double>
sampleSR(
size_t s,
size_t a)
const;
254 const E & experience_;
262 template <IsExperience E>
264 S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A,
Matrix2D(S, S)),
265 rewards_(S, A), rand_(
Seeder::getSeed())
275 for (
size_t a = 0; a < A; ++a )
276 for (
size_t s = 0; s < S; ++s )
277 if ( experience_.getVisitsSum(s, a) == 0ul )
278 transitions_[a](s, s) = 1.0;
282 for (
size_t a = 0; a < A; ++a )
283 transitions_[a].setIdentity();
287 template <IsExperience E>
289 if ( d <= 0.0 || d > 1.0 )
throw std::invalid_argument(
"Discount parameter must be in (0,1]");
293 template <IsExperience E>
295 for (
size_t a = 0; a < A; ++a )
296 for (
size_t s = 0; s < S; ++s )
300 template <IsExperience E>
303 const auto visitSum = experience_.getVisitsSum(s, a);
304 if ( visitSum == 0ul )
return;
307 rewards_(s, a) = experience_.getReward(s, a);
310 const double visitSumReciprocal = 1.0 / visitSum;
312 if constexpr (IsExperienceEigen<E>) {
313 transitions_[a].row(s) = experience_.getVisitsTable(a).row(s).template cast<double>() * visitSumReciprocal;
316 for (
size_t s1 = 0; s1 < S; ++s1 ) {
317 const auto visits = experience_.getVisits(s, a, s1);
318 transitions_[a](s, s1) =
static_cast<double>(visits) * visitSumReciprocal;
323 template <IsExperience E>
325 const auto visitSum = experience_.getVisitsSum(s, a);
328 if ( !(visitSum % 10000ul) )
return sync(s, a);
331 rewards_(s, a) = experience_.getReward(s, a);
333 if ( visitSum == 1ul ) {
334 transitions_[a](s, s) = 0.0;
335 transitions_[a](s, s1) = 1.0;
337 const double newVisits =
static_cast<double>(experience_.getVisits(s, a, s1));
339 const double newTransitionValue = newVisits /
static_cast<double>(visitSum - 1);
340 const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a](s, s1));
345 transitions_[a](s, s1) = newTransitionValue;
346 transitions_[a].row(s) /= newVectorSum;
350 template <IsExperience E>
354 return std::make_tuple(s1, rewards_(s, a));
357 template <IsExperience E>
359 return transitions_[a](s, s1);
362 template <IsExperience E>
364 return rewards_(s, a);
367 template <IsExperience E>
369 for (
size_t a = 0; a < A; ++a )
375 template <IsExperience E>
377 template <IsExperience E>
379 template <IsExperience E>
381 template <IsExperience E>
384 template <IsExperience E>
386 template <IsExperience E>
389 template <IsExperience E>