AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_MDP_SPARSE_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_SPARSE_MAXIMUM_LIKELIHOOD_MODEL_HEADER_FILE
66 template <IsExperience E>
143 void sync(
size_t s,
size_t a);
158 void sync(
size_t s,
size_t a,
size_t s1);
176 std::tuple<size_t, double>
sampleSR(
size_t s,
size_t a)
const;
264 const E & experience_;
272 template <IsExperience E>
274 S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A,
SparseMatrix2D(S, S)),
275 rewards_(S, A), rand_(
Seeder::getSeed())
284 for (
size_t a = 0; a < A; ++a ) {
285 for (
size_t s = 0; s < S; ++s )
286 if ( experience_.getVisitsSum(s, a) == 0ul )
287 transitions_[a].insert(s, s) = 1.0;
294 for (
size_t a = 0; a < A; ++a )
295 transitions_[a].setIdentity();
299 template <IsExperience E>
301 if ( d <= 0.0 || d > 1.0 )
throw std::invalid_argument(
"Discount parameter must be in (0,1]");
305 template <IsExperience E>
307 for (
size_t a = 0; a < A; ++a )
308 for (
size_t s = 0; s < S; ++s )
312 template <IsExperience E>
315 const auto visitSum = experience_.getVisitsSum(s, a);
316 if ( visitSum == 0ul )
return;
323 rewards_.coeffRef(s, a) = experience_.getReward(s, a);
326 if ( visitSum == 1ul )
327 transitions_[a].coeffRef(s, s) = 0.0;
330 const double visitSumReciprocal = 1.0 / visitSum;
332 if constexpr (IsExperienceEigen<E>) {
333 transitions_[a].row(s) = experience_.getVisitsTable(a).row(s).template cast<double>() * visitSumReciprocal;
336 for (
size_t s1 = 0; s1 < S; ++s1 ) {
337 const auto visits = experience_.getVisits(s, a, s1);
339 transitions_[a].coeffRef(s, s1) =
static_cast<double>(visits) * visitSumReciprocal;
344 template <IsExperience E>
346 const auto visitSum = experience_.getVisitsSum(s, a);
349 if ( !(visitSum % 10000ul) )
return sync(s, a);
356 rewards_.coeffRef(s, a) = experience_.getReward(s, a);
358 if ( visitSum == 1ul ) {
359 transitions_[a].coeffRef(s, s) = 0.0;
360 transitions_[a].coeffRef(s, s1) = 1.0;
362 const double newVisits =
static_cast<double>(experience_.getVisits(s, a, s1));
364 const double newTransitionValue = newVisits /
static_cast<double>(visitSum - 1);
365 const double newVectorSum = 1.0 + (newTransitionValue - transitions_[a].coeff(s, s1));
370 transitions_[a].coeffRef(s, s1) = newTransitionValue;
371 transitions_[a].row(s) /= newVectorSum;
375 template <IsExperience E>
379 return std::make_tuple(s1, rewards_.coeff(s, a));
382 template <IsExperience E>
384 return transitions_[a].coeff(s, s1);
387 template <IsExperience E>
389 return rewards_.coeff(s, a);
392 template <IsExperience E>
394 for (
size_t a = 0; a < A; ++a )
400 template <IsExperience E>
402 template <IsExperience E>
404 template <IsExperience E>
406 template <IsExperience E>
409 template <IsExperience E>
411 template <IsExperience E>
414 template <IsExperience E>