AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_MDP_THOMPSON_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_THOMPSON_MODEL_HEADER_FILE
57 template <IsExperience E>
121 void sync(
size_t s,
size_t a);
139 std::tuple<size_t, double>
sampleSR(
size_t s,
size_t a)
const;
227 const E & experience_;
235 template <IsExperience E>
237 S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A,
Matrix2D(S, S)),
238 rewards_(S, A), rand_(
Seeder::getSeed())
245 template <IsExperience E>
247 if ( d <= 0.0 || d > 1.0 )
throw std::invalid_argument(
"Discount parameter must be in (0,1]");
251 template <IsExperience E>
253 for (
size_t a = 0; a < A; ++a )
254 for (
size_t s = 0; s < S; ++s )
258 template <IsExperience E>
260 if constexpr (IsExperienceEigen<E>) {
268 experience_.getVisitsTable(a).row(s).array().template cast<double>() + 0.5,
269 rand_, transitions_[a].row(s)
274 for (
size_t s1 = 0; s1 < S; ++s1) {
276 std::gamma_distribution<double> dist(experience_.getVisits(s, a, s1) + 0.5, 1.0);
277 transitions_[a](s, s1) = dist(rand_);
278 sum += transitions_[a](s, s1);
280 transitions_[a].row(s) /= sum;
283 const auto visits = experience_.getVisitsSum(s, a);
284 const auto MLEReward = experience_.getReward(s, a);
285 const auto M2 = experience_.getM2(s, a);
288 rewards_(s, a) = MLEReward;
290 std::student_t_distribution<double> dist(visits - 1);
291 rewards_(s, a) = MLEReward + dist(rand_) * std::sqrt(M2 / (visits * (visits - 1)));
295 template <IsExperience E>
299 return std::make_tuple(s1, rewards_(s, a));
302 template <IsExperience E>
304 return transitions_[a](s, s1);
307 template <IsExperience E>
309 return rewards_(s, a);
312 template <IsExperience E>
314 for (
size_t a = 0; a < A; ++a )
320 template <IsExperience E>
322 template <IsExperience E>
324 template <IsExperience E>
326 template <IsExperience E>
329 template <IsExperience E>
331 template <IsExperience E>
334 template <IsExperience E>