AIToolbox
A library that offers tools for AI problem solving.
ThompsonModel.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_THOMPSON_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_MDP_THOMPSON_MODEL_HEADER_FILE
3 
4 #include <tuple>
5 #include <random>
6 
7 #include <AIToolbox/Types.hpp>
10 #include <AIToolbox/Seeder.hpp>
12 
13 namespace AIToolbox::MDP {
57  template <IsExperience E>
58  class ThompsonModel {
59  public:
62 
82  ThompsonModel(const E & exp, double discount = 1.0);
83 
89  void setDiscount(double d);
90 
100  void sync();
101 
121  void sync(size_t s, size_t a);
122 
139  std::tuple<size_t, double> sampleSR(size_t s, size_t a) const;
140 
146  size_t getS() const;
147 
153  size_t getA() const;
154 
160  double getDiscount() const;
161 
167  const E & getExperience() const;
168 
178  double getTransitionProbability(size_t s, size_t a, size_t s1) const;
179 
189  double getExpectedReward(size_t s, size_t a, size_t s1) const;
190 
196  const TransitionMatrix & getTransitionFunction() const;
197 
205  const Matrix2D & getTransitionFunction(size_t a) const;
206 
212  const RewardMatrix & getRewardFunction() const;
213 
221  bool isTerminal(size_t s) const;
222 
223  private:
224  size_t S, A;
225  double discount_;
226 
227  const E & experience_;
228 
229  TransitionMatrix transitions_;
230  RewardMatrix rewards_;
231 
232  mutable RandomEngine rand_;
233  };
234 
235  template <IsExperience E>
236  ThompsonModel<E>::ThompsonModel(const E& exp, const double discount) :
237  S(exp.getS()), A(exp.getA()), experience_(exp), transitions_(A, Matrix2D(S, S)),
238  rewards_(S, A), rand_(Seeder::getSeed())
239  {
240  setDiscount(discount);
241 
242  sync();
243  }
244 
245  template <IsExperience E>
246  void ThompsonModel<E>::setDiscount(const double d) {
247  if ( d <= 0.0 || d > 1.0 ) throw std::invalid_argument("Discount parameter must be in (0,1]");
248  discount_ = d;
249  }
250 
251  template <IsExperience E>
253  for ( size_t a = 0; a < A; ++a )
254  for ( size_t s = 0; s < S; ++s )
255  sync(s,a);
256  }
257 
258  template <IsExperience E>
259  void ThompsonModel<E>::sync(const size_t s, const size_t a) {
260  if constexpr (IsExperienceEigen<E>) {
262  // Here we add the Jeffreys prior
263  //
264  // Ideally this shouldn't allocate, as the casting and sum
265  // should simply create a wrapper Eigen object which is passed
266  // by reference, so should be still as efficient as doing it by
267  // hand.
268  experience_.getVisitsTable(a).row(s).array().template cast<double>() + 0.5,
269  rand_, transitions_[a].row(s)
270  );
271  } else {
272  // Sample manually
273  double sum = 0.0;
274  for (size_t s1 = 0; s1 < S; ++s1) {
275  // Here we add the Jeffreys prior
276  std::gamma_distribution<double> dist(experience_.getVisits(s, a, s1) + 0.5, 1.0);
277  transitions_[a](s, s1) = dist(rand_);
278  sum += transitions_[a](s, s1);
279  }
280  transitions_[a].row(s) /= sum;
281  }
282 
283  const auto visits = experience_.getVisitsSum(s, a);
284  const auto MLEReward = experience_.getReward(s, a);
285  const auto M2 = experience_.getM2(s, a);
286  if (visits < 2) {
287  // If we don't have enough info for the STD, we revert to MLE.
288  rewards_(s, a) = MLEReward;
289  } else {
290  std::student_t_distribution<double> dist(visits - 1);
291  rewards_(s, a) = MLEReward + dist(rand_) * std::sqrt(M2 / (visits * (visits - 1)));
292  }
293  }
294 
295  template <IsExperience E>
296  std::tuple<size_t, double> ThompsonModel<E>::sampleSR(const size_t s, const size_t a) const {
297  const size_t s1 = sampleProbability(S, transitions_[a].row(s), rand_);
298 
299  return std::make_tuple(s1, rewards_(s, a));
300  }
301 
302  template <IsExperience E>
303  double ThompsonModel<E>::getTransitionProbability(const size_t s, const size_t a, const size_t s1) const {
304  return transitions_[a](s, s1);
305  }
306 
307  template <IsExperience E>
308  double ThompsonModel<E>::getExpectedReward(const size_t s, const size_t a, const size_t) const {
309  return rewards_(s, a);
310  }
311 
312  template <IsExperience E>
313  bool ThompsonModel<E>::isTerminal(const size_t s) const {
314  for ( size_t a = 0; a < A; ++a )
315  if ( !checkEqualSmall(1.0, transitions_[a](s, s)) )
316  return false;
317  return true;
318  }
319 
320  template <IsExperience E>
321  size_t ThompsonModel<E>::getS() const { return S; }
322  template <IsExperience E>
323  size_t ThompsonModel<E>::getA() const { return A; }
324  template <IsExperience E>
325  double ThompsonModel<E>::getDiscount() const { return discount_; }
326  template <IsExperience E>
327  const E & ThompsonModel<E>::getExperience() const { return experience_; }
328 
329  template <IsExperience E>
330  const typename ThompsonModel<E>::TransitionMatrix & ThompsonModel<E>::getTransitionFunction() const { return transitions_; }
331  template <IsExperience E>
332  const typename ThompsonModel<E>::RewardMatrix & ThompsonModel<E>::getRewardFunction() const { return rewards_; }
333 
334  template <IsExperience E>
335  const Matrix2D & ThompsonModel<E>::getTransitionFunction(const size_t a) const { return transitions_[a]; }
336 }
337 
338 #endif
AIToolbox::MDP::ThompsonModel::sync
void sync()
This function syncs the whole ThompsonModel to the underlying Experience.
Definition: ThompsonModel.hpp:252
AIToolbox::MDP::ThompsonModel::ThompsonModel
ThompsonModel(const E &exp, double discount=1.0)
Constructor using previous Experience.
Definition: ThompsonModel.hpp:236
AIToolbox::MDP::ThompsonModel::TransitionMatrix
Matrix3D TransitionMatrix
Definition: ThompsonModel.hpp:60
AIToolbox::MDP::ThompsonModel::getA
size_t getA() const
This function returns the number of available actions to the agent.
Definition: ThompsonModel.hpp:323
AIToolbox::MDP::ThompsonModel::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
Definition: ThompsonModel.hpp:330
AIToolbox::MDP::ThompsonModel::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
Definition: ThompsonModel.hpp:325
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::MDP::ThompsonModel
This class models Experience as a Markov Decision Process using Thompson Sampling.
Definition: ThompsonModel.hpp:58
AIToolbox::MDP::ThompsonModel::RewardMatrix
Matrix2D RewardMatrix
Definition: ThompsonModel.hpp:61
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::MDP::ThompsonModel::getS
size_t getS() const
This function returns the number of states of the world.
Definition: ThompsonModel.hpp:321
AIToolbox::MDP::ThompsonModel::getExperience
const E & getExperience() const
This function enables inspection of the underlying Experience of the ThompsonModel.
Definition: ThompsonModel.hpp:327
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::sampleDirichletDistribution
ProbabilityVector sampleDirichletDistribution(const TIn &params, G &generator)
This function samples from the input Dirichlet distribution.
Definition: Probability.hpp:270
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
AIToolbox::MDP::ThompsonModel::sampleSR
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP for the specified state action pair.
Definition: ThompsonModel.hpp:296
AIToolbox::MDP::ThompsonModel::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the Model.
Definition: ThompsonModel.hpp:246
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::Matrix3D
std::vector< Matrix2D > Matrix3D
Definition: Types.hpp:21
AIToolbox::MDP::ThompsonModel::getTransitionProbability
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
Definition: ThompsonModel.hpp:303
Types.hpp
TypeTraits.hpp
AIToolbox::MDP::ThompsonModel::getExpectedReward
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
Definition: ThompsonModel.hpp:308
AIToolbox::MDP::ThompsonModel::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
Definition: ThompsonModel.hpp:332
AIToolbox::MDP::ThompsonModel::isTerminal
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
Definition: ThompsonModel.hpp:313
Probability.hpp