AIToolbox
A library that offers tools for AI problem solving.
CooperativeThompsonModel.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_FACTORED_MDP_THOMPSON_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_FACTORED_MDP_THOMPSON_MODEL_HEADER_FILE
3 
6 
7 namespace AIToolbox::Factored::MDP {
52  public:
54  using RewardMatrix = std::vector<Vector>;
55 
83  CooperativeThompsonModel(const CooperativeExperience & exp, double discount = 1.0);
84 
98  void sync();
99 
106  void sync(const State & s, const Action & a);
107 
123  void sync(const CooperativeExperience::Indeces & indeces);
124 
141  std::tuple<State, double> sampleSR(const State & s, const Action & a) const;
142 
159  std::tuple<State, Rewards> sampleSRs(const State & s, const Action & a) const;
160 
178  double sampleSR(const State & s, const Action & a, State * s1) const;
179 
196  void sampleSRs(const State & s, const Action & a, State * s1, Rewards * rews) const;
197 
207  double getTransitionProbability(const State & s, const Action & a, const State & s1) const;
208 
218  double getExpectedReward(const State & s, const Action & a, const State & s1) const;
219 
234  Rewards getExpectedRewards(const State & s, const Action & a, const State & s1) const;
235 
253  void getExpectedRewards(const State & s, const Action & a, const State & s1, Rewards * rews) const;
254 
260  const State & getS() const;
261 
267  const Action & getA() const;
268 
274  void setDiscount(double d);
275 
281  double getDiscount() const;
282 
288  const CooperativeExperience & getExperience() const;
289 
295  const TransitionMatrix & getTransitionFunction() const;
296 
302  const RewardMatrix & getRewardFunction() const;
303 
309  const DDNGraph & getGraph() const;
310 
311  private:
320  void syncRow(size_t i, size_t j);
321 
322  const CooperativeExperience & experience_;
323  double discount_;
324 
325  TransitionMatrix transitions_;
326  RewardMatrix rewards_;
327 
328  mutable RandomEngine rand_;
329  };
330 }
331 
332 #endif
333 
AIToolbox::Factored::MDP::CooperativeThompsonModel::getA
const Action & getA() const
This function returns the number of available actions to the agent.
AIToolbox::Factored::MDP::CooperativeThompsonModel::getExpectedReward
double getExpectedReward(const State &s, const Action &a, const State &s1) const
This function returns the stored expected reward for the specified transition.
AIToolbox::Factored::MDP::CooperativeExperience::Indeces
std::vector< size_t > Indeces
Definition: CooperativeExperience.hpp:34
AIToolbox::Factored::MDP::CooperativeThompsonModel::setDiscount
void setDiscount(double d)
This function sets a new discount factor for the Model.
AIToolbox::Factored::MDP::CooperativeThompsonModel::getExperience
const CooperativeExperience & getExperience() const
This function enables inspection of the underlying Experience of the RLModel.
AIToolbox::Factored::Rewards
Vector Rewards
Definition: Types.hpp:71
AIToolbox::Factored::MDP::CooperativeThompsonModel::sync
void sync()
This function syncs the whole CooperativeMaximumLikelihoodModel to the underlying CooperativeExperien...
AIToolbox::Factored::MDP::CooperativeThompsonModel::getTransitionFunction
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
AIToolbox::Factored::MDP::CooperativeThompsonModel::getRewardFunction
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
AIToolbox::Factored::MDP
Definition: CooperativePrioritizedSweeping.hpp:13
AIToolbox::Factored::State
Factors State
Definition: Types.hpp:67
AIToolbox::Factored::MDP::CooperativeThompsonModel::getDiscount
double getDiscount() const
This function returns the currently set discount factor.
AIToolbox::Factored::MDP::CooperativeThompsonModel
This class models CooperativeExperience as a CooperativeModel using Thompson Sampling.
Definition: CooperativeThompsonModel.hpp:51
AIToolbox::Factored::MDP::CooperativeThompsonModel::RewardMatrix
std::vector< Vector > RewardMatrix
Definition: CooperativeThompsonModel.hpp:54
BayesianNetwork.hpp
AIToolbox::Factored::MDP::CooperativeThompsonModel::getS
const State & getS() const
This function returns the number of states of the world.
AIToolbox::Factored::MDP::CooperativeThompsonModel::sampleSR
std::tuple< State, double > sampleSR(const State &s, const Action &a) const
This function samples the MDP with the specified state action pair.
CooperativeExperience.hpp
AIToolbox::Factored::DynamicDecisionNetworkGraph
This class represents the structure of a dynamic decision network.
Definition: BayesianNetwork.hpp:52
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::Factored::MDP::CooperativeThompsonModel::getGraph
const DDNGraph & getGraph() const
This function returns the underlying DDNGraph of the CooperativeExperience.
AIToolbox::Factored::MDP::CooperativeThompsonModel::sampleSRs
std::tuple< State, Rewards > sampleSRs(const State &s, const Action &a) const
This function samples the MDP with the specified state action pair.
AIToolbox::Factored::DynamicDecisionNetwork
This class represents a Dynamic Decision Network with factored actions.
Definition: BayesianNetwork.hpp:275
AIToolbox::Factored::MDP::CooperativeExperience
This class keeps track of registered events and rewards.
Definition: CooperativeExperience.hpp:28
AIToolbox::Factored::Action
Factors Action
Definition: Types.hpp:69
AIToolbox::Factored::MDP::CooperativeThompsonModel::CooperativeThompsonModel
CooperativeThompsonModel(const CooperativeExperience &exp, double discount=1.0)
Constructor using previous Experience.
AIToolbox::Factored::DDN
DynamicDecisionNetwork DDN
Definition: BayesianNetwork.hpp:308
AIToolbox::Factored::MDP::CooperativeThompsonModel::getTransitionProbability
double getTransitionProbability(const State &s, const Action &a, const State &s1) const
This function returns the stored transition probability for the specified transition.
AIToolbox::Factored::MDP::CooperativeThompsonModel::getExpectedRewards
Rewards getExpectedRewards(const State &s, const Action &a, const State &s1) const
This function returns the stored expected rewards for the specified transition.