AIToolbox
A library that offers tools for AI problem solving.
BanditPolicyAdaptor.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_FACTORED_MDP_BANDIT_POLICY_ADAPTOR_HEADER_FILE
2 #define AI_TOOLBOX_FACTORED_MDP_BANDIT_POLICY_ADAPTOR_HEADER_FILE
3 
6 
7 namespace AIToolbox::Factored::MDP {
16  template <typename BanditPolicy>
17  class BanditPolicyAdaptor : public PolicyInterface<State, State, Action> {
18  public:
26  template <typename... Args>
27  BanditPolicyAdaptor(State s, Args&&... params);
28 
36  virtual Action sampleAction(const State & s) const override;
37 
46  virtual double getActionProbability(const State & s, const Action & a) const override;
47 
51  BanditPolicy & getBanditPolicy();
52 
56  const BanditPolicy & getBanditPolicy() const;
57 
58  private:
59  BanditPolicy policy_;
60  };
61 
62  template <typename BP>
63  template <typename... Args>
65  Base(std::move(s), {}), policy_(std::forward<Args>(args)...)
66  {
67  // We need to fix this later since we can't initialize the policy
68  // before Base.
69  A = policy_.getA();
70  }
71 
72  template <typename BP>
74  return policy_.sampleAction();
75  }
76 
77  template <typename BP>
78  double BanditPolicyAdaptor<BP>::getActionProbability(const State &, const Action & a) const {
79  return policy_.getActionProbability(a);
80  }
81 
82  template <typename BP>
83  BP & BanditPolicyAdaptor<BP>::getBanditPolicy() { return policy_; }
84 
85  template <typename BP>
86  const BP & BanditPolicyAdaptor<BP>::getBanditPolicy() const { return policy_; }
87 }
88 
89 #endif
AIToolbox::PolicyInterface::getA
const Action & getA() const
This function returns the number of available actions to the agent.
Definition: PolicyInterface.hpp:98
AIToolbox::Factored::MDP::BanditPolicyAdaptor::getBanditPolicy
BanditPolicy & getBanditPolicy()
This function returns a reference to the underlying BanditPolicy.
Definition: BanditPolicyAdaptor.hpp:83
AIToolbox::Factored::MDP::BanditPolicyAdaptor::BanditPolicyAdaptor
BanditPolicyAdaptor(State s, Args &&... params)
Basic constructor.
Definition: BanditPolicyAdaptor.hpp:64
AIToolbox::Factored::MDP
Definition: CooperativePrioritizedSweeping.hpp:13
AIToolbox::Factored::State
Factors State
Definition: Types.hpp:67
AIToolbox::PolicyInterface
This class represents the base interface for policies.
Definition: PolicyInterface.hpp:31
PolicyInterface.hpp
PolicyInterface.hpp
AIToolbox::Factored::Action
Factors Action
Definition: Types.hpp:69
AIToolbox::Factored::MDP::BanditPolicyAdaptor::getActionProbability
virtual double getActionProbability(const State &s, const Action &a) const override
This function returns the probability of taking the specified action.
Definition: BanditPolicyAdaptor.hpp:78
AIToolbox::Factored::MDP::BanditPolicyAdaptor::sampleAction
virtual Action sampleAction(const State &s) const override
This function chooses a random action using the underlying bandit policy.
Definition: BanditPolicyAdaptor.hpp:73
AIToolbox::Factored::MDP::BanditPolicyAdaptor
This class extends a Bandit policy so that it can be called from MDP code.
Definition: BanditPolicyAdaptor.hpp:17