AIToolbox
A library that offers tools for AI problem solving.
BanditPolicyAdaptor.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_BANDIT_POLICY_ADAPTOR_HEADER_FILE
2 #define AI_TOOLBOX_MDP_BANDIT_POLICY_ADAPTOR_HEADER_FILE
3 
6 
7 namespace AIToolbox::MDP {
16  template <typename BanditPolicy>
18  public:
25  template <typename... Args>
26  BanditPolicyAdaptor(size_t s, Args&&... params);
27 
35  virtual size_t sampleAction(const size_t & s) const override;
36 
45  virtual double getActionProbability(const size_t & s, const size_t & a) const override;
46 
53  virtual Matrix2D getPolicy() const override;
54 
58  BanditPolicy & getBanditPolicy();
59 
63  const BanditPolicy & getBanditPolicy() const;
64 
65  private:
66  BanditPolicy policy_;
67  };
68 
69  template <typename BP>
70  template <typename... Args>
71  BanditPolicyAdaptor<BP>::BanditPolicyAdaptor(const size_t s, Args&&... args) :
72  Base(s, 1), policy_(std::forward<Args>(args)...)
73  {
74  // We need to fix this later since we can't initialize the policy
75  // before Base.
76  A = policy_.getA();
77  }
78 
79  template <typename BP>
80  size_t BanditPolicyAdaptor<BP>::sampleAction(const size_t &) const {
81  return policy_.sampleAction();
82  }
83 
84  template <typename BP>
85  double BanditPolicyAdaptor<BP>::getActionProbability(const size_t &, const size_t & a) const {
86  return policy_.getActionProbability(a);
87  }
88 
89  template <typename BP>
91  return policy_.getPolicy().transpose().replicate(getS(), 1);
92  }
93 
94  template <typename BP>
95  BP & BanditPolicyAdaptor<BP>::getBanditPolicy() { return policy_; }
96 
97  template <typename BP>
98  const BP & BanditPolicyAdaptor<BP>::getBanditPolicy() const { return policy_; }
99 }
100 
101 #endif
PolicyInterface.hpp
AIToolbox::MDP::BanditPolicyAdaptor::BanditPolicyAdaptor
BanditPolicyAdaptor(size_t s, Args &&... params)
Basic constructor.
Definition: BanditPolicyAdaptor.hpp:71
AIToolbox::MDP::BanditPolicyAdaptor
This class extends a Bandit policy so that it can be called from MDP code.
Definition: BanditPolicyAdaptor.hpp:17
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::PolicyInterface< size_t, size_t, size_t >
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::BanditPolicyAdaptor::getBanditPolicy
BanditPolicy & getBanditPolicy()
This function returns a reference to the underlying BanditPolicy.
Definition: BanditPolicyAdaptor.hpp:95
PolicyInterface.hpp
AIToolbox::MDP::BanditPolicyAdaptor::getActionProbability
virtual double getActionProbability(const size_t &s, const size_t &a) const override
This function returns the probability of taking the specified action.
Definition: BanditPolicyAdaptor.hpp:85
AIToolbox::MDP::BanditPolicyAdaptor::getPolicy
virtual Matrix2D getPolicy() const override
This function returns a matrix containing all probabilities of the policy.
Definition: BanditPolicyAdaptor.hpp:90
AIToolbox::PolicyInterface< size_t, size_t, size_t >::A
size_t A
Definition: PolicyInterface.hpp:81
AIToolbox::MDP::PolicyInterface
Simple typedef for most of MDP's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::MDP::BanditPolicyAdaptor::sampleAction
virtual size_t sampleAction(const size_t &s) const override
This function chooses a random action using the underlying bandit policy.
Definition: BanditPolicyAdaptor.hpp:80