AIToolbox
A library that offers tools for AI problem solving.
LRPPolicy.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_BANDIT_LRP_POLICY_HEADER_FILE
2 #define AI_TOOLBOX_BANDIT_LRP_POLICY_HEADER_FILE
3 
4 #include <AIToolbox/Types.hpp>
6 
7 namespace AIToolbox::Bandit {
32  class LRPPolicy : public PolicyInterface {
33  public:
52  LRPPolicy(size_t A, double a, double b = 0.0);
53 
68  void stepUpdateP(size_t a, bool result);
69 
75  virtual size_t sampleAction() const override;
76 
84  virtual double getActionProbability(const size_t & a) const override;
85 
93  void setAParam(double a);
94 
100  double getAParam() const;
101 
109  void setBParam(double b);
110 
116  double getBParam() const;
117 
125  virtual Vector getPolicy() const override;
126 
127  private:
128  double a_, invB_, divB_;
129  Vector policy_;
130  };
131 }
132 
133 #endif
AIToolbox::Bandit::LRPPolicy::setAParam
void setAParam(double a)
This function sets the a parameter.
AIToolbox::Bandit::LRPPolicy::setBParam
void setBParam(double b)
This function sets the b parameter.
AIToolbox::Bandit::LRPPolicy::LRPPolicy
LRPPolicy(size_t A, double a, double b=0.0)
Basic constructor.
AIToolbox::Bandit::LRPPolicy
This class implements the Linear Reward Penalty algorithm.
Definition: LRPPolicy.hpp:32
AIToolbox::Bandit::PolicyInterface
Simple typedef for most of a normal Bandit's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::Bandit::LRPPolicy::stepUpdateP
void stepUpdateP(size_t a, bool result)
This function updates the LRP policy based on the result of the action.
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::Bandit
Definition: Experience.hpp:6
AIToolbox::Bandit::LRPPolicy::getAParam
double getAParam() const
This function will return the currently set a parameter.
PolicyInterface.hpp
AIToolbox::Bandit::LRPPolicy::getPolicy
virtual Vector getPolicy() const override
This function returns a vector containing all probabilities of the policy.
Types.hpp
AIToolbox::Bandit::LRPPolicy::getBParam
double getBParam() const
This function will return the currently set b parameter.
AIToolbox::Bandit::LRPPolicy::sampleAction
virtual size_t sampleAction() const override
This function chooses an action, following the policy distribution.
AIToolbox::PolicyInterface< void, void, size_t >::A
size_t A
Definition: PolicyInterface.hpp:81
AIToolbox::Bandit::LRPPolicy::getActionProbability
virtual double getActionProbability(const size_t &a) const override
This function returns the probability of taking the specified action.