AIToolbox
A library that offers tools for AI problem solving.
ESRLPolicy.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_BANDIT_ESRL_POLICY_HEADER_FILE
2 #define AI_TOOLBOX_BANDIT_ESRL_POLICY_HEADER_FILE
3 
5 
6 namespace AIToolbox::Bandit {
36  class ESRLPolicy : public PolicyInterface {
37  public:
47  ESRLPolicy(size_t A, double a, unsigned timesteps, unsigned explorationPhases, unsigned window);
48 
66  void stepUpdateP(size_t a, bool result);
67 
81  bool isExploiting() const;
82 
88  virtual size_t sampleAction() const override;
89 
97  virtual double getActionProbability(const size_t & a) const override;
98 
106  void setAParam(double a);
107 
113  double getAParam() const;
114 
120  void setTimesteps(unsigned t);
121 
127  unsigned getTimesteps() const;
128 
134  void setExplorationPhases(unsigned p);
135 
141  unsigned getExplorationPhases() const;
142 
148  void setWindowSize(unsigned window);
149 
155  unsigned getWindowSize() const;
156 
164  virtual Vector getPolicy() const override;
165 
166  private:
167  // Whether we have learned enough to start exploiting.
168  bool exploit_;
169  size_t bestAction_;
170  // Timesteps in current exploration phase in overall exploration phases.
171  size_t timestep_, N_, explorations_, explorationPhases_;
172  // Average value obtained in last window in the last exploration phase.
173  double average_;
174  size_t window_;
175 
176  // Values obtained for all actions.
177  Vector values_;
178  // Allowed actions in the current exploration phase.
179  std::vector<size_t> allowedActions_;
180  // Exploration learning policy to learn Nash equilibria.
181  LRPPolicy lri_;
182  };
183 }
184 
185 #endif
AIToolbox::Bandit::ESRLPolicy::getActionProbability
virtual double getActionProbability(const size_t &a) const override
This function returns the probability of taking the specified action.
LRPPolicy.hpp
AIToolbox::Bandit::ESRLPolicy::sampleAction
virtual size_t sampleAction() const override
This function chooses an action, following the policy distribution.
AIToolbox::Bandit::ESRLPolicy::getAParam
double getAParam() const
This function will return the currently set a parameter.
AIToolbox::Bandit::ESRLPolicy::getPolicy
virtual Vector getPolicy() const override
This function returns a vector containing all probabilities of the policy.
AIToolbox::Bandit::LRPPolicy
This class implements the Linear Reward Penalty algorithm.
Definition: LRPPolicy.hpp:32
AIToolbox::Bandit::ESRLPolicy::isExploiting
bool isExploiting() const
This function returns whether ESRL is now in the exploiting phase.
AIToolbox::Bandit::PolicyInterface
Simple typedef for most of a normal Bandit's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::Bandit::ESRLPolicy::setWindowSize
void setWindowSize(unsigned window)
This function sets the size of the timestep window to compute the value of the action that ESRL is co...
AIToolbox::Bandit::ESRLPolicy::setAParam
void setAParam(double a)
This function sets the a parameter.
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::Bandit::ESRLPolicy::setExplorationPhases
void setExplorationPhases(unsigned p)
This function sets the required number of exploration phases before exploitation.
AIToolbox::Bandit::ESRLPolicy::stepUpdateP
void stepUpdateP(size_t a, bool result)
This function updates the ESRL policy based on the result of the action.
AIToolbox::Bandit::ESRLPolicy::setTimesteps
void setTimesteps(unsigned t)
This function sets the required number of timesteps per exploration phase.
AIToolbox::Bandit
Definition: Experience.hpp:6
AIToolbox::Bandit::ESRLPolicy::getWindowSize
unsigned getWindowSize() const
This function returns the currently set size of the timestep window to compute the value of an action...
AIToolbox::Bandit::ESRLPolicy::getExplorationPhases
unsigned getExplorationPhases() const
This function returns the currently set number of exploration phases before exploitation.
AIToolbox::Bandit::ESRLPolicy::ESRLPolicy
ESRLPolicy(size_t A, double a, unsigned timesteps, unsigned explorationPhases, unsigned window)
Basic constructor.
AIToolbox::Bandit::ESRLPolicy
This class implements the Exploring Selfish Reinforcement Learning algorithm.
Definition: ESRLPolicy.hpp:36
AIToolbox::Bandit::ESRLPolicy::getTimesteps
unsigned getTimesteps() const
This function returns the currently set number of timesteps per exploration phase.
AIToolbox::PolicyInterface< void, void, size_t >::A
size_t A
Definition: PolicyInterface.hpp:81