AIToolbox
A library that offers tools for AI problem solving.
LRPPolicy.hpp
Go to the documentation of this file.
1
#ifndef AI_TOOLBOX_BANDIT_LRP_POLICY_HEADER_FILE
2
#define AI_TOOLBOX_BANDIT_LRP_POLICY_HEADER_FILE
3
4
#include <
AIToolbox/Types.hpp
>
5
#include <
AIToolbox/Bandit/Policies/PolicyInterface.hpp
>
6
7
namespace
AIToolbox::Bandit
{
32
class
LRPPolicy
:
public
PolicyInterface
{
33
public
:
52
LRPPolicy
(
size_t
A
,
double
a,
double
b = 0.0);
53
68
void
stepUpdateP
(
size_t
a,
bool
result);
69
75
virtual
size_t
sampleAction
()
const override
;
76
84
virtual
double
getActionProbability
(
const
size_t
& a)
const override
;
85
93
void
setAParam
(
double
a);
94
100
double
getAParam
()
const
;
101
109
void
setBParam
(
double
b);
110
116
double
getBParam
()
const
;
117
125
virtual
Vector
getPolicy
()
const override
;
126
127
private
:
128
double
a_, invB_, divB_;
129
Vector
policy_;
130
};
131
}
132
133
#endif
AIToolbox::Bandit::LRPPolicy::setAParam
void setAParam(double a)
This function sets the a parameter.
AIToolbox::Bandit::LRPPolicy::setBParam
void setBParam(double b)
This function sets the b parameter.
AIToolbox::Bandit::LRPPolicy::LRPPolicy
LRPPolicy(size_t A, double a, double b=0.0)
Basic constructor.
AIToolbox::Bandit::LRPPolicy
This class implements the Linear Reward Penalty algorithm.
Definition:
LRPPolicy.hpp:32
AIToolbox::Bandit::PolicyInterface
Simple typedef for most of a normal Bandit's policy needs.
Definition:
PolicyInterface.hpp:11
AIToolbox::Bandit::LRPPolicy::stepUpdateP
void stepUpdateP(size_t a, bool result)
This function updates the LRP policy based on the result of the action.
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition:
Types.hpp:16
AIToolbox::Bandit
Definition:
Experience.hpp:6
AIToolbox::Bandit::LRPPolicy::getAParam
double getAParam() const
This function will return the currently set a parameter.
PolicyInterface.hpp
AIToolbox::Bandit::LRPPolicy::getPolicy
virtual Vector getPolicy() const override
This function returns a vector containing all probabilities of the policy.
Types.hpp
AIToolbox::Bandit::LRPPolicy::getBParam
double getBParam() const
This function will return the currently set b parameter.
AIToolbox::Bandit::LRPPolicy::sampleAction
virtual size_t sampleAction() const override
This function chooses an action, following the policy distribution.
AIToolbox::PolicyInterface< void, void, size_t >::A
size_t A
Definition:
PolicyInterface.hpp:81
AIToolbox::Bandit::LRPPolicy::getActionProbability
virtual double getActionProbability(const size_t &a) const override
This function returns the probability of taking the specified action.