AIToolbox
A library that offers tools for AI problem solving.
T3CPolicy.hpp
Go to the documentation of this file.
1
#ifndef AI_TOOLBOX_BANDIT_T3C_POLICY_HEADER_FILE
2
#define AI_TOOLBOX_BANDIT_T3C_POLICY_HEADER_FILE
3
4
#include <
AIToolbox/Bandit/Types.hpp
>
5
#include <
AIToolbox/Bandit/Experience.hpp
>
6
#include <
AIToolbox/Bandit/Policies/PolicyInterface.hpp
>
7
#include <
AIToolbox/Bandit/Policies/ThompsonSamplingPolicy.hpp
>
8
9
namespace
AIToolbox::Bandit
{
27
class
T3CPolicy
:
public
PolicyInterface
{
28
public
:
36
T3CPolicy
(
const
Experience
& exp,
double
beta,
double
var);
37
43
virtual
size_t
sampleAction
()
const override
;
44
50
size_t
recommendAction
()
const
;
51
65
virtual
double
getActionProbability
(
const
size_t
& a)
const override
;
66
79
virtual
Vector
getPolicy
()
const override
;
80
86
const
Experience
&
getExperience
()
const
;
87
88
private
:
89
ThompsonSamplingPolicy
policy_;
90
double
beta_;
91
double
var_;
92
};
93
}
94
95
#endif
96
AIToolbox::Bandit::ThompsonSamplingPolicy
This class implements a Thompson sampling policy.
Definition:
ThompsonSamplingPolicy.hpp:19
AIToolbox::Bandit::T3CPolicy::getExperience
const Experience & getExperience() const
This function returns a reference to the underlying Experience we use.
Experience.hpp
AIToolbox::Bandit::PolicyInterface
Simple typedef for most of a normal Bandit's policy needs.
Definition:
PolicyInterface.hpp:11
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition:
Types.hpp:16
AIToolbox::Bandit::T3CPolicy::getActionProbability
virtual double getActionProbability(const size_t &a) const override
This function returns the probability of taking the specified action.
AIToolbox::Bandit::T3CPolicy::sampleAction
virtual size_t sampleAction() const override
This function chooses an action using T3CPolicy.
AIToolbox::Bandit::T3CPolicy::T3CPolicy
T3CPolicy(const Experience &exp, double beta, double var)
Basic constructor.
AIToolbox::Bandit
Definition:
Experience.hpp:6
PolicyInterface.hpp
AIToolbox::Bandit::T3CPolicy
This class implements the T3C sampling policy.
Definition:
T3CPolicy.hpp:27
Types.hpp
AIToolbox::Bandit::Experience
This class computes averages and counts for a Bandit problem.
Definition:
Experience.hpp:13
AIToolbox::Bandit::T3CPolicy::recommendAction
size_t recommendAction() const
This function returns the most likely best action until this point.
ThompsonSamplingPolicy.hpp
AIToolbox::Bandit::T3CPolicy::getPolicy
virtual Vector getPolicy() const override
This function returns a vector containing all probabilities of the policy.