AIToolbox
A library that offers tools for AI problem solving.
ThompsonSamplingPolicy.hpp
Go to the documentation of this file.
1
#ifndef AI_TOOLBOX_FACTORED_BANDIT_THOMPSON_SAMPLING_POLICY_HEADER_FILE
2
#define AI_TOOLBOX_FACTORED_BANDIT_THOMPSON_SAMPLING_POLICY_HEADER_FILE
3
4
#include <random>
5
6
#include <
AIToolbox/Factored/Bandit/Types.hpp
>
7
#include <
AIToolbox/Factored/Bandit/Experience.hpp
>
8
#include <
AIToolbox/Factored/Bandit/Policies/PolicyInterface.hpp
>
9
#include <
AIToolbox/Factored/Bandit/Algorithms/Utils/VariableElimination.hpp
>
10
11
namespace
AIToolbox::Factored::Bandit
{
27
class
ThompsonSamplingPolicy
:
public
PolicyInterface
{
28
public
:
34
ThompsonSamplingPolicy
(
const
Experience
& exp);
35
48
virtual
Action
sampleAction
()
const override
;
49
69
virtual
double
getActionProbability
(
const
Action
& a)
const override
;
70
87
static
void
setupGraph
(
const
Experience
& exp,
VariableElimination::GVE::Graph
& graph,
RandomEngine
& rnd);
88
94
const
Experience
&
getExperience
()
const
;
95
96
private
:
97
const
Experience
& exp_;
98
};
99
}
100
101
#endif
AIToolbox::Factored::Bandit::ThompsonSamplingPolicy::sampleAction
virtual Action sampleAction() const override
This function chooses an action using Thompson sampling.
AIToolbox::Factored::Bandit::ThompsonSamplingPolicy
This class implements a Thompson sampling policy.
Definition:
ThompsonSamplingPolicy.hpp:27
VariableElimination.hpp
Types.hpp
AIToolbox::Factored::Bandit::ThompsonSamplingPolicy::getActionProbability
virtual double getActionProbability(const Action &a) const override
This function returns the probability of taking the specified action.
AIToolbox::Factored::Bandit::Experience
This class computes averages and counts for a multi-agent cooperative Bandit problem.
Definition:
Experience.hpp:14
AIToolbox::Factored::FactorGraph
This class offers a minimal interface to manager a factor graph.
Definition:
FactorGraph.hpp:31
PolicyInterface.hpp
AIToolbox::Factored::Bandit::ThompsonSamplingPolicy::getExperience
const Experience & getExperience() const
This function returns a reference to the underlying Experience we use.
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition:
Types.hpp:14
Experience.hpp
AIToolbox::Factored::Action
Factors Action
Definition:
Types.hpp:69
AIToolbox::Factored::Bandit::PolicyInterface
Simple typedef for most of a normal Bandit's policy needs.
Definition:
PolicyInterface.hpp:11
AIToolbox::Factored::Bandit::ThompsonSamplingPolicy::setupGraph
static void setupGraph(const Experience &exp, VariableElimination::GVE::Graph &graph, RandomEngine &rnd)
This function constructs a graph by sampling the provided experience.
AIToolbox::Factored::Bandit::ThompsonSamplingPolicy::ThompsonSamplingPolicy
ThompsonSamplingPolicy(const Experience &exp)
Basic constructor.
AIToolbox::Factored::Bandit
Definition:
GraphUtils.hpp:12