AIToolbox
A library that offers tools for AI problem solving.
ImportanceSampling.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_IMPORTANCE_SAMPLING_HEADER_FILE
2 #define AI_TOOLBOX_MDP_IMPORTANCE_SAMPLING_HEADER_FILE
3 
5 
6 namespace AIToolbox::MDP {
12  class ImportanceSampling : public OffPolicyControl<ImportanceSampling> {
13  public:
15 
25  ImportanceSampling(const PolicyInterface & behaviour, const double discount = 1.0,
26  const double alpha = 0.1, const double tolerance = 0.001, const double epsilon = 0.1) :
27  Parent(behaviour.getS(), behaviour.getA(), discount, alpha, tolerance, epsilon),
28  behaviour_(behaviour) {}
29 
30  private:
31  friend Parent;
37  double getTraceDiscount(const size_t s, const size_t a, const size_t, const double, const size_t maxA) const {
38  const auto prob = epsilon_ / A + (a == maxA) * (1.0 - epsilon_);
39  return prob / behaviour_.getActionProbability(s, a);
40  }
41 
42  const PolicyInterface & behaviour_;
43  };
44 
67  class ImportanceSamplingEvaluation : public OffPolicyEvaluation<ImportanceSamplingEvaluation> {
68  public:
70 
81  const double discount, const double alpha, const double tolerance) :
82  Parent(target, discount, alpha, tolerance),
83  behaviour_(behaviour) {}
84 
85  private:
86  friend Parent;
92  double getTraceDiscount(const size_t s, const size_t a, const size_t, const double) const {
93  return target_.getActionProbability(s, a) / behaviour_.getActionProbability(s, a);
94  }
95 
96  const PolicyInterface & behaviour_;
97  };
98 }
99 
100 #endif
AIToolbox::MDP::OffPolicyControl
This class is a general version of off-policy control.
Definition: OffPolicyTemplate.hpp:294
AIToolbox::MDP::ImportanceSamplingEvaluation::Parent
OffPolicyEvaluation< ImportanceSamplingEvaluation > Parent
Definition: ImportanceSampling.hpp:69
AIToolbox::MDP::ImportanceSampling
This class implements off-policy control via importance sampling.
Definition: ImportanceSampling.hpp:12
OffPolicyTemplate.hpp
AIToolbox::PolicyInterface
This class represents the base interface for policies.
Definition: PolicyInterface.hpp:31
AIToolbox::MDP::OffPolicyBase::getA
size_t getA() const
This function returns the number of actions on which QLearning is working.
AIToolbox::MDP::OffPolicyBase::A
size_t A
Definition: OffPolicyTemplate.hpp:154
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::ImportanceSampling::Parent
OffPolicyControl< ImportanceSampling > Parent
Definition: ImportanceSampling.hpp:14
AIToolbox::MDP::ImportanceSamplingEvaluation
This class implements off-policy evaluation via importance sampling.
Definition: ImportanceSampling.hpp:67
AIToolbox::MDP::OffPolicyBase::getS
size_t getS() const
This function returns the number of states on which QLearning is working.
AIToolbox::MDP::OffPolicyEvaluation< ImportanceSamplingEvaluation >::target_
const PolicyInterface & target_
Definition: OffPolicyTemplate.hpp:245
AIToolbox::MDP::OffPolicyEvaluation
This class is a general version of off-policy evaluation.
Definition: OffPolicyTemplate.hpp:215
AIToolbox::PolicyInterface::getActionProbability
virtual double getActionProbability(const Sampling &s, const Action &a) const =0
This function returns the probability of taking the specified action in the specified state.
AIToolbox::MDP::ImportanceSamplingEvaluation::ImportanceSamplingEvaluation
ImportanceSamplingEvaluation(const PolicyInterface &target, const PolicyInterface &behaviour, const double discount, const double alpha, const double tolerance)
Basic constructor.
Definition: ImportanceSampling.hpp:80
AIToolbox::MDP::PolicyInterface
Simple typedef for most of MDP's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::MDP::ImportanceSampling::ImportanceSampling
ImportanceSampling(const PolicyInterface &behaviour, const double discount=1.0, const double alpha=0.1, const double tolerance=0.001, const double epsilon=0.1)
Basic constructor.
Definition: ImportanceSampling.hpp:25
AIToolbox::MDP::OffPolicyControl< ImportanceSampling >::epsilon_
double epsilon_
Definition: OffPolicyTemplate.hpp:349