AIToolbox
A library that offers tools for AI problem solving.
RetraceL.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_RETRACE_L_HEADER_FILE
2 #define AI_TOOLBOX_MDP_RETRACE_L_HEADER_FILE
3 
5 
6 namespace AIToolbox::MDP {
12  class RetraceL : public OffPolicyControl<RetraceL> {
13  public:
15 
26  RetraceL(const PolicyInterface & behaviour, const double discount = 1.0, const double alpha = 0.1,
27  const double lambda = 0.9,const double tolerance = 0.001, const double epsilon = 0.1) :
28  Parent(behaviour.getS(), behaviour.getA(), discount, alpha, tolerance, epsilon),
29  behaviour_(behaviour)
30  {
31  setLambda(lambda);
32  }
33 
42  void setLambda(double l) {
43  if ( l < 0.0 || l > 1.0 ) throw std::invalid_argument("Lambda parameter must be in [0,1]");
44  lambda_ = l;
45  }
46 
50  double getLambda() const { return lambda_; }
51 
52  private:
53  friend Parent;
57  double getTraceDiscount(const size_t s, const size_t a, const size_t, const double, const size_t maxA) const {
58  const auto prob = epsilon_ / A + (a == maxA) * (1.0 - epsilon_);
59  return lambda_ * std::min(1.0, prob / behaviour_.getActionProbability(s, a));
60  }
61 
62  double lambda_;
63  const PolicyInterface & behaviour_;
64  };
65 
79  class RetraceLEvaluation : public OffPolicyEvaluation<RetraceLEvaluation> {
80  public:
82 
93  RetraceLEvaluation(const PolicyInterface & target, const PolicyInterface & behaviour,
94  const double discount, const double alpha, const double lambda, const double tolerance) :
95  Parent(target, discount, alpha, tolerance),
96  behaviour_(behaviour)
97  {
98  setLambda(lambda);
99  }
100 
109  void setLambda(double l) {
110  if ( l < 0.0 || l > 1.0 ) throw std::invalid_argument("Lambda parameter must be in [0,1]");
111  lambda_ = l;
112  }
113 
117  double getLambda() const { return lambda_; }
118 
119  private:
120  friend Parent;
121 
125  double getTraceDiscount(const size_t s, const size_t a, const size_t, const double) const {
126  return lambda_ * std::min(1.0, target_.getActionProbability(s, a) / behaviour_.getActionProbability(s, a));
127  }
128 
129  double lambda_;
130  const PolicyInterface & behaviour_;
131  };
132 }
133 
134 #endif
135 
AIToolbox::MDP::RetraceL::RetraceL
RetraceL(const PolicyInterface &behaviour, const double discount=1.0, const double alpha=0.1, const double lambda=0.9, const double tolerance=0.001, const double epsilon=0.1)
Basic constructor.
Definition: RetraceL.hpp:26
AIToolbox::MDP::RetraceLEvaluation::Parent
OffPolicyEvaluation< RetraceLEvaluation > Parent
Definition: RetraceL.hpp:81
AIToolbox::MDP::OffPolicyControl
This class is a general version of off-policy control.
Definition: OffPolicyTemplate.hpp:294
AIToolbox::MDP::RetraceLEvaluation::RetraceLEvaluation
RetraceLEvaluation(const PolicyInterface &target, const PolicyInterface &behaviour, const double discount, const double alpha, const double lambda, const double tolerance)
Basic constructor.
Definition: RetraceL.hpp:93
AIToolbox::MDP::RetraceL::Parent
OffPolicyControl< RetraceL > Parent
Definition: RetraceL.hpp:14
OffPolicyTemplate.hpp
AIToolbox::PolicyInterface
This class represents the base interface for policies.
Definition: PolicyInterface.hpp:31
AIToolbox::MDP::RetraceL::setLambda
void setLambda(double l)
This function sets the new lambda parameter.
Definition: RetraceL.hpp:42
AIToolbox::MDP::OffPolicyBase::getA
size_t getA() const
This function returns the number of actions on which QLearning is working.
AIToolbox::MDP::OffPolicyBase::A
size_t A
Definition: OffPolicyTemplate.hpp:154
AIToolbox::MDP::RetraceLEvaluation::setLambda
void setLambda(double l)
This function sets the new lambda parameter.
Definition: RetraceL.hpp:109
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::OffPolicyBase::getS
size_t getS() const
This function returns the number of states on which QLearning is working.
AIToolbox::MDP::OffPolicyEvaluation< RetraceLEvaluation >::target_
const PolicyInterface & target_
Definition: OffPolicyTemplate.hpp:245
AIToolbox::MDP::OffPolicyEvaluation
This class is a general version of off-policy evaluation.
Definition: OffPolicyTemplate.hpp:215
AIToolbox::PolicyInterface::getActionProbability
virtual double getActionProbability(const Sampling &s, const Action &a) const =0
This function returns the probability of taking the specified action in the specified state.
AIToolbox::MDP::RetraceL::getLambda
double getLambda() const
This function returns the currently set lambda parameter.
Definition: RetraceL.hpp:50
AIToolbox::MDP::RetraceLEvaluation
This class implements off-policy evaluation via Retrace(lambda).
Definition: RetraceL.hpp:79
AIToolbox::MDP::RetraceL
This class implements off-policy control via Retrace(lambda).
Definition: RetraceL.hpp:12
AIToolbox::MDP::RetraceLEvaluation::getLambda
double getLambda() const
This function returns the currently set lambda parameter.
Definition: RetraceL.hpp:117
AIToolbox::MDP::PolicyInterface
Simple typedef for most of MDP's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::MDP::OffPolicyControl< RetraceL >::epsilon_
double epsilon_
Definition: OffPolicyTemplate.hpp:349