AIToolbox
A library that offers tools for AI problem solving.
TreeBackupL.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_TREE_BACKUP_L_HEADER_FILE
2 #define AI_TOOLBOX_MDP_TREE_BACKUP_L_HEADER_FILE
3 
5 
6 namespace AIToolbox::MDP {
12  class TreeBackupL : public OffPolicyControl<TreeBackupL> {
13  public:
15 
27  TreeBackupL(const size_t s, const size_t a, const double discount = 1.0, const double alpha = 0.1,
28  const double lambda = 0.9, const double tolerance = 0.001, const double epsilon = 0.1) :
29  Parent(s, a, discount, alpha, tolerance, epsilon)
30  {
31  setLambda(lambda);
32  }
33 
42  void setLambda(double l) {
43  if ( l < 0.0 || l > 1.0 ) throw std::invalid_argument("Lambda parameter must be in [0,1]");
44  lambda_ = l;
45  }
46 
50  double getLambda() const { return lambda_; }
51 
52  private:
53  friend Parent;
57  double getTraceDiscount(const size_t, const size_t a, const size_t, const double, const size_t maxA) const {
58  const auto prob = epsilon_ / A + (a == maxA) * (1.0 - epsilon_);
59  return lambda_ * prob;
60  }
61 
62  double lambda_;
63  };
64 
79  class TreeBackupLEvaluation : public OffPolicyEvaluation<TreeBackupLEvaluation> {
80  public:
82 
92  TreeBackupLEvaluation(const PolicyInterface & target, const double discount,
93  const double alpha, const double lambda, const double tolerance) :
94  Parent(target, discount, alpha, tolerance)
95  {
96  setLambda(lambda);
97  }
98 
107  void setLambda(double l) {
108  if ( l < 0.0 || l > 1.0 ) throw std::invalid_argument("Lambda parameter must be in [0,1]");
109  lambda_ = l;
110  }
111 
115  double getLambda() const { return lambda_; }
116 
117  private:
118  friend Parent;
122  double getTraceDiscount(const size_t s, const size_t a, const size_t, const double) const {
123  return lambda_ * target_.getActionProbability(s, a);
124  }
125 
126  double lambda_;
127  };
128 }
129 
130 #endif
AIToolbox::MDP::OffPolicyControl
This class is a general version of off-policy control.
Definition: OffPolicyTemplate.hpp:294
AIToolbox::MDP::TreeBackupL
This class implements off-policy control via Tree Backup(lambda).
Definition: TreeBackupL.hpp:12
OffPolicyTemplate.hpp
AIToolbox::MDP::TreeBackupL::setLambda
void setLambda(double l)
This function sets the new lambda parameter.
Definition: TreeBackupL.hpp:42
AIToolbox::MDP::OffPolicyBase::A
size_t A
Definition: OffPolicyTemplate.hpp:154
AIToolbox::MDP::TreeBackupL::Parent
OffPolicyControl< TreeBackupL > Parent
Definition: TreeBackupL.hpp:14
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::TreeBackupLEvaluation::getLambda
double getLambda() const
This function returns the currently set lambda parameter.
Definition: TreeBackupL.hpp:115
AIToolbox::MDP::TreeBackupLEvaluation::TreeBackupLEvaluation
TreeBackupLEvaluation(const PolicyInterface &target, const double discount, const double alpha, const double lambda, const double tolerance)
Basic constructor.
Definition: TreeBackupL.hpp:92
AIToolbox::MDP::TreeBackupLEvaluation
This class implements off-policy evaluation via Tree Backup(lambda).
Definition: TreeBackupL.hpp:79
AIToolbox::MDP::TreeBackupLEvaluation::Parent
OffPolicyEvaluation< TreeBackupLEvaluation > Parent
Definition: TreeBackupL.hpp:81
AIToolbox::MDP::TreeBackupLEvaluation::setLambda
void setLambda(double l)
This function sets the new lambda parameter.
Definition: TreeBackupL.hpp:107
AIToolbox::MDP::OffPolicyEvaluation< TreeBackupLEvaluation >::target_
const PolicyInterface & target_
Definition: OffPolicyTemplate.hpp:245
AIToolbox::MDP::OffPolicyEvaluation
This class is a general version of off-policy evaluation.
Definition: OffPolicyTemplate.hpp:215
AIToolbox::PolicyInterface::getActionProbability
virtual double getActionProbability(const Sampling &s, const Action &a) const =0
This function returns the probability of taking the specified action in the specified state.
AIToolbox::MDP::TreeBackupL::TreeBackupL
TreeBackupL(const size_t s, const size_t a, const double discount=1.0, const double alpha=0.1, const double lambda=0.9, const double tolerance=0.001, const double epsilon=0.1)
Basic constructor.
Definition: TreeBackupL.hpp:27
AIToolbox::MDP::TreeBackupL::getLambda
double getLambda() const
This function returns the currently set lambda parameter.
Definition: TreeBackupL.hpp:50
AIToolbox::MDP::PolicyInterface
Simple typedef for most of MDP's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::MDP::OffPolicyControl< TreeBackupL >::epsilon_
double epsilon_
Definition: OffPolicyTemplate.hpp:349