AIToolbox
A library that offers tools for AI problem solving.
QGreedyPolicyWrapper.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_BANDIT_Q_GREEDY_POLICY_WRAPPER_HEADER_FILE
2 #define AI_TOOLBOX_BANDIT_Q_GREEDY_POLICY_WRAPPER_HEADER_FILE
3 
6 
7 namespace AIToolbox::Bandit {
20  template <typename V, typename Gen>
22  public:
30  QGreedyPolicyWrapper(V q, std::vector<size_t> & buffer, Gen & gen);
31 
40  size_t sampleAction();
41 
53  double getActionProbability(size_t a) const;
54 
62  template <typename P>
63  void getPolicy(P && p) const;
64 
65  private:
66  V q_;
67  std::vector<size_t> & buffer_;
68  Gen & rand_;
69  };
70 
71  // If we get a temporary, we copy it.
72  template <typename V, typename Gen>
73  QGreedyPolicyWrapper(const V &&, std::vector<size_t>&, Gen &) -> QGreedyPolicyWrapper<V, Gen>;
74 
75  // If we get a reference, we store a reference.
76  template <typename V, typename Gen>
77  QGreedyPolicyWrapper(const V &, std::vector<size_t>&, Gen &) -> QGreedyPolicyWrapper<const V &, Gen>;
78 
79  template <typename V, typename Gen>
80  QGreedyPolicyWrapper<V, Gen>::QGreedyPolicyWrapper(V q, std::vector<size_t> & buffer, Gen & gen)
81  : q_(std::move(q)), buffer_(buffer), rand_(gen)
82  {
83  assert(static_cast<size_t>(q_.size()) == buffer_.size());
84  }
85 
86  template <typename V, typename Gen>
88  // Automatically sets initial best action as bestAction[0] = 0
89  buffer_[0] = 0;
90 
91  // This work is due to multiple max-valued actions
92  double bestValue = q_[0]; unsigned bestActionCount = 1;
93  for ( size_t a = 1; a < buffer_.size(); ++a ) {
94  const double val = q_[a];
95  // The checkEqualGeneral is before the greater since we want to
96  // trap here things that may be equal (even if one is a tiny bit
97  // higher than the other).
98  if ( checkEqualGeneral(val, bestValue) ) {
99  buffer_[bestActionCount] = a;
100  ++bestActionCount;
101  }
102  else if ( val > bestValue ) {
103  buffer_[0] = a;
104  bestActionCount = 1;
105  bestValue = val;
106  }
107  }
108  auto pickDistribution = std::uniform_int_distribution<unsigned>(0, bestActionCount-1);
109  const unsigned selection = pickDistribution(rand_);
110 
111  return buffer_[selection];
112  }
113 
114  template <typename V, typename Gen>
116  const double max = q_[a]; unsigned count = 0;
117  for ( size_t aa = 0; aa < buffer_.size(); ++aa ) {
118  const double val = q_[aa];
119  // The checkEqualGeneral is before the greater since we want to
120  // trap here things that may be equal (even if one is a tiny bit
121  // higher than the other).
122  if ( checkEqualGeneral(val, max) ) ++count;
123  else if ( val > max ) {
124  return 0.0;
125  }
126  }
127  return 1.0 / count;
128  }
129 
130  template <typename V, typename Gen>
131  template <typename P>
133  double max = q_[0]; unsigned count = 1;
134  for ( size_t aa = 1; aa < buffer_.size(); ++aa ) {
135  const double val = q_[aa];
136  // The checkEqualGeneral is before the greater since we want to
137  // trap here things that may be equal (even if one is a tiny bit
138  // higher than the other).
139  if ( checkEqualGeneral(val, max) ) ++count;
140  else if ( val > max ) {
141  max = val;
142  count = 1;
143  }
144  }
145  for ( size_t aa = 0; aa < buffer_.size(); ++aa ) {
146  if ( checkEqualGeneral(q_[aa], max) )
147  p[aa] = 1.0 / count;
148  else
149  p[aa] = 0.0;
150  }
151  }
152 };
153 
154 #endif
Core.hpp
AIToolbox::Bandit::QGreedyPolicyWrapper::QGreedyPolicyWrapper
QGreedyPolicyWrapper(V q, std::vector< size_t > &buffer, Gen &gen)
Basic constructor.
Definition: QGreedyPolicyWrapper.hpp:80
AIToolbox::Bandit::QGreedyPolicyWrapper
QGreedyPolicyWrapper(const V &&, std::vector< size_t > &, Gen &) -> QGreedyPolicyWrapper< V, Gen >
AIToolbox::Bandit
Definition: Experience.hpp:6
AIToolbox::Bandit::QGreedyPolicyWrapper::getPolicy
void getPolicy(P &&p) const
This function writes in a vector all probabilities of the policy.
Definition: QGreedyPolicyWrapper.hpp:132
AIToolbox::Bandit::QGreedyPolicyWrapper::sampleAction
size_t sampleAction()
This function chooses the greediest action.
Definition: QGreedyPolicyWrapper.hpp:87
Types.hpp
AIToolbox::checkEqualGeneral
bool checkEqualGeneral(const double a, const double b)
This function checks if two doubles are reasonably equal.
Definition: Core.hpp:74
AIToolbox::Bandit::QGreedyPolicyWrapper
This class implements some basic greedy policy primitives.
Definition: QGreedyPolicyWrapper.hpp:21
AIToolbox::Bandit::QGreedyPolicyWrapper::getActionProbability
double getActionProbability(size_t a) const
This function returns the probability of taking the specified action.
Definition: QGreedyPolicyWrapper.hpp:115