AIToolbox
A library that offers tools for AI problem solving.
PrioritizedSweeping.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_PRIORITIZED_SWEEPING_HEADER_FILE
2 #define AI_TOOLBOX_MDP_PRIORITIZED_SWEEPING_HEADER_FILE
3 
4 #include <tuple>
5 #include <unordered_map>
6 
7 #include <boost/heap/fibonacci_heap.hpp>
8 
11 #include <AIToolbox/MDP/Utils.hpp>
12 
14 
15 namespace AIToolbox::MDP {
43  template <IsModel M>
45  public:
53  PrioritizedSweeping(const M & m, double theta = 0.5, unsigned n = 50);
54 
65  void stepUpdateQ(size_t s, size_t a);
66 
76  void batchUpdateQ();
77 
86  void setQueueThreshold(double t);
87 
93  double getQueueThreshold() const;
94 
100  void setN(unsigned n);
101 
107  unsigned getN() const;
108 
114  size_t getQueueLength() const;
115 
121  const M & getModel() const;
122 
128  const QFunction & getQFunction() const;
129 
139  void setQFunction(const QFunction & q);
140 
146  const ValueFunction & getValueFunction() const;
147 
148  private:
149  size_t S, A;
150  unsigned N;
151  double theta_;
152 
153  const M & model_;
154  QFunction qfun_;
155  ValueFunction vfun_;
156 
157  struct PriorityQueueElement {
158  double priority;
159  std::pair<size_t, size_t> stateAction;
160  bool operator<(const PriorityQueueElement& arg2) const {
161  return priority < arg2.priority;
162  }
163  };
164 
165  using QueueType = boost::heap::fibonacci_heap<PriorityQueueElement>;
166 
167  QueueType queue_;
168 
169  std::unordered_map<std::pair<size_t, size_t>, typename QueueType::handle_type, boost::hash<std::pair<size_t, size_t>>> queueHandles_;
170  };
171 
172  template <IsModel M>
173  PrioritizedSweeping<M>::PrioritizedSweeping(const M & m, const double theta, const unsigned n) :
174  S(m.getS()), A(m.getA()), N(n), theta_(theta), model_(m),
175  qfun_(makeQFunction(S,A)), vfun_(makeValueFunction(S)) {}
176 
177  template <IsModel M>
178  void PrioritizedSweeping<M>::stepUpdateQ(const size_t s, const size_t a) {
179  auto & values = vfun_.values;
180 
181  // Update q[s][a]
182  if constexpr(IsModelEigen<M>) {
183  qfun_(s,a) = model_.getRewardFunction().coeff(s, a) + model_.getTransitionFunction(a).row(s).dot(values * model_.getDiscount());
184  } else {
185  double newQValue = 0;
186  for ( size_t s1 = 0; s1 < S; ++s1 ) {
187  const double probability = model_.getTransitionProbability(s,a,s1);
188  if ( checkDifferentSmall( probability, 0.0 ) )
189  newQValue += probability * ( model_.getExpectedReward(s,a,s1) + model_.getDiscount() * values[s1] );
190  }
191  qfun_(s, a) = newQValue;
192  }
193 
194  double p = values[s];
195  {
196  // Update value and action
197  values[s] = qfun_.row(s).maxCoeff(&(vfun_.actions[s]));
198  }
199 
200  p = std::fabs(values[s] - p);
201 
202  for ( size_t ss = 0; ss < S; ++ss ) {
203  for ( size_t a = 0; a < A; ++a ) {
204  const double delta = p * model_.getTransitionProbability(ss,a,s);
205  // If it changed enough, we're going to update its parents.
206  if ( delta > theta_ ) {
207  const auto pair = std::make_pair(ss, a);
208  auto it = queueHandles_.find(pair);
209 
210  if (it != std::end(queueHandles_)) {
211  if ((*it->second).priority < delta) {
212  (*it->second).priority = delta;
213  queue_.increase(it->second);
214  }
215  } else {
216  queueHandles_[pair] = queue_.emplace(PriorityQueueElement{delta, pair});
217  }
218  }
219  }
220  }
221  }
222 
223  template <IsModel M>
225  for ( unsigned i = 0; i < N; ++i ) {
226  if ( queue_.empty() ) return;
227 
228  // The state we extract has been processed already
229  // So it is the future we have to backtrack from.
230  auto [p, pair] = queue_.top();
231  (void)p;
232 
233  queue_.pop();
234  queueHandles_.erase(pair);
235 
236  stepUpdateQ(pair.first, pair.second);
237  }
238  }
239 
240  template <IsModel M>
241  void PrioritizedSweeping<M>::setN(const unsigned n) {
242  N = n;
243  }
244 
245  template <IsModel M>
246  unsigned PrioritizedSweeping<M>::getN() const {
247  return N;
248  }
249 
250  template <IsModel M>
252  if ( t < 0.0 ) throw std::invalid_argument("Theta parameter must be >= 0");
253  theta_ = t;
254  }
255 
256  template <IsModel M>
258  return theta_;
259  }
260 
261  template <IsModel M>
263  return queue_.size();
264  }
265 
266  template <IsModel M>
268  return model_;
269  }
270 
271  template <IsModel M>
273  return qfun_;
274  }
275 
276  template <IsModel M>
278  qfun_ = qfun;
279  }
280 
281  template <IsModel M>
283  return vfun_;
284  }
285 }
286 
287 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::MDP::makeQFunction
QFunction makeQFunction(size_t S, size_t A)
This function creates and zeroes a QFunction.
AIToolbox::MDP::PrioritizedSweeping::setQueueThreshold
void setQueueThreshold(double t)
This function sets the theta parameter.
Definition: PrioritizedSweeping.hpp:251
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
AIToolbox::MDP::PrioritizedSweeping
This class represents the PrioritizedSweeping algorithm.
Definition: PrioritizedSweeping.hpp:44
AIToolbox::MDP::ValueFunction
Definition: Types.hpp:47
AIToolbox::MDP::PrioritizedSweeping::setN
void setN(unsigned n)
This function sets the number of sampling passes during batchUpdateQ().
Definition: PrioritizedSweeping.hpp:241
AIToolbox::MDP::PrioritizedSweeping::stepUpdateQ
void stepUpdateQ(size_t s, size_t a)
This function updates the PrioritizedSweeping internal update queue.
Definition: PrioritizedSweeping.hpp:178
AIToolbox::MDP::PrioritizedSweeping::getQFunction
const QFunction & getQFunction() const
This function returns a reference to the internal QFunction.
Definition: PrioritizedSweeping.hpp:272
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
Utils.hpp
AIToolbox::MDP::PrioritizedSweeping::getModel
const M & getModel() const
This function returns a reference to the referenced Model.
Definition: PrioritizedSweeping.hpp:267
AIToolbox::MDP::PrioritizedSweeping::getQueueLength
size_t getQueueLength() const
This function returns the current number of elements unprocessed in the queue.
Definition: PrioritizedSweeping.hpp:262
AIToolbox::MDP::PrioritizedSweeping::getN
unsigned getN() const
This function returns the currently set number of sampling passes during batchUpdateQ().
Definition: PrioritizedSweeping.hpp:246
AIToolbox::MDP::PrioritizedSweeping::batchUpdateQ
void batchUpdateQ()
This function updates a QFunction based on simulated experience.
Definition: PrioritizedSweeping.hpp:224
AIToolbox::MDP::PrioritizedSweeping::PrioritizedSweeping
PrioritizedSweeping(const M &m, double theta=0.5, unsigned n=50)
Basic constructor.
Definition: PrioritizedSweeping.hpp:173
AIToolbox::MDP::PrioritizedSweeping::getValueFunction
const ValueFunction & getValueFunction() const
This function returns a reference to the internal ValueFunction.
Definition: PrioritizedSweeping.hpp:282
AIToolbox::MDP::PrioritizedSweeping::setQFunction
void setQFunction(const QFunction &q)
This function allows you to set the value of the internal QFunction.
Definition: PrioritizedSweeping.hpp:277
Types.hpp
AIToolbox::MDP::PrioritizedSweeping::getQueueThreshold
double getQueueThreshold() const
This function will return the currently set theta parameter.
Definition: PrioritizedSweeping.hpp:257
TypeTraits.hpp
AIToolbox::MDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates and zeroes a ValueFunction.
Probability.hpp