AIToolbox
A library that offers tools for AI problem solving.
rPOMCP.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_rPOMCP_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_rPOMCP_HEADER_FILE
3 
4 #include <unordered_map>
5 
6 #include <AIToolbox/Logging.hpp>
7 #include <AIToolbox/Seeder.hpp>
11 
13 
14 namespace AIToolbox::POMDP {
54  template <IsGenerativeModel M, bool UseEntropy>
55  class rPOMCP {
56  public:
57  // Shorthands to avoid specifying UseEntropy everywhere.
61 
71  rPOMCP(const M& m, size_t beliefSize, unsigned iterations, double exp, unsigned k = 500);
72 
85  size_t sampleAction(const Belief& b, unsigned horizon);
86 
110  size_t sampleAction(size_t a, size_t o, unsigned horizon);
111 
121  void setBeliefSize(size_t beliefSize);
122 
128  void setIterations(unsigned iter);
129 
141  void setExploration(double exp);
142 
148  const M& getModel() const;
149 
155  const HNode& getGraph() const;
156 
162  size_t getBeliefSize() const;
163 
169  unsigned getIterations() const;
170 
176  double getExploration() const;
177 
178  private:
179  const M& model_;
180  size_t S, A, beliefSize_;
181  unsigned iterations_, maxDepth_;
182  double exploration_;
183  unsigned k_;
184 
185  mutable RandomEngine rand_;
186 
187  HNode graph_;
188 
189  // Private Methods
190  size_t runSimulation(unsigned horizon);
191  double simulate(BNode & b, size_t s, unsigned horizon);
192 
193  void maxBeliefNodeUpdate(BNode * bn, const ANode & aNode, size_t a);
194 
195  template <typename Iterator>
196  Iterator findBestA(Iterator begin, Iterator end);
197 
198  template <typename Iterator>
199  Iterator findBestBonusA(Iterator begin, Iterator end, unsigned count);
200  };
201 
202  template <IsGenerativeModel M, bool UseEntropy>
203  rPOMCP<M, UseEntropy>::rPOMCP(const M& m, const size_t beliefSize, const unsigned iter, const double exp, const unsigned k) : model_(m), S(model_.getS()), A(model_.getA()),
204  beliefSize_(beliefSize), iterations_(iter),
205  exploration_(exp), k_(k),
206  rand_(AIToolbox::Seeder::getSeed()), graph_(A, rand_) {}
207 
208  template <IsGenerativeModel M, bool UseEntropy>
209  size_t rPOMCP<M, UseEntropy>::sampleAction(const Belief& b, const unsigned horizon) {
210  // Reset graph
211  graph_ = HNode(A, beliefSize_, b, rand_);
212 
213  return runSimulation(horizon);
214  }
215 
216  template <IsGenerativeModel M, bool UseEntropy>
217  size_t rPOMCP<M, UseEntropy>::sampleAction(const size_t a, const size_t o, const unsigned horizon) {
218  auto & obs = graph_.children[a].children;
219 
220  auto it = obs.find(o);
221  if ( it == obs.end() ) {
222  AI_LOGGER(AI_SEVERITY_WARNING, "Observation " << o << " never experienced in simulation, restarting with uniform belief..");
223  return sampleAction(Belief(S, 1.0 / S), horizon);
224  }
225 
226  // Here we need an additional step, because *it is contained by graph_.
227  // If we just move assign, graph_ is first going to delete everything it
228  // contains (included *it), and then we are going to move unallocated memory
229  // into graph_! So we move *it outside of the graph_ hierarchy, so that
230  // we can then assign safely.
231  { BNode tmp = std::move(it->second); graph_ = HNode(A, std::move(tmp), rand_); }
232 
233  if ( graph_.isSampleBeliefEmpty() ) {
234  AI_LOGGER(AI_SEVERITY_WARNING, "rPOMCP lost track of the belief, restarting with uniform..");
235  return sampleAction(Belief(S, 1.0 / S), horizon);
236  }
237 
238  return runSimulation(horizon);
239  }
240 
241  template <IsGenerativeModel M, bool UseEntropy>
242  size_t rPOMCP<M, UseEntropy>::runSimulation(const unsigned horizon) {
243  if ( !horizon ) return 0;
244 
245  maxDepth_ = horizon;
246 
247  for (unsigned i = 0; i < iterations_; ++i )
248  simulate(graph_, graph_.sampleBelief(), 0);
249 
250  auto begin = std::begin(graph_.children);
251  size_t bestA = std::distance(begin, findBestA(begin, std::end(graph_.children)));
252 
253  // Since we do not update the root value in simulate,
254  // we do it here.
255  graph_.V = graph_.children[bestA].V;
256  return bestA;
257  }
258 
259  template <IsGenerativeModel M, bool UseEntropy>
260  double rPOMCP<M, UseEntropy>::simulate(BNode & b, size_t s, unsigned depth) {
261  b.N++;
262 
263  // Select next action node
264  auto begin = std::begin(b.children);
265  size_t a = std::distance(begin, findBestBonusA(begin, std::end(b.children), b.N));
266  auto & aNode = b.children[a];
267 
268  // Generate next step
269  size_t s1, o;
270  std::tie(s1, o, std::ignore) = model_.sampleSOR(s, a);
271 
272  double immAndFutureRew = 0.0;
273  {
274  typename decltype(aNode.children)::iterator ot;
275  bool newNode = false;
276 
277  // This either adds a node or sets ot to the existing node.
278  ot = aNode.children.find(o);
279  if ( ot == aNode.children.end() ) {
280  newNode = true;
281  std::tie(ot, std::ignore) = aNode.children.insert(std::make_pair(o, BNode()));
282  }
283 
284  // Compute knowledge for new observation node (entropy/max belief)
285  // This needs to be done here since we are going to upgrade a future belief.
286  ot->second.updateBeliefAndKnowledge(s1);
287 
288  // We only go deeper if needed (maxDepth_ is always at least 1).
289  if ( depth + 1 < maxDepth_ && !model_.isTerminal(s1) && !newNode) {
290  ot->second.children.resize(A);
291  immAndFutureRew = simulate( ot->second, s1, depth + 1 );
292  }
293  // Otherwise we increase the N for the bottom leaves, since they can't get it otherwise and is needed for entropy
294  else {
295  ot->second.N += 1;
296  // For leaves we still extract entropy
297  if ( depth + 1 >= maxDepth_ )
298  immAndFutureRew = ot->second.getKnowledgeMeasure();
299  }
300  }
301 
302  // Action update
303  aNode.N += 1;
304  aNode.V += ( immAndFutureRew - aNode.V ) / static_cast<double>(aNode.N);
305 
306  // At this point the current beliefNode has a correct estimate of its
307  // own entropy. What it needs to do is select its best action. Although
308  // this is not needed for the top node.
309  if ( depth == 0 ) return 0.0;
310 
311  // Here we decide what to transmit to the upper level. In case this
312  // node has not been explored enough, then we simply pass on the new
313  // datapoint. Otherwise we compute the max over the actions, and we
314  // transmit a fake datapoint that will modify the value of the action
315  // above as if we chose the best action all the time in the past.
316  if ( b.N >= k_ ) {
317  // Force looking out for best action
318  if ( b.N == k_ ) {
319  b.actionsV = HUGE_VAL;
320  b.bestAction = a;
321  }
322  maxBeliefNodeUpdate(&b, aNode, a);
323  }
324  else {
325  b.actionsV += ( immAndFutureRew - b.actionsV ) / static_cast<double>(b.N);
326  }
327 
328  double oldV = b.V;
329  // Note that both actionsV and entropy have been modified from last time!
330  // We discount the action part since it's the future reward part, while the
331  // immediate reward is the direct entropy, which is not discounted.
332  b.V = model_.getDiscount() * b.actionsV + b.getKnowledgeMeasure();
333  // This replaces our old value with the new value in the action update.
334  return (b.N - 1)*(b.V - oldV) + b.V;
335  }
336 
337  template <IsGenerativeModel M, bool UseEntropy>
338  void rPOMCP<M, UseEntropy>::maxBeliefNodeUpdate(BNode * bp, const ANode & aNode, const size_t a) {
339  auto & b = *bp;
340 
341  if ( aNode.V >= b.actionsV ) {
342  b.actionsV = aNode.V;
343  b.bestAction = a;
344  }
345  // Note: This is needed because the value may go down!
346  else if ( a == b.bestAction ) {
347  auto begin = std::begin(b.children);
348  auto it = findBestA(begin, std::end(b.children));
349  b.actionsV = it->V;
350  b.bestAction = std::distance(begin, it);
351  }
352  }
353 
354  template <IsGenerativeModel M, bool UseEntropy>
355  template <typename Iterator>
356  Iterator rPOMCP<M, UseEntropy>::findBestA(const Iterator begin, const Iterator end) {
357  return std::max_element(begin, end, [](const ANode & lhs, const ANode & rhs){ return lhs.V < rhs.V; });
358  }
359 
360  template <IsGenerativeModel M, bool UseEntropy>
361  template <typename Iterator>
362  Iterator rPOMCP<M, UseEntropy>::findBestBonusA(Iterator begin, const Iterator end, const unsigned count) {
363  // Count here can be as low as 1.
364  // Since log(1) = 0, and 0/0 = error, we add 1.0.
365  double logCount = std::log(count + 1.0);
366  // We use this function to produce a score for each action. This can be easily
367  // substituted with something else to produce different rPOMCP variants.
368  auto evaluationFunction = [this, logCount](const ANode & an){
369  return an.V + exploration_ * std::sqrt( logCount / an.N );
370  };
371 
372  auto bestIterator = begin++;
373  double bestValue = evaluationFunction(*bestIterator);
374 
375  for ( ; begin < end; ++begin ) {
376  double actionValue = evaluationFunction(*begin);
377  if ( actionValue > bestValue ) {
378  bestValue = actionValue;
379  bestIterator = begin;
380  }
381  }
382 
383  return bestIterator;
384  }
385 
386  template <IsGenerativeModel M, bool UseEntropy>
387  void rPOMCP<M, UseEntropy>::setBeliefSize(const size_t beliefSize) {
388  beliefSize_ = beliefSize;
389  }
390 
391  template <IsGenerativeModel M, bool UseEntropy>
392  void rPOMCP<M, UseEntropy>::setIterations(const unsigned iter) {
393  iterations_ = iter;
394  }
395 
396  template <IsGenerativeModel M, bool UseEntropy>
397  void rPOMCP<M, UseEntropy>::setExploration(const double exp) {
398  exploration_ = exp;
399  }
400 
401  template <IsGenerativeModel M, bool UseEntropy>
403  return model_;
404  }
405 
406  template <IsGenerativeModel M, bool UseEntropy>
408  return graph_;
409  }
410 
411  template <IsGenerativeModel M, bool UseEntropy>
413  return beliefSize_;
414  }
415 
416  template <IsGenerativeModel M, bool UseEntropy>
418  return iterations_;
419  }
420 
421  template <IsGenerativeModel M, bool UseEntropy>
423  return exploration_;
424  }
425 }
426 
427 #endif
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::rPOMCP::setExploration
void setExploration(double exp)
This function sets the new exploration constant for rPOMCP.
Definition: rPOMCP.hpp:397
AI_SEVERITY_WARNING
#define AI_SEVERITY_WARNING
Definition: Logging.hpp:70
TypeTraits.hpp
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::POMDP::rPOMCP::getModel
const M & getModel() const
This function returns the POMDP generative model being used.
Definition: rPOMCP.hpp:402
AIToolbox::POMDP::rPOMCP::getExploration
double getExploration() const
This function returns the currently set exploration constant.
Definition: rPOMCP.hpp:422
AIToolbox::POMDP::ActionNode
Definition: rPOMCPGraph.hpp:24
AIToolbox::POMDP::rPOMCP::getBeliefSize
size_t getBeliefSize() const
This function returns the initial particle size for converted Beliefs.
Definition: rPOMCP.hpp:412
AIToolbox::POMDP::rPOMCP::setIterations
void setIterations(unsigned iter)
This function sets the number of performed rollouts in rPOMCP.
Definition: rPOMCP.hpp:392
AIToolbox
Definition: Experience.hpp:6
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
Types.hpp
rPOMCPGraph.hpp
AIToolbox::POMDP::rPOMCP::rPOMCP
rPOMCP(const M &m, size_t beliefSize, unsigned iterations, double exp, unsigned k=500)
Basic constructor.
Definition: rPOMCP.hpp:203
AIToolbox::POMDP::rPOMCP::sampleAction
size_t sampleAction(const Belief &b, unsigned horizon)
This function resets the internal graph and samples for the provided belief and horizon.
Definition: rPOMCP.hpp:209
AIToolbox::POMDP::rPOMCP
This class represents the rPOMCP online planner.
Definition: rPOMCP.hpp:55
AIToolbox::POMDP::HeadBeliefNode
This class is the root node of the rPOMCP graph.
Definition: rPOMCPGraph.hpp:98
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Logging.hpp
AIToolbox::POMDP::BeliefNode
This is a belief node of the rPOMCP tree.
Definition: rPOMCPGraph.hpp:49
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AIToolbox::POMDP::rPOMCP::setBeliefSize
void setBeliefSize(size_t beliefSize)
This function sets the new size for initial beliefs created from sampleAction().
Definition: rPOMCP.hpp:387
AIToolbox::POMDP::rPOMCP::getGraph
const HNode & getGraph() const
This function returns a reference to the internal graph structure holding the results of rollouts.
Definition: rPOMCP.hpp:407
AIToolbox::POMDP::rPOMCP::getIterations
unsigned getIterations() const
This function returns the number of iterations performed to plan for an action.
Definition: rPOMCP.hpp:417
Probability.hpp