AIToolbox
A library that offers tools for AI problem solving.
POMCP.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_POMCP_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_POMCP_HEADER_FILE
3 
4 #include <unordered_map>
5 
6 #include <AIToolbox/Logging.hpp>
7 #include <AIToolbox/Seeder.hpp>
12 
13 namespace AIToolbox::POMDP {
61  template <IsGenerativeModel M>
62  class POMCP {
63  public:
64  using SampleBelief = std::vector<size_t>;
65 
66  struct BeliefNode;
67  using BeliefNodes = std::unordered_map<size_t, BeliefNode>;
68 
69  struct ActionNode {
71  double V = 0.0;
72  unsigned N = 0;
73  };
74  using ActionNodes = std::vector<ActionNode>;
75 
76  struct BeliefNode {
77  BeliefNode() : N(0) {}
78  BeliefNode(size_t s) : belief(1, s), N(0) {}
81  unsigned N;
82  };
83 
92  POMCP(const M& m, size_t beliefSize, unsigned iterations, double exp);
93 
106  size_t sampleAction(const Belief& b, unsigned horizon);
107 
131  size_t sampleAction(size_t a, size_t o, unsigned horizon);
132 
142  void setBeliefSize(size_t beliefSize);
143 
149  void setIterations(unsigned iter);
150 
162  void setExploration(double exp);
163 
169  const M& getModel() const;
170 
176  const BeliefNode& getGraph() const;
177 
183  size_t getBeliefSize() const;
184 
190  unsigned getIterations() const;
191 
197  double getExploration() const;
198 
199  private:
200  const M& model_;
201  size_t S, A, beliefSize_;
202  unsigned iterations_, maxDepth_;
203  double exploration_;
204 
205  SampleBelief sampleBelief_;
206  BeliefNode graph_;
207 
208  mutable RandomEngine rand_;
209 
223  size_t runSimulation(unsigned horizon);
224 
247  double simulate(BeliefNode & b, size_t s, unsigned horizon);
248 
258  template <typename Iterator>
259  Iterator findBestA(Iterator begin, Iterator end);
260 
275  template <typename Iterator>
276  Iterator findBestBonusA(Iterator begin, Iterator end, unsigned count);
277 
285  SampleBelief makeSampledBelief(const Belief & b);
286  };
287 
288  template <IsGenerativeModel M>
289  POMCP<M>::POMCP(const M& m, const size_t beliefSize, const unsigned iter, const double exp) :
290  model_(m), S(model_.getS()), A(model_.getA()), beliefSize_(beliefSize),
291  iterations_(iter), exploration_(exp), graph_(), rand_(Seeder::getSeed()) {}
292 
293  template <IsGenerativeModel M>
294  size_t POMCP<M>::sampleAction(const Belief& b, const unsigned horizon) {
295  // Reset graph
296  graph_ = BeliefNode(A);
297  graph_.children.resize(A);
298  graph_.belief = makeSampledBelief(b);
299 
300  return runSimulation(horizon);
301  }
302 
303  template <IsGenerativeModel M>
304  size_t POMCP<M>::sampleAction(const size_t a, const size_t o, const unsigned horizon) {
305  const auto & obs = graph_.children[a].children;
306 
307  auto it = obs.find(o);
308  if ( it == obs.end() ) {
309  AI_LOGGER(AI_SEVERITY_WARNING, "Observation " << o << " never experienced in simulation, restarting with uniform belief..");
310  auto b = Belief(S); b.fill(1.0/S);
311  return sampleAction(b, horizon);
312  }
313 
314  // Here we need an additional step, because *it is contained by graph_.
315  // If we just move assign, graph_ is first going to delete everything it
316  // contains (included *it), and then we are going to move unallocated memory
317  // into graph_! So we move *it outside of the graph_ hierarchy, so that
318  // we can then assign safely.
319  { auto tmp = std::move(it->second); graph_ = std::move(tmp); }
320 
321  if ( ! graph_.belief.size() ) {
322  AI_LOGGER(AI_SEVERITY_WARNING, "POMCP lost track of the belief, restarting with uniform..");
323  auto b = Belief(S); b.fill(1.0/S);
324  return sampleAction(b, horizon);
325  }
326 
327  // We resize here in case we didn't have time to sample the new
328  // head node. In this case, the new head may not have children.
329  // This would break the UCT call.
330  graph_.children.resize(A);
331 
332  return runSimulation(horizon);
333  }
334 
335  template <IsGenerativeModel M>
336  size_t POMCP<M>::runSimulation(const unsigned horizon) {
337  if ( !horizon ) return 0;
338 
339  maxDepth_ = horizon;
340  std::uniform_int_distribution<size_t> generator(0, graph_.belief.size()-1);
341 
342  for (unsigned i = 0; i < iterations_; ++i )
343  simulate(graph_, graph_.belief.at(generator(rand_)), 0);
344 
345  auto begin = std::begin(graph_.children);
346  return std::distance(begin, findBestA(begin, std::end(graph_.children)));
347  }
348 
349  template <IsGenerativeModel M>
350  double POMCP<M>::simulate(BeliefNode & b, const size_t s, const unsigned depth) {
351  b.N++;
352 
353  auto begin = std::begin(b.children);
354  const size_t a = std::distance(begin, findBestBonusA(begin, std::end(b.children), b.N));
355 
356  auto [s1, o, rew] = model_.sampleSOR(s, a);
357 
358  auto & aNode = b.children[a];
359 
360  {
361  double futureRew = 0.0;
362  // We need to append the node anyway to perform the belief
363  // update for the next timestep.
364  auto ot = aNode.children.find(o);
365  if ( ot == std::end(aNode.children) ) {
366  aNode.children.emplace(std::piecewise_construct,
367  std::forward_as_tuple(o),
368  std::forward_as_tuple(s1));
369  // This stops automatically if we go out of depth
370  futureRew = rollout(model_, s1, maxDepth_ - depth + 1, rand_);
371  }
372  else {
373  ot->second.belief.push_back(s1);
374  // We only go deeper if needed (maxDepth_ is always at least 1).
375  if ( depth + 1 < maxDepth_ && !model_.isTerminal(s1) ) {
376  // Since most memory is allocated on the leaves,
377  // we do not allocate on node creation but only when
378  // we are actually descending into a node. If the node
379  // already has memory this should not do anything in
380  // any case.
381  ot->second.children.resize(A);
382  futureRew = simulate( ot->second, s1, depth + 1 );
383  }
384  }
385 
386  rew += model_.getDiscount() * futureRew;
387  }
388 
389  // Action update
390  aNode.N++;
391  aNode.V += ( rew - aNode.V ) / static_cast<double>(aNode.N);
392 
393  return rew;
394  }
395 
396  template <IsGenerativeModel M>
397  template <typename Iterator>
398  Iterator POMCP<M>::findBestA(Iterator begin, Iterator end) {
399  return std::max_element(begin, end, [](const ActionNode & lhs, const ActionNode & rhs){ return lhs.V < rhs.V; });
400  }
401 
402  template <IsGenerativeModel M>
403  template <typename Iterator>
404  Iterator POMCP<M>::findBestBonusA(Iterator begin, Iterator end, const unsigned count) {
405  // Count here can be as low as 1.
406  // Since log(1) = 0, and 0/0 = error, we add 1.0.
407  const double logCount = std::log(count + 1.0);
408  // We use this function to produce a score for each action. This can be easily
409  // substituted with something else to produce different POMCP variants.
410  auto evaluationFunction = [this, logCount](const ActionNode & an){
411  return an.V + exploration_ * std::sqrt( logCount / an.N );
412  };
413 
414  auto bestIterator = begin++;
415  double bestValue = evaluationFunction(*bestIterator);
416 
417  for ( ; begin < end; ++begin ) {
418  const double actionValue = evaluationFunction(*begin);
419  if ( actionValue > bestValue ) {
420  bestValue = actionValue;
421  bestIterator = begin;
422  }
423  }
424 
425  return bestIterator;
426  }
427 
428  template <IsGenerativeModel M>
429  typename POMCP<M>::SampleBelief POMCP<M>::makeSampledBelief(const Belief & b) {
430  SampleBelief belief;
431  belief.reserve(beliefSize_);
432 
433  for ( size_t i = 0; i < beliefSize_; ++i )
434  belief.push_back(sampleProbability(S, b, rand_));
435 
436  return belief;
437  }
438 
439  template <IsGenerativeModel M>
440  void POMCP<M>::setBeliefSize(const size_t beliefSize) {
441  beliefSize_ = beliefSize;
442  }
443 
444  template <IsGenerativeModel M>
445  void POMCP<M>::setIterations(const unsigned iter) {
446  iterations_ = iter;
447  }
448 
449  template <IsGenerativeModel M>
450  void POMCP<M>::setExploration(const double exp) {
451  exploration_ = exp;
452  }
453 
454  template <IsGenerativeModel M>
455  const M& POMCP<M>::getModel() const {
456  return model_;
457  }
458 
459  template <IsGenerativeModel M>
460  const typename POMCP<M>::BeliefNode& POMCP<M>::getGraph() const {
461  return graph_;
462  }
463 
464  template <IsGenerativeModel M>
465  size_t POMCP<M>::getBeliefSize() const {
466  return beliefSize_;
467  }
468 
469  template <IsGenerativeModel M>
470  unsigned POMCP<M>::getIterations() const {
471  return iterations_;
472  }
473 
474  template <IsGenerativeModel M>
475  double POMCP<M>::getExploration() const {
476  return exploration_;
477  }
478 }
479 
480 #endif
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::POMCP::BeliefNode::BeliefNode
BeliefNode(size_t s)
Definition: POMCP.hpp:78
AIToolbox::POMDP::POMCP::BeliefNode::N
unsigned N
Definition: POMCP.hpp:81
AIToolbox::POMDP::POMCP::getExploration
double getExploration() const
This function returns the currently set exploration constant.
Definition: POMCP.hpp:475
AI_SEVERITY_WARNING
#define AI_SEVERITY_WARNING
Definition: Logging.hpp:70
AIToolbox::POMDP::POMCP::SampleBelief
std::vector< size_t > SampleBelief
Definition: POMCP.hpp:64
AIToolbox::POMDP::POMCP::ActionNode::N
unsigned N
Definition: POMCP.hpp:72
AIToolbox::POMDP::POMCP::POMCP
POMCP(const M &m, size_t beliefSize, unsigned iterations, double exp)
Basic constructor.
Definition: POMCP.hpp:289
TypeTraits.hpp
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::POMDP::POMCP::getBeliefSize
size_t getBeliefSize() const
This function returns the initial particle size for converted Beliefs.
Definition: POMCP.hpp:465
AIToolbox::POMDP::POMCP::setBeliefSize
void setBeliefSize(size_t beliefSize)
This function sets the new size for initial beliefs created from sampleAction().
Definition: POMCP.hpp:440
Rollout.hpp
AIToolbox::POMDP::POMCP::getModel
const M & getModel() const
This function returns the POMDP generative model being used.
Definition: POMCP.hpp:455
AIToolbox::POMDP::POMCP::getGraph
const BeliefNode & getGraph() const
This function returns a reference to the internal graph structure holding the results of rollouts.
Definition: POMCP.hpp:460
AIToolbox::POMDP::POMCP::BeliefNode::belief
SampleBelief belief
Definition: POMCP.hpp:80
AIToolbox::MDP::rollout
requires AIToolbox::IsGenerativeModel< M > &&HasIntegralActionSpace< M > double rollout(const M &m, std::remove_cvref_t< decltype(std::declval< M >().getS())> s, const unsigned maxDepth, Gen &rnd)
This function performs a rollout from the input state.
Definition: Rollout.hpp:32
AIToolbox::POMDP::POMCP
This class represents the POMCP online planner using UCB1.
Definition: POMCP.hpp:62
AIToolbox::POMDP::POMCP::setIterations
void setIterations(unsigned iter)
This function sets the number of performed rollouts in POMCP.
Definition: POMCP.hpp:445
AIToolbox::POMDP::POMCP::BeliefNode
Definition: POMCP.hpp:76
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::POMDP::POMCP::BeliefNodes
std::unordered_map< size_t, BeliefNode > BeliefNodes
Definition: POMCP.hpp:67
AIToolbox::POMDP::POMCP::ActionNodes
std::vector< ActionNode > ActionNodes
Definition: POMCP.hpp:74
AIToolbox::POMDP::POMCP::ActionNode
Definition: POMCP.hpp:69
Types.hpp
AIToolbox::POMDP::POMCP::ActionNode::V
double V
Definition: POMCP.hpp:71
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::POMDP::POMCP::BeliefNode::BeliefNode
BeliefNode()
Definition: POMCP.hpp:77
AIToolbox::POMDP::SampleBelief
std::vector< std::pair< size_t, unsigned > > SampleBelief
Definition: rPOMCPGraph.hpp:85
AIToolbox::POMDP::POMCP::getIterations
unsigned getIterations() const
This function returns the number of iterations performed to plan for an action.
Definition: POMCP.hpp:470
AIToolbox::POMDP::POMCP::sampleAction
size_t sampleAction(const Belief &b, unsigned horizon)
This function resets the internal graph and samples for the provided belief and horizon.
Definition: POMCP.hpp:294
AIToolbox::POMDP::POMCP::setExploration
void setExploration(double exp)
This function sets the new exploration constant for POMCP.
Definition: POMCP.hpp:450
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
AIToolbox::POMDP::POMCP::BeliefNode::children
ActionNodes children
Definition: POMCP.hpp:79
Logging.hpp
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AIToolbox::POMDP::POMCP::ActionNode::children
BeliefNodes children
Definition: POMCP.hpp:70
Probability.hpp