AIToolbox
A library that offers tools for AI problem solving.
rPOMCPGraph.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_rPOMCP_GRAPH_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_rPOMCP_GRAPH_HEADER_FILE
3 
4 #include <vector>
5 #include <unordered_map>
6 
9 
11  struct EmptyStruct {};
12 
14  double negativeEntropy = 0;
15  };
16 
18  size_t maxS_ = 0;
19  };
20 }
21 
22 namespace AIToolbox::POMDP {
23  template <bool UseEntropy>
24  struct ActionNode;
25 
26  template <bool UseEntropy>
27  using ActionNodes = std::vector<ActionNode<UseEntropy>>;
28 
29  template <bool UseEntropy>
30  struct BeliefParticle : public std::conditional_t<UseEntropy, Impl::POMDP::BeliefParticleEntropyAddon, Impl::POMDP::EmptyStruct> {
31  unsigned N = 0;
32  };
33 
34  // This is used to keep track of beliefs down in the tree. We use a map since
35  // we do not need to sample from here, just to access fast and recompute the
36  // entropy values.
37  template <bool UseEntropy>
38  using TrackBelief = std::unordered_map<
39  size_t,
41  std::hash<size_t>,
42  std::equal_to<size_t>
43  >;
44 
48  template <bool UseEntropy>
49  class BeliefNode : public std::conditional_t<UseEntropy, Impl::POMDP::EmptyStruct, Impl::POMDP::BeliefNodeNoEntropyAddon> {
50  public:
51  BeliefNode();
52 
54  void updateBeliefAndKnowledge(size_t s);
55 
57  double getKnowledgeMeasure() const;
58 
59  unsigned N;
61 
62  double V;
63  double actionsV;
64  size_t bestAction;
65 
66  protected:
71  };
72 
73  template <bool UseEntropy>
74  using BeliefNodes = std::unordered_map<size_t, BeliefNode<UseEntropy>>;
75 
76  template <bool UseEntropy>
77  struct ActionNode {
79  double V = 0.0;
80  unsigned N = 0;
81  };
82 
83  // This is used to sample at the top of the tree. It is a vector containing a
84  // state-count pair for each particle.
85  using SampleBelief = std::vector<std::pair<size_t, unsigned>>;
86 
97  template <bool UseEntropy>
98  class HeadBeliefNode : public BeliefNode<UseEntropy> {
99  public:
103  HeadBeliefNode(size_t A, RandomEngine & rand);
115  HeadBeliefNode(size_t A, size_t beliefSize, const AIToolbox::POMDP::Belief & b, RandomEngine & rand);
123  HeadBeliefNode(size_t A, BeliefNode<UseEntropy> && bn, RandomEngine & rand);
124 
125  bool isSampleBeliefEmpty() const;
126  size_t sampleBelief() const;
127  size_t getMostCommonParticle() const;
128 
129  private:
130  RandomEngine * rand_;
131  SampleBelief sampleBelief_;
132  size_t beliefSize_;
133  };
134 
135  template <bool UseEntropy>
137  N(0), V(0.0),
138  actionsV(0.0), bestAction(0),
139  knowledgeMeasure_(0.0) {}
140 
141  // Note for ENTROPY implementation:
142  // In theory this is wrong as we should update all the entropy terms, one
143  // for each different type of particle. In practice we hope this will work
144  // anyway, and that there are not going to be huge problems, as each particle
145  // should be seen enough times to still keep a decent approximation of its
146  // entropy term. Minor errors are ok since this is still an estimation.
147  template <>
149  // Remove entropy term for this state from summatory
150  knowledgeMeasure_ -= trackBelief_[s].negativeEntropy;
151  // Updating belief
152  trackBelief_[s].N += 1;
153  // Computing new entropy term for this state
154  double p = static_cast<double>(trackBelief_[s].N) / static_cast<double>(N+1);
155  double newEntropy = p * std::log(p);
156  // Update values
157  trackBelief_[s].negativeEntropy = newEntropy;
158  knowledgeMeasure_ += newEntropy;
159  }
160 
161  // This is the Max-Belief implementation
162  template <>
164  trackBelief_[s].N += 1;
165 
166  if ( trackBelief_[s].N > trackBelief_[maxS_].N )
167  maxS_ = s;
168 
169  knowledgeMeasure_ = static_cast<double>(trackBelief_[maxS_].N) / static_cast<double>(N+1);
170  }
171 
172  template <bool UseEntropy>
174  return knowledgeMeasure_;
175  }
176 
177  template <bool UseEntropy>
178  HeadBeliefNode<UseEntropy>::HeadBeliefNode(const size_t A, RandomEngine & rand) : BeliefNode<UseEntropy>(), rand_(&rand) {
179  this->children.resize(A);
180  }
181 
182  template <bool UseEntropy>
183  HeadBeliefNode<UseEntropy>::HeadBeliefNode(const size_t A, const size_t beliefSize, const AIToolbox::POMDP::Belief & b, RandomEngine & rand) :
184  BeliefNode<UseEntropy>(), rand_(&rand), beliefSize_(beliefSize)
185  {
186  this->children.resize(A);
187  std::unordered_map<size_t, unsigned> generatedSamples;
188 
189  size_t S = b.size();
190  for ( size_t i = 0; i < beliefSize_; ++i )
191  generatedSamples[AIToolbox::sampleProbability(S, b, *rand_)] += 1;
192 
193  sampleBelief_.reserve(beliefSize_);
194  for ( auto & pair : generatedSamples ) {
195  sampleBelief_.emplace_back(pair);
196  // Compute entropy here since we don't have a parent in this case (is it really needed?)
197  // double p = static_cast<double>(pair.second) / static_cast<double>(beliefSize_);
198  // negativeEntropy += p * std::log(p);
199  }
200  }
201 
202  template <bool UseEntropy>
204  BeliefNode<UseEntropy>(std::move(bn)), rand_(&rand), beliefSize_(0)
205  {
206  this->children.resize(A);
207  sampleBelief_.reserve(this->trackBelief_.size());
208  for ( auto & pair : this->trackBelief_ ) {
209  sampleBelief_.emplace_back(pair.first, pair.second.N);
210  beliefSize_ += pair.second.N;
211  }
212  TrackBelief<UseEntropy>().swap(this->trackBelief_); // Clear belief memory
213  }
214 
215  template <bool UseEntropy>
217  return sampleBelief_.empty();
218  }
219 
220  template <bool UseEntropy>
222  std::uniform_int_distribution<unsigned> generator(1, beliefSize_);
223  int pick = generator(*rand_);
224 
225  size_t index = 0;
226  while (true) {
227  pick -= sampleBelief_[index].second;
228  if ( pick < 1 ) return sampleBelief_[index].first;
229  ++index;
230  }
231  }
232 
233  template <bool UseEntropy>
235  // We return the most common particle in the head belief
236  size_t bestGuess; unsigned bestGuessCount = 0;
237  for ( auto & pair : sampleBelief_ ) {
238  if ( pair.second > bestGuessCount ) {
239  bestGuessCount = pair.second;
240  bestGuess = pair.first;
241  }
242  }
243  return bestGuess;
244  }
245 }
246 
247 #endif
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::HeadBeliefNode::sampleBelief
size_t sampleBelief() const
Samples the internal sampling belief.
Definition: rPOMCPGraph.hpp:221
AIToolbox::POMDP::BeliefParticle::N
unsigned N
Number of particles for this particular type (state)
Definition: rPOMCPGraph.hpp:31
AIToolbox::Impl::POMDP::BeliefNodeNoEntropyAddon::maxS_
size_t maxS_
This keeps track of the belief peak state for max of belief.
Definition: rPOMCPGraph.hpp:18
AIToolbox::POMDP::ActionNode::V
double V
Tracks the value of the action, as a weighted average of the values of the next step beliefNodes.
Definition: rPOMCPGraph.hpp:79
AIToolbox::POMDP::BeliefNodes
std::unordered_map< size_t, BeliefNode< UseEntropy > > BeliefNodes
Definition: rPOMCPGraph.hpp:74
AIToolbox::POMDP::BeliefNode::trackBelief_
TrackBelief< UseEntropy > trackBelief_
This is a particle belief which is easy to update.
Definition: rPOMCPGraph.hpp:68
AIToolbox::POMDP::HeadBeliefNode::getMostCommonParticle
size_t getMostCommonParticle() const
Useful if the agents wants a guess of what the current state is.
Definition: rPOMCPGraph.hpp:234
AIToolbox::Impl::POMDP
Definition: rPOMCPGraph.hpp:10
AIToolbox::POMDP::ActionNode::children
BeliefNodes< UseEntropy > children
Definition: rPOMCPGraph.hpp:78
AIToolbox::POMDP::BeliefNode::actionsV
double actionsV
Estimated value for the actions (could be mean, max, or other)
Definition: rPOMCPGraph.hpp:63
AIToolbox::POMDP::ActionNode
Definition: rPOMCPGraph.hpp:24
AIToolbox::POMDP::BeliefNode::getKnowledgeMeasure
double getKnowledgeMeasure() const
This function returns the current estimate for reward for this node.
Definition: rPOMCPGraph.hpp:173
AIToolbox::POMDP::BeliefNode::BeliefNode
BeliefNode()
Definition: rPOMCPGraph.hpp:136
AIToolbox::POMDP::BeliefNode::knowledgeMeasure_
double knowledgeMeasure_
Estimated entropy/max-belief for this node.
Definition: rPOMCPGraph.hpp:70
AIToolbox::POMDP::BeliefNode::V
double V
Estimated value for this belief, taking into account future rewards/actions.
Definition: rPOMCPGraph.hpp:62
AIToolbox::POMDP::HeadBeliefNode::isSampleBeliefEmpty
bool isSampleBeliefEmpty() const
Whether we have no particles in the sampling belief.
Definition: rPOMCPGraph.hpp:216
AIToolbox::POMDP::BeliefNode::updateBeliefAndKnowledge
void updateBeliefAndKnowledge(size_t s)
This function updates the knowledge measure after adding a new belief particle.
AIToolbox::Impl::POMDP::BeliefParticleEntropyAddon
Definition: rPOMCPGraph.hpp:13
AIToolbox::Impl::POMDP::BeliefParticleEntropyAddon::negativeEntropy
double negativeEntropy
Estimated entropy deriving from this particle type.
Definition: rPOMCPGraph.hpp:14
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::Impl::POMDP::BeliefNodeNoEntropyAddon
Definition: rPOMCPGraph.hpp:17
Types.hpp
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::POMDP::ActionNodes
std::vector< ActionNode< UseEntropy > > ActionNodes
Definition: rPOMCPGraph.hpp:27
AIToolbox::POMDP::SampleBelief
std::vector< std::pair< size_t, unsigned > > SampleBelief
Definition: rPOMCPGraph.hpp:85
AIToolbox::POMDP::ActionNode::N
unsigned N
Times this action has been performed.
Definition: rPOMCPGraph.hpp:80
AIToolbox::POMDP::BeliefNode::N
unsigned N
Counter for number of times we went through this belief node.
Definition: rPOMCPGraph.hpp:59
AIToolbox::POMDP::HeadBeliefNode
This class is the root node of the rPOMCP graph.
Definition: rPOMCPGraph.hpp:98
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
AIToolbox::POMDP::BeliefNode
This is a belief node of the rPOMCP tree.
Definition: rPOMCPGraph.hpp:49
AIToolbox::POMDP::BeliefParticle
Definition: rPOMCPGraph.hpp:30
AIToolbox::POMDP::TrackBelief
std::unordered_map< size_t, BeliefParticle< UseEntropy >, std::hash< size_t >, std::equal_to< size_t > > TrackBelief
Definition: rPOMCPGraph.hpp:43
AIToolbox::POMDP::HeadBeliefNode::HeadBeliefNode
HeadBeliefNode(size_t A, RandomEngine &rand)
Basic constructor.
Definition: rPOMCPGraph.hpp:178
AIToolbox::Impl::POMDP::EmptyStruct
Definition: rPOMCPGraph.hpp:11
AIToolbox::POMDP::BeliefNode::children
ActionNodes< UseEntropy > children
Definition: rPOMCPGraph.hpp:60
AIToolbox::POMDP::BeliefNode::bestAction
size_t bestAction
Tracker of best available action in MAX-mode, to select node value.
Definition: rPOMCPGraph.hpp:64
Probability.hpp