AIToolbox
A library that offers tools for AI problem solving.
BeliefGenerator.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_BELIEF_GENERATOR_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_BELIEF_GENERATOR_HEADER_FILE
3 
4 #include <array>
5 
6 #include <boost/container/flat_set.hpp>
7 
8 #include <AIToolbox/Seeder.hpp>
12 
13 namespace AIToolbox::POMDP {
17  template <IsGenerativeModel M>
19  public:
20  using BeliefList = std::vector<Belief>;
21 
27  BeliefGenerator(const M& model);
28 
41  BeliefList operator()(size_t beliefNumber) const;
42 
58  void operator()(size_t beliefNumber, BeliefList * bl) const;
59 
60  private:
61  using SeenObservations = std::vector<boost::container::flat_set<std::pair<size_t, size_t>>>;
62 
72  void expandBeliefList(size_t max, size_t randomBeliefsToAdd, size_t firstProductiveBelief) const;
73 
74  const M& model_;
75  size_t S, A;
76 
77  mutable RandomEngine rand_;
78 
79  // Helpers and internal pointers.
80  static constexpr unsigned triesPerRun_ = 20;
81  static constexpr unsigned retryLimit_ = 5;
82  static constexpr unsigned minProductiveBeliefs_ = 10;
83  mutable BeliefList * blp_;
84  mutable SeenObservations * sop_;
85  mutable std::vector<unsigned> * up_; // unproductives
86  mutable std::vector<double> * dp_; // distances
87  mutable size_t goodBeliefsSize_, allBeliefsSize_, productiveBeliefs_;
88 
89  mutable Belief helper_;
90  };
91 
92  template <IsGenerativeModel M>
94  model_(model), S(model_.getS()), A(model_.getA()),
95  rand_(Seeder::getSeed()),
96  blp_(nullptr), sop_(nullptr), up_(nullptr), dp_(nullptr), helper_(S) {}
97 
98  template <IsGenerativeModel M>
99  typename BeliefGenerator<M>::BeliefList BeliefGenerator<M>::operator()(const size_t beliefNumber) const {
100  // We add all simplex corners and the middle belief.
101  BeliefList beliefs; beliefs.reserve(std::max(beliefNumber, S));
102 
103  beliefs.emplace_back(S);
104  beliefs.back().fill(1.0/S);
105 
106  for ( size_t s = 0; s < S && s < beliefNumber; ++s ) {
107  beliefs.emplace_back(S);
108  beliefs.back().setZero(); beliefs.back()(s) = 1.0;
109  }
110 
111  this->operator()(beliefNumber, &beliefs);
112 
113  return beliefs;
114  }
115 
116  template <IsGenerativeModel M>
117  void BeliefGenerator<M>::operator()(const size_t maxBeliefs, BeliefList * bl) const {
118  if ( !bl ) return;
119 
120  // Initialize all helper storage.
121  //
122  // We have:
123  //
124  // - bl: The belief list, which will contain beliefs we ever find,
125  // divided into two groups: the good ones (which we will return), and
126  // the bad ones. The good ones are further subdivided into the
127  // unproductive ones (which we do not want to sample from anymore as
128  // they are unlikely to produce anything new), and the productive
129  // ones.
130  // - seenObservations: This list contains, for each good belief, a list
131  // of action/observation pairs seen from it. This is used to avoid
132  // actually creating an updated belief if we have observed the a/o
133  // pair before.
134  // - unproductiveBeliefs: This list tracks the number of times we have
135  // tried to expand a particular belief. After a certain number amount
136  // of times we give up and signal that it is unproductive.
137  // - distances: This list contains, for each bad belief, its distance
138  // from the current good space. This is used to only pick the
139  // farthest beliefs when adding to the good set.
140  blp_ = bl;
141  auto & beliefs = *blp_;
142 
143  SeenObservations seenObservations;
144  seenObservations.resize(beliefs.size());
145 
146  std::vector<unsigned> unproductiveBeliefs;
147  unproductiveBeliefs.resize(beliefs.size());
148 
149  sop_ = &seenObservations;
150  up_ = &unproductiveBeliefs;
151 
152  beliefs.reserve(maxBeliefs);
153  seenObservations.reserve(maxBeliefs);
154  unproductiveBeliefs.reserve(maxBeliefs);
155 
156  std::vector<double> distances;
157  dp_ = &distances;
158 
159  // Since the original method of obtaining beliefs is stochastic,
160  // we keep trying for a while in case we don't find any new beliefs.
161  // However, for some problems (for example the Tiger problem) still
162  // this does not perform too well since the probability of finding
163  // a new belief (via action LISTEN) is pretty low.
164  size_t firstProductiveBelief = 0;
165  productiveBeliefs_ = goodBeliefsSize_ = allBeliefsSize_ = beliefs.size();
166 
167  unsigned randomBeliefsToAdd = 0;
168 
169  while ( goodBeliefsSize_ < maxBeliefs ) {
170  expandBeliefList(maxBeliefs, randomBeliefsToAdd, firstProductiveBelief);
171  if (goodBeliefsSize_ >= maxBeliefs) break;
172 
173  // Shift firstProductiveBelief to avoid checking the initial
174  // non-productive beliefs every single time.
175  for (size_t i = firstProductiveBelief; i < goodBeliefsSize_; ++i) {
176  if (unproductiveBeliefs[i] < retryLimit_) break;
177  else ++firstProductiveBelief;
178  }
179 
180  // Fill the missing if needed with random beliefs so we always have new stuff.
181  randomBeliefsToAdd = productiveBeliefs_ >= minProductiveBeliefs_ ? 0 : minProductiveBeliefs_ - productiveBeliefs_;
182  }
183  // Remove unused bad beliefs.
184  beliefs.resize(maxBeliefs);
185  }
186 
187  template <IsGenerativeModel M>
188  void BeliefGenerator<M>::expandBeliefList(const size_t max, const size_t randomBeliefsToAdd, const size_t firstProductiveBelief) const {
189  auto & bl = *blp_;
190  auto & seenObservations = *sop_;
191  auto & unproductiveBeliefs = *up_;
192  auto & distances = *dp_;
193 
194  // This is our optimistic estimate of how many beliefs we want to add
195  // this run; should be one per productive belief, or at least one per
196  // new random belief we are going to add.
197  //
198  // We refine this estimate later.
199  auto beliefsToAdd = std::max(randomBeliefsToAdd, productiveBeliefs_);
200 
201  // L1 distance
202  auto computeDistance = [](const Belief & lhs, const Belief & rhs) {
203  return (lhs - rhs).cwiseAbs().sum();
204  };
205 
206  // If we have some tentative test in the list, pop it off since we need
207  // to emplace the random beliefs.
208  if (allBeliefsSize_ < bl.size())
209  bl.pop_back();
210 
211  // Add the required random beliefs, computing distances for each.
212  for ( size_t i = 0; i < randomBeliefsToAdd; ++i) {
213  bl.emplace_back(makeRandomProbability(S, rand_));
214  ++allBeliefsSize_;
215 
216  // Compute distance for this belief.
217  distances.push_back(std::numeric_limits<double>::max());
218  for (size_t k = 0; k < goodBeliefsSize_; ++k) {
219  distances.back() = std::min(distances.back(), computeDistance(bl.back(), bl[k]));
220  }
221  }
222 
223  // We apply the discovery process to all beliefs we have approved as
224  // good. We start from the first productive one, since the others have
225  // already produced as much as they can.
226  for (size_t i = firstProductiveBelief; i < goodBeliefsSize_; ++i) {
227  // Skip this belief if it is unproductive.
228  auto & notFoundCounter = unproductiveBeliefs[i];
229  if (notFoundCounter >= retryLimit_) continue;
230 
231  auto & beliefObservations = seenObservations[i];
232  bool foundAnything = false;
233 
234  // Compute all new beliefs
235  for ( size_t a = 0; a < A; ++a ) {
236  updateBeliefPartial(model_, bl[i], a, &helper_);
237 
238  for (unsigned j = 0; j < triesPerRun_; ++j) {
239  // Sample state from belief, and generate an observation
240  // for it (given the current action)
241  const size_t s = sampleProbability(S, bl[i], rand_);
242 
243  size_t o;
244  std::tie(std::ignore, o, std::ignore) = model_.sampleSOR(s, a);
245 
246  // Check the new observation against the ones we have
247  // already produced for this belief. If we did, try again.
248  // Otherwise, mark it as seen.
249  if (beliefObservations.find({a,o}) != beliefObservations.end())
250  continue;
251 
252  beliefObservations.insert({a,o});
253  foundAnything = true;
254 
255  // Now we can update the belief. We use the last element of
256  // bl as temporary storage to avoid re-allocating when we
257  // don't need to.
258  if (allBeliefsSize_ == bl.size())
259  bl.emplace_back(S);
260 
261  updateBeliefPartialNormalized(model_, helper_, a, o, &bl.back());
262 
263  // Now check that the belief did not already exist in our
264  // list. If it did, we don't have to do anything else;
265  bool found = false;
266  for (size_t k = 0; k < allBeliefsSize_; ++k) {
267  if (checkEqualProbability(bl[k], bl.back())) {
268  found = true;
269  break;
270  }
271  }
272  if (found) continue;
273 
274  // Otherwise, the new belief is truly new. We keep it in
275  // the list and compute its distance. Note that we give an
276  // observation list only to the beliefs in the good set
277  // though (since we only sample those), so not yet to this
278  // one.
279  ++allBeliefsSize_;
280 
281  distances.push_back(std::numeric_limits<double>::max());
282  for (size_t k = 0; k < goodBeliefsSize_; ++k) {
283  distances.back() = std::min(distances.back(), computeDistance(bl.back(), bl[k]));
284  }
285  }
286  }
287  // We update the production counter for this belief, so we can skip
288  // the ones which are not needed.
289  if (!foundAnything) {
290  ++notFoundCounter;
291  // Mark it as unproductive if that's the case.
292  if (notFoundCounter == retryLimit_)
293  --productiveBeliefs_;
294  } else {
295  notFoundCounter = 0;
296  }
297  }
298  // Our optimistic estimane gets now scaled back by how many bad beliefs
299  // we actually have to make into good.
300  beliefsToAdd = std::min(beliefsToAdd, allBeliefsSize_ - goodBeliefsSize_);
301 
302  for (size_t i = 0; i < beliefsToAdd; ++i) {
303  assert((allBeliefsSize_ - goodBeliefsSize_) == distances.size());
304 
305  // Find furthest away. It's guaranteed to be new, so we add it to
306  // the good guys.
307  //
308  // We do a double swap here to avoid having to remove elements in
309  // the middle of distances; just saving a slight amount of work.
310  auto dBegin = std::begin(distances), dEnd = std::end(distances);
311  size_t id = std::distance( dBegin, std::max_element(dBegin, dEnd) );
312 
313  // Move the good one at the end
314  std::swap(distances[id], distances.back());
315  std::swap(bl[goodBeliefsSize_ + id], bl[allBeliefsSize_ - 1]);
316 
317  // We finally move the new selected belief in the right spot.
318  // Let's wait to do all the rest of the work until we know we have
319  // to.
320  std::swap(bl[goodBeliefsSize_], bl[allBeliefsSize_ - 1]);
321 
322  // Check if we are done.
323  ++goodBeliefsSize_;
324  if (goodBeliefsSize_ >= max) break;
325 
326  // If we are not done we:
327  //
328  // 1 - Remove the last entry from distances, which belonged to the
329  // now good belief.
330  // 2 - Add a seenObservations entry for the belief, since we can
331  // now sample from it.
332  // 3 - Recompute all remaining distances against the new good
333  // belief, as the space has changed.
334  distances.pop_back();
335  seenObservations.emplace_back();
336  unproductiveBeliefs.emplace_back();
337  ++productiveBeliefs_;
338 
339  for (size_t k = 0; k < distances.size(); ++k) {
340  distances[k] = std::min(distances[k], computeDistance(bl[goodBeliefsSize_ - 1], bl[goodBeliefsSize_ + k]));
341  }
342  }
343  }
344 }
345 
346 #endif
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::BeliefGenerator::operator()
BeliefList operator()(size_t beliefNumber) const
This function tries to generate at least the specified input number of Beliefs.
Definition: BeliefGenerator.hpp:99
AIToolbox::POMDP::BeliefGenerator::BeliefGenerator
BeliefGenerator(const M &model)
Basic constructor.
Definition: BeliefGenerator.hpp:93
TypeTraits.hpp
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::POMDP::BeliefGenerator
This class generates reachable beliefs from a given Model.
Definition: BeliefGenerator.hpp:18
AIToolbox::checkEqualProbability
bool checkEqualProbability(const ProbabilityVector &lhs, const ProbabilityVector &rhs)
This function checks whether two input ProbabilityVector are equal.
Definition: Probability.hpp:369
AIToolbox::makeRandomProbability
ProbabilityVector makeRandomProbability(const size_t S, G &generator)
This function generates a random probability vector.
Definition: Probability.hpp:316
AIToolbox::POMDP::BeliefGenerator::BeliefList
std::vector< Belief > BeliefList
Definition: BeliefGenerator.hpp:20
Seeder.hpp
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
AIToolbox::POMDP::updateBeliefPartialNormalized
void updateBeliefPartialNormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
This function terminates the normalized update of a partially updated belief.
Definition: Utils.hpp:418
Types.hpp
AIToolbox::sampleProbability
size_t sampleProbability(const size_t d, const T &in, G &generator)
This function samples an index from a probability vector.
Definition: Probability.hpp:188
AIToolbox::POMDP::updateBeliefPartial
void updateBeliefPartial(const M &model, const Belief &b, const size_t a, Belief *bRet)
This function partially updates a belief.
Definition: Utils.hpp:289
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Probability.hpp