AIToolbox
A library that offers tools for AI problem solving.
IncrementalPruning.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_INCREMENTAL_PRUNING_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_INCREMENTAL_PRUNING_HEADER_FILE
3 
4 #include <limits>
5 
12 
13 namespace AIToolbox::POMDP {
45  public:
62  IncrementalPruning(unsigned h, double tolerance);
63 
77  void setTolerance(double t);
78 
84  void setHorizon(unsigned h);
85 
91  double getTolerance() const;
92 
98  unsigned getHorizon() const;
99 
116  template <IsModel M>
117  std::tuple<double, ValueFunction> operator()(const M & model);
118 
119  private:
139  VList crossSum(const VList & l1, const VList & l2, size_t a, bool order);
140 
141  size_t S, A, O;
142  unsigned horizon_;
143  double tolerance_;
144  };
145 
146  template <IsModel M>
147  std::tuple<double, ValueFunction> IncrementalPruning::operator()(const M & model) {
148  // Initialize "global" variables
149  S = model.getS();
150  A = model.getA();
151  O = model.getO();
152 
153  auto v = makeValueFunction(S); // TODO: May take user input
154 
155  unsigned timestep = 0;
156 
157  Pruner prune(S);
158  Projecter projecter(model);
159 
160  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
161  double variation = tolerance_ * 2; // Make it bigger
162  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
163  ++timestep;
164 
165  // Compute all possible outcomes, from our previous results.
166  // This means that for each action-observation pair, we are going
167  // to obtain the same number of possible outcomes as the number
168  // of entries in our initial vector w.
169  auto projs = projecter(v[timestep-1]);
170 
171  size_t finalWSize = 0;
172  // In this method we split the work by action, which will then
173  // be joined again at the end of the loop.
174  for ( size_t a = 0; a < A; ++a ) {
175  // We prune each outcome separately to be sure
176  // we do not replicate work later.
177  for ( size_t o = 0; o < O; ++o ) {
178  const auto begin = std::begin(projs[a][o]);
179  const auto end = std::end (projs[a][o]);
180  projs[a][o].erase(prune(begin, end, unwrap), end);
181  }
182 
183  // Here we reduce at the minimum the cross-summing, by alternating
184  // merges. We pick matches like a reverse binary tree, so that
185  // we always pick lists that have been merged the least.
186  //
187  // Example for O==7:
188  //
189  // 0 <- 1 2 <- 3 4 <- 5 6
190  // 0 ------> 2 4 ------> 6
191  // 2 <---------------- 6
192  //
193  // In particular, the variables are:
194  //
195  // - oddOld: Whether our starting step has an odd number of elements.
196  // If so, we skip the last one.
197  // - front: The id of the element at the "front" of our current pass.
198  // note that since passes can be backwards this can be high.
199  // - back: Opposite of front, which excludes the last element if we
200  // have odd elements.
201  // - stepsize: The space between each "first" of each new merge.
202  // - diff: The space between each "first" and its match to merge.
203  // - elements: The number of elements we have left to merge.
204 
205  bool oddOld = O % 2;
206  int i, front = 0, back = O - oddOld, stepsize = 2, diff = 1, elements = O;
207  while ( elements > 1 ) {
208  for ( i = front; i != back; i += stepsize ) {
209  projs[a][i] = crossSum(projs[a][i], projs[a][i + diff], a, stepsize > 0);
210  const auto begin = std::begin(projs[a][i]);
211  const auto end = std::end (projs[a][i]);
212  projs[a][i].erase(prune(begin, end, unwrap), end);
213  --elements;
214  }
215 
216  const bool oddNew = elements % 2;
217 
218  const int tmp = back;
219  back = front - ( oddNew ? 0 : stepsize );
220  front = tmp - ( oddOld ? 0 : stepsize );
221  stepsize *= -2;
222  diff *= -2;
223 
224  oddOld = oddNew;
225  }
226  // Put the result where we can find it
227  if (front != 0)
228  projs[a][0] = std::move(projs[a][front]);
229  finalWSize += projs[a][0].size();
230  }
231  VList w;
232  w.reserve(finalWSize);
233 
234  // Here we don't have to do fancy merging since no cross-summing is involved
235  for ( size_t a = 0; a < A; ++a )
236  w.insert(std::end(w), std::make_move_iterator(std::begin(projs[a][0])), std::make_move_iterator(std::end(projs[a][0])));
237 
238  // We have them all, and we prune one final time to be sure we have
239  // computed the parsimonious set of value functions.
240  const auto begin = std::begin(w);
241  const auto end = std::end (w);
242  w.erase(prune(begin, end, unwrap), end);
243 
244  v.emplace_back(std::move(w));
245 
246  // Check convergence
247  if ( useTolerance )
248  variation = weakBoundDistance(v[timestep-1], v[timestep]);
249  }
250 
251  return std::make_tuple(useTolerance ? variation : 0.0, v);
252  }
253 }
254 
255 #endif
AIToolbox::POMDP::IncrementalPruning::getHorizon
unsigned getHorizon() const
This function returns the currently set horizon parameter.
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::IncrementalPruning::setTolerance
void setTolerance(double t)
This function sets the tolerance parameter.
TypeTraits.hpp
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::IncrementalPruning::operator()
std::tuple< double, ValueFunction > operator()(const M &model)
This function solves a POMDP::Model completely.
Definition: IncrementalPruning.hpp:147
AIToolbox::POMDP::IncrementalPruning::IncrementalPruning
IncrementalPruning(unsigned h, double tolerance)
Basic constructor.
AIToolbox::Pruner
This class offers pruning facilities for non-parsimonious ValueFunction sets.
Definition: Prune.hpp:180
AIToolbox::POMDP::IncrementalPruning::setHorizon
void setHorizon(unsigned h)
This function allows setting the horizon parameter.
AIToolbox::POMDP::IncrementalPruning::getTolerance
double getTolerance() const
This function will return the currently set tolerance parameter.
AIToolbox::POMDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates a default ValueFunction.
AIToolbox::POMDP::weakBoundDistance
double weakBoundDistance(const VList &oldV, const VList &newV)
This function returns a weak measure of distance between two VLists.
Prune.hpp
Types.hpp
Projecter.hpp
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::POMDP::IncrementalPruning
This class implements the Incremental Pruning algorithm.
Definition: IncrementalPruning.hpp:44
AIToolbox::POMDP::Projecter
This class offers projecting facilities for Models.
Definition: Projecter.hpp:13
Utils.hpp
Probability.hpp