AIToolbox
A library that offers tools for AI problem solving.
PERSEUS.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_PERSEUS_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_PERSEUS_HEADER_FILE
3 
10 
11 namespace AIToolbox::POMDP {
34  class PERSEUS {
35  public:
47  PERSEUS(size_t nBeliefs, unsigned h, double tolerance);
48 
62  void setTolerance(double tolerance);
63 
69  void setHorizon(unsigned h);
70 
76  void setBeliefSize(size_t nBeliefs);
77 
83  double getTolerance() const;
84 
90  unsigned getHorizon() const;
91 
97  size_t getBeliefSize() const;
98 
123  template <IsModel M>
124  std::tuple<double, ValueFunction> operator()(const M & model, double minReward);
125 
126  private:
127 
148  template <typename ProjectionsTable>
149  VList crossSum(const ProjectionsTable & projs, const std::vector<Belief> & bl, const VList & oldV);
150 
151  size_t S, A, O, beliefSize_;
152  unsigned horizon_;
153  double tolerance_;
154 
155  mutable RandomEngine rand_;
156  };
157 
158  template <IsModel M>
159  std::tuple<double, ValueFunction> PERSEUS::operator()(const M & model, const double minReward) {
160  if ( model.getDiscount() == 1 ) throw std::invalid_argument("The model cannot have a discount of 1 in PERSEUS!");
161  // Initialize "global" variables
162  S = model.getS();
163  A = model.getA();
164  O = model.getO();
165 
166  // In this implementation we compute all beliefs in advance. This
167  // is mostly due to the fact that I prefer counter parameters (how
168  // many beliefs do you want?) versus timers (loop until time is
169  // up). However, this is easily changeable, since the belief generator
170  // can be called multiple times to increase the size of the belief
171  // vector.
172  BeliefGenerator bGen(model);
173  const auto beliefs = bGen(beliefSize_);
174 
175  // We initialize the ValueFunction to the "worst" case scenario.
177 
178  v[0][0].values.fill(minReward / (1.0 - model.getDiscount()));
179 
180  unsigned timestep = 0;
181 
182  Projecter projecter(model);
183 
184  // And off we go
185  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
186  double variation = tolerance_ * 2; // Make it bigger
187  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
188  ++timestep;
189  // Compute all possible outcomes, from our previous results.
190  // This means that for each action-observation pair, we are going
191  // to obtain the same number of possible outcomes as the number
192  // of entries in our initial vector w.
193  const auto projs = projecter(v[timestep-1]);
194  // Here we find the minimum number of VEntries that we need to improve
195  // v on all beliefs from v[timestep-1].
196  v.emplace_back( crossSum( projs, beliefs, v[timestep-1] ) );
197 
198  // Check convergence
199  if ( useTolerance )
200  variation = weakBoundDistance(v[timestep-1], v[timestep]);
201  }
202 
203  return std::make_tuple(useTolerance ? variation : 0.0, v);
204  }
205 
206  template <typename ProjectionsTable>
207  VList PERSEUS::crossSum(const ProjectionsTable & projs, const std::vector<Belief> & bl, const VList & oldV) {
208  VList result, helper;
209  result.reserve(bl.size());
210  helper.reserve(A);
211  bool start = true;
212  double currentValue, oldValue;
213 
214  auto rbegin = std::begin(result);
215  auto rend = std::end (result);
216  const auto obegin = std::begin(oldV);
217  const auto oend = std::end (oldV);
218 
219  for ( const auto & b : bl ) {
220  if ( !start ) {
221  // If we have already improved this belief, skip it
222  findBestAtPoint( b, rbegin, rend, &currentValue, unwrap);
223  findBestAtPoint( b, obegin, oend, &oldValue, unwrap);
224  if ( currentValue >= oldValue ) continue;
225  }
226 
227  result.emplace_back(crossSumBestAtBelief(b, projs));
228 
229  rbegin = std::begin(result);
230  rend = std::end (result);
231 
232  start = false;
233  }
234 
235  result.erase(extractDominated(rbegin, rend, unwrap), std::end(result));
236 
237  return result;
238  }
239 }
240 
241 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::PERSEUS::getHorizon
unsigned getHorizon() const
This function returns the currently set horizon parameter.
AIToolbox::POMDP::PERSEUS
This class implements the PERSEUS algorithm.
Definition: PERSEUS.hpp:34
TypeTraits.hpp
AIToolbox::extractDominated
Iterator extractDominated(Iterator begin, Iterator end, P p=P{})
This function finds and moves all Hyperplanes in the range that are dominated by others.
Definition: Prune.hpp:30
AIToolbox::POMDP::crossSumBestAtBelief
void crossSumBestAtBelief(const Belief &b, const ActionRow &row, VEntry *outp, double *value=nullptr)
This function computes the best VEntry for the input belief from the input VLists.
Definition: Utils.hpp:504
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::PERSEUS::setBeliefSize
void setBeliefSize(size_t nBeliefs)
This function sets a new number of support beliefs.
AIToolbox::POMDP::BeliefGenerator
This class generates reachable beliefs from a given Model.
Definition: BeliefGenerator.hpp:18
AIToolbox::POMDP::PERSEUS::operator()
std::tuple< double, ValueFunction > operator()(const M &model, double minReward)
This function solves a POMDP::Model approximately.
Definition: PERSEUS.hpp:159
AIToolbox::POMDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates a default ValueFunction.
AIToolbox::POMDP::weakBoundDistance
double weakBoundDistance(const VList &oldV, const VList &newV)
This function returns a weak measure of distance between two VLists.
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
Prune.hpp
AIToolbox::POMDP::PERSEUS::setHorizon
void setHorizon(unsigned h)
This function sets a new horizon parameter.
AIToolbox::POMDP::ValueFunction
std::vector< VList > ValueFunction
Definition: Types.hpp:78
Types.hpp
Projecter.hpp
AIToolbox::POMDP::PERSEUS::setTolerance
void setTolerance(double tolerance)
This function sets the tolerance parameter.
AIToolbox::POMDP::PERSEUS::getBeliefSize
size_t getBeliefSize() const
This function returns the currently set number of support beliefs to use during a solve pass.
AIToolbox::findBestAtPoint
Iterator findBestAtPoint(const Point &point, Iterator begin, Iterator end, double *value=nullptr, P p=P{})
This function returns an iterator pointing to the best Hyperplane for the specified point.
Definition: Polytope.hpp:65
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::POMDP::Projecter
This class offers projecting facilities for Models.
Definition: Projecter.hpp:13
BeliefGenerator.hpp
AIToolbox::POMDP::PERSEUS::PERSEUS
PERSEUS(size_t nBeliefs, unsigned h, double tolerance)
Basic constructor.
AIToolbox::POMDP::PERSEUS::getTolerance
double getTolerance() const
This function returns the currently set tolerance parameter.
Utils.hpp