AIToolbox
A library that offers tools for AI problem solving.
LinearSupport.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_LINEAR_SUPPORT_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_LINEAR_SUPPORT_HEADER_FILE
3 
4 #include <boost/heap/fibonacci_heap.hpp>
5 
10 
11 #include <unordered_set>
12 #include <boost/functional/hash.hpp>
13 
15 
16 #include <AIToolbox/Logging.hpp>
17 
18 namespace AIToolbox::POMDP {
51  class LinearSupport {
52  public:
69  LinearSupport(unsigned horizon, double tolerance);
70 
84  void setTolerance(double tolerance);
85 
91  void setHorizon(unsigned h);
92 
98  double getTolerance() const;
99 
105  unsigned getHorizon() const;
106 
122  template <IsModel M>
123  std::tuple<double, ValueFunction> operator()(const M & model);
124 
125  private:
126  unsigned horizon_;
127  double tolerance_;
128 
129  using SupportSet = std::unordered_set<VEntry, boost::hash<VEntry>>;
130  using BeliefSet = std::unordered_set<Belief, boost::hash<Belief>>;
131 
132  struct Vertex;
133 
134  struct VertexComparator {
135  bool operator() (const Vertex & lhs, const Vertex & rhs) const;
136  };
137 
138  struct Vertex {
139  Belief belief;
140  SupportSet::iterator support;
141  double currentValue;
142  double error;
143  };
144 
145  // This is our priority queue with all the vertices.
146  using Agenda = boost::heap::fibonacci_heap<Vertex, boost::heap::compare<VertexComparator>>;
147 
148  Agenda agenda_;
149  };
150 
151  template <IsModel M>
152  std::tuple<double, ValueFunction> LinearSupport::operator()(const M& model) {
153  const auto S = model.getS();
154 
155  Projecter project(model);
156  auto v = makeValueFunction(S); // TODO: May take user input
157 
158  unsigned timestep = 0;
159  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
160  double variation = tolerance_ * 2; // Make it bigger
161  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
162  ++timestep;
163 
164  auto projections = project(v[timestep-1]);
165 
166  // These are the good vectors, the ones that we are going to return for
167  // sure.
168  VList goodSupports;
169 
170  // We use this as a sorted linked list to handle all vectors, so it's
171  // easy to check whether we already have one, and also each vertex can
172  // keep references to them and we don't duplicate vectors all over the
173  // place.
174  SupportSet allSupports;
175 
176  // Similarly, we keep a set of all the vertices we have already
177  // seen to avoid duplicates.
178  BeliefSet triedVertices;
179 
180  // For each corner belief, find its value and alphavector. Add the
181  // alphavectors in a separate list, remove duplicates. Note: In theory
182  // we must be able to find all alphas for each corner, not just a
183  // single best but all best. Cassandra does not do that though.. maybe
184  // we can avoid it? He uses the more powerful corner detection tho.
185  Belief corner(S); corner.setZero();
186  for (size_t s = 0; s < S; ++s) {
187  corner[s] = 1.0;
188 
189  const auto [it, inserted] = allSupports.emplace(crossSumBestAtBelief(corner, projections));
190  if (inserted) goodSupports.push_back(*it);
191 
192  corner[s] = 0.0;
193  }
194 
195  // Now we find the vertices of the polytope created by the
196  // alphavectors we have found. These vertices will bootstrap the
197  // algorithm. This is simply a list of <belief, value> pairs.
198  PointSurface vertices = findVerticesNaive(goodSupports, unwrap);
199 
200  do {
201  // For each vertex, we find its true alphas and its best possible value.
202  // Then we compute the error between a vertex's known true value and
203  // what we can do with the optimal alphas we already have.
204  // If the error is low enough, we don't need them. Otherwise we add
205  // them to the priority queue.
206  for (size_t i = 0; i < vertices.first.size(); ++i) {
207  const auto & vertex = vertices.first[i];
208  if (triedVertices.find(vertex) != std::end(triedVertices))
209  continue;
210 
211  double trueValue;
212  auto support = crossSumBestAtBelief(vertex, projections, &trueValue);
213 
214  double currentValue = vertices.second[i];
215  // FIXME: As long as we use the naive way to find vertices,
216  // we can't really trust the values that come out as they
217  // may be lower than what we actually have. So we are
218  // forced to recompute their value.
219  const auto gsBegin = std::cbegin(goodSupports);
220  const auto gsEnd = std::cend(goodSupports);
221  findBestAtPoint(vertex, gsBegin, gsEnd, &currentValue, unwrap);
222 
223  auto diff = trueValue - currentValue;
224  if (diff > tolerance_ && checkDifferentGeneral(diff, tolerance_)) {
225  auto it = allSupports.insert(std::move(support));
226  Vertex newVertex;
227  newVertex.belief = vertex;
228  newVertex.currentValue = currentValue;
229  newVertex.support = it.first;
230  newVertex.error = diff;
231  agenda_.push(std::move(newVertex));
232  }
233 
234  triedVertices.insert(std::move(vertex));
235  }
236 
237  if (agenda_.size() == 0)
238  break;
239 
240  Vertex best = agenda_.top();
241  agenda_.pop();
242 
243  AI_LOGGER(AI_SEVERITY_INFO, "Selected Vertex " << best.belief.transpose() << " as best, with support: " << best.support->values.transpose() << ", action: " << best.support->action);
244 
245  std::vector<Agenda::handle_type> verticesToRemove;
246 
247  // For each element in the agenda, we need to check whether any would
248  // be made useless by the new supports that best is bringing in. If so,
249  // we can remove them from the queue.
250  for (auto it = agenda_.begin(); it != agenda_.end(); ++it) {
251  // If so, *their* supports, plus the supports of best form the surface
252  // in which we need to find vertices.
253  if (it->belief.dot(best.support->values) > it->currentValue)
254  verticesToRemove.push_back(Agenda::s_handle_from_iterator(it));
255  }
256 
257  AI_LOGGER(AI_SEVERITY_DEBUG, "Removing " << verticesToRemove.size() << " vertices, as they are now obsolete.");
258  for (auto & h : verticesToRemove) {
259  agenda_.erase(h);
260  }
261 
262  // Find vertices between the best support of this belief and the list
263  // we already have.
264  const auto bestAddr = &(*best.support);
265  const auto supBegin = bestAddr;
266  const auto supEnd = bestAddr + 1;
267  const auto chkBegin = std::cbegin(goodSupports);
268  const auto chkEnd = std::cend(goodSupports);
269  vertices = findVerticesNaive(supBegin, supEnd, chkBegin, chkEnd, unwrap, unwrap);
270 
271  // We now can add the support for this vertex to the main list. We
272  // don't need checks here because we are guaranteed that we are
273  // improving the VList.
274  goodSupports.push_back(*best.support);
275  }
276  while (true);
277 
278  v.emplace_back(std::move(goodSupports));
279 
280  // Check convergence
281  if ( useTolerance ) {
282  variation = weakBoundDistance(v[timestep-1], v[timestep]);
283  }
284  }
285  return std::make_tuple(useTolerance ? variation : 0.0, v);
286  }
287 }
288 
289 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::checkDifferentGeneral
bool checkDifferentGeneral(const double a, const double b)
This function checks if two doubles are reasonably different.
Definition: Core.hpp:89
AIToolbox::POMDP::LinearSupport::operator()
std::tuple< double, ValueFunction > operator()(const M &model)
This function solves a POMDP::Model completely.
Definition: LinearSupport.hpp:152
AIToolbox::findVerticesNaive
PointSurface findVerticesNaive(NewIt beginNew, NewIt endNew, OldIt alphasBegin, OldIt alphasEnd, P1 p1=P1{}, P2 p2=P2{})
This function implements a naive vertex enumeration algorithm.
Definition: Polytope.hpp:340
TypeTraits.hpp
AIToolbox::POMDP::LinearSupport::getHorizon
unsigned getHorizon() const
This function returns the currently set horizon parameter.
AIToolbox::POMDP::crossSumBestAtBelief
void crossSumBestAtBelief(const Belief &b, const ActionRow &row, VEntry *outp, double *value=nullptr)
This function computes the best VEntry for the input belief from the input VLists.
Definition: Utils.hpp:504
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::LinearSupport::setTolerance
void setTolerance(double tolerance)
This function sets the tolerance parameter.
AIToolbox::POMDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates a default ValueFunction.
AIToolbox::POMDP::weakBoundDistance
double weakBoundDistance(const VList &oldV, const VList &newV)
This function returns a weak measure of distance between two VLists.
AIToolbox::PointSurface
std::pair< std::vector< Point >, std::vector< double > > PointSurface
A surface within a simplex defined by points and their height. Should not contain the corners.
Definition: Polytope.hpp:30
Types.hpp
AIToolbox::POMDP::LinearSupport::LinearSupport
LinearSupport(unsigned horizon, double tolerance)
Basic constructor.
Projecter.hpp
AIToolbox::POMDP::LinearSupport::setHorizon
void setHorizon(unsigned h)
This function allows setting the horizon parameter.
AIToolbox::findBestAtPoint
Iterator findBestAtPoint(const Point &point, Iterator begin, Iterator end, double *value=nullptr, P p=P{})
This function returns an iterator pointing to the best Hyperplane for the specified point.
Definition: Polytope.hpp:65
AIToolbox::POMDP::LinearSupport::getTolerance
double getTolerance() const
This function will return the currently set tolerance parameter.
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AI_SEVERITY_INFO
#define AI_SEVERITY_INFO
Definition: Logging.hpp:69
AIToolbox::POMDP::Projecter
This class offers projecting facilities for Models.
Definition: Projecter.hpp:13
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
Logging.hpp
AI_LOGGER
#define AI_LOGGER(SEV, ARGS)
Definition: Logging.hpp:114
AIToolbox::POMDP::LinearSupport
This class represents the LinearSupport algorithm.
Definition: LinearSupport.hpp:51
Polytope.hpp
AI_SEVERITY_DEBUG
#define AI_SEVERITY_DEBUG
Definition: Logging.hpp:68
Utils.hpp