AIToolbox
A library that offers tools for AI problem solving.
Witness.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_WITNESS_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_WITNESS_HEADER_FILE
3 
4 #include <unordered_set>
5 
6 #include <boost/functional/hash.hpp>
7 
14 
15 namespace AIToolbox::POMDP {
44  class Witness {
45  public:
62  Witness(unsigned horizon, double tolerance);
63 
77  void setTolerance(double t);
78 
84  void setHorizon(unsigned h);
85 
91  double getTolerance() const;
92 
98  unsigned getHorizon() const;
99 
114  template <IsModel M>
115  std::tuple<double, ValueFunction> operator()(const M & model);
116 
117  private:
123  template <typename ProjectionsRow>
124  void addDefaultEntry(const ProjectionsRow & projs);
125 
132  template <typename ProjectionsRow>
133  void addVariations(const ProjectionsRow & projs, const VEntry & variated);
134 
135  size_t S, A, O;
136  unsigned horizon_;
137  double tolerance_;
138 
139  std::vector<MDP::Values> agenda_;
140  std::unordered_set<VObs, boost::hash<VObs>> triedVectors_;
141  };
142 
143  template <IsModel M>
144  std::tuple<double, ValueFunction> Witness::operator()(const M& model) {
145  S = model.getS();
146  A = model.getA();
147  O = model.getO();
148 
149  std::vector<VList> U(A);
150 
151  auto v = makeValueFunction(S); // TODO: May take user input
152 
153  unsigned timestep = 0;
154 
155  // This variable we use to manually control the allocations
156  // for the LP solver. This is because this algorithm cannot
157  // know in advance just how many constraints the LP is going
158  // to get. Thus we implement a x2 doubling allocation scheme
159  // to avoid too many reallocations.
160  size_t reserveSize = 1;
161 
162  Projecter project(model);
163  Pruner prune(S);
164  WitnessLP lp(S);
165 
166  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
167  double variation = tolerance_ * 2; // Make it bigger
168  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
169  ++timestep;
170 
171  // As default, we allocate double the numbers of VEntries for last step.
172  reserveSize = std::max(reserveSize, 2 * v[timestep-1].size());
173  // Compute all possible outcomes, from our previous results.
174  // This means that for each action-observation pair, we are going
175  // to obtain the same number of possible outcomes as the number
176  // of entries in our initial vector w.
177  auto projections = project(v[timestep-1]);
178 
179  size_t finalWSize = 0;
180  for ( size_t a = 0; a < A; ++a ) {
181  U[a].clear();
182  lp.reset();
183  agenda_.clear();
184  triedVectors_.clear();
185  size_t counter = 0;
186 
187  lp.allocate(reserveSize);
188 
189  // We add the VEntry to startoff the whole process. This
190  // VEntry does not even need to be optimal, as we are going
191  // to compute the optimal one for the witness point anyway.
192  addDefaultEntry(projections[a]);
193 
194  // We check whether any element in the agenda improves what we have
195  while ( !agenda_.empty() ) {
196  const auto witness = lp.findWitness(agenda_.back());
197  if ( witness ) {
198  // If so, we generate the best vector for that particular belief point.
199  U[a].push_back(crossSumBestAtBelief(*witness, projections[a], a));
200  lp.addOptimalRow(U[a].back().values);
201  // We add to the agenda all possible "variations" of the VEntry found.
202  addVariations(projections[a], U[a].back());
203  // We manually check memory for the lp, since this method
204  // cannot know in advance how many rows it'll need to do.
205  if ( ++counter == reserveSize ) {
206  reserveSize *= 2;
207  lp.allocate(reserveSize);
208  }
209  }
210  else
211  agenda_.pop_back();
212  }
213  finalWSize += U[a].size();
214  }
215  VList w;
216  w.reserve(finalWSize);
217 
218  // We put together all VEntries we found.
219  for ( size_t a = 0; a < A; ++a )
220  w.insert(std::end(w), std::make_move_iterator(std::begin(U[a])), std::make_move_iterator(std::end(U[a])));
221 
222  // We have them all, and we prune one final time to be sure we have
223  // computed the parsimonious set of value functions.
224  const auto begin = std::begin(w);
225  const auto end = std::end (w);
226  w.erase(prune(begin, end, unwrap), end);
227 
228  v.emplace_back(std::move(w));
229 
230  // Check convergence
231  if ( useTolerance ) {
232  variation = weakBoundDistance(v[timestep-1], v[timestep]);
233  }
234  }
235 
236  return std::make_tuple(useTolerance ? variation : 0.0, v);
237  }
238 
239  template <typename ProjectionsRow>
240  void Witness::addDefaultEntry(const ProjectionsRow & projs) {
241  MDP::Values v(S); v.setZero();
242 
243  // We compute the crossSum between each best vector for the belief.
244  for ( size_t o = 0; o < O; ++o )
245  v.noalias() += projs[o][0].values;
246 
247  triedVectors_.emplace(O, 0);
248  agenda_.emplace_back(std::move(v));
249  }
250 
251  template <typename ProjectionsRow>
252  void Witness::addVariations(const ProjectionsRow & projs, const VEntry & variated) {
253  // We need to copy this one unfortunately
254  auto vObs = variated.observations;
255  const auto & vValues = variated.values;
256 
257  for ( size_t o = 0; o < O; ++o ) {
258  const size_t skip = vObs[o];
259 
260  for ( size_t i = 0; i < projs[o].size(); ++i ) {
261  if ( i == skip ) continue;
262 
263  vObs[o] = i;
264  if ( triedVectors_.find(vObs) != std::end(triedVectors_) ) continue;
265 
266  triedVectors_.insert(vObs);
267 
268  auto v = vValues - projs[o][skip].values + projs[o][i].values;
269  agenda_.emplace_back(std::move(v));
270  }
271  vObs[o] = skip;
272  }
273  }
274 }
275 
276 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::VEntry
Definition: Types.hpp:72
AIToolbox::POMDP::Witness::Witness
Witness(unsigned horizon, double tolerance)
Basic constructor.
AIToolbox::POMDP::Witness::getTolerance
double getTolerance() const
This function will return the currently set tolerance parameter.
TypeTraits.hpp
AIToolbox::POMDP::crossSumBestAtBelief
void crossSumBestAtBelief(const Belief &b, const ActionRow &row, VEntry *outp, double *value=nullptr)
This function computes the best VEntry for the input belief from the input VLists.
Definition: Utils.hpp:504
AIToolbox::POMDP::Witness
This class implements the Witness algorithm.
Definition: Witness.hpp:44
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::WitnessLP::addOptimalRow
void addOptimalRow(const Hyperplane &v)
This function adds a new optimal constraint to the LP, which will not be removed unless the LP is res...
AIToolbox::Pruner
This class offers pruning facilities for non-parsimonious ValueFunction sets.
Definition: Prune.hpp:180
AIToolbox::POMDP::Witness::operator()
std::tuple< double, ValueFunction > operator()(const M &model)
This function solves a POMDP::Model completely.
Definition: Witness.hpp:144
AIToolbox::WitnessLP::findWitness
std::optional< Point > findWitness(const Hyperplane &v)
This function solves the currently set LP.
AIToolbox::POMDP::Witness::setHorizon
void setHorizon(unsigned h)
This function allows setting the horizon parameter.
AIToolbox::MDP::Values
Vector Values
Definition: Types.hpp:44
AIToolbox::POMDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates a default ValueFunction.
AIToolbox::POMDP::weakBoundDistance
double weakBoundDistance(const VList &oldV, const VList &newV)
This function returns a weak measure of distance between two VLists.
AIToolbox::POMDP::Witness::setTolerance
void setTolerance(double t)
This function sets the tolerance parameter.
AIToolbox::POMDP::Witness::getHorizon
unsigned getHorizon() const
This function returns the currently set horizon parameter.
Prune.hpp
Types.hpp
AIToolbox::WitnessLP::reset
void reset()
This function resets the internal LP to only the simplex constraint.
Projecter.hpp
AIToolbox::WitnessLP
This class implements an easy interface to do Witness discovery through linear programming.
Definition: Polytope.hpp:551
AIToolbox::WitnessLP::allocate
void allocate(size_t rows)
This function reserves space for a certain amount of rows (not counting the simplex) to avoid realloc...
AIToolbox::POMDP::unwrap
const MDP::Values & unwrap(const VEntry &ve)
This function is used as iterator projection to obtain the Values of a VEntry.
Definition: Utils.hpp:68
AIToolbox::POMDP::Projecter
This class offers projecting facilities for Models.
Definition: Projecter.hpp:13
Utils.hpp