AIToolbox
A library that offers tools for AI problem solving.
FastInformedBound.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_FAST_INFORMED_BOUND_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_FAST_INFORMED_BOUND_HEADER_FILE
3 
5 
10 
11 namespace AIToolbox::POMDP {
82  public:
89  FastInformedBound(unsigned horizon, double tolerance = 0.001);
90 
108  template <IsModel M>
109  std::tuple<double, MDP::QFunction> operator()(const M & m, const MDP::QFunction & oldQ = {});
110 
134  template <IsModel M, typename SOSA>
135  std::tuple<double, MDP::QFunction> operator()(const M & m, const SOSA & sosa, MDP::QFunction oldQ = {});
136 
150  void setTolerance(double tolerance);
151 
157  void setHorizon(unsigned h);
158 
164  double getTolerance() const;
165 
171  unsigned getHorizon() const;
172 
173  private:
174  size_t horizon_;
175  double tolerance_;
176  };
177 
178  template <IsModel M>
179  std::tuple<double, MDP::QFunction> FastInformedBound::operator()(const M & m, const MDP::QFunction & oldQ) {
180  return operator()(m, makeSOSA(m), oldQ);
181  }
182 
183  template <IsModel M, typename SOSA>
184  std::tuple<double, MDP::QFunction> FastInformedBound::operator()(const M & m, const SOSA & sosa, MDP::QFunction oldQ) {
185  const auto & ir = [&]{
186  if constexpr (IsModelEigen<M>) return m.getRewardFunction();
187  else return computeImmediateRewards(m);
188  }();
189  auto newQ = MDP::QFunction(m.getS(), m.getA());
190 
191  if (oldQ.size() == 0) {
192  oldQ.resize(m.getS(), m.getA());
193 
194  double max;
195  using Tmp = std::remove_cvref_t<decltype(ir)>;
196  if constexpr(std::is_base_of_v<Eigen::SparseMatrixBase<Tmp>, Tmp>)
197  max = Eigen::Map<const Vector>(ir.valuePtr(), ir.size()).maxCoeff();
198  else
199  max = ir.maxCoeff();
200 
201  // Note that here we take the max over all IR: since we're
202  // computing an upper bound, we want to assume that we're going to
203  // do the best possible thing after each action forever.
204  oldQ.fill(max / std::max(0.0001, 1.0 - m.getDiscount()));
205  }
206 
207  unsigned timestep = 0;
208  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
209  double variation = tolerance_ * 2; // Make it bigger
210  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
211  ++timestep;
212  newQ.setZero();
213  // Q(s,a) = R(s,a) + gamma * Sum_o max_a' Sum_s' P(s',o|s,a) * Q(s',a')
214  for (size_t a = 0; a < m.getA(); ++a)
215  for (size_t o = 0; o < m.getO(); ++o)
216  newQ.col(a) += (sosa[a][o] * oldQ).rowwise().maxCoeff();
217  newQ *= m.getDiscount();
218  newQ += ir;
219 
220  if (useTolerance)
221  variation = (oldQ - newQ).cwiseAbs().maxCoeff();
222 
223  std::swap(oldQ, newQ);
224  }
225  return std::make_tuple(useTolerance ? variation : 0.0, std::move(oldQ));
226  }
227 }
228 
229 #endif
230 
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::FastInformedBound
This class implements the Fast Informed Bound algorithm.
Definition: FastInformedBound.hpp:81
Core.hpp
AIToolbox::POMDP::FastInformedBound::getHorizon
unsigned getHorizon() const
This function returns the current horizon parameter.
AIToolbox::POMDP::FastInformedBound::setHorizon
void setHorizon(unsigned h)
This function sets the horizon parameter.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
TypeTraits.hpp
Utils.hpp
Types.hpp
AIToolbox::POMDP::FastInformedBound::operator()
std::tuple< double, MDP::QFunction > operator()(const M &m, const MDP::QFunction &oldQ={})
This function computes the Fast Informed Bound for the input POMDP.
Definition: FastInformedBound.hpp:179
AIToolbox::POMDP::FastInformedBound::getTolerance
double getTolerance() const
This function returns the currently set tolerance parameter.
AIToolbox::POMDP::makeSOSA
auto makeSOSA(const M &m)
This function creates the SOSA matrix for the input POMDP.
Definition: Utils.hpp:132
AIToolbox::POMDP::FastInformedBound::setTolerance
void setTolerance(double tolerance)
This function sets the tolerance parameter.
Utils.hpp
AIToolbox::POMDP::FastInformedBound::FastInformedBound
FastInformedBound(unsigned horizon, double tolerance=0.001)
Basic constructor.