1 #ifndef AI_TOOLBOX_POMDP_FAST_INFORMED_BOUND_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_FAST_INFORMED_BOUND_HEADER_FILE
134 template <IsModel M,
typename SOSA>
183 template <IsModel M,
typename SOSA>
185 const auto & ir = [&]{
186 if constexpr (IsModelEigen<M>)
return m.getRewardFunction();
191 if (oldQ.size() == 0) {
192 oldQ.resize(m.getS(), m.getA());
195 using Tmp = std::remove_cvref_t<decltype(ir)>;
196 if constexpr(std::is_base_of_v<Eigen::SparseMatrixBase<Tmp>, Tmp>)
197 max = Eigen::Map<const Vector>(ir.valuePtr(), ir.size()).maxCoeff();
204 oldQ.fill(max / std::max(0.0001, 1.0 - m.getDiscount()));
207 unsigned timestep = 0;
209 double variation = tolerance_ * 2;
210 while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
214 for (
size_t a = 0; a < m.getA(); ++a)
215 for (
size_t o = 0; o < m.getO(); ++o)
216 newQ.col(a) += (sosa[a][o] * oldQ).rowwise().maxCoeff();
217 newQ *= m.getDiscount();
221 variation = (oldQ - newQ).cwiseAbs().maxCoeff();
223 std::swap(oldQ, newQ);
225 return std::make_tuple(useTolerance ? variation : 0.0, std::move(oldQ));