1 #ifndef AI_TOOLBOX_POMDP_GAPMIN_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_GAPMIN_HEADER_FILE
6 #include <boost/heap/fibonacci_heap.hpp>
69 GapMin(
double initialTolerance,
unsigned precisionDigits);
135 std::tuple<double, double, VList, MDP::QFunction>
operator()(
const M & model,
const Belief & initialBelief);
141 using QueueElement = std::tuple<Belief, double, double, double, double, unsigned, std::vector<Belief>>;
143 struct QueueElementLess {
144 bool operator() (
const QueueElement& arg1,
const QueueElement& arg2)
const;
147 using QueueType = boost::heap::fibonacci_heap<QueueElement, boost::heap::compare<QueueElementLess>>;
172 std::tuple<std::vector<Belief>, std::vector<Belief>, std::vector<double>> selectReachableBeliefs(
176 const std::vector<Belief> & lbBeliefs,
222 double initialTolerance_;
223 unsigned precisionDigits_;
228 constexpr
unsigned infiniteHorizon = 1000000;
231 if constexpr (!MDP::IsModelEigen<M>)
235 tolerance_ = initialTolerance_;
240 PBVI pbvi(0, infiniteHorizon, tolerance_);
244 VList lbVList = std::get<1>(bs(pomdp,
true));
247 auto lbBeliefs = std::vector<Belief>{initialBelief};
260 auto fibQ =
Matrix2D(pomdp.getS()+1, pomdp.getA());
261 fibQ.topLeftCorner(pomdp.getS(), pomdp.getA()).noalias() = ubQ;
262 fibQ.row(pomdp.getS()).noalias() = initialBelief.transpose() * ubQ;
270 {initialBelief}, {fibQ.row(pomdp.getS()).maxCoeff()}
279 double ub = ubV.second[0];
284 double threshold = std::pow(10,
std::ceil(std::log10(std::max(std::fabs(ub), std::fabs(lb))))-precisionDigits_);
290 tolerance_ = threshold * (1.0 - pomdp.getDiscount()) / 2.0;
294 auto [newLbBeliefs, newUbBeliefs, newUbVals] = selectReachableBeliefs(pomdp, initialBelief, lbVList, lbBeliefs, ubQ, ubV);
295 const auto newLbBeliefsSize = newLbBeliefs.size();
296 const auto newUbBeliefsSize = newUbBeliefs.size();
298 if (newLbBeliefsSize > 0) {
300 for (
const auto & b : newLbBeliefs)
304 lbBeliefs.insert(std::end(lbBeliefs), std::make_move_iterator(std::begin(newLbBeliefs)), std::make_move_iterator(std::end(newLbBeliefs)));
309 auto sol = pbvi(pomdp, lbBeliefs,
ValueFunction{std::move(lbVList)});
311 lbVList = std::move(std::get<1>(sol).back());
313 const auto rbegin = std::begin(lbVList);
314 const auto rend = std::end (lbVList);
318 std::begin(lbBeliefs), std::end(lbBeliefs),
329 if (newUbBeliefsSize > 0) {
331 const auto prevRows = pomdp.getS() + ubV.first.size();
332 fibQ.conservativeResize(prevRows + newUbBeliefsSize, Eigen::NoChange);
335 for (
size_t i = 0; i < newUbBeliefsSize; ++i)
343 for (
size_t i = 0; i < newUbBeliefs.size(); ++i) {
344 ubV.first.emplace_back(std::move(newUbBeliefs[i]));
345 ubV.second.emplace_back(newUbVals[i]);
346 fibQ.row(prevRows + i).fill(newUbVals[i]);
350 auto [newPOMDP, newPOMDPSOSA] = makeNewPomdp(pomdp, ubQ, ubV);
352 fibQ = std::get<1>(fib(newPOMDP, newPOMDPSOSA, std::move(fibQ)));
357 ubQ.noalias() = fibQ.topRows(pomdp.getS());
358 for (
size_t i = 0; i < ubV.second.size(); ++i)
359 ubV.second[i] = fibQ.row(pomdp.getS() + i).maxCoeff();
362 cleanUp(ubQ, &ubV, &fibQ);
371 AI_LOGGER(
AI_SEVERITY_INFO,
"Updated bounds to " << lb <<
", " << ub <<
" -- size LB: " << lbVList.size() <<
", size UB " << ubV.first.size());
374 if (newLbBeliefsSize + newUbBeliefsSize == 0 || std::fabs(var - oldVar) < tolerance_ * 5)
377 return std::make_tuple(lb, ub, lbVList, ubQ);
382 size_t S = model.getS() + ubV.first.size();
389 const auto & ir = [&]{
390 if constexpr (MDP::IsModelEigen<M>)
return model.getRewardFunction();
391 else return immediateRewards_;
394 R.topRows(model.getS()) = ir;
395 for (
size_t b = 0; b < ubV.first.size(); ++b)
396 R.row(model.getS()+b) = ubV.first[b].transpose() * ir;
404 Belief helper(model.getS()), corner(model.getS());
407 SparseMatrix4D sosa( boost::extents[model.getA()][model.getO()] );
408 const auto updateMatrix = [&](
SparseMatrix2D & m,
const Belief & b,
size_t a,
size_t o,
size_t index) {
410 auto sum = helper.sum();
416 for (
size_t i = 0; i < S; ++i)
418 m.insert(index, i) = dist[i];
422 for (
size_t a = 0; a < model.getA(); ++a) {
423 for (
size_t o = 0; o < model.getO(); ++o) {
425 for (
size_t s = 0; s < model.getS(); ++s) {
427 updateMatrix(m, corner, a, o, s);
431 for (
size_t b = 0; b < ubV.first.size(); ++b)
432 updateMatrix(m, ubV.first[b], a, o, model.getS() + b);
436 sosa[a][o] = std::move(m);
437 sosa[a][o].makeCompressed();
446 return std::make_tuple(
456 std::tuple<std::vector<Belief>, std::vector<Belief>, std::vector<double>> GapMin::selectReachableBeliefs(
457 const M & pomdp,
const Belief & initialBelief,
const VList & lbVList,
461 std::vector<Belief> newLbBeliefs, newUbBeliefs, visitedBeliefs;
462 std::vector<double> newUbValues;
464 constexpr
size_t maxVisitedBeliefs = 1000;
465 size_t overwriteCounter = 0;
466 visitedBeliefs.reserve(maxVisitedBeliefs);
469 unsigned newBeliefs = 0;
472 const auto maxNewBeliefs = std::max(20lu, (ubV.first.size() + lbVList.size()) / 5lu);
476 double currentLowerBound;
477 const auto rbegin = std::begin(lbVList);
478 const auto rend = std::end (lbVList);
480 const double currentUpperBound = std::get<0>(
LPInterpolation(initialBelief, ubQ, ubV));
481 queue.emplace(QueueElement(initialBelief, 0.0, 1.0, currentLowerBound, currentUpperBound, 1, {}));
484 while (!queue.empty() && newBeliefs < maxNewBeliefs) {
485 const auto [belief, gap, beliefProbability, currentLowerBound, currentUpperBound, depth, path] = queue.top();
493 if (visitedBeliefs.size() == maxVisitedBeliefs) {
494 visitedBeliefs[overwriteCounter] = belief;
495 overwriteCounter = (overwriteCounter + 1) % maxVisitedBeliefs;
497 visitedBeliefs.push_back(belief);
505 const auto & ir = [&]{
506 if constexpr (MDP::IsModelEigen<M>)
return pomdp.getRewardFunction();
507 else return immediateRewards_;
518 const auto validForUb = [&newUbBeliefs, &ubV](
const Belief & b) {
520 for (
auto i = 0; i < b.size(); ++i)
526 if (std::any_of(std::begin(newUbBeliefs), std::end(newUbBeliefs), check))
528 if (std::any_of(std::begin(ubV.first), std::end(ubV.first), check))
533 if (validForUb(belief) && ubActionValue < currentUpperBound - tolerance_) {
534 newUbBeliefs.push_back(belief);
535 newUbValues.push_back(ubActionValue);
539 for (
const auto & p : path) {
541 newUbBeliefs.push_back(p);
556 const auto validForLb = [&newLbBeliefs, &lbBeliefs](
const Belief & b) {
558 if (std::any_of(std::begin(newLbBeliefs), std::end(newLbBeliefs), check))
560 if (std::any_of(std::begin(lbBeliefs), std::end(lbBeliefs), check))
565 if (validForLb(belief) && lbActionValue > currentLowerBound + tolerance_) {
569 newLbBeliefs.push_back(belief);
571 for (
const auto & p : path) {
573 newLbBeliefs.push_back(p);
586 if (newBeliefs >= maxNewBeliefs)
590 newPath.push_back(belief);
596 for (
size_t o = 0; o < pomdp.getO(); ++o) {
599 const auto nextBeliefProbability = nextBelief.sum();
601 nextBelief /= nextBeliefProbability;
604 if (std::any_of(std::begin(visitedBeliefs), std::end(visitedBeliefs), check))
continue;
606 const double ubValue = std::get<0>(
LPInterpolation(nextBelief, ubQ, ubV));
610 if ((ubValue - lbValue) * std::pow(pomdp.getDiscount(), depth) > tolerance_ * 20) {
611 const auto nextBeliefOverallProbability = nextBeliefProbability * beliefProbability * pomdp.getDiscount();
612 const auto nextBeliefGap = nextBeliefOverallProbability * (ubValue - lbValue);
614 const auto qcheck = [&nextBelief](
const QueueElement & qe){
return checkEqualProbability(nextBelief, std::get<0>(qe)); };
615 const auto it = std::find_if(std::begin(queue), std::end(queue), qcheck);
616 if (it == std::end(queue)) {
618 std::move(nextBelief),
620 nextBeliefOverallProbability,
627 auto handle = QueueType::s_handle_from_iterator(it);
628 std::get<1>(*handle) += nextBeliefGap;
629 std::get<2>(*handle) += nextBeliefOverallProbability;
630 std::get<5>(*handle) = std::min(std::get<5>(*handle), depth+1);
631 queue.increase(handle);
636 return std::make_tuple(std::move(newLbBeliefs), std::move(newUbBeliefs), std::move(newUbValues));