AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_POMDP_UTILS_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_UTILS_HEADER_FILE
14 #include <boost/functional/hash.hpp>
28 std::strong_ordering
operator<=>(
const VEntry & lhs,
const VEntry & rhs);
29 bool operator==(
const VEntry & lhs,
const VEntry & rhs);
59 boost::hash_combine(seed, v.
action);
61 boost::hash_combine(seed, v.
values);
133 if constexpr(IsModelEigen<M>) {
134 boost::multi_array<std::remove_cvref_t<decltype(m.getTransitionFunction(0))>, 2> retval( boost::extents[m.getA()][m.getO()] );
135 for (
size_t a = 0; a < m.getA(); ++a)
136 for (
size_t o = 0; o < m.getO(); ++o)
137 retval[a][o] = m.getTransitionFunction(a) *
Vector(m.getObservationFunction(a).col(o)).asDiagonal();
140 Matrix4D retval( boost::extents[m.getA()][m.getO()] );
141 for (
size_t a = 0; a < m.getA(); ++a) {
142 for (
size_t o = 0; o < m.getO(); ++o) {
143 retval[a][o].resize(m.getS(), m.getS());
144 for (
size_t s = 0; s < m.getS(); ++s)
145 for (
size_t s1 = 0; s1 < m.getS(); ++s1)
146 retval[a][o](s, s1) = m.getTransitionProbability(s, a, s1) * m.getObservationProbability(s1, a, o);
176 if constexpr(IsModelEigen<M>) {
177 br = model.getObservationFunction(a).col(o).cwiseProduct((b.transpose() * model.getTransitionFunction(a)).transpose());
179 const size_t S = model.getS();
180 for (
size_t s1 = 0; s1 < S; ++s1 ) {
182 for (
size_t s = 0; s < S; ++s )
183 sum += model.getTransitionProbability(s,a,s1) * b[s];
185 br[s1] = model.getObservationProbability(s1,a,o) * sum;
294 if constexpr(IsModelEigen<M>) {
295 br = (b.transpose() * model.getTransitionFunction(a)).transpose();
297 const size_t S = model.getS();
298 for (
size_t s1 = 0; s1 < S; ++s1 ) {
300 for (
size_t s = 0; s < S; ++s )
301 br[s1] += model.getTransitionProbability(s,a,s1) * b[s];
324 Belief bRet(model.getS());
355 if constexpr(IsModelEigen<M>) {
356 br = model.getObservationFunction(a).col(o).cwiseProduct(b);
358 const size_t S = model.getS();
359 for (
size_t s = 0; s < S; ++s )
360 br[s] = model.getObservationProbability(s, a, o) * b[s];
385 Belief bRet(model.getS());
472 if constexpr (IsModelEigen<M>) {
473 return model.getRewardFunction().col(a).dot(b);
475 double rew = 0.0;
const size_t S = model.getS();
476 for (
size_t s = 0; s < S; ++s )
477 for (
size_t s1 = 0; s1 < S; ++s1 )
478 rew += model.getTransitionProbability(s, a, s1) * model.getExpectedReward(s, a, s1) * b[s];
503 template <
typename ActionRow>
507 const size_t O = row.size();
514 for (
size_t o = 0; o < O; ++o ) {
515 const auto & r = row[o];
516 auto begin = std::begin(r);
517 auto end = std::end(r);
521 out.values += bestMatch->values;
524 out.observations[o] = bestMatch->observations[0];
526 if (value) *value = v;
545 template <
typename ActionRow>
547 auto entry =
makeVEntry(b.size(), a, row.size());
568 template <
typename Projections>
570 const size_t A = projs.size();
572 double bestValue, tmp;
576 for (
size_t a = 1; a < A; ++a ) {
580 if (tmp > bestValue) {
582 std::swap(entry, helper);
585 if (value) *value = bestValue;
611 Vector bpAlpha(pomdp.getS());
613 Belief intermediateBelief(pomdp.getS());
614 Belief nextBelief(pomdp.getS());
616 for (
size_t a = 0; a < pomdp.getA(); ++a) {
621 for (
size_t o = 0; o < pomdp.getO(); ++o) {
624 const auto nextBeliefProbability = nextBelief.sum();
627 nextBelief /= nextBeliefProbability;
629 const auto it =
findBestAtPoint(nextBelief, std::begin(lbVList), std::end(lbVList),
nullptr,
unwrap);
631 bpAlpha += pomdp.getObservationFunction(a).col(o).cwiseProduct(it->values);
633 immediateRewards.col(a) += pomdp.getDiscount() * pomdp.getTransitionFunction(a) * bpAlpha;
637 double v = (initialBelief.transpose() * immediateRewards).maxCoeff(&
id);
640 if (alpha) *alpha = immediateRewards.col(
id);
642 return std::make_tuple(
id, v);
663 template <
bool useLP = true, IsModel M>
666 Vector & qvals = vals ? *vals : storage;
668 qvals = belief.transpose() * immediateRewards;
671 Belief intermediateBelief(pomdp.getS());
672 Belief nextBelief(pomdp.getS());
674 for (
size_t a = 0; a < pomdp.getA(); ++a) {
677 for (
size_t o = 0; o < pomdp.getO(); ++o) {
680 const auto prob = nextBelief.sum();
691 qvals[a] += pomdp.getDiscount() * sum;
694 double bestValue = qvals.maxCoeff(&bestAction);
696 return std::make_tuple(bestAction, bestValue);
MDP::Values values
Definition: Types.hpp:73
VObs observations
Definition: Types.hpp:75
size_t action
Definition: Types.hpp:74