AIToolbox
A library that offers tools for AI problem solving.
Projecter.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_PROJECTER_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_PROJECTER_HEADER_FILE
3 
7 
8 namespace AIToolbox::POMDP {
12  template <IsModel M>
13  class Projecter {
14  public:
15  using ProjectionsTable = boost::multi_array<VList, 2>;
16  using ProjectionsRow = boost::multi_array<VList, 1>;
17 
27  Projecter(const M & model);
28 
37 
46  ProjectionsRow operator()(const VList & w, size_t a);
47 
48  private:
49  using PossibleObservationsTable = boost::multi_array<bool, 2>;
50 
54  void computePossibleObservations();
55 
59  void computeImmediateRewards();
60 
61  const M & model_;
62  size_t S, A, O;
63  double discount_;
64 
65  Matrix2D immediateRewards_;
66  PossibleObservationsTable possibleObservations_;
67  };
68 
69  template <IsModel M>
70  Projecter<M>::Projecter(const M& model) :
71  model_(model), S(model_.getS()), A(model_.getA()), O(model_.getO()),
72  discount_(model_.getDiscount()), possibleObservations_(boost::extents[A][O])
73  {
74  computePossibleObservations();
75  computeImmediateRewards();
76  }
77 
78  template <IsModel M>
80  ProjectionsTable projections( boost::extents[A][O] );
81 
82  for ( size_t a = 0; a < A; ++a )
83  projections[a] = operator()(w, a);
84 
85  return projections;
86  }
87 
88  template <IsModel M>
89  typename Projecter<M>::ProjectionsRow Projecter<M>::operator()(const VList & w, const size_t a) {
90  ProjectionsRow projections( boost::extents[O] );
91 
92  for ( size_t o = 0; o < O; ++o ) {
93  // Here we put in just the immediate rewards so that the cross-summing step in the main
94  // function works correctly. However we communicate via the boolean that pruning should
95  // not be done at this step (since adding constants shouldn't do anything anyway).
96  if ( !possibleObservations_[a][o] ) {
97  // We add a parent id anyway in order to keep the code that cross-sums simple. However
98  // note that this fake ID of 0 should never be used, so it should be safe to avoid
99  // setting it to a special value like -1. If one really wants to check, he/she can
100  // just look at the observation table and the belief and see if it makes sense.
101  projections[o].emplace_back(immediateRewards_.row(a), a, VObs(1,0));
102  continue;
103  }
104 
105  // Otherwise we compute a projection for each ValueFunction supplied to us.
106  MDP::Values vproj(S);
107  for ( size_t i = 0; i < w.size(); ++i ) {
108  const auto & v = w[i].values;
109  // For each value function in the previous timestep, we compute the new value
110  // if we performed action a and obtained observation o.
111  // vproj_{a,o}[s] = R(s,a) / |O| + discount * sum_{s'} ( T(s,a,s') * O(s',a,o) * v_{t-1}(s') )
112  if constexpr(IsModelEigen<M>) {
113  vproj = model_.getTransitionFunction(a) * (v.cwiseProduct(model_.getObservationFunction(a).col(o)));
114  } else {
115  vproj.setZero();
116  for ( size_t s = 0; s < S; ++s )
117  for ( size_t s1 = 0; s1 < S; ++s1 )
118  vproj[s] += model_.getTransitionProbability(s,a,s1) * model_.getObservationProbability(s1,a,o) * v[s1];
119  }
120  // Set new projection with found value and previous V id.
121  // projections[o].emplace_back(vproj, a, VObs(1,i));
122  projections[o].emplace_back(vproj * discount_ + immediateRewards_.row(a).transpose(), a, VObs(1,i));
123  }
124  }
125  return projections;
126  }
127 
128  template <IsModel M>
130  immediateRewards_ = [&]{
131  if constexpr(MDP::IsModelEigen<M>)
132  return model_.getRewardFunction().transpose();
133  else
134  return MDP::computeImmediateRewards(model_).transpose();
135  }();
136  // You can find out why this is divided in the incremental pruning paper =)
137  // The idea is that at the end of all the cross sums it's going to add up to the correct value.
138  immediateRewards_ /= static_cast<double>(O);
139  }
140 
141  template <IsModel M>
142  void Projecter<M>::computePossibleObservations() {
143  for ( size_t a = 0; a < A; ++a )
144  for ( size_t o = 0; o < O; ++o )
145  for ( size_t s = 0; s < S; ++s ) // This NEEDS to be last!
146  if ( checkDifferentSmall(model_.getObservationProbability(s,a,o), 0.0) ) { possibleObservations_[a][o] = true; break; } // We only break the S loop!
147  }
148 }
149 
150 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::Projecter::ProjectionsTable
boost::multi_array< VList, 2 > ProjectionsTable
Definition: Projecter.hpp:15
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
TypeTraits.hpp
AIToolbox::POMDP::Projecter::Projecter
Projecter(const M &model)
Basic constructor.
Definition: Projecter.hpp:70
AIToolbox::POMDP::Projecter::ProjectionsRow
boost::multi_array< VList, 1 > ProjectionsRow
Definition: Projecter.hpp:16
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::Projecter::operator()
ProjectionsTable operator()(const VList &w)
This function returns all possible projections for the provided VList.
Definition: Projecter.hpp:79
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::MDP::Values
Vector Values
Definition: Types.hpp:44
Utils.hpp
Types.hpp
AIToolbox::POMDP::Projecter
This class offers projecting facilities for Models.
Definition: Projecter.hpp:13
AIToolbox::POMDP::VObs
std::vector< size_t > VObs
Definition: Types.hpp:71