AIToolbox
A library that offers tools for AI problem solving.
AMDP.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_AMDP_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_AMDP_HEADER_FILE
3 
4 #include <cmath>
5 
6 #include <AIToolbox/Types.hpp>
10 #include <AIToolbox/MDP/Model.hpp>
13 
14 namespace AIToolbox::POMDP {
41  class AMDP {
42  public:
43  using Discretizer = std::function<size_t(const Belief&)>;
44 
51  AMDP(size_t nBeliefs, size_t entropyBuckets);
52 
53 
59  void setBeliefSize(size_t nBeliefs);
60 
66  void setEntropyBuckets(size_t buckets);
67 
73  size_t getBeliefSize() const;
74 
80  size_t getEntropyBuckets() const;
81 
90  template <IsModel M>
91  std::tuple<MDP::Model, Discretizer> discretizeDense(const M& model);
92 
101  template <IsModel M>
102  std::tuple<MDP::SparseModel, Discretizer> discretizeSparse(const M& model);
103 
104  private:
105  Discretizer makeDiscretizer(size_t S);
106 
107  size_t beliefSize_, buckets_;
108  };
109 
110  template <IsModel M>
111  std::tuple<MDP::Model, AMDP::Discretizer> AMDP::discretizeDense(const M& model) {
112  const size_t S = model.getS(), A = model.getA(), O = model.getO();
113  const size_t S1 = S * buckets_;
114 
115  BeliefGenerator bGen(model);
116  const auto beliefs = bGen(beliefSize_);
117 
118  auto T = MDP::Model::TransitionMatrix(A, Matrix2D::Zero(S1, S1));
119  auto R = MDP::Model::RewardMatrix (S1, A);
120  R.setZero();
121 
122  const auto discretizer = makeDiscretizer(S);
123 
124  Belief b1(S);
125  for ( const auto & b : beliefs ) {
126  const size_t s = discretizer(b);
127 
128  for ( size_t a = 0; a < A; ++a ) {
129  const double r = beliefExpectedReward(model, b, a);
130 
131  for ( size_t o = 0; o < O; ++o ) {
132  updateBeliefUnnormalized(model, b, a, o, &b1);
133  const auto p = b1.sum();
134  if (checkDifferentSmall(0.0, p)) {
135  b1 /= p;
136  const size_t s1 = discretizer(b1);
137 
138  T[a](s, s1) += p;
139  R(s, a) += p * r;
140  }
141  }
142  }
143  }
144 
145  for ( size_t a = 0; a < A; ++a )
146  for ( size_t s = 0; s < S1; ++s ) {
147  R(s, a) /= T[a].row(s).sum();
148 
149  const double sum = T[a].row(s).sum();
150  if ( checkEqualSmall(sum, 0.0) ) T[a](s, s) = 1.0;
151  else T[a].row(s) /= sum;
152  }
153 
154  return std::make_tuple(MDP::Model(NO_CHECK, S1, A, std::move(T), std::move(R), model.getDiscount()), std::move(discretizer));
155  }
156 
157  template <IsModel M>
158  std::tuple<MDP::SparseModel, AMDP::Discretizer> AMDP::discretizeSparse(const M& model) {
159  const size_t S = model.getS(), A = model.getA(), O = model.getO();
160  const size_t S1 = S * buckets_;
161 
162  BeliefGenerator<M> bGen(model);
163  const auto beliefs = bGen(beliefSize_);
164 
166  auto R = MDP::SparseModel::RewardMatrix (S1, A);
167 
168  auto discretizer = makeDiscretizer(S);
169 
170  Belief b1(S);
171  for ( const auto & b : beliefs ) {
172  const size_t s = discretizer(b);
173 
174  for ( size_t a = 0; a < A; ++a ) {
175  const double r = beliefExpectedReward(model, b, a);
176 
177  for ( size_t o = 0; o < O; ++o ) {
178  updateBeliefUnnormalized(model, b, a, o, &b1);
179  const auto p = b1.sum();
180  if (checkDifferentSmall(0.0, p)) {
181  b1 /= p;
182  const size_t s1 = discretizer(b1);
183 
184  T[a].coeffRef(s, s1) += p;
185  if (checkDifferentSmall(0.0, r))
186  R.coeffRef(s, a) += p * r;
187  }
188  }
189  }
190  }
191 
192  for ( size_t a = 0; a < A; ++a ) {
193  for ( size_t s = 0; s < S1; ++s ) {
194  if (checkDifferentSmall(0.0, R.coeff(s, a)))
195  R.coeffRef(s, a) /= T[a].row(s).sum();
196 
197  const double sum = T[a].row(s).sum();
198  if ( checkEqualSmall(sum, 0.0) ) T[a].coeffRef(s, s) = 1.0;
199  else T[a].row(s) /= sum;
200  }
201  T[a].makeCompressed();
202  }
203  R.makeCompressed();
204 
205  return std::make_tuple(MDP::SparseModel(NO_CHECK, S1, A, std::move(T), std::move(R), model.getDiscount()), std::move(discretizer));
206  }
207 }
208 
209 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::MDP::SparseModel::TransitionMatrix
SparseMatrix3D TransitionMatrix
Definition: SparseModel.hpp:79
SparseModel.hpp
AIToolbox::NO_CHECK
struct AIToolbox::NoCheck NO_CHECK
Model.hpp
TypeTraits.hpp
AIToolbox::POMDP::AMDP
This class implements the Augmented MDP algorithm.
Definition: AMDP.hpp:41
AIToolbox::POMDP::AMDP::getEntropyBuckets
size_t getEntropyBuckets() const
This function returns the currently set number of entropy buckets.
AIToolbox::MDP::SparseModel::RewardMatrix
SparseMatrix2D RewardMatrix
Definition: SparseModel.hpp:80
AIToolbox::POMDP::BeliefGenerator
This class generates reachable beliefs from a given Model.
Definition: BeliefGenerator.hpp:18
AIToolbox::POMDP::AMDP::AMDP
AMDP(size_t nBeliefs, size_t entropyBuckets)
Basic constructor.
AIToolbox::POMDP::beliefExpectedReward
double beliefExpectedReward(const M &model, const Belief &b, const size_t a)
This function computes an immediate reward based on a belief rather than a state.
Definition: Utils.hpp:471
AIToolbox::POMDP::AMDP::Discretizer
std::function< size_t(const Belief &)> Discretizer
Definition: AMDP.hpp:43
AIToolbox::MDP::Model::TransitionMatrix
Matrix3D TransitionMatrix
Definition: Model.hpp:72
AIToolbox::POMDP::AMDP::discretizeSparse
std::tuple< MDP::SparseModel, Discretizer > discretizeSparse(const M &model)
This function constructs an approximate sparse MDP of the provided POMDP model.
Definition: AMDP.hpp:158
AIToolbox::checkEqualSmall
bool checkEqualSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably equal.
Definition: Core.hpp:45
Types.hpp
Types.hpp
AIToolbox::POMDP::AMDP::setEntropyBuckets
void setEntropyBuckets(size_t buckets)
This function sets the new number of buckets in which to discretize the entropy.
AIToolbox::POMDP::AMDP::getBeliefSize
size_t getBeliefSize() const
This function returns the currently set number of sampled beliefs.
AIToolbox::POMDP::AMDP::discretizeDense
std::tuple< MDP::Model, Discretizer > discretizeDense(const M &model)
This function constructs an approximate dense MDP of the provided POMDP model.
Definition: AMDP.hpp:111
AIToolbox::MDP::SparseModel
This class represents a Markov Decision Process.
Definition: SparseModel.hpp:77
AIToolbox::POMDP::updateBeliefUnnormalized
void updateBeliefUnnormalized(const M &model, const Belief &b, const size_t a, const size_t o, Belief *bRet)
Creates a new belief reflecting changes after an action and observation for a particular Model.
Definition: Utils.hpp:171
AIToolbox::MDP::Model
This class represents a Markov Decision Process.
Definition: Model.hpp:70
AIToolbox::POMDP::AMDP::setBeliefSize
void setBeliefSize(size_t nBeliefs)
This function sets a new number of sampled beliefs.
AIToolbox::POMDP::Belief
ProbabilityVector Belief
This represents a belief, which is a probability distribution over states.
Definition: Types.hpp:12
BeliefGenerator.hpp
AIToolbox::MDP::Model::RewardMatrix
Matrix2D RewardMatrix
Definition: Model.hpp:73
AIToolbox::SparseMatrix2D
Eigen::SparseMatrix< double, Eigen::RowMajor > SparseMatrix2D
Definition: Types.hpp:19
Utils.hpp