AIToolbox
A library that offers tools for AI problem solving.
Utils.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_UTILS_HEADER_FILE
2 #define AI_TOOLBOX_MDP_UTILS_HEADER_FILE
3 
4 #include <stddef.h>
7 
8 namespace AIToolbox::MDP {
20  QFunction makeQFunction(size_t S, size_t A);
21 
33 
45 
60  void bellmanOperatorInplace(const QFunction & q, ValueFunction * v);
61 
76  template <IsModel M>
77  Matrix2D computeImmediateRewards(const M & model) {
78  if constexpr(IsModelEigen<M>) {
79  return model.getRewardFunction();
80  } else {
81  const auto S = model.getS();
82  const auto A = model.getA();
83 
84  auto ir = QFunction(S, A);
85  ir.setZero();
86  for ( size_t s = 0; s < S; ++s )
87  for ( size_t a = 0; a < A; ++a )
88  for ( size_t s1 = 0; s1 < S; ++s1 )
89  ir(s, a) += model.getTransitionProbability(s,a,s1) * model.getExpectedReward(s,a,s1);
90  return ir;
91  }
92  }
93 
105  template <IsModel M>
106  QFunction computeQFunction(const M & model, const Values & v, QFunction ir) {
107  const auto A = model.getA();
108 
109  if constexpr(IsModelEigen<M>) {
110  for ( size_t a = 0; a < A; ++a )
111  ir.col(a).noalias() += model.getTransitionFunction(a) * v;
112  } else {
113  const auto S = model.getS();
114  for ( size_t s = 0; s < S; ++s )
115  for ( size_t a = 0; a < A; ++a )
116  for ( size_t s1 = 0; s1 < S; ++s1 )
117  ir(s, a) += model.getTransitionProbability(s,a,s1) * v[s1];
118  }
119  return ir;
120  }
121 }
122 
123 #endif
AIToolbox::MDP::computeQFunction
QFunction computeQFunction(const M &model, const Values &v, QFunction ir)
This function computes the Model's QFunction from the values of a ValueFunction.
Definition: Utils.hpp:106
AIToolbox::MDP::bellmanOperatorInplace
void bellmanOperatorInplace(const QFunction &q, ValueFunction *v)
This function converts a QFunction into the equivalent optimal ValueFunction.
AIToolbox::MDP::makeQFunction
QFunction makeQFunction(size_t S, size_t A)
This function creates and zeroes a QFunction.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
AIToolbox::MDP::bellmanOperator
ValueFunction bellmanOperator(const QFunction &q)
This function converts a QFunction into the equivalent optimal ValueFunction.
AIToolbox::Matrix2D
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor|Eigen::AutoAlign > Matrix2D
Definition: Types.hpp:18
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::MDP::Values
Vector Values
Definition: Types.hpp:44
AIToolbox::POMDP::ValueFunction
std::vector< VList > ValueFunction
Definition: Types.hpp:78
Types.hpp
TypeTraits.hpp
AIToolbox::MDP::makeValueFunction
ValueFunction makeValueFunction(size_t S)
This function creates and zeroes a ValueFunction.