AIToolbox
A library that offers tools for AI problem solving.
BlindStrategies.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_BLIND_STRATEGIES_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_BLIND_STRATEGIES_HEADER_FILE
3 
6 
10 
11 namespace AIToolbox::POMDP {
29  public:
36  BlindStrategies(unsigned horizon, double tolerance = 0.001);
37 
67  template <IsModel M>
68  std::tuple<double, VList> operator()(const M & m, bool fasterConvergence);
69 
83  void setTolerance(double tolerance);
84 
90  void setHorizon(unsigned h);
91 
97  double getTolerance() const;
98 
104  unsigned getHorizon() const;
105 
106  private:
107  size_t horizon_;
108  double tolerance_;
109  };
110 
111 
112  template <IsModel M>
113  std::tuple<double, VList> BlindStrategies::operator()(const M & m, const bool fasterConvergence) {
114  const MDP::QFunction ir = [&]{
115  if constexpr(MDP::IsModelEigen<M>) return m.getRewardFunction().transpose();
116  else return MDP::computeImmediateRewards(m).transpose();
117  }();
118  // This function produces a very simple lower bound for the POMDP. The
119  // bound for each action is computed assuming to take the same action forever
120  // (so the bound for action 0 assumes to forever take action 0, the bound for
121  // action 1 assumes to take action 1, etc.).
122  VList retval;
123 
124  const bool useTolerance = checkDifferentSmall(tolerance_, 0.0);
125 
126  double maxVariation = 0.0;
127  for (size_t a = 0; a < m.getA(); ++a) {
128  auto newAlpha = Vector(m.getS());
129  auto oldAlpha = Vector(m.getS());
130  // Note that here we can take the minimum for each action
131  // separately, since the implied policy will take that action
132  // forever anyway so there cannot be "cross-pollination" between
133  // different actions.
134  if (fasterConvergence)
135  oldAlpha.fill(ir.row(a).minCoeff() / std::max(0.0001, 1.0 - m.getDiscount()));
136  else
137  oldAlpha = ir.row(a);
138 
139  unsigned timestep = 0;
140  double variation = tolerance_ * 2; // Make it bigger
141  while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
142  ++timestep;
143  if constexpr(MDP::IsModelEigen<M>) {
144  newAlpha = ir.row(a) + (m.getDiscount() * m.getTransitionFunction(a) * oldAlpha).transpose();
145  } else {
146  newAlpha = ir.row(a);
147  for (size_t s = 0; s < m.getS(); ++s) {
148  double sum = 0.0;
149  for (size_t s1 = 0; s1 < m.getS(); ++s1)
150  sum += m.getTransitionProbability(s, a, s1) * oldAlpha[s1];
151  newAlpha[s] += m.getDiscount() * sum;
152  }
153  }
154 
155  if (useTolerance)
156  variation = (oldAlpha - newAlpha).cwiseAbs().maxCoeff();
157 
158  oldAlpha = std::move(newAlpha);
159  }
160  maxVariation = std::max(maxVariation, variation);
161  retval.emplace_back(std::move(oldAlpha), a, VObs(0));
162  }
163  return std::make_tuple(useTolerance ? maxVariation : 0.0, std::move(retval));
164  }
165 }
166 
167 #endif
AIToolbox::checkDifferentSmall
bool checkDifferentSmall(const double a, const double b)
This function checks if two doubles near [0,1] are reasonably different.
Definition: Core.hpp:60
AIToolbox::POMDP
Definition: AMDP.hpp:14
Core.hpp
AIToolbox::POMDP::BlindStrategies::setTolerance
void setTolerance(double tolerance)
This function sets the tolerance parameter.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
TypeTraits.hpp
AIToolbox::POMDP::BlindStrategies::BlindStrategies
BlindStrategies(unsigned horizon, double tolerance=0.001)
Basic constructor.
AIToolbox::POMDP::VList
std::vector< VEntry > VList
Definition: Types.hpp:77
AIToolbox::POMDP::BlindStrategies::setHorizon
void setHorizon(unsigned h)
This function sets the horizon parameter.
AIToolbox::Vector
Eigen::Matrix< double, Eigen::Dynamic, 1 > Vector
Definition: Types.hpp:16
AIToolbox::POMDP::BlindStrategies::getHorizon
unsigned getHorizon() const
This function returns the current horizon parameter.
AIToolbox::POMDP::BlindStrategies::operator()
std::tuple< double, VList > operator()(const M &m, bool fasterConvergence)
This function computes the blind strategies for the input POMDP.
Definition: BlindStrategies.hpp:113
Utils.hpp
AIToolbox::POMDP::BlindStrategies
This class implements the blind strategies lower bound.
Definition: BlindStrategies.hpp:28
Prune.hpp
Types.hpp
AIToolbox::POMDP::BlindStrategies::getTolerance
double getTolerance() const
This function returns the currently set toleranc parameter.
AIToolbox::POMDP::VObs
std::vector< size_t > VObs
Definition: Types.hpp:71