1 #ifndef AI_TOOLBOX_POMDP_BLIND_STRATEGIES_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_BLIND_STRATEGIES_HEADER_FILE
68 std::tuple<double, VList>
operator()(
const M & m,
bool fasterConvergence);
115 if constexpr(MDP::IsModelEigen<M>)
return m.getRewardFunction().transpose();
126 double maxVariation = 0.0;
127 for (
size_t a = 0; a < m.getA(); ++a) {
128 auto newAlpha =
Vector(m.getS());
129 auto oldAlpha =
Vector(m.getS());
134 if (fasterConvergence)
135 oldAlpha.fill(ir.row(a).minCoeff() / std::max(0.0001, 1.0 - m.getDiscount()));
137 oldAlpha = ir.row(a);
139 unsigned timestep = 0;
140 double variation = tolerance_ * 2;
141 while ( timestep < horizon_ && ( !useTolerance || variation > tolerance_ ) ) {
143 if constexpr(MDP::IsModelEigen<M>) {
144 newAlpha = ir.row(a) + (m.getDiscount() * m.getTransitionFunction(a) * oldAlpha).transpose();
146 newAlpha = ir.row(a);
147 for (
size_t s = 0; s < m.getS(); ++s) {
149 for (
size_t s1 = 0; s1 < m.getS(); ++s1)
150 sum += m.getTransitionProbability(s, a, s1) * oldAlpha[s1];
151 newAlpha[s] += m.getDiscount() * sum;
156 variation = (oldAlpha - newAlpha).cwiseAbs().maxCoeff();
158 oldAlpha = std::move(newAlpha);
160 maxVariation = std::max(maxVariation, variation);
161 retval.emplace_back(std::move(oldAlpha), a,
VObs(0));
163 return std::make_tuple(useTolerance ? maxVariation : 0.0, std::move(retval));