AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_MDP_DYNA2_HEADER_FILE
2 #define AI_TOOLBOX_MDP_DYNA2_HEADER_FILE
29 template <IsGenerativeModel M>
41 explicit Dyna2(
const M & m,
double alpha = 0.1,
double lambda = 0.9,
double tolerance = 0.001,
unsigned n = 50);
63 void stepUpdateQ(
size_t s,
size_t a,
size_t s1,
size_t a1,
double rew);
148 void setN(
unsigned n);
155 unsigned getN()
const;
203 SARSAL permanentLearning_;
204 SARSAL transientLearning_;
205 std::unique_ptr<PolicyInterface> internalPolicy_;
208 template <IsGenerativeModel M>
209 Dyna2<M>::Dyna2(
const M & m,
const double alpha,
const double lambda,
const double tolerance,
const unsigned n) :
211 permanentLearning_(model_, alpha, lambda, tolerance),
212 transientLearning_(model_, alpha, lambda, tolerance),
217 template <IsGenerativeModel M>
235 transientLearning_.setTraces(permanentLearning_.getTraces());
236 permanentLearning_.stepUpdateQ(s, a, s1, a1, rew);
237 transientLearning_.stepUpdateQ(s, a, s1, a1, rew);
240 template <IsGenerativeModel M>
246 transientLearning_.clearTraces();
249 size_t a = internalPolicy_->sampleAction(s);
250 for (
unsigned i = 0; i < N; ++i ) {
251 const auto [s1, rew] = model_.sampleSR(s, a);
252 const size_t a1 = internalPolicy_->sampleAction(s1);
254 transientLearning_.stepUpdateQ(s, a, s1, a1, rew);
256 if (model_.isTerminal(s1)) {
258 a = internalPolicy_->sampleAction(s);
266 template <IsGenerativeModel M>
268 transientLearning_.setQFunction(permanentLearning_.getQFunction());
270 template <IsGenerativeModel M>
272 internalPolicy_.reset(p);
275 template <IsGenerativeModel M>
280 template <IsGenerativeModel M>
282 transientLearning_.setTolerance(t);
283 permanentLearning_.setTolerance(t);
286 template <IsGenerativeModel M>
288 return permanentLearning_.getTolerance();
291 template <IsGenerativeModel M>
293 return permanentLearning_.getQFunction();
296 template <IsGenerativeModel M>
298 return transientLearning_.getQFunction();
301 template <IsGenerativeModel M>
306 template <IsGenerativeModel M>
308 template <IsGenerativeModel M>
310 template <IsGenerativeModel M>
312 template <IsGenerativeModel M>