AIToolbox
A library that offers tools for AI problem solving.
DynaQ.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_DYNAQ_HEADER_FILE
2 #define AI_TOOLBOX_MDP_DYNAQ_HEADER_FILE
3 
7 #include <AIToolbox/Seeder.hpp>
8 
9 #include <boost/functional/hash.hpp>
10 #include <utility>
11 #include <unordered_set>
12 #include <vector>
13 
14 namespace AIToolbox::MDP {
33  template <IsGenerativeModel M>
34  class DynaQ {
35  public:
43  explicit DynaQ(const M & m, double alpha = 0.5, unsigned n = 50);
44 
64  void stepUpdateQ(size_t s, size_t a, size_t s1, double rew);
65 
78  void batchUpdateQ();
79 
88  void setLearningRate(double a);
89 
95  double getLearningRate() const;
96 
102  void setN(unsigned n);
103 
109  unsigned getN() const;
110 
116  const QFunction & getQFunction() const;
117 
123  const M & getModel() const;
124 
125  private:
126  unsigned N;
127  const M & model_;
128  QLearning qLearning_;
129 
130  // We use two structures because generally S*A is not THAT big, and we can definitely use
131  // the O(1) insertion and O(1) sampling time.
132  std::unordered_set<std::pair<size_t,size_t>, boost::hash<std::pair<size_t, size_t>>> visitedStatesActionsInserter_;
133  std::vector<std::pair<size_t,size_t>> visitedStatesActionsSampler_;
134 
135  // Stuff for batch update
136  mutable RandomEngine rand_;
137  };
138 
139  template <IsGenerativeModel M>
140  DynaQ<M>::DynaQ(const M & m, const double alpha, const unsigned n) :
141  N(n), model_(m), qLearning_(model_, alpha), rand_(Seeder::getSeed())
142  {
143  visitedStatesActionsInserter_.reserve(model_.getS()*model_.getA());
144  visitedStatesActionsSampler_.reserve(model_.getS()*model_.getA());
145  }
146 
147  template <IsGenerativeModel M>
148  void DynaQ<M>::stepUpdateQ(const size_t s, const size_t a, const size_t s1, const double rew) {
149  qLearning_.stepUpdateQ(s, a, s1, rew);
150  // O(1) insertion...
151  const auto result = visitedStatesActionsInserter_.insert(std::make_pair(s,a));
152  if ( std::get<1>(result) )
153  visitedStatesActionsSampler_.push_back(*std::get<0>(result));
154  }
155 
156  template <IsGenerativeModel M>
158  if ( ! visitedStatesActionsSampler_.size() ) return;
159  std::uniform_int_distribution<size_t> sampleDistribution_(0, visitedStatesActionsSampler_.size()-1);
160 
161  for ( unsigned i = 0; i < N; ++i ) {
162  // O(1) sampling...
163  const auto [s,a] = visitedStatesActionsSampler_[sampleDistribution_(rand_)];
164  const auto [s1, rew] = model_.sample(s, a);
165 
166  qLearning_.stepUpdateQ(s, a, s1, rew);
167  }
168  }
169 
170  template <IsGenerativeModel M>
171  unsigned DynaQ<M>::getN() const {
172  return N;
173  }
174 
175  template <IsGenerativeModel M>
177  return qLearning_.getQFunction();
178  }
179  template <IsGenerativeModel M>
180  const M & DynaQ<M>::getModel() const {
181  return model_;
182  }
183 
184  template <IsGenerativeModel M>
185  void DynaQ<M>::setLearningRate(const double a) {
186  qLearning_.setLearningRate(a);
187  }
188 
189  template <IsGenerativeModel M>
190  double DynaQ<M>::getLearningRate() const {
191  return qLearning_.getLearningRate();
192  }
193 }
194 
195 #endif
AIToolbox::MDP::DynaQ::getLearningRate
double getLearningRate() const
This function will return the current set learning rate parameter.
Definition: DynaQ.hpp:190
AIToolbox::MDP::DynaQ::getN
unsigned getN() const
This function returns the currently set number of sampling passes during batchUpdateQ().
Definition: DynaQ.hpp:171
AIToolbox::MDP::QFunction
Matrix2D QFunction
Definition: Types.hpp:52
AIToolbox::MDP::DynaQ::getModel
const M & getModel() const
This function returns a reference to the referenced Model.
Definition: DynaQ.hpp:180
AIToolbox::MDP::DynaQ::setLearningRate
void setLearningRate(double a)
This function sets the learning rate parameter.
Definition: DynaQ.hpp:185
AIToolbox::Seeder
This class is an internal class used to seed all random engines in the library.
Definition: Seeder.hpp:15
AIToolbox::MDP::DynaQ::stepUpdateQ
void stepUpdateQ(size_t s, size_t a, size_t s1, double rew)
This function updates the internal QFunction.
Definition: DynaQ.hpp:148
AIToolbox::MDP::DynaQ::batchUpdateQ
void batchUpdateQ()
This function updates a QFunction based on simulated experience.
Definition: DynaQ.hpp:157
AIToolbox::MDP::QLearning
This class represents the QLearning algorithm.
Definition: QLearning.hpp:44
AIToolbox::MDP::DynaQ::DynaQ
DynaQ(const M &m, double alpha=0.5, unsigned n=50)
Basic constructor.
Definition: DynaQ.hpp:140
AIToolbox::MDP::DynaQ::setN
void setN(unsigned n)
This function sets the current sample number parameter.
AIToolbox::MDP::DynaQ
This class represents the DynaQ algorithm.
Definition: DynaQ.hpp:34
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
Seeder.hpp
AIToolbox::MDP::DynaQ::getQFunction
const QFunction & getQFunction() const
This function returns a reference to the internal QFunction.
Definition: DynaQ.hpp:176
AIToolbox::RandomEngine
std::mt19937 RandomEngine
Definition: Types.hpp:14
QLearning.hpp
Types.hpp
TypeTraits.hpp