1 #ifndef AI_TOOLBOX_BANDIT_Q_GREEDY_POLICY_WRAPPER_HEADER_FILE
2 #define AI_TOOLBOX_BANDIT_Q_GREEDY_POLICY_WRAPPER_HEADER_FILE
20 template <
typename V,
typename Gen>
67 std::vector<size_t> & buffer_;
72 template <
typename V,
typename Gen>
76 template <
typename V,
typename Gen>
79 template <
typename V,
typename Gen>
81 : q_(std::move(q)), buffer_(buffer), rand_(gen)
83 assert(
static_cast<size_t>(q_.size()) == buffer_.size());
86 template <
typename V,
typename Gen>
92 double bestValue = q_[0];
unsigned bestActionCount = 1;
93 for (
size_t a = 1; a < buffer_.size(); ++a ) {
94 const double val = q_[a];
99 buffer_[bestActionCount] = a;
102 else if ( val > bestValue ) {
108 auto pickDistribution = std::uniform_int_distribution<unsigned>(0, bestActionCount-1);
109 const unsigned selection = pickDistribution(rand_);
111 return buffer_[selection];
114 template <
typename V,
typename Gen>
116 const double max = q_[a];
unsigned count = 0;
117 for (
size_t aa = 0; aa < buffer_.size(); ++aa ) {
118 const double val = q_[aa];
123 else if ( val > max ) {
130 template <
typename V,
typename Gen>
131 template <
typename P>
133 double max = q_[0];
unsigned count = 1;
134 for (
size_t aa = 1; aa < buffer_.size(); ++aa ) {
135 const double val = q_[aa];
140 else if ( val > max ) {
145 for (
size_t aa = 0; aa < buffer_.size(); ++aa ) {