AIToolbox
A library that offers tools for AI problem solving.
QGreedyPolicy.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_FACTORED_BANDIT_Q_GREEDY_POLICY_HEADER_FILE
2 #define AI_TOOLBOX_FACTORED_BANDIT_Q_GREEDY_POLICY_HEADER_FILE
3 
7 
10 
21  template <typename Maximizer = VariableElimination>
22  class QGreedyPolicy : public PolicyInterface {
23  public:
31  template <typename... Args>
32  QGreedyPolicy(Action a, const FilterMap<QFunctionRule> & q, Args && ...args);
33 
41  template <typename... Args>
42  QGreedyPolicy(Action a, const QFunction & q, Args && ...args);
43 
49  virtual Action sampleAction() const override;
50 
58  virtual double getActionProbability(const Action & a) const override;
59 
65  Maximizer & getMaximizer();
66 
70  const Maximizer & getMaximizer() const;
71 
75  const typename Maximizer::Graph & getGraph() const;
76 
77  private:
78  const FilterMap<QFunctionRule> * qc_;
79  const QFunction * qm_;
80 
81  mutable Maximizer max_;
82  mutable typename Maximizer::Graph graph_;
83  };
84 
85  template <typename Maximizer>
86  template <typename... Args>
88  Base(std::move(a)), qc_(&q), qm_(nullptr),
89  max_(std::forward<Args>(args)...),
90  graph_(MakeGraph<Maximizer>()(q, A))
91  {}
92 
93  template <typename Maximizer>
94  template <typename... Args>
95  QGreedyPolicy<Maximizer>::QGreedyPolicy(Action a, const QFunction & q, Args && ...args) :
96  Base(std::move(a)), qc_(nullptr), qm_(&q),
97  max_(std::forward<Args>(args)...),
98  graph_(MakeGraph<Maximizer>()(q, A))
99  {}
100 
101  template <typename Maximizer>
103  if (qc_) {
104  UpdateGraph<Maximizer>()(graph_, *qc_, A);
105  } else {
106  UpdateGraph<Maximizer>()(graph_, *qm_, A);
107  }
108  return std::get<0>(max_(A, graph_));
109  }
110 
111  template <typename Maximizer>
113  if (veccmp(a, sampleAction()) == 0) return 1.0;
114  return 0.0;
115  }
116 
117  template <typename Maximizer>
119  return max_;
120  }
121 
122  template <typename Maximizer>
123  const Maximizer & QGreedyPolicy<Maximizer>::getMaximizer() const {
124  return max_;
125  }
126 
127  template <typename Maximizer>
128  const typename Maximizer::Graph & QGreedyPolicy<Maximizer>::getGraph() const {
129  return graph_;
130  }
131 }
132 
133 #endif
AIToolbox::PolicyInterface< void, void, Action >
This class represents the base interface for policies in games and bandits.
Definition: PolicyInterface.hpp:110
AIToolbox::Factored::Bandit::QGreedyPolicy::sampleAction
virtual Action sampleAction() const override
This function chooses the greediest action for state s.
Definition: QGreedyPolicy.hpp:102
AIToolbox::Factored::Bandit::QGreedyPolicy::QGreedyPolicy
QGreedyPolicy(Action a, const FilterMap< QFunctionRule > &q, Args &&...args)
Basic constructor with QFunctionRules.
Definition: QGreedyPolicy.hpp:87
AIToolbox::Factored::FactoredVector
This class represents a factored vector.
Definition: FactoredMatrix.hpp:60
AIToolbox::Factored::Bandit::QGreedyPolicy
This class implements a greedy policy through a QFunction.
Definition: QGreedyPolicy.hpp:22
VariableElimination.hpp
Types.hpp
AIToolbox::Factored::Bandit::MakeGraph
This class is the public interface for initializing the graph in generic code that uses the maximizer...
Definition: GraphUtils.hpp:64
AIToolbox::veccmp
std::strong_ordering veccmp(const V &lhs, const V &rhs)
This function compares two general vectors of equal size lexicographically.
Definition: Core.hpp:158
FilterMap.hpp
PolicyInterface.hpp
AIToolbox::Factored::Bandit::QGreedyPolicy::getActionProbability
virtual double getActionProbability(const Action &a) const override
This function returns the probability of taking the specified action.
Definition: QGreedyPolicy.hpp:112
GraphUtils.hpp
AIToolbox::Factored::Bandit::QGreedyPolicy::getGraph
const Maximizer::Graph & getGraph() const
This function returns the currently set graph.
Definition: QGreedyPolicy.hpp:128
AIToolbox::Factored::Action
Factors Action
Definition: Types.hpp:69
AIToolbox::Factored::Bandit::PolicyInterface
Simple typedef for most of a normal Bandit's policy needs.
Definition: PolicyInterface.hpp:11
AIToolbox::Factored::FilterMap
This class is a container which uses PartialFactors as keys.
Definition: FilterMap.hpp:22
AIToolbox::Factored::Bandit::QGreedyPolicy::getMaximizer
Maximizer & getMaximizer()
This function returns a reference to the internal maximizer.
Definition: QGreedyPolicy.hpp:118
AIToolbox::Factored::Bandit::UpdateGraph
This class is the public interface for updating the input graph with the input data in generic code t...
Definition: GraphUtils.hpp:85
AIToolbox::Factored::Bandit
Definition: GraphUtils.hpp:12