|
template<typename... Args> |
| QGreedyPolicy (State s, Action a, const FilterMap< QFunctionRule > &q, Args &&...args) |
| Basic constructor with QFunctionRules. More...
|
|
template<typename... Args> |
| QGreedyPolicy (State s, Action a, const QFunction &q, Args &&...args) |
| Basic constructor with QFunction. More...
|
|
virtual Action | sampleAction (const State &s) const override |
| This function chooses the greediest action for state s. More...
|
|
virtual double | getActionProbability (const State &s, const Action &a) const override |
| This function returns the probability of taking the specified action in the specified state. More...
|
|
Maximizer & | getMaximizer () |
| This function returns a reference to the internal maximizer. More...
|
|
const Maximizer & | getMaximizer () const |
| This function returns a reference to the internal maximizer. More...
|
|
const Maximizer::Graph & | getGraph () const |
| This function returns the currently set graph. More...
|
|
| PolicyInterface (State s, Action a) |
| Basic constructor. More...
|
|
virtual | ~PolicyInterface () |
| Basic virtual destructor. More...
|
|
virtual Action | sampleAction (const State &s) const=0 |
| This function chooses a random action for state s, following the policy distribution. More...
|
|
virtual double | getActionProbability (const State &s, const Action &a) const=0 |
| This function returns the probability of taking the specified action in the specified state. More...
|
|
const State & | getS () const |
| This function returns the number of states of the world. More...
|
|
const Action & | getA () const |
| This function returns the number of available actions to the agent. More...
|
|
template<typename Maximizer = Bandit::VariableElimination>
class AIToolbox::Factored::MDP::QGreedyPolicy< Maximizer >
This class implements a greedy policy through a QFunction.
This class allows you to select effortlessly the best greedy actions from a given list of QFunctionRules, or from a QFunction.
In order to compute the best action or a given action probability the QGreedyPolicy must run VariableElimination on the stored rules, so the process can get a bit expensive.