|
template<typename... Args> |
| BanditPolicyAdaptor (State s, Args &&... params) |
| Basic constructor. More...
|
|
virtual Action | sampleAction (const State &s) const override |
| This function chooses a random action using the underlying bandit policy. More...
|
|
virtual double | getActionProbability (const State &s, const Action &a) const override |
| This function returns the probability of taking the specified action. More...
|
|
BanditPolicy & | getBanditPolicy () |
| This function returns a reference to the underlying BanditPolicy. More...
|
|
const BanditPolicy & | getBanditPolicy () const |
| This function returns a reference to the underlying BanditPolicy. More...
|
|
| PolicyInterface (State s, Action a) |
| Basic constructor. More...
|
|
virtual | ~PolicyInterface () |
| Basic virtual destructor. More...
|
|
virtual Action | sampleAction (const State &s) const=0 |
| This function chooses a random action for state s, following the policy distribution. More...
|
|
virtual double | getActionProbability (const State &s, const Action &a) const=0 |
| This function returns the probability of taking the specified action in the specified state. More...
|
|
const State & | getS () const |
| This function returns the number of states of the world. More...
|
|
const Action & | getA () const |
| This function returns the number of available actions to the agent. More...
|
|
template<typename BanditPolicy>
class AIToolbox::Factored::MDP::BanditPolicyAdaptor< BanditPolicy >
This class extends a Bandit policy so that it can be called from MDP code.
This class simply ignores all states that are passed to it, and just uses the actions in order to sample and call the underlying Bandit code.
- Template Parameters
-
BanditPolicy | The Bandit policy to wrap. |