AIToolbox
A library that offers tools for AI problem solving.
QGreedyPolicy.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_FACTORED_MDP_Q_GREEDY_POLICY_HEADER_FILE
2 #define AI_TOOLBOX_FACTORED_MDP_Q_GREEDY_POLICY_HEADER_FILE
3 
7 
10 
11 namespace AIToolbox::Factored::MDP {
22  template <typename Maximizer = Bandit::VariableElimination>
23  class QGreedyPolicy : public PolicyInterface<State, State, Action> {
24  public:
26 
35  template <typename... Args>
36  QGreedyPolicy(State s, Action a, const FilterMap<QFunctionRule> & q, Args && ...args);
37 
46  template <typename... Args>
47  QGreedyPolicy(State s, Action a, const QFunction & q, Args && ...args);
48 
56  virtual Action sampleAction(const State & s) const override;
57 
66  virtual double getActionProbability(const State & s, const Action & a) const override;
67 
73  Maximizer & getMaximizer();
74 
78  const Maximizer & getMaximizer() const;
79 
83  const typename Maximizer::Graph & getGraph() const;
84 
85  private:
86  const FilterMap<QFunctionRule> * qc_;
87  const QFunction * qm_;
88 
89  mutable Maximizer max_;
90  mutable typename Maximizer::Graph graph_;
91  };
92 
93  template <typename Maximizer>
94  template <typename... Args>
96  Base(std::move(s), std::move(a)), qc_(&q), qm_(nullptr),
97  max_(std::forward<Args>(args)...),
98  graph_(MakeGraph<Maximizer>()(q, A))
99  {}
100 
101  template <typename Maximizer>
102  template <typename... Args>
103  QGreedyPolicy<Maximizer>::QGreedyPolicy(State s, Action a, const QFunction & q, Args && ...args) :
104  Base(std::move(s), std::move(a)), qc_(nullptr), qm_(&q),
105  max_(std::forward<Args>(args)...),
106  graph_(MakeGraph<Maximizer>()(q, A))
107  {}
108 
109  template <typename Maximizer>
111  if (qc_) {
112  UpdateGraph<Maximizer>()(graph_, qc_->filter(s), S, A, s);
113  } else {
114  UpdateGraph<Maximizer>()(graph_, *qm_, S, A, s);
115  }
116  return std::get<0>(max_(A, graph_));
117  }
118 
119  template <typename Maximizer>
120  double QGreedyPolicy<Maximizer>::getActionProbability(const State & s, const Action & a) const {
121  if (veccmp(a, sampleAction(s)) == 0) return 1.0;
122  return 0.0;
123  }
124 
125  template <typename Maximizer>
127  return max_;
128  }
129 
130  template <typename Maximizer>
131  const Maximizer & QGreedyPolicy<Maximizer>::getMaximizer() const {
132  return max_;
133  }
134 
135  template <typename Maximizer>
136  const typename Maximizer::Graph & QGreedyPolicy<Maximizer>::getGraph() const {
137  return graph_;
138  }
139 }
140 
141 #endif
AIToolbox::Factored::MDP::QGreedyPolicy::QGreedyPolicy
QGreedyPolicy(State s, Action a, const FilterMap< QFunctionRule > &q, Args &&...args)
Basic constructor with QFunctionRules.
Definition: QGreedyPolicy.hpp:95
AIToolbox::Factored::MDP::QGreedyPolicy::getGraph
const Maximizer::Graph & getGraph() const
This function returns the currently set graph.
Definition: QGreedyPolicy.hpp:136
AIToolbox::Factored::MDP::QGreedyPolicy
This class implements a greedy policy through a QFunction.
Definition: QGreedyPolicy.hpp:23
VariableElimination.hpp
AIToolbox::veccmp
std::strong_ordering veccmp(const V &lhs, const V &rhs)
This function compares two general vectors of equal size lexicographically.
Definition: Core.hpp:158
FilterMap.hpp
AIToolbox::Factored::MDP::MakeGraph
This class is the public interface for initializing the graph in generic code that uses the maximizer...
Definition: GraphUtils.hpp:64
AIToolbox::Factored::MDP
Definition: CooperativePrioritizedSweeping.hpp:13
AIToolbox::Factored::State
Factors State
Definition: Types.hpp:67
GraphUtils.hpp
AIToolbox::Factored::FactoredMatrix2D
This class represents a factored 2D matrix.
Definition: FactoredMatrix.hpp:140
AIToolbox::PolicyInterface
This class represents the base interface for policies.
Definition: PolicyInterface.hpp:31
PolicyInterface.hpp
AIToolbox::Factored::MDP::QGreedyPolicy::getActionProbability
virtual double getActionProbability(const State &s, const Action &a) const override
This function returns the probability of taking the specified action in the specified state.
Definition: QGreedyPolicy.hpp:120
AIToolbox::Factored::Action
Factors Action
Definition: Types.hpp:69
AIToolbox::Factored::MDP::QGreedyPolicy::getMaximizer
Maximizer & getMaximizer()
This function returns a reference to the internal maximizer.
Definition: QGreedyPolicy.hpp:126
AIToolbox::Factored::FilterMap
This class is a container which uses PartialFactors as keys.
Definition: FilterMap.hpp:22
AIToolbox::Factored::MDP::QGreedyPolicy::sampleAction
virtual Action sampleAction(const State &s) const override
This function chooses the greediest action for state s.
Definition: QGreedyPolicy.hpp:110
AIToolbox::Factored::MDP::UpdateGraph
This class is the public interface for updating the input graph with the input data in generic code t...
Definition: GraphUtils.hpp:85
Types.hpp