AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_OLD_MDP_MODEL_HEADER_FILE
2 #define AI_TOOLBOX_OLD_MDP_MODEL_HEADER_FILE
94 OldMDPModel(
size_t s,
size_t a,
double discount = 1.0);
131 template <AIToolbox::IsNaive3DMatrix T, AIToolbox::IsNaive3DMatrix R>
132 OldMDPModel(
size_t s,
size_t a,
const T & t,
const R & r,
double d = 1.0);
146 template <AIToolbox::MDP::IsModel M>
170 template <AIToolbox::IsNaive3DMatrix T>
190 template <AIToolbox::IsNaive3DMatrix R>
218 std::tuple<size_t, double>
sampleSR(
size_t s,
size_t a)
const;
298 template <AIToolbox::IsNaive3DMatrix T, AIToolbox::IsNaive3DMatrix R>
299 OldMDPModel::OldMDPModel(
size_t s,
size_t a,
const T & t,
const R & r,
double d) : S(s), A(a), transitions_(boost::extents[S][A][S]), rewards_(boost::extents[S][A][S]),
307 template <AIToolbox::MDP::IsModel M>
308 OldMDPModel::OldMDPModel(
const M& model) : S(model.getS()), A(model.getA()), discount_(model.getDiscount()), transitions_(boost::extents[S][A][S]), rewards_(boost::extents[S][A][S]),
311 for (
size_t s = 0; s < S; ++s )
312 for (
size_t a = 0; a < A; ++a ) {
313 for (
size_t s1 = 0; s1 < S; ++s1 ) {
314 transitions_[s][a][s1] = model.getTransitionProbability(s, a, s1);
315 rewards_ [s][a][s1] = model.getExpectedReward (s, a, s1);
318 throw std::invalid_argument(
"Input transition matrix does not contain valid probabilities.");
322 template <AIToolbox::IsNaive3DMatrix T>
324 for (
size_t s = 0; s < S; ++s )
325 for (
size_t a = 0; a < A; ++a )
327 throw std::invalid_argument(
"Input transition matrix does not contain valid probabilities.");
332 template <AIToolbox::IsNaive3DMatrix R>
337 OldMDPModel::OldMDPModel(
size_t s,
size_t a,
double discount) : S(s), A(a), discount_(discount), transitions_(boost::extents[S][A][S]), rewards_(boost::extents[S][A][S]),
341 for (
size_t s = 0; s < S; ++s )
342 for (
size_t a = 0; a < A; ++a )
343 transitions_[s][a][s] = 1.0;
349 return std::make_tuple(s1, rewards_[s][a][s1]);
353 return transitions_[s][a][s1];
357 return rewards_[s][a][s1];
361 if ( d <= 0.0 || d > 1.0 )
throw std::invalid_argument(
"Discount parameter must be in (0,1]");
367 for (
size_t a = 0; a < A; ++a ) {
This class represents a Markov Decision Process.
Definition: OldMDPModel.hpp:75
size_t getA() const
This function returns the number of available actions to the agent.
Definition: OldMDPModel.hpp:377
double getExpectedReward(size_t s, size_t a, size_t s1) const
This function returns the stored expected reward for the specified transition.
Definition: OldMDPModel.hpp:356
size_t getS() const
This function returns the number of states of the world.
Definition: OldMDPModel.hpp:376
double getTransitionProbability(size_t s, size_t a, size_t s1) const
This function returns the stored transition probability for the specified transition.
Definition: OldMDPModel.hpp:352
const RewardMatrix & getRewardFunction() const
This function returns the rewards matrix for inspection.
Definition: OldMDPModel.hpp:381
AIToolbox::DumbMatrix3D RewardMatrix
Definition: OldMDPModel.hpp:78
bool isTerminal(size_t s) const
This function returns whether a given state is a terminal.
Definition: OldMDPModel.hpp:365
void setDiscount(double d)
This function sets a new discount factor for the OldMDPModel.
Definition: OldMDPModel.hpp:360
void setRewardFunction(const R &r)
This function replaces the OldMDPModel reward function with the one provided.
Definition: OldMDPModel.hpp:333
friend std::istream & operator>>(std::istream &is, OldMDPModel &)
double getDiscount() const
This function returns the currently set discount factor.
Definition: OldMDPModel.hpp:378
void setTransitionFunction(const T &t)
This function replaces the OldMDPModel transition function with the one provided.
Definition: OldMDPModel.hpp:323
AIToolbox::DumbMatrix3D TransitionMatrix
Definition: OldMDPModel.hpp:77
const TransitionMatrix & getTransitionFunction() const
This function returns the transition matrix for inspection.
Definition: OldMDPModel.hpp:380
std::tuple< size_t, double > sampleSR(size_t s, size_t a) const
This function samples the MDP for the specified state action pair.
Definition: OldMDPModel.hpp:346
OldMDPModel(size_t s, size_t a, double discount=1.0)
Basic constructor.
Definition: OldMDPModel.hpp:337