AIToolbox
A library that offers tools for AI problem solving.
CliffProblem.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_CLIFF_PROBLEM_HEADER_FILE
2 #define AI_TOOLBOX_MDP_CLIFF_PROBLEM_HEADER_FILE
3 
6 
7 namespace AIToolbox::MDP {
8 
59  using namespace GridWorldUtils;
60 
61  size_t S = grid.getWidth() * grid.getHeight() + 2;
62  size_t A = 4;
63 
64  AIToolbox::DumbMatrix3D transitions(boost::extents[S][A][S]);
65  AIToolbox::DumbMatrix3D rewards(boost::extents[S][A][S]);
66 
67  double failReward = -100.0, stepReward = -1.0, winReward = 0.0;
68 
69  // Default all transitions within the grid to be deterministic,
70  // and give negative reward. Remember that the actual cliff is
71  // under the grid.
72  for ( size_t s = 0; s < S-2; ++s ) {
73  for ( size_t a = 0; a < A; ++a ) {
74  auto cell = grid.getAdjacent((Direction)a, grid(s));
75  transitions[s][a][cell] = 1.0;
76  rewards[s][a][cell] = stepReward;
77  }
78  }
79  // Attach start and goal states
80  size_t start = S - 2, goal = S - 1;
81  size_t upStart = (grid.getHeight() - 1) * grid.getWidth();
82  size_t upGoal = S - 3;
83 
84  // Fix start
85  transitions[start][UP ][upStart] = 1.0;
86  rewards [start][UP ][upStart] = stepReward;
87  transitions[start][LEFT ][start ] = 1.0;
88  rewards [start][LEFT ][start ] = stepReward;
89  transitions[start][DOWN ][start ] = 1.0;
90  rewards [start][DOWN ][start ] = stepReward;
91  transitions[start][RIGHT][start ] = 1.0;
92  rewards [start][RIGHT][start ] = failReward; // This goes into the cliff
93 
94  // Fix down for upStart
95  transitions[upStart][DOWN][upStart] = 0.0;
96  rewards [upStart][DOWN][upStart] = 0.0;
97  transitions[upStart][DOWN][start ] = 1.0;
98  rewards [upStart][DOWN][start ] = stepReward;
99 
100  // Fix goal (self absorbing)
101  transitions[goal][UP ][goal] = 1.0;
102  transitions[goal][LEFT ][goal] = 1.0;
103  transitions[goal][DOWN ][goal] = 1.0;
104  transitions[goal][RIGHT][goal] = 1.0;
105 
106  // Fix upGoal
107  transitions[upGoal][DOWN][upGoal] = 0.0;
108  rewards [upGoal][DOWN][upGoal] = 0.0;
109  transitions[upGoal][DOWN][goal ] = 1.0;
110  rewards [upGoal][DOWN][goal ] = winReward; // Won!
111 
112  // Fix cliff edge
113  for ( size_t s = upStart + 1; s < upGoal; ++s ) {
114  transitions[s][DOWN][s ] = 0.0;
115  rewards [s][DOWN][s ] = 0.0;
116  transitions[s][DOWN][start] = 1.0;
117  rewards [s][DOWN][start] = failReward; // This goes into the cliff
118  }
119 
120  SparseModel model(S, A, transitions, rewards, 1.0);
121 
122  return model;
123  }
124 }
125 
126 #endif
AIToolbox::MDP::GridWorld
This class represents a simple rectangular gridworld.
Definition: GridWorld.hpp:23
SparseModel.hpp
AIToolbox::MDP::GridWorldUtils::RIGHT
@ RIGHT
Definition: GridWorld.hpp:15
AIToolbox::DumbMatrix3D
boost::multi_array< double, 3 > DumbMatrix3D
Definition: Types.hpp:37
AIToolbox::MDP::GridWorld::getAdjacent
State getAdjacent(Direction d, State s) const
This function returns the state next to the input in the chosen Direction.
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
GridWorld.hpp
AIToolbox::MDP::GridWorldUtils::UP
@ UP
Definition: GridWorld.hpp:15
AIToolbox::MDP::GridWorldUtils::DOWN
@ DOWN
Definition: GridWorld.hpp:15
AIToolbox::MDP::GridWorld::getWidth
unsigned getWidth() const
This function returns the width of the GridWorld.
AIToolbox::MDP::GridWorldUtils::LEFT
@ LEFT
Definition: GridWorld.hpp:15
AIToolbox::MDP::SparseModel
This class represents a Markov Decision Process.
Definition: SparseModel.hpp:77
AIToolbox::MDP::makeCliffProblem
AIToolbox::MDP::SparseModel makeCliffProblem(const GridWorld &grid)
This function sets up the cliff problem in a SparseModel.
Definition: CliffProblem.hpp:58
AIToolbox::MDP::GridWorld::getHeight
unsigned getHeight() const
This function returns the height of the GridWorld.
AIToolbox::MDP::GridWorldUtils::Direction
Direction
The possible actions in a GridWorld-like environment.
Definition: GridWorld.hpp:15