AIToolbox
A library that offers tools for AI problem solving.
|
Go to the documentation of this file. 1 #ifndef AI_TOOLBOX_MDP_CLIFF_PROBLEM_HEADER_FILE
2 #define AI_TOOLBOX_MDP_CLIFF_PROBLEM_HEADER_FILE
59 using namespace GridWorldUtils;
67 double failReward = -100.0, stepReward = -1.0, winReward = 0.0;
72 for (
size_t s = 0; s < S-2; ++s ) {
73 for (
size_t a = 0; a < A; ++a ) {
75 transitions[s][a][cell] = 1.0;
76 rewards[s][a][cell] = stepReward;
80 size_t start = S - 2, goal = S - 1;
82 size_t upGoal = S - 3;
85 transitions[start][
UP ][upStart] = 1.0;
86 rewards [start][
UP ][upStart] = stepReward;
87 transitions[start][
LEFT ][start ] = 1.0;
88 rewards [start][
LEFT ][start ] = stepReward;
89 transitions[start][
DOWN ][start ] = 1.0;
90 rewards [start][
DOWN ][start ] = stepReward;
91 transitions[start][
RIGHT][start ] = 1.0;
92 rewards [start][
RIGHT][start ] = failReward;
95 transitions[upStart][
DOWN][upStart] = 0.0;
96 rewards [upStart][
DOWN][upStart] = 0.0;
97 transitions[upStart][
DOWN][start ] = 1.0;
98 rewards [upStart][
DOWN][start ] = stepReward;
101 transitions[goal][
UP ][goal] = 1.0;
102 transitions[goal][
LEFT ][goal] = 1.0;
103 transitions[goal][
DOWN ][goal] = 1.0;
104 transitions[goal][
RIGHT][goal] = 1.0;
107 transitions[upGoal][
DOWN][upGoal] = 0.0;
108 rewards [upGoal][
DOWN][upGoal] = 0.0;
109 transitions[upGoal][
DOWN][goal ] = 1.0;
110 rewards [upGoal][
DOWN][goal ] = winReward;
113 for (
size_t s = upStart + 1; s < upGoal; ++s ) {
114 transitions[s][
DOWN][s ] = 0.0;
115 rewards [s][
DOWN][s ] = 0.0;
116 transitions[s][
DOWN][start] = 1.0;
117 rewards [s][
DOWN][start] = failReward;
120 SparseModel model(S, A, transitions, rewards, 1.0);