AIToolbox
A library that offers tools for AI problem solving.
TigerProblem.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_POMDP_TIGER_PROBLEM_HEADER_FILE
2 #define AI_TOOLBOX_POMDP_TIGER_PROBLEM_HEADER_FILE
3 
6 
7 namespace AIToolbox::POMDP {
8  namespace TigerProblemUtils {
9  enum Action {
10  A_LISTEN = 0,
11  A_LEFT = 1,
12  A_RIGHT = 2,
13  };
14 
15  enum State {
16  TIG_LEFT = 0,
17  TIG_RIGHT = 1,
18  };
19 
20  constexpr double listenError = 0.15;
21  }
22 
59  using namespace TigerProblemUtils;
60 
61  // Actions are: 0-listen, 1-open-left, 2-open-right
62  constexpr size_t S = 2, A = 3, O = 2;
63 
65 
66  AIToolbox::DumbMatrix3D transitions(boost::extents[S][A][S]);
67  AIToolbox::DumbMatrix3D rewards(boost::extents[S][A][S]);
68  AIToolbox::DumbMatrix3D observations(boost::extents[S][A][O]);
69 
70  // Transitions
71  // If we listen, nothing changes.
72  for ( size_t s = 0; s < S; ++s )
73  transitions[s][A_LISTEN][s] = 1.0;
74 
75  // If we pick a door, tiger and treasure shuffle.
76  for ( size_t s = 0; s < S; ++s ) {
77  for ( size_t s1 = 0; s1 < S; ++s1 ) {
78  transitions[s][A_LEFT ][s1] = 1.0 / S;
79  transitions[s][A_RIGHT][s1] = 1.0 / S;
80  }
81  }
82 
83  // Observations
84  // If we listen, we guess right 85% of the time.
85  observations[TIG_LEFT ][A_LISTEN][TIG_LEFT ] = 1.0 - listenError;
86  observations[TIG_LEFT ][A_LISTEN][TIG_RIGHT] = listenError;
87 
88  observations[TIG_RIGHT][A_LISTEN][TIG_RIGHT] = 1.0 - listenError;
89  observations[TIG_RIGHT][A_LISTEN][TIG_LEFT ] = listenError;
90 
91  // Otherwise we get no information on the environment.
92  for ( size_t s = 0; s < S; ++s ) {
93  for ( size_t o = 0; o < O; ++o ) {
94  observations[s][A_LEFT ][o] = 1.0 / O;
95  observations[s][A_RIGHT][o] = 1.0 / O;
96  }
97  }
98 
99  // Rewards
100  // Listening has a small penalty
101  for ( size_t s = 0; s < S; ++s )
102  for ( size_t s1 = 0; s1 < S; ++s1 )
103  rewards[s][A_LISTEN][s1] = -1.0;
104 
105  // Treasure has a decent reward, and tiger a bad penalty.
106  for ( size_t s1 = 0; s1 < S; ++s1 ) {
107  rewards[TIG_RIGHT][A_LEFT][s1] = 10.0;
108  rewards[TIG_LEFT ][A_LEFT][s1] = -100.0;
109 
110  rewards[TIG_LEFT ][A_RIGHT][s1] = 10.0;
111  rewards[TIG_RIGHT][A_RIGHT][s1] = -100.0;
112  }
113 
114  model.setTransitionFunction(transitions);
115  model.setRewardFunction(rewards);
116  model.setObservationFunction(observations);
117 
118  return model;
119  }
120 }
121 
122 #endif
AIToolbox::POMDP::TigerProblemUtils::A_LEFT
@ A_LEFT
Definition: TigerProblem.hpp:11
AIToolbox::POMDP::TigerProblemUtils::TIG_LEFT
@ TIG_LEFT
Definition: TigerProblem.hpp:16
AIToolbox::POMDP
Definition: AMDP.hpp:14
AIToolbox::POMDP::makeTigerProblem
AIToolbox::POMDP::Model< AIToolbox::MDP::Model > makeTigerProblem()
This function sets up the tiger problem in a Model.
Definition: TigerProblem.hpp:58
Model.hpp
AIToolbox::DumbMatrix3D
boost::multi_array< double, 3 > DumbMatrix3D
Definition: Types.hpp:37
Model.hpp
AIToolbox::POMDP::TigerProblemUtils::listenError
constexpr double listenError
Definition: TigerProblem.hpp:20
AIToolbox::POMDP::TigerProblemUtils::State
State
Definition: TigerProblem.hpp:15
AIToolbox::POMDP::TigerProblemUtils::A_RIGHT
@ A_RIGHT
Definition: TigerProblem.hpp:12
AIToolbox::POMDP::Model
This class represents a Partially Observable Markov Decision Process.
Definition: Model.hpp:15
AIToolbox::POMDP::TigerProblemUtils::Action
Action
Definition: TigerProblem.hpp:9
AIToolbox::POMDP::TigerProblemUtils::TIG_RIGHT
@ TIG_RIGHT
Definition: TigerProblem.hpp:17
AIToolbox::POMDP::Model::setObservationFunction
void setObservationFunction(const ObFun &of)
This function replaces the Model observation function with the one provided.
Definition: Model.hpp:319
AIToolbox::POMDP::TigerProblemUtils::A_LISTEN
@ A_LISTEN
Definition: TigerProblem.hpp:10