AIToolbox
A library that offers tools for AI problem solving.
LinearProgramming.hpp
Go to the documentation of this file.
1 #ifndef AI_TOOLBOX_MDP_LINEAR_PROGRAMMING_HEADER_FILE
2 #define AI_TOOLBOX_MDP_LINEAR_PROGRAMMING_HEADER_FILE
3 
4 #include <AIToolbox/Utils/LP.hpp>
5 
9 
10 namespace AIToolbox::MDP {
24  public:
35  template <IsModel M>
36  std::tuple<double, ValueFunction, QFunction> operator()(const M & m);
37  };
38 
39  template <IsModel M>
40  std::tuple<double, ValueFunction, QFunction> LinearProgramming::operator()(const M & model) {
41  // Extract necessary knowledge from model so we don't have to pass it around
42  const size_t S = model.getS();
43  const size_t A = model.getA();
44 
45  // Here we solve an LP to determine the optimal value function for the
46  // infinite horizon. In particular, for every state, we represent its
47  // value with a variable (we assume an uniform distribution over the
48  // states here).
49  //
50  // Then we minimize the sum of the variables, subject to:
51  //
52  // sum_s' T(s,a,s') * [ R(s,a,s') + gamma * V*(s') ] <= V(s)
53  //
54  // for every combination of s and a (so |S|*|A| constraints in the
55  // end).
56  //
57  // Here we transform the constraints in the form:
58  //
59  // V(s) - sum_s' gamma * T(s,a,s') * V*(s') >= sum_s' T(s,a,s') * R(s,a,s')
60  //
61  // and we merge the V(s) with its appropriate V*(s') element.
62  LP lp(S);
63  lp.resize(S * A);
64 
65  // Assume uniform distribution, and minimize the objective.
66  lp.row.fill(1.0 / S);
67  lp.setObjective(false);
68 
69  for (size_t s = 0; s < S; ++s) {
70  // For every variable, we set it as unbounded (as its value can be
71  // anything).
72  lp.setUnbounded(s);
73  for (size_t a = 0; a < A; ++a) {
74  double rhs;
75  if constexpr(IsModelEigen<M>) {
76  lp.row = -model.getDiscount() * model.getTransitionFunction(a).row(s);
77  rhs = model.getRewardFunction()(s, a);
78  } else {
79  // For each constraint, we compute the RHS, while at the same
80  // time setting the coefficients for the various variables.
81  rhs = 0.0;
82  for (size_t s1 = 0; s1 < S; ++s1) {
83  lp.row[s1] = -model.getDiscount() * model.getTransitionProbability(s, a, s1);
84  rhs += model.getTransitionProbability(s, a, s1) * model.getExpectedReward(s, a, s1);
85  }
86  }
87  // Finally we add the V(s) at its place.
88  lp.row[s] += 1.0;
90  }
91  }
92 
93  // We solve the LP, and get V*
94  auto values = lp.solve(S);
95 
96  if (!values)
97  throw std::runtime_error("Could not solve the LP for this MDP");
98 
99  // We have the values, but we also want the optimal actions. So while
100  // we're at it, we also build Q.
101  const auto & ir = [&]{
102  if constexpr (IsModelEigen<M>) return model.getRewardFunction();
103  else return computeImmediateRewards(model);
104  }();
105 
106  auto q = computeQFunction(model, model.getDiscount() * (*values), ir);
107 
108  ValueFunction v;
109  v.values = std::move(*values);
110  v.actions.resize(S);
111  for (size_t s = 0; s < S; ++s)
112  q.row(s).maxCoeff(&v.actions[s]);
113 
114  return std::make_tuple(lp.getPrecision(), std::move(v), std::move(q));
115  }
116 }
117 
118 #endif
AIToolbox::MDP::computeQFunction
QFunction computeQFunction(const M &model, const Values &v, QFunction ir)
This function computes the Model's QFunction from the values of a ValueFunction.
Definition: Utils.hpp:106
AIToolbox::LP::resize
void resize(size_t rows)
This function resizes the underlying LP.
AIToolbox::LP::solve
std::optional< Vector > solve(size_t variables, double *objective=nullptr)
This function solves the LP associated with all constraints in the stack.
AIToolbox::MDP::computeImmediateRewards
Matrix2D computeImmediateRewards(const M &model)
This function computes all immediate rewards (state and action) of the MDP once for improved speed.
Definition: Utils.hpp:77
AIToolbox::MDP::LinearProgramming::operator()
std::tuple< double, ValueFunction, QFunction > operator()(const M &m)
This function solves the input MDP using linear programming.
Definition: LinearProgramming.hpp:40
AIToolbox::MDP::ValueFunction
Definition: Types.hpp:47
AIToolbox::LP::getPrecision
static double getPrecision()
This function returns the maximum precision obtainable from the solution.
AIToolbox::LP
This class presents a common interface for solving Linear Programming problems.
Definition: LP.hpp:23
AIToolbox::MDP::ValueFunction::values
Values values
Definition: Types.hpp:48
AIToolbox::MDP
Definition: DoubleQLearning.hpp:10
AIToolbox::LP::pushRow
void pushRow(Constraint c, double value)
This function adds a constraint to the LP.
Utils.hpp
LP.hpp
AIToolbox::LP::Constraint::GreaterEqual
@ GreaterEqual
Types.hpp
TypeTraits.hpp
AIToolbox::LP::row
Eigen::Map< Vector > row
Editable vector containing column coefficients.
Definition: LP.hpp:62
AIToolbox::LP::setUnbounded
void setUnbounded(size_t n)
This function sets the specified variable as unbounded.
AIToolbox::LP::setObjective
void setObjective(size_t n, bool maximize)
This function selects the variable to use as objective.
AIToolbox::MDP::ValueFunction::actions
Actions actions
Definition: Types.hpp:49
AIToolbox::MDP::LinearProgramming
This class solves an MDP using Linear Programming.
Definition: LinearProgramming.hpp:23