Code from the Algorithms in book "Algorithms for Decision Making"?
azev77 opened this issue · 3 comments
azev77 commented
Hey and thank you for making your stuff public!
There are various algorithms in the book that are hard to copy/paste from PDF.
Is there a chance they can be made available in this repo?
E.g. my attempt to copy/paste some of chap 7:
struct MDP
γ # discount factor
𝒮 # state space
𝒜 # action space
T # transition function
R # reward function
TR # sample transition and reward
end
#
functionlookahead(𝒫::MDP,U,s,a)
𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
return R(s,a) + γ*sum(T(s,a,s′)*U(s′) for s′ in 𝒮)
end
#
functionlookahead(𝒫::MDP,U::Vector,s,a)
𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
return R(s,a) + γ*sum(T(s,a,s′)*U[i] for (i,s′) in enumerate(𝒮))
end
# 7.3 Iterate k_max times w/o MAX
function iterative_policy_evaluation(𝒫::MDP,π,k_max)
𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
U=[0.0 for s in 𝒮]
for k in 1:k_max
U = [lookahead(𝒫,U,s,π(s)) for s in 𝒮]
end
return U
end
# 7.4
function policy_evaluation(𝒫::MDP,π)
𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
R′=[R(s,π(s)) for s in 𝒮]
T′=[T(s,π(s),s′) for s in 𝒮, s′ in 𝒮]
return (I-γ*T′) \ R′
end
# 7.5
struct ValueFunctionPolicy
𝒫 # problem
U # utility function
end
function greedy(𝒫::MDP,U,s)
u,a = findmax(a->lookahead(𝒫,U,s,a),𝒫.𝒜)
return(a=a,u=u)
end
(π::ValueFunctionPolicy)(s) = greedy(π.𝒫,π.U,s).a
# 7.6
struct PolicyIteration
π# initial policy
k_max# maximum number of iterations
end
functionsolve(M::PolicyIteration,𝒫::MDP)
π,𝒮=M.π,𝒫.𝒮
for k=1:M.k_max
U=policy_evaluation(𝒫,π)
π′=ValueFunctionPolicy(𝒫,U)
if all(π(s)==π′(s) for s in 𝒮)
break
end
π=π′
end
return π
end
#Algorithm 7.7. The backup proce-dure applied to an MDP
#Algorithm 7.8. Value iteration
#Algorithm 7.9. Asynchronous value iteration
#Algorithm 7.10. solv discrete MDP using a linear program formulation
#Algorithm 7.11. LinearQuadraticProblem
#Example 7.4. An example solvinga finite horizon MDP
mykelk commented
The algorithms will eventually end up in a different repository. This repository is for the problem definitions.
azev77 commented
Which repository did the algorithms end up in?
tawheeler commented
Hello Алик, your prompting got us to create a new repo with all of the exported code. We hope you find it useful.