You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
functioncholesky_columns(cholesky_vec, j, batch_size, da) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
other_idxs =cholesky_matrix_to_vector_index(j, j, da)+1:cholesky_matrix_to_vector_index(j +1, j +1, da)-1#indices of elements between two diagonal elements
273
+
tc_other = cholesky_vec[other_idxs, :, :]
274
+
zs =ignore_derivatives() do
275
+
zs =similar(cholesky_vec, da -size(tc_other, 1) -1, 1, batch_size)
276
+
zs .=zero(eltype(cholesky_vec))
277
+
return zs
278
+
end
279
+
[zs; tc_diag; tc_other]
280
+
end
281
+
252
282
"""
253
283
Transform a vector containing the non-zero elements of a lower triangular da x da matrix into that matrix.
254
284
"""
255
285
functionvec_to_tril(cholesky_vec, da)
256
-
batch_size =size(cholesky_vec, 3)
257
-
c2idx(i, j) = ((2da - j) * (j -1)) ÷2+ i #return the position in cholesky_vec of the element of the triangular matrix at coordinates (i,j)
258
-
functionf(j) #return a slice (da x 1 x batchsize) containing the jth columns of the lower triangular cholesky decomposition of the covariance
## State / observation implementations ========================================================================
50
50
51
-
RLBase.state(env::PettingzooEnv, ::Observation{Any}, players::Tuple) =Dict(p =>state(env, p) for p in players)
51
+
RLBase.state(env::PettingZooEnv, ::Observation{Any}, players::Tuple) =Dict(p =>state(env, p) for p in players)
52
52
53
53
54
54
# partial observability is default for pettingzoo
55
-
function RLBase.state(env::PettingzooEnv, ::Observation{Any}, player)
55
+
function RLBase.state(env::PettingZooEnv, ::Observation{Any}, player)
56
56
env.pyenv.observe(player)
57
57
end
58
58
59
59
60
60
## state space =========================================================================================================================================
61
61
62
-
RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, players) =Space(Dict(player =>state_space(env, player) for player in players))
62
+
RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, players) =Space(Dict(player =>state_space(env, player) for player in players))
function RLBase.act!(env::PettingzooEnv, action::Integer)
119
-
env.ts +=1
117
+
function RLBase.act!(env::PettingZooEnv, action)
120
118
pycall(env.pyenv.step, PyObject, action)
121
119
end
122
120
123
121
# reward of player ======================================================================================================================
124
-
function RLBase.reward(env::PettingzooEnv, player::String)
125
-
env.pyenv.rewards[player]
122
+
function RLBase.reward(env::PettingZooEnv, player::Symbol)
123
+
env.pyenv.rewards[String(player)]
126
124
end
127
125
128
126
129
127
# Multi agent part =========================================================================================================================================
0 commit comments