Use unsafe_gettpl! to speed up access to results of env.step()

JobJob · JobJob · commit d16ea6822aa7 · 2018-09-15T02:09:36.000+10:00
Requires a PyCall PR
diff --git a/src/OpenAIGym.jl b/src/OpenAIGym.jl
@@ -30,6 +30,7 @@ mutable struct GymEnv{T} <: AbstractGymEnv
     pyreset::PyObject # the python env.reset function
     pystate::PyObject # the state array object referenced by the PyArray state.o
     pystepres::PyObject # used to make stepping the env slightly more efficient
+    pytplres::PyObject  # used to make stepping the env slightly more efficient
     info::PyObject    # store it as a PyObject for speed, since often unused
     state::T
     reward::Float64
@@ -40,7 +41,7 @@ mutable struct GymEnv{T} <: AbstractGymEnv
         pystate = pycall(pyenv["reset"], PyObject)
         state = convert(stateT, pystate)
         env = new{typeof(state)}(name, pyenv, pyenv["step"], pyenv["reset"],
-                                 pystate, PyNULL(), PyNULL(), state)
+                                 pystate, PyNULL(), PyNULL(), PyNULL(), state)
         reset!(env)
         env
     end
@@ -137,13 +138,10 @@ function Reinforce.step!(env::GymEnv{T}, a) where T <: PyArray
     pyact = pyaction(a)
     pycall!(env.pystepres, env.pystep, PyObject, pyact)
 
-    env.pystate, r, env.done, env.info =
-        convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
-
+    unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0)
     setdata!(env.state, env.pystate)
 
-    env.total_reward += r
-    return (r, env.state)
+    return gymstep!(env)
 end
 
 """
@@ -153,11 +151,16 @@ function Reinforce.step!(env::GymEnv{T}, a) where T
     pyact = pyaction(a)
     pycall!(env.pystepres, env.pystep, PyObject, pyact)
 
-    env.pystate, r, env.done, env.info =
-        convert(Tuple{PyObject, Float64, Bool, PyObject}, env.pystepres)
-
+    unsafe_gettpl!(env.pystate, env.pystepres, PyObject, 0)
     env.state = convert(T, env.pystate)
 
+    return gymstep!(env)
+end
+
+@inline function gymstep!(env)
+    r = unsafe_gettpl!(env.pytplres, env.pystepres, Float64, 1)
+    env.done = unsafe_gettpl!(env.pytplres, env.pystepres, Bool, 2)
+    unsafe_gettpl!(env.info, env.pystepres, PyObject, 3)
     env.total_reward += r
     return (r, env.state)
 end