Base
Environment (ABC, Generic)
#
Environment written in Jax that differs from the gym API to make the step and reset functions jittable. The state contains all the dynamics and data needed to step the environment, no computation stored in attributes of self. The API is inspired by brax.
observation_spec: specs.Spec[Observation]
cached
property
writable
#
Returns the observation spec.
Returns:
Type | Description |
---|---|
observation_spec |
a potentially nested |
action_spec: ActionSpec
cached
property
writable
#
Returns the action spec.
Returns:
Type | Description |
---|---|
action_spec |
a potentially nested |
reward_spec: specs.Array
cached
property
writable
#
Returns the reward spec. By default, this is assumed to be a single float.
Returns:
Type | Description |
---|---|
reward_spec |
a |
discount_spec: specs.BoundedArray
cached
property
writable
#
Returns the discount spec. By default, this is assumed to be a single float between 0 and 1.
Returns:
Type | Description |
---|---|
discount_spec |
a |
unwrapped: Environment[State, ActionSpec, Observation]
property
readonly
#
reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep[Observation]]
#
Resets the environment to an initial state.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
key |
chex.PRNGKey |
random key used to reset the environment. |
required |
Returns:
Type | Description |
---|---|
state |
State object corresponding to the new state of the environment, timestep: TimeStep object corresponding the first timestep returned by the environment, |
step(self, state: State, action: chex.Array) -> Tuple[State, TimeStep[Observation]]
#
Run one timestep of the environment's dynamics.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
state |
State |
State object containing the dynamics of the environment. |
required |
action |
chex.Array |
Array containing the action to take. |
required |
Returns:
Type | Description |
---|---|
state |
State object corresponding to the next state of the environment, timestep: TimeStep object corresponding the timestep returned by the environment, |
render(self, state: State) -> Any
#
Render frames of the environment for a given state.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
state |
State |
State object containing the current dynamics of the environment. |
required |
close(self) -> None
#
Perform any necessary cleanup.
__enter__(self) -> Environment
special
#
__exit__(self, *args: Any) -> None
special
#
Calls :meth:close()
.