Source code for rlstructures.env

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import torch
import time
import torch
from rlstructures import DictTensor

[docs]class VecEnv: """ An VecEnvironment corresponds to multiple 'gym' environments (i.e a batch) that are running simultaneously. At each timestep, upon the B environments, a subset B' of envs are running (since some envs may have stopped). So each observation returned by the VecEnv is a DictTensor of size B'. To mark which environments that are still running, the observation is returned with a mapping vector of size B'. e.g [0,2,5] means that the observation 0 corresponds to the env 0, the observation 1 corresponds to env 2, and the observation 3 corresponds to env 5. Finally, when running a step (at time t) method (over B' running envs), the agent has to provide an action (DictTensor) of size B'. The VecEnv will return the next observation (time t+1) (size B'). But some of the B' envs may have stopped at t+1, such that actually only B'' envs are still running. The step method will thus also return a B'' observation (and corresponding mapping). The return of the step function is thus: ((DictTensor of size B', tensor of size B'), (Dicttensor of size B'', mapping vector if size B'')) """ def __init__(self): pass
[docs] def reset(self, env_info: DictTensor = None): """reset the environments instances :param env_info: a DictTensor of size n_envs, such that each value will be transmitted to each environment instance :type env_info: DictTensor, optional """ pass
[docs] def step( self, policy_output: DictTensor ) -> [[DictTensor, torch.Tensor], [DictTensor, torch.Tensor]]: """Execute one step over alll the running environment instances :param policy_output: the output given by the policy :type policy_output: DictTensor :return: see general description :rtype: [[DictTensor,torch.Tensor],[DictTensor,torch.Tensor]] """ raise NotImplementedError
[docs] def close(self): """Terminate the environment""" raise NotImplementedError
[docs] def n_envs(self) -> int: """Returns the number of environment instances contained in this env :rtype: int """ return self.reset()[0].n_elems()