Source code for rlstructures.rl_batchers.batcher

#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#


from rlstructures import TemporalDictTensor, DictTensor, Trajectories
from .tools import S_Buffer
from .tools import S_ProcessWorker
import torch
import numpy as np
import time

[docs]class RL_Batcher:
[docs] def reset(self, agent_info=DictTensor({}), env_info=DictTensor({})): assert agent_info.empty() or agent_info.device()==torch.device("cpu"),"agent_info must be on CPU" assert env_info.empty() or env_info.device()==torch.device("cpu"),"env_info must be on CPU" n_workers = len(self.workers) pos = 0 for k in range(n_workers): n = self.n_envs wi = agent_info.slice(pos, pos + n) ei = env_info.slice(pos, pos + n) self.workers[k].reset(agent_info=wi, env_info=ei) pos += n assert agent_info.empty() or agent_info.n_elems() == pos assert env_info.empty() or env_info.n_elems() == pos
[docs] def execute(self, agent_info=None): assert agent_info is None or agent_info.empty() or agent_info.device()==torch.device("cpu"),"agent_info must be on CPU" n_workers = len(self.workers) pos = 0 for k in range(n_workers): n = self.n_envs wi = None if not agent_info is None: wi = agent_info.slice(pos, pos + n) self.workers[k].acquire_slot(wi) pos += n
[docs] def get(self, blocking=True): if not blocking: for w in range(len(self.workers)): if not self.workers[w].finished(): return None, None buffer_slot_ids = [] n_still_running = 0 for w in range(len(self.workers)): bs, n = self.workers[w].get() buffer_slot_ids += bs n_still_running += n if len(buffer_slot_ids) == 0: assert False, "Don't call batcher.get when all environnments are finished" slots, info = self.buffer.get_single_slots(buffer_slot_ids, erase=True) assert not slots.lengths.eq(0).any() return Trajectories(info, slots), n_still_running
[docs] def update(self, info): for w in self.workers: w.update_worker(info)
[docs] def close(self): for w in self.workers: w.close() for w in self.workers: del w self.buffer.close()
[docs] def n_elems(self): return self._n_episodes
def __init__( self, n_timesteps, create_agent, agent_args, create_env, env_args, n_processes, seeds, agent_info, env_info, agent_seeds=None, device=torch.device("cpu") ): """ Create a multi-processes batcher Args: n_timesteps ([type]): number of timesteps to acquire at each call create_agent ([type]): function that creates an agent agent_args ([type]): arguments of the previous function create_env ([type]): function that creates an environment env_args ([type]): arguments of the previous function n_processes ([type]): Number of procesases seeds ([type]): list of seeds for the environments, each seed will be used as an argument of the create_env function agent_info ([type]): DictTensor in the same format than the agent_info that will be used when calling the batcher (with n_elems()==1) env_info ([type]): DictTensor in the same format than the env_info that will be used when calling the batcher (with n_elems()==1) agent_seeds ([type], optional): list of n_processes agent seeds (passed to agents through the RL_Agent.seed function. or None if no seeds device: the device of the batcher (default is "cpu") """ assert agent_seeds is None or len(agent_seeds)==n_processes,"agent_seeds must be None or a list of n_processes seeds" # Buffer creation: agent = create_agent(**agent_args) env = create_env(**{**env_args, "seed": 0}) assert agent_info.empty() or agent_info.device()==torch.device("cpu"),"agent_info must be on CPU" assert env_info.empty() or env_info.device()==torch.device("cpu"),"env_info must be on CPU" if not agent_info.empty(): agent_info = agent_info.slice(0, 1) agent_info = DictTensor.cat([agent_info for k in range(env.n_envs())]) if not env_info.empty(): env_info = env_info.slice(0, 1) env_info = DictTensor.cat([env_info for k in range(env.n_envs())]) obs, who = env.reset(env_info) assert obs.device()==device,"environment observation is not on the same device than the batcher" B = obs.n_elems() with torch.no_grad(): istate = agent.initial_state(agent_info, B) assert istate.empty() or istate.device()==device,"agent initial state is not on the same device than the batcher" b, a = agent(istate, obs, agent_info) self.n_envs = env.n_envs() self._n_episodes = n_processes * self.n_envs specs_agent_state = a.specs() specs_agent_output = b.specs() specs_environment = obs.specs() specs_agent_info = agent_info.specs() specs_env_info = env_info.specs() del a del b del obs del who del env del agent self.buffer = S_Buffer(n_slots=self.n_envs * n_processes, s_slots=n_timesteps, specs_agent_state=specs_agent_state, specs_agent_output=specs_agent_output, specs_environment=specs_environment, specs_agent_info=specs_agent_info, specs_env_info=specs_env_info, device=device ) self.workers = [] self.n_per_worker = [] assert isinstance(seeds, list), "You have to choose one seed per process" assert len(seeds) == n_processes, "You have to choose one seed per process" print("[Batcher] Creating %d processes " % (n_processes)) for k in range(n_processes): agent_seed=None if not agent_seeds is None: agent_seed=agent_seeds[k] e_args = {**env_args, "seed": seeds[k]} worker = S_ProcessWorker( len(self.workers), create_agent, agent_args, agent_seed, create_env, e_args, self.buffer, ) self.workers.append(worker)