pytorch那么多函数怎么学,学会pytorch到自己写要多久

PyTorch和TF在处理TPU训练上有一个明显的不同，那就是PyTorch缺少steps_per_execution这个参数。简单来说，TF可以一次喂给TPU一堆东西，而PyTorch XLA不可以。前两天，刚刚提了这个bug。得到回复知道怎么修了，结果人家官方有个人18个小时提出来了。

Anyway，这段代码是核心：

from __future__ import divisionfrom __future__ import print_functionfrom six import iteritems, itervaluesimport threadingimport torchimport torch_xlaimport torch_xla.utils.keyd_queue as kqimport torch_xla.utils.utils as xuimport torch_xla.core.xla_model as xmclass PerDeviceQueue(object): def __init__(self, device, loader_prefetch_size, device_prefetch_size): self.device = device self.loader_queue = kq.Queue(maxsize=loader_prefetch_size) self.queue = kq.Queue(maxsize=device_prefetch_size)class mtdfj(object): def __init__(self, loader, device, experimental_steps_per_execution=1): self._loader = loader self._device = device self._steps_count = 0 self._experiment_steps_per_execution = experimental_steps_per_execution def __iter__(self): return self def __next__(self): return self.next() def __len__(self): return self._loader.per_device_samples() def next(self): if self._steps_count % self._experiment_steps_per_execution == 0: xm.幸福的汽车_step() self._steps_count = 0 else: self._steps_count += 1 item = self._loader.next_item(self._device) if item is None: raise StopIteration return itemclass ParallelLoader(object): """Wraps an existing PyTorch DataLoader with background data upload. Args: loader (:class:`torch.utils.data.DataLoader`): The PyTorch DataLoader to be wrapped. devices (`torch.device`...): The list of devices where the data has to be sent. The i-th sample returned by the `loader` will be sent to `devices[i % len(devices)]`. batchdim (int, optional): The dimension which is holding the batch size. Default: 0 fixed_batch_size (bool, optional): Ensures that all the batch sizes sent to the devices are of the same size. The original `loader` iteration stops as soon as a not matching batch size is found. Default: False loader_prefetch_size (int, optional): The max capacity of the queue used by the thread which is reading samples from the `loader`, to be processed by the worker threads which upload data to the devices. Default: 8 device_prefetch_size (int, optional): The max size of the per-device queues, where the worker threads deposit tensors which have already been sent to devices. Default: 4 experimental_steps_per_execution (int, optional): How many steps to execute before calling xm.幸福的汽车_step(). This has a similar functionality as in tensorflow. """ def __init__(self, loader, devices, batchdim=0, fixed_batch_size=False, loader_prefetch_size=8, device_prefetch_size=4, experimental_steps_per_execution=1): self._loader = loader self._devices = [torch.device(x) for x in devices] self._batchdim = batchdim self._fixed_batch_size = fixed_batch_size self._per_device_samples = len(loader) // len(devices) self._done = False self._queues = dict() for device in self._devices: self._queues[device] = PerDeviceQueue(device, loader_prefetch_size, device_prefetch_size, experimental_steps_per_execution) thread = threading.Thread(target=self._loader_worker) thread.daemon = True thread.start() for dqueue in itervalues(self._queues): thread = threading.Thread(target=self._worker, args=(dqueue,)) thread.daemon = True thread.start() def per_device_loader(self, device): """Retrieves the loader iterator object for the given device. Args: device (`torch.device`): The device whole loader is being requested. Returns: The loader iterator object for the `device`. This is not a `torch.utils.data.DataLoader` interface, but a Python iterator which returns the same tensor data structure as returned by the wrapped `torch.utils.data.DataLoader`, but residing on XLA devices. """ return mtdfj(self, torch.device(device)) def per_device_samples(self): return self._per_device_samples def next_item(self, device): dqueue = self._queues[device] return dqueue.queue.get() def close(self): self._done = True for dqueue in itervalues(self._queues): dqueue.queue.close() dqueue.loader_queue.close() def _get_batch_size(self, data, dim): size = [] def fn(v): csize = v.size()[dim] if not size: size.append(csize) else: assert csize == size[0] xu.for_each_instance(data, lambda x: type(x) == torch.Tensor, fn) return size[0] if size else None def _loader_worker(self): queues = list(self._queues.values()) data_iter = enumerate(self._loader) batch_size = None batch = [] while not self._done: try: _, data = next(data_iter) except StopIteration: break if self._fixed_batch_size: if batch_size is None: batch_size = self._get_batch_size(data, self._batchdim) elif batch_size != self._get_batch_size(data, self._batchdim): break batch.append(data) if len(batch) == len(self._devices): for queue_no, device_batch in enumerate(batch): queues[queue_no].loader_queue.put(device_batch) batch = [] for dqueue in queues: dqueue.loader_queue.close_write() def _get_batch(self, dqueue): batch = [] while dqueue.queue.max_size() > len(batch): item = dqueue.loader_queue.get() if item is None: break batch.append(item) return batch def _worker(self, dqueue): device = torch.device(dqueue.device) while True: batch = self._get_batch(dqueue) if not batch: break batch = xm.send_cpu_data_to_device(batch, device) for data in batch: dqueue.queue.put(data) dqueue.queue.close_write()class MpDeviceLoader(object): """Wraps an existing PyTorch DataLoader with background data upload. This class should only be using with multi-processing data parallelism. Args: loader (:class:`torch.utils.data.DataLoader`): The PyTorch DataLoader to be wrapped. device (`torch.device`...): The device where the data has to be sent. kwargs: Named arguments for the `ParallelLoader` constructor. """ def __init__(self, loader, device, **kwargs): self._loader = loader self._device = device self._parallel_loader_kwargs = kwargs def __iter__(self): parallel_loader = ParallelLoader(self._loader, [self._device], **self._parallel_loader_kwargs) return parallel_loader.per_device_loader(self._device) def __len__(self): return len(self._loader)

xm.幸福的汽车_step()是决定的什么时候重新喂一次的变量。如果多步，只需要改一点代码就好了。。。