Skip to main content

Igoro's group workspace

Timestamps visible
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/deepspeed/runtime/pipe/engine.py", line 60, in __init__
2022-02-14 20:20:36
    super().__init__(*super_args, **super_kwargs)
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/deepspeed/runtime/engine.py", line 192, in __init__
2022-02-14 20:20:36
    self._configure_optimizer(optimizer, model_parameters)
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/deepspeed/runtime/engine.py", line 681, in _configure_optimizer
2022-02-14 20:20:36
    self.optimizer = self._configure_zero_optimizer(basic_optimizer)
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/deepspeed/runtime/engine.py", line 810, in _configure_zero_optimizer
2022-02-14 20:20:36
    optimizer = FP16_DeepSpeedZeroOptimizer_Stage1(
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/deepspeed/runtime/zero/stage1.py", line 311, in __init__
2022-02-14 20:20:36
    self._initialize_optimizer_states()
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/deepspeed/runtime/zero/stage1.py", line 321, in _initialize_optimizer_states
2022-02-14 20:20:36
    self.optimizer.step()
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 89, in wrapper
2022-02-14 20:20:36
    return func(*args, **kwargs)
2022-02-14 20:20:36
  File "/usr/local/lib/python3.8/dist-packages/apex/optimizers/fused_adam.py", line 129, in step
2022-02-14 20:20:36
    state['exp_avg'] = torch.zeros_like(p.data)
2022-02-14 20:20:36
RuntimeError: CUDA out of memory. Tried to allocate 4.70 GiB (GPU 0; 47.55 GiB total capacity; 44.60 GiB already allocated; 1.43 GiB free; 44.64 GiB reserved in total by PyTorch)