Skip to main content

Kastan's group workspace

Timestamps visible
2022-08-05 16:01:56
Traceback (most recent call last):
2022-08-05 16:01:56
  File "/u/kastanday/LLM-Distributed-Quantization/benchmarks/gpt/v2_train.py", line 170, in <module>
2022-08-05 16:01:56
    main()
2022-08-05 16:01:56
  File "/u/kastanday/LLM-Distributed-Quantization/benchmarks/gpt/v2_train.py", line 161, in main
2022-08-05 16:01:56
    trainer.fit(train_dataloader=train_dataloader,
2022-08-05 16:01:56
  File "/u/kastanday/.conda/envs/nice_base/envs/col_ai_quant/lib/python3.9/site-packages/colossalai/trainer/_trainer.py", line 371, in fit
2022-08-05 16:01:56
    self._train_epoch(
2022-08-05 16:01:56
  File "/u/kastanday/.conda/envs/nice_base/envs/col_ai_quant/lib/python3.9/site-packages/colossalai/trainer/_trainer.py", line 181, in _train_epoch
2022-08-05 16:01:56
    logits, label, loss = self.engine.execute_schedule(
2022-08-05 16:01:56
  File "/u/kastanday/.conda/envs/nice_base/envs/col_ai_quant/lib/python3.9/site-packages/colossalai/engine/_base_engine.py", line 201, in execute_schedule
2022-08-05 16:01:56
    output, label, loss = self._schedule.forward_backward_step(self, data_iter, **kwargs)
2022-08-05 16:01:56
  File "/u/kastanday/.conda/envs/nice_base/envs/col_ai_quant/lib/python3.9/site-packages/colossalai/engine/schedule/_pipeline_schedule.py", line 382, in forward_backward_step
2022-08-05 16:01:56
    ft_shapes = comm.recv_obj_meta(ft_shapes)
2022-08-05 16:01:56
  File "/u/kastanday/.conda/envs/nice_base/envs/col_ai_quant/lib/python3.9/site-packages/colossalai/communication/utils.py", line 78, in recv_obj_meta
2022-08-05 16:01:56
    dist.recv(recv_obj_nums, prev_rank)