Skip to main content

Upup-ashton-wang's group workspace

Timestamps visible
2025-04-28 02:05:11
{'loss': 0.0006, 'grad_norm': 0.0895218700170517, 'learning_rate': 5.019208589408119e-07, 'rewards/format_reward': 0.625, 'rewards/tag_count_reward': 0.8125, 'rewards/len_reward': -0.18009646609425545, 'rewards/reasoning_steps_reward': 0.7361111491918564, 'rewards/repetition_penalty_reward': -0.18341998383402824, 'reward': 1.8100946843624115, 'reward_std': 0.5166726261377335, 'completion_length': 2428.416748046875, 'kl': 0.016082763671875, 'epoch': 1.0}
2025-04-28 02:08:07
{'loss': 0.0007, 'grad_norm': 0.05836423113942146, 'learning_rate': 5.008797867207304e-07, 'rewards/format_reward': 0.18750000558793545, 'rewards/tag_count_reward': 0.5937500149011612, 'rewards/len_reward': -0.1520249880850315, 'rewards/reasoning_steps_reward': 0.694444477558136, 'rewards/repetition_penalty_reward': -0.21366273239254951, 'reward': 1.1100067794322968, 'reward_std': 0.5761597864329815, 'completion_length': 3287.0208740234375, 'kl': 0.01788330078125, 'epoch': 1.0}
2025-04-28 02:11:01
{'loss': 0.0005, 'grad_norm': 0.05319062992930412, 'learning_rate': 4.998389805071536e-07, 'rewards/format_reward': 0.4375000111758709, 'rewards/tag_count_reward': 0.7187500298023224, 'rewards/len_reward': -0.051472827792167664, 'rewards/reasoning_steps_reward': 0.9583333730697632, 'rewards/repetition_penalty_reward': -0.20800023525953293, 'reward': 1.8551103174686432, 'reward_std': 0.5105666145682335, 'completion_length': 3080.1458740234375, 'kl': 0.01180267333984375, 'epoch': 1.0}
2025-04-28 02:14:00
{'loss': 0.0004, 'grad_norm': 0.039327628910541534, 'learning_rate': 4.987984459364822e-07, 'rewards/format_reward': 0.35416668467223644, 'rewards/tag_count_reward': 0.677083358168602, 'rewards/len_reward': -0.1002722904086113, 'rewards/reasoning_steps_reward': 0.8819444626569748, 'rewards/repetition_penalty_reward': -0.22063272073864937, 'reward': 1.5922894179821014, 'reward_std': 0.6086674332618713, 'completion_length': 3303.0625, 'kl': 0.0090789794921875, 'epoch': 1.0}
2025-04-28 02:16:47
{'loss': 0.0005, 'grad_norm': 0.05911056697368622, 'learning_rate': 4.977581886436462e-07, 'rewards/format_reward': 0.4791666865348816, 'rewards/tag_count_reward': 0.7395833432674408, 'rewards/len_reward': -0.09516762325074524, 'rewards/reasoning_steps_reward': 0.8055556118488312, 'rewards/repetition_penalty_reward': -0.15204493515193462, 'reward': 1.7770930379629135, 'reward_std': 0.41904831677675247, 'completion_length': 2529.666748046875, 'kl': 0.013397216796875, 'epoch': 1.0}