Reports
Created by
Created On
Last edited
Qwen2.5 7B GRPO Fast Zero ORZ
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=val/sequence_lengths&metrics=objective/kl_avg', 'r1_simple_chat_postpend_think?tag=no-tag-1-g383da57&tag=qwen2.5_7b_grpo_fast_zero_orz&cl=Qwen2.5 7B GRPO Fast Zero ORZ']]
0
2025-04-12
Qwen2.5 7B GRPO Zero
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=objective/kl&metrics=val/sequence_lengths', 'r1_simple_chat_postpend_think?tag=no-tag-691-gf3a4ad9&tag=qwen2.5_7b_grpo_zero&cl=Qwen2.5 7B GRPO Zero']]
0
2025-03-31
Qwen2.5 7B GRPO Fast Zero
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=val/sequence_lengths', 'r1_simple_chat_postpend_think?tag=no-tag-1-g383da57&tag=qwen2.5_7b_grpo_fast_zero&cl=Qwen2.5 7B GRPO Fast Zero', 'r1_simple_chat_postpend_think?tag=no-tag-691-gf3a4ad9&tag=qwen2.5_7b_grpo_zero&cl=Qwen2.5 7B GRPO Zero']]
0
2025-03-31
Qwen2.5 7B GRPO Zero
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=objective/kl&metrics=val/sequence_lengths', 'r1_simple_chat_postpend_think?tag=no-tag-691-gf3a4ad9&tag=qwen2.5_7b_grpo_zero&cl=Qwen2.5 7B GRPO Zero']]
0
2025-03-30
OLMo 2 7B GRPO Fast Zero
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=val/sequence_lengths&metrics=objective/kl_avg', 'r1_simple_chat_postpend_think?tag=no-tag-682-g782d335&tag=olmo2_7b_grpo_fast_zero&cl=OLMo 2 7B GRPO Fast Zero']]
0
2025-03-30
OLMo 2 13B GRPO Fast Zero
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=val/sequence_lengths&metrics=objective/kl_avg', 'r1_simple_chat_postpend_think?tag=no-tag-682-g782d335&tag=olmo2_13b_grpo_fast_zero&cl=OLMo 2 13B GRPO Fast Zero']]
0
2025-03-30
OLMo 2 32B SFT
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-743-g52acdce&tag=olmo2_32b_sft&cl=OLMo 2 32B SFT']]
0
2025-03-30
OLMo 2 13B SFT
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-680-gbba6fab8&tag=olmo2_13b_sft&cl=OLMo 2 13B SFT']]
0
2025-03-30
Tulu3 8B SFT
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-743-g52acdce&tag=tulu3_8b_sft&cl=Tulu3 8B SFT']]
0
2025-03-24
Tulu3.1 8B GRPO Fast
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/verifiable_correct_rate&metrics=val/sequence_lengths', 'tulu?tag=no-tag-743-g52acdce&tag=tulu3.1_8b_grpo_fast&cl=Tulu3.1 8B GRPO Fast', 'tulu?tag=no-tag-679-g2d47f44&tag=tulu3.1_8b_grpo&cl=Tulu3.1 8B GRPO']]
0
2025-03-24
Tulu3.1 8B GRPO
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/scores&metrics=objective/kl&metrics=val/sequence_lengths', 'tulu?tag=no-tag-679-g2d47f44&tag=tulu3.1_8b_grpo&cl=Tulu3.1 8B GRPO']]
0
2025-03-23
OLMo 2 7B GRPO
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=objective/scores&metrics=objective/kl&metrics=val/sequence_lengths', 'tulu?tag=no-tag-699-g2f63029&tag=olmo2_7b_grpo&cl=OLMo 2 7B GRPO']]
0
2025-03-23
OLMo 2 7B SFT
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-695-gdb4af25&tag=olmo2_7b_sft&cl=OLMo 2 7B SFT']]
0
2025-03-23
Tulu3 8B RM
[['?we=ai2-llm&wpn=open_instruct_public&ceik=chat_template_name&cen=chat_template_name&metrics=train/rm/accuracy&metrics=train/rm/loss&metrics=train/rm/chosen_rewards&metrics=train/rm/rejected_rewards&metrics=train/rm/reward_margin&metrics=train/rm/lr&metric_names=Accuracy&metric_names=Loss&metric_names=Chosen Rewards&metric_names=Rejected Rewards&metric_names=Reward Margin&metric_names=Learning Rate', 'tulu?tag=no-tag-734-g3e689d0&tag=pr-616&tag=tulu3_8b_rm&cl=Tulu3 8B RM']]
0
2025-03-21
OLMo 2 13B DPO
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-695-gdb4af25&tag=olmo2_13b_dpo&cl=OLMo 2 13B DPO']]
0
2025-03-19
OLMo 2 7B DPO
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-695-gdb4af25&tag=olmo2_7b_dpo&cl=OLMo 2 7B DPO']]
0
2025-03-19
Tulu3 8B DPO
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-683-g6b93f7f&tag=tulu3_8b_dpo&cl=Tulu3 8B DPO']]
0
2025-03-19
Tulu 3 8B SFT
[['?we=ai2-llm&wpn=open_instruct_public&xaxis=_step&ceik=chat_template_name&cen=chat_template_name&metrics=train_loss&metrics=learning_rate&metric_names=Training Loss&metric_names=Learning Rate', 'tulu?tag=no-tag-679-g2d47f44&tag=tulu3_8b_sft&cl=Tulu3 8B SFT']]
0
2025-03-18