Ucalyptus's workspace
Runs
21
Name
21 visualized
Name: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
Name: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
5
Name: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
Name: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
5
Name: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
Name: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
1
Name: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
Name: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
3
Name: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
Name: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
3
Name: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
Name: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
3
Name: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
Name: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
1
1-7
of 7profiling
6
train
14
train/rewards/xml_format_reward
train/rewards/xml_format_reward
displayName: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
displayName: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
train/rewards/soft_reward
train/rewards/soft_reward
displayName: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
displayName: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
train/rewards/numeric_match_reward
train/rewards/numeric_match_reward
displayName: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
displayName: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
train/rewards/gsm_reward
train/rewards/gsm_reward
displayName: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
displayName: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
train/reward_std
train/reward_std
displayName: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
displayName: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
train/reward
train/reward
displayName: llama-3.2-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-3b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-1.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-7b/test_gsm8k_platinum_formatted.jsonl/
displayName: phi-4-14b/test_gsm8k_platinum_formatted.jsonl/
displayName: qwen-2.5-0.5b/test_gsm8k_platinum_formatted.jsonl/
displayName: llama-3.1-8b/test_gsm8k_platinum_formatted.jsonl/
System
21