As-wandb's workspace
Runs
775
Name
59 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
api
arc_agi.arc_agi_1_artifacts_path
arc_agi.arc_agi_2_artifacts_path
arc_agi.max_output_tokens
arc_agi.num_attempts
arc_agi.reasoning.effort
arc_agi.reasoning.summary
arc_agi_2.artifacts_path
arc_agi_2.max_output_tokens
arc_agi_2.num_attempts
azure_openai.http_timeout.connect
azure_openai.http_timeout.pool
azure_openai.http_timeout.read
azure_openai.http_timeout.write
base_url
batch_size
benchmark_interval_seconds
bfcl.artifacts_path
bfcl.categories.irrelevance
bfcl.categories.java
bfcl.categories.javascript
bfcl.categories.live_irrelevance
bfcl.categories.live_multiple
bfcl.categories.live_parallel
bfcl.categories.live_parallel_multiple
bfcl.categories.live_relevance
bfcl.categories.live_simple
bfcl.categories.multi_turn_base
bfcl.categories.multi_turn_long_context
bfcl.categories.multi_turn_miss_func
bfcl.categories.multi_turn_miss_param
bfcl.categories.multiple
bfcl.categories.parallel
bfcl.categories.parallel_multiple
bfcl.categories.simple
bfcl.generator_config.max_tokens
bfcl.generator_config.temperature
bfcl.generator_config.top_p
bfcl.handler_config.unified_oss_jsonschema.execution_result_include_call_id
bfcl.handler_config.unified_oss_jsonschema.execution_result_include_call_str
bfcl.handler_config.unified_oss_jsonschema.execution_result_join_parallel_calls
bfcl.handler_config.unified_oss_jsonschema.execution_result_role
bfcl.max_steps_per_turn
bfcl.max_tokens
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
6h 37m 40s
-
openai_responses
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
90000
2
-
-
-
-
-
10
30
300
300
-
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
90000
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
35m 3s
-
openai_responses
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
128000
2
-
-
-
-
-
10
30
300
300
-
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
128000
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
5h 1m 30s
-
google
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
65000
2
-
-
-
-
-
10
30
300
300
-
4
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
65000
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
3h 12m 44s
-
openai_responses
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
100000
2
-
-
-
-
-
10
30
300
300
-
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
100000
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
1h 8m 11s
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
8096
0.01
1
false
true
true
tool
-
-
Finished
-
gzion930127
leaderboard
leaderboard4
2h 8m 14s
-
openai_responses
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
128000
2
-
-
-
-
-
10
30
300
300
-
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
128000
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
2h 6m
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
180000
2
-
-
-
-
-
10
30
300
300
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
180000
0.01
1
false
true
true
tool
-
-
Finished
-
jmurakami
leaderboard
leaderboard4
1h 33s
-
openai_responses
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
200000
2
-
-
-
-
-
10
30
300
300
-
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
200000
0.01
1
false
true
true
tool
-
-
Finished
-
keisuke-kamata
leaderboard
leaderboard4
40m 10s
-
google
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
-
2
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
12000
0.01
1
false
true
true
tool
-
-
Finished
-
nv-kyamamoto
leaderboard
leaderboard4
8h 7m 57s
-
vllm-docker
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
32768
2
-
-
-
-
-
10
30
300
300
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
32768
0.6
0.95
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
1h 35m 15s
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
180000
2
-
-
-
-
-
10
30
300
300
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
180000
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
5h 21m 19s
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
180000
2
-
-
-
-
-
10
30
300
300
6
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
180000
0.01
1
false
true
true
tool
-
-
Finished
-
nv-kyamamoto
leaderboard
leaderboard4
2h 11m 4s
-
vllm-docker
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
32768
2
-
-
-
-
-
10
30
300
300
256
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
32768
0.6
0.95
false
true
true
tool
-
-
Finished
-
nv-kyamamoto
leaderboard
leaderboard4
19m 3s
-
vllm-docker
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
256
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
8096
0.01
1
false
true
true
tool
-
-
Finished
-
nv-kyamamoto
leaderboard
leaderboard4
21m 6s
-
vllm-docker
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
256
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
8096
0.01
1
false
true
true
tool
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
1h 27m 53s
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
100000
2
-
-
-
-
-
10
30
300
300
8
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
100000
0.01
1
false
true
true
tool
-
-
Finished
-
nv-kyamamoto
leaderboard
leaderboard4
3m 51s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
yuya-yamamoto
leaderboard
leaderboard4
52m 7s
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
8096
0.01
1
false
true
true
tool
-
-
Finished
-
nv-kyamamoto
leaderboard
leaderboard4
12m 21s
-
vllm-docker
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
256
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
8096
0.01
1
false
true
true
tool
-
-
Finished
-
jmurakami
leaderboard
leaderboard4
24m 45s
-
openai-compatible
llm-leaderboard/nejumi-leaderboard4/arc-agi-1_public-eval_50:production
llm-leaderboard/nejumi-leaderboard4/arc-agi-2_public-eval_50:production
4096
2
-
-
-
-
-
10
30
300
300
32
-
llm-leaderboard/nejumi-leaderboard4/bfcl:production
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
8096
0.01
1
false
true
true
tool
-
-
1-20
of 59