Manekineko's workspace
Runs
318
Name
318 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_attn_implementation_autoset
_name_or_path
accelerator_config.even_batches
accelerator_config.non_blocking
accelerator_config.split_batches
accelerator_config.use_seedable_sampler
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
architectures
attention_bias
attention_dropout
attn_layer_offset
attn_layer_period
auto_find_batch_size
auto_map.AutoConfig
auto_map.AutoModel
auto_map.AutoModelForCausalLM
auto_map.AutoModelForSequenceClassification
average_tokens_across_devices
batch_eval_metrics
bench_dataset
bench_source_max_len
bench_split
beta
bf16
bf16_full_eval
boi_token_index
bos_token_id
calc_logits_for_entire_prompt
chat_template
chunk_size_feed_forward
cpo_alpha
dataloader_drop_last
dataloader_num_workers
dataloader_persistent_workers
dataloader_pin_memory
dataset_num_proc
ddp_find_unused_parameters
ddp_timeout
debug
deepspeed
Finished
-
randomfoo
2h 14m 17s
-
true
/fsx2/outputs/ablation-194-finalsft2-shisa-v2-qwen2.5-32b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
-
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
2h 14m 27s
-
true
/fsx2/outputs/ablation-194-finalsft2-shisa-v2-qwen2.5-32b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
-
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Crashed
-
randomfoo
4m 31s
-
true
/fsx2/outputs/ablation-191-finalsft2-shisa-v2-qwen2.5-7b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
-
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
4h 14m 52s
-
true
google/gemma-3-27b-it
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Gemma3ForConditionalGeneration"]
-
-
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
255999
-
-
-
0
-
false
0
false
true
-
false
1800
[]
-
Crashed
-
randomfoo
1m 46s
-
true
google/gemma-3-27b-it
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Gemma3ForConditionalGeneration"]
-
-
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
255999
-
-
-
0
-
false
0
false
true
-
false
1800
[]
-
Finished
-
randomfoo
4h 47m 15s
-
true
mistralai/Mistral-Small-24B-Instruct-2501
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["MistralForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
-
1
-
{%- set today = strftime_now("%Y-%m-%d") %}
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
{{- bos_token }}
{%- if messages[0]['role'] == 'system' %}
{%- set system_message = messages[0]['content'] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set system_message = default_system_message %}
{%- set loop_messages = messages %}
{%- endif %}
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
{%- for message in loop_messages %}
{%- if message['role'] == 'user' %}
{%- if message['content'] is string %}
{{- '[INST]' + message['content'] + '[/INST]' }}
{%- else %}
{{- '[INST]' }}
{%- for block in message['content'] %}
{%- if block['type'] == 'text' %}
{{- block['text'] }}
{%- elif block['type'] == 'image' or block['type'] == 'image_url' %}
{{- '[IMG]' }}
{%- else %}
{{- raise_exception('Only text and image blocks are supported in message content!') }}
{%- endif %}
{%- endfor %}
{{- '[/INST]' }}
{%- endif %}
{%- elif message['role'] == 'system' %}
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
{%- elif message['role'] == 'assistant' %}
{%- if message['content'] is string %}
{{- message['content'] }}
{%- elif message['content'] is iterable %}
{%- for block in message['content'] %}
{%- if block['type'] == 'text' %}
{{- block['text'] }}
{%- else %}
{{- raise_exception('Only text blocks are supported in assistant message content!') }} {%- endif %}
{%- endfor %} {{- eos_token }} {%- else %}
{{- raise_exception('Unsupported assistant message content format!') }}
{%- endif %}
{%- else %}
{{- raise_exception('Only user, system and assistant roles are supported!') }}
{%- endif %}
{%- endfor %}
0
-
false
0
false
true
-
false
1800
[]
-
Finished
-
randomfoo
5h 49m 38s
-
true
Qwen/Qwen2.5-32B-Instruct
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
-
-
-
-
0
-
false
0
false
true
-
false
1800
[]
-
Crashed
-
randomfoo
6m 16s
-
true
Qwen/Qwen2.5-32B-Instruct
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
-
-
-
-
0
-
false
0
false
true
-
false
1800
[]
-
Finished
-
randomfoo
1h 46s
-
true
/fsx2/outputs/ablation-191-finalsft2-shisa-v2-qwen2.5-7b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
-
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 1m 27s
-
true
/fsx2/outputs/ablation-175-finalsft2-shisa-v2-llama-3.1-8b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["LlamaForCausalLM"]
false
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
128000
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 26m 14s
-
true
Qwen/Qwen2.5-7B-Instruct
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
-
-
-
-
0
-
false
0
false
true
-
false
1800
[]
-
Failed
-
randomfoo
37s
-
true
Qwen/Qwen2.5-7B-Instruct
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
-
-
-
-
0
-
false
0
false
true
-
false
1800
[]
-
Finished
-
randomfoo
1h 1m 11s
-
true
/fsx2/outputs/ablation-175-finalsft2-shisa-v2-llama-3.1-8b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["LlamaForCausalLM"]
false
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
128000
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 36m 2s
-
true
/fsx2/outputs/ablation-178-finalsft2-shisa-v2-mistral-nemo-japanese-12b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["MistralForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
1
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 36m 10s
-
true
/fsx2/outputs/ablation-178-finalsft2-shisa-v2-mistral-nemo-japanese-12b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["MistralForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
1
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 31m 36s
-
true
/fsx2/outputs/ablation-177-finalsft2-shisa-v2-mistral-nemo-12b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["MistralForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
1
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 31m 28s
-
true
/fsx2/outputs/ablation-177-finalsft2-shisa-v2-mistral-nemo-12b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["MistralForCausalLM"]
-
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
1
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
1h 1m 53s
-
true
/fsx2/outputs/ablation-175-finalsft2-shisa-v2-llama-3.1-8b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["LlamaForCausalLM"]
false
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
128000
-
-
0
-
false
0
false
true
32
-
1800
[]
-
Finished
-
randomfoo
3h 41m 19s
-
true
unsloth/phi-4
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["LlamaForCausalLM"]
false
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
-
true
false
-
100257
-
{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}
0
-
false
0
false
true
-
false
1800
[]
-
Finished
-
randomfoo
1h 52s
-
true
/fsx2/outputs/ablation-174-shisav2.if50.tltweak-shisa-v2-llama-3.1-8b
true
false
false
true
false
0.9
0.999
1.0000e-8
false
["LlamaForCausalLM"]
false
0
-
-
false
-
-
-
-
false
false
pharaouk/dharma-1/dharma_1_mini.json
2048
eval
0.1
true
false
-
128000
-
-
0
-
false
0
false
true
32
-
1800
[]
-
1-20
of 318