Akhauriyash's workspace
Runs
12
State
Notes
User
Tags
Created
Runtime
Sweep
architecture
attn_reduce_factor
calibrate_thresholds
dDash
do_downstream_eval
do_longbench_eval
do_wikitext_eval
eval_llm_mode
eval_subset
eval_wk2_seqlen
evalgap
finetune_dataset
flash_attn
head_attn_reduce_factor
intdim
late_context_upweight
longbench_datasets
lookahead
max_norm
min_sparse_index
model_mode
model_parallelism
model_path
model_resume_path
no_wandb
no_wikitext_eval
num_tok_per_page
pred_lr
proj_name
randomize_init
result_file
save_interval
seed
stream_llm_start_size
task_list
test_with_thresholds
token_sparse_method
train_batch_size
train_headpredictor
train_seqlen
train_subset_fac
wname
Head Hit Acc
Head Hit Corr
Crashed
-
akhauriyash
19m 1s
-
llama
8
false
16
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
false
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-3.2-3B
-
false
false
16
0.001
TrainTokenButler
false
L3_3B_2k_CausalLoss.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_3B_2k_CausalLoss
0
0
Killed
-
akhauriyash
20h 7m 3s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-3.1-8B
/home/ya255/projects/TokenButler/checkpoints/TrainTokenButler_42_finetune_None_None_500_llama_meta-llama_Llama-3.1-8B_L3_8B_1k.csv_L3_8B_1k_Cont_False_False_2000_False_redpajama_1024_1_1_20_0.001_1024/4_False_False_True_32_0.3875000000000002.pt
false
false
16
0.001
TrainTokenButler
false
L3_8B_1k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_8B_1k_Cont2
0
0
Finished
-
akhauriyash
18h 45m 6s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-2-7b-hf
/home/ya255/projects/TokenButler/checkpoints/TrainTokenButler_42_finetune_None_None_500_llama_meta-llama_Llama-2-7b-hf_L2_7B_2k.csv_L2_7B_2k_False_False_2000_False_redpajama_1024_1_1_20_0.001_1024/4_False_False_True_32_0.3875000000000002.pt
false
false
16
0.001
TrainTokenButler
false
L2_7B_2k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L2_7B_2k_Cont
0
0
Finished
-
akhauriyash
34m 7s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
/home/ya255/projects/TokenButler/checkpoints/TrainTokenButler_42_finetune_None_None_500_llama_deepseek-ai_DeepSeek-R1-Distill-Llama-8B_L3_8B_R1_1K.csv_L3_8B_R1_1K_False_False_2000_False_redpajama_1024_1_1_20_0.001_1024/4_False_False_True_32_0.3875000000000002.pt
false
false
16
0.001
TrainTokenButler
false
L3_8B_R1_1K.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_8B_R1_1K_Cont
0
0
Finished
-
akhauriyash
1h 59m 54s
-
mistral
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
mistralai/Mistral-7B-v0.1
/home/ya255/projects/TokenButler/checkpoints/TrainTokenButler_42_finetune_None_None_500_mistral_mistralai_Mistral-7B-v0.1_M7B_1k.csv_M7B_1k_False_False_2000_False_redpajama_1024_1_1_20_0.001_1024/4_False_False_True_32_0.3875000000000002.pt
false
false
16
0.001
TrainTokenButler
false
M7B_1k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
M7B_1k_Cont
0
0
Killed
-
akhauriyash
6h 3m 36s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-3.1-8B
/home/ya255/projects/TokenButler/checkpoints/TrainTokenButler_42_finetune_None_None_500_llama_meta-llama_Llama-3.1-8B_L3_8B_1k.csv_L3_8B_1k_False_False_2000_False_redpajama_1024_1_1_20_0.001_1024/4_False_False_True_32_0.3875000000000002.pt
false
false
16
0.001
TrainTokenButler
false
L3_8B_1k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_8B_1k_Cont
0
0
Finished
-
akhauriyash
17h 3m 56s
-
llama
8
false
16
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
512
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-3.2-1B
-
false
false
16
0.001
TrainTokenButler
false
L3_1B_2k.csv
500
42
4
["winogrande","triviaqa"]
false
fixed_40pc
1
false
1024
1
L3_1B_2k
0
0
Killed
-
akhauriyash
1d 22h 22m 49s
-
mistral
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
mistralai/Mistral-7B-v0.1
-
false
false
16
0.001
TrainTokenButler
false
M7B_1k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
M7B_1k
0
0
Finished
-
akhauriyash
1d 15h 11m 27s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-3.1-8B
-
false
false
16
0.001
TrainTokenButler
false
L3_8B_1k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_8B_1k
0
0
Killed
-
akhauriyash
1d 22h 23m 11s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-2-7b-hf
-
false
false
16
0.001
TrainTokenButler
false
L2_7B_2k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L2_7B_2k
0
0
Finished
-
akhauriyash
1d 1h 14m 31s
-
llama
8
false
16
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
meta-llama/Llama-3.2-3B
-
false
false
16
0.001
TrainTokenButler
false
L3_3B_2k.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_3B_2k
0
0
Killed
-
akhauriyash
1d 22h 23m 28s
-
llama
8
false
32
true
false
true
ExpPred
1000
1024
2000
redpajama
false
2
1024
-
["triviaqa","qasper","trec","samsum","lcc","repobench-p","qmsum","multi_news"]
0
20
4
finetune
false
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-
false
false
16
0.001
TrainTokenButler
false
L3_8B_R1_1K.csv
500
42
4
["winogrande","hellaswag","piqa","arc_easy"]
false
fixed_40pc
1
false
1024
1
L3_8B_R1_1K
0
0
1-12
of 12