Skip to main content
Jobs
job-https___github.com_allenai_LLM.git_scripts_combine_wandb_runs.py
Job
105 versions
1 year ago
Job version
v104
Version metadata
Version path
ai2-llm/OLMo-7B/job-https___github.com_allenai_LLM.git_scripts_combine_wandb_runs.py:v104
Description

What makes this job version special?

Aliases
latest
v104
Creator
Creation date
Feb 6th 2024 at 3:25am
Python version
3.10.9
Entry point
python scripts/combine_wandb_runs.py
Arguments
N/A
Source type
Artifact link
Input types
Key
Value
activation_checkpointing
string
canceled_check_interval
number
compile
none
console_log_interval
number
data
datasets
none
drop_last
boolean
num_workers
number
pad_direction
string
paths
list
persistent_workers
boolean
pin_memory
boolean
prefetch_factor
number
timeout
number
device_eval_batch_size
number
device_train_batch_size
number
device_train_grad_accum
number
device_train_microbatch_size
number
dry_run
boolean
early_stopping_factor
none
epoch
number
eval_interval
number
eval_on_load
boolean
eval_subset_num_batches
number
evaluators
list
fast_forward_batches
none
force_save_unsharded
boolean
fsdp
precision
string
sharding_strategy
string
use_orig_params
boolean
wrapping_strategy
string
global_train_batch_size
number
load_path
string
load_path_sharded_checkpointer
none
max_duration
string
max_grad_norm
number
max_grad_norm_ratio
none
model
activation_type
string
alibi
boolean
alibi_bias_max
number
attention_dropout
number
attention_layer_norm
boolean
attention_layer_norm_with_affine
boolean
bias_for_layer_norm
boolean
block_group_size
number
block_type
string
d_model
number
embedding_dropout
number
embedding_size
number
eos_token_id
number
flash_attention
boolean
include_bias
boolean
init_cutoff_factor
none
init_device
string
init_fn
string
init_std
number
layer_norm_type
string
layer_norm_with_affine
boolean
max_sequence_length
number
mlp_hidden_size
number
mlp_ratio
number
multi_query_attention
boolean
n_heads
number
n_layers
number
pad_token_id
number
precision
string
residual_dropout
number
rope
boolean
rope_full_precision
boolean
scale_logits
boolean
vocab_size
number
weight_tying
boolean
new_style_checkpoints
none
no_pre_train_checkpoint
boolean
optimizer
betas
list
decay_embeddings
boolean
decay_norm_and_bias
boolean
learning_rate
number
metrics_log_interval
number
name
string
no_decay_norm_and_bias
none
weight_decay
number
precision
string
python_profiling
boolean
remote_save_folder
string
reset_optimizer_state
boolean
restore_dataloader
boolean
run_name
string
save_data_indices
boolean
save_folder
string
save_interval
number
save_interval_ephemeral
number
save_interval_unsharded
none
save_num_checkpoints_to_keep
number
save_num_unsharded_checkpoints_to_keep
number
save_overwrite
boolean
scheduler
alpha_f
number
grad_clip_warmup_factor
number
grad_clip_warmup_steps
number
name
string
t_max
none
t_warmup
number
seed
number
sharded_checkpointer
string
softmax_auxiliary_loss
boolean
speed_monitor
gpu_flops_available
none
window_size
number
stop_at
none
time_limit
number
tokenizer
identifier
string
truncate_direction
string
torch_profiling
boolean
Output types
Key
Value
System
Peak GPU Memory (MB)
none
_runtime
number
_step
number
_timestamp
number
eval
4chan-validation/CrossEntropyLoss
none
4chan-validation/Perplexity
none
c4_100_domains-validation/CrossEntropyLoss
none
c4_100_domains-validation/Perplexity
none
c4_en-validation/CrossEntropyLoss
none
c4_en-validation/Perplexity
none
downstream/arc_easy_acc
none
downstream/commitment_bank_acc
none
downstream/copa_acc
none
downstream/hellaswag_len_norm
none
downstream/mrpc_f1
none
downstream/openbook_qa_len_norm
none
downstream/piqa_len_norm
none
downstream/rte_len_norm
none
downstream/sciq_acc
none
downstream/sst2_acc
none
downstream/winogrande_acc
none
gab-validation/CrossEntropyLoss
none
gab-validation/Perplexity
none
ice-validation/CrossEntropyLoss
none
ice-validation/Perplexity
none
m2d2_s2orc-validation/CrossEntropyLoss
none
m2d2_s2orc-validation/Perplexity
none
m2d2_wiki-validation/CrossEntropyLoss
none
m2d2_wiki-validation/Perplexity
none
manosphere-validation/CrossEntropyLoss
none
manosphere-validation/Perplexity
none
mc4_en-validation/CrossEntropyLoss
none
mc4_en-validation/Perplexity
none
pile-validation/CrossEntropyLoss
none
pile-validation/Perplexity
none
ptb-validation/CrossEntropyLoss
none
ptb-validation/Perplexity
none
twitterAEE-validation/CrossEntropyLoss
none
twitterAEE-validation/Perplexity
none
wikitext_103-validation/CrossEntropyLoss
none
wikitext_103-validation/Perplexity
none
optim
clipping_rate
none
exp_avg_sq/transformer.blocks.0.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.0.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.0.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.0.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.0.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.0.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.0.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.0.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.0.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.0.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.0.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.0.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.0.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.0.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.0.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.0.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.1.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.1.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.1.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.1.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.1.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.1.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.1.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.1.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.1.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.1.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.1.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.1.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.1.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.1.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.1.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.1.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.2.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.2.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.2.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.2.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.2.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.2.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.2.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.2.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.2.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.2.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.2.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.2.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.2.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.2.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.2.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.2.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.3.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.3.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.3.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.3.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.3.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.3.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.3.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.3.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.3.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.3.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.3.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.3.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.3.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.3.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.3.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.3.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.4.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.4.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.4.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.4.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.4.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.4.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.4.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.4.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.4.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.4.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.4.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.4.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.4.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.4.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.4.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.4.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.5.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.5.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.5.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.5.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.5.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.5.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.5.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.5.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.5.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.5.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.5.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.5.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.5.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.5.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.5.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.5.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.6.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.6.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.6.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.6.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.6.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.6.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.6.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.6.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.6.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.6.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.6.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.6.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.6.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.6.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.6.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.6.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.7.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.7.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.7.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.7.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.7.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.7.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.7.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.7.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.7.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.7.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.7.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.7.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.7.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.7.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.7.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.7.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.8.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.8.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.8.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.8.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.8.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.8.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.8.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.8.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.8.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.8.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.8.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.8.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.8.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.8.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.8.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.8.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.9.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.9.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.9.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.9.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.9.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.9.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.9.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.9.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.9.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.9.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.9.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.9.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.9.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.9.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.9.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.9.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.10.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.10.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.10.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.10.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.10.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.10.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.10.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.10.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.10.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.10.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.10.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.10.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.10.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.10.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.10.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.10.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.11.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.11.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.11.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.11.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.11.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.11.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.11.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.11.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.11.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.11.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.11.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.11.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.11.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.11.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.11.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.11.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.12.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.12.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.12.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.12.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.12.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.12.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.12.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.12.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.12.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.12.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.12.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.12.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.12.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.12.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.12.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.12.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.13.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.13.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.13.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.13.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.13.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.13.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.13.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.13.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.13.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.13.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.13.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.13.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.13.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.13.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.13.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.13.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.14.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.14.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.14.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.14.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.14.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.14.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.14.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.14.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.14.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.14.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.14.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.14.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.14.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.14.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.14.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.14.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.15.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.15.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.15.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.15.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.15.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.15.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.15.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.15.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.15.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.15.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.15.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.15.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.15.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.15.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.15.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.15.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.16.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.16.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.16.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.16.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.16.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.16.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.16.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.16.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.16.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.16.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.16.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.16.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.16.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.16.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.16.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.16.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.17.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.17.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.17.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.17.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.17.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.17.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.17.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.17.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.17.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.17.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.17.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.17.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.17.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.17.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.17.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.17.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.18.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.18.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.18.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.18.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.18.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.18.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.18.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.18.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.18.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.18.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.18.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.18.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.18.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.18.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.18.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.18.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.19.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.19.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.19.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.19.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.19.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.19.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.19.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.19.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.19.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.19.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.19.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.19.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.19.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.19.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.19.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.19.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.20.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.20.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.20.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.20.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.20.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.20.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.20.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.20.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.20.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.20.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.20.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.20.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.20.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.20.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.20.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.20.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.21.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.21.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.21.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.21.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.21.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.21.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.21.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.21.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.21.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.21.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.21.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.21.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.21.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.21.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.21.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.21.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.22.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.22.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.22.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.22.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.22.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.22.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.22.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.22.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.22.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.22.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.22.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.22.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.22.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.22.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.22.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.22.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.23.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.23.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.23.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.23.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.23.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.23.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.23.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.23.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.23.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.23.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.23.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.23.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.23.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.23.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.23.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.23.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.24.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.24.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.24.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.24.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.24.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.24.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.24.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.24.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.24.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.24.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.24.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.24.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.24.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.24.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.24.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.24.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.25.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.25.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.25.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.25.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.25.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.25.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.25.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.25.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.25.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.25.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.25.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.25.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.25.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.25.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.25.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.25.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.26.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.26.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.26.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.26.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.26.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.26.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.26.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.26.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.26.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.26.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.26.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.26.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.26.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.26.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.26.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.26.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.27.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.27.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.27.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.27.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.27.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.27.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.27.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.27.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.27.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.27.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.27.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.27.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.27.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.27.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.27.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.27.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.28.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.28.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.28.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.28.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.28.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.28.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.28.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.28.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.28.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.28.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.28.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.28.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.28.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.28.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.28.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.28.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.29.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.29.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.29.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.29.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.29.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.29.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.29.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.29.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.29.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.29.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.29.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.29.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.29.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.29.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.29.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.29.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.30.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.30.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.30.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.30.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.30.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.30.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.30.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.30.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.30.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.30.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.30.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.30.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.30.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.30.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.30.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.30.ff_proj.weight.norm
none
exp_avg_sq/transformer.blocks.31.att_proj.weight.avg
none
exp_avg_sq/transformer.blocks.31.att_proj.weight.max
none
exp_avg_sq/transformer.blocks.31.att_proj.weight.min
none
exp_avg_sq/transformer.blocks.31.att_proj.weight.norm
none
exp_avg_sq/transformer.blocks.31.attn_out.weight.avg
none
exp_avg_sq/transformer.blocks.31.attn_out.weight.max
none
exp_avg_sq/transformer.blocks.31.attn_out.weight.min
none
exp_avg_sq/transformer.blocks.31.attn_out.weight.norm
none
exp_avg_sq/transformer.blocks.31.ff_out.weight.avg
none
exp_avg_sq/transformer.blocks.31.ff_out.weight.max
none
exp_avg_sq/transformer.blocks.31.ff_out.weight.min
none
exp_avg_sq/transformer.blocks.31.ff_out.weight.norm
none
exp_avg_sq/transformer.blocks.31.ff_proj.weight.avg
none
exp_avg_sq/transformer.blocks.31.ff_proj.weight.max
none
exp_avg_sq/transformer.blocks.31.ff_proj.weight.min
none
exp_avg_sq/transformer.blocks.31.ff_proj.weight.norm
none
exp_avg_sq/transformer.ff_out.weight.avg
none
exp_avg_sq/transformer.ff_out.weight.max
none
exp_avg_sq/transformer.ff_out.weight.min
none
exp_avg_sq/transformer.ff_out.weight.norm
none
exp_avg_sq/transformer.wte.weight.avg
none
exp_avg_sq/transformer.wte.weight.max
none
exp_avg_sq/transformer.wte.weight.min
none
exp_avg_sq/transformer.wte.weight.norm
none
exp_avg/transformer.blocks.0.att_proj.weight.avg
none
exp_avg/transformer.blocks.0.att_proj.weight.max
none
exp_avg/transformer.blocks.0.att_proj.weight.min
none
exp_avg/transformer.blocks.0.att_proj.weight.norm
none
exp_avg/transformer.blocks.0.attn_out.weight.avg
none
exp_avg/transformer.blocks.0.attn_out.weight.max
none
exp_avg/transformer.blocks.0.attn_out.weight.min
none
exp_avg/transformer.blocks.0.attn_out.weight.norm
none
exp_avg/transformer.blocks.0.ff_out.weight.avg
none
exp_avg/transformer.blocks.0.ff_out.weight.max
none
exp_avg/transformer.blocks.0.ff_out.weight.min
none
exp_avg/transformer.blocks.0.ff_out.weight.norm
none
exp_avg/transformer.blocks.0.ff_proj.weight.avg
none
exp_avg/transformer.blocks.0.ff_proj.weight.max
none
exp_avg/transformer.blocks.0.ff_proj.weight.min
none
exp_avg/transformer.blocks.0.ff_proj.weight.norm
none
exp_avg/transformer.blocks.1.att_proj.weight.avg
none
exp_avg/transformer.blocks.1.att_proj.weight.max
none
exp_avg/transformer.blocks.1.att_proj.weight.min
none
exp_avg/transformer.blocks.1.att_proj.weight.norm
none
exp_avg/transformer.blocks.1.attn_out.weight.avg
none
exp_avg/transformer.blocks.1.attn_out.weight.max
none
exp_avg/transformer.blocks.1.attn_out.weight.min
none
exp_avg/transformer.blocks.1.attn_out.weight.norm
none
exp_avg/transformer.blocks.1.ff_out.weight.avg
none
exp_avg/transformer.blocks.1.ff_out.weight.max
none
exp_avg/transformer.blocks.1.ff_out.weight.min
none
exp_avg/transformer.blocks.1.ff_out.weight.norm
none
exp_avg/transformer.blocks.1.ff_proj.weight.avg
none
exp_avg/transformer.blocks.1.ff_proj.weight.max
none
exp_avg/transformer.blocks.1.ff_proj.weight.min
none
exp_avg/transformer.blocks.1.ff_proj.weight.norm
none
exp_avg/transformer.blocks.2.att_proj.weight.avg
none
exp_avg/transformer.blocks.2.att_proj.weight.max
none
exp_avg/transformer.blocks.2.att_proj.weight.min
none
exp_avg/transformer.blocks.2.att_proj.weight.norm
none
exp_avg/transformer.blocks.2.attn_out.weight.avg
none
exp_avg/transformer.blocks.2.attn_out.weight.max
none
exp_avg/transformer.blocks.2.attn_out.weight.min
none
exp_avg/transformer.blocks.2.attn_out.weight.norm
none
exp_avg/transformer.blocks.2.ff_out.weight.avg
none
exp_avg/transformer.blocks.2.ff_out.weight.max
none
exp_avg/transformer.blocks.2.ff_out.weight.min
none
exp_avg/transformer.blocks.2.ff_out.weight.norm
none
exp_avg/transformer.blocks.2.ff_proj.weight.avg
none
exp_avg/transformer.blocks.2.ff_proj.weight.max
none
exp_avg/transformer.blocks.2.ff_proj.weight.min
none
exp_avg/transformer.blocks.2.ff_proj.weight.norm
none
exp_avg/transformer.blocks.3.att_proj.weight.avg
none
exp_avg/transformer.blocks.3.att_proj.weight.max
none
exp_avg/transformer.blocks.3.att_proj.weight.min
none
exp_avg/transformer.blocks.3.att_proj.weight.norm
none
exp_avg/transformer.blocks.3.attn_out.weight.avg
none
exp_avg/transformer.blocks.3.attn_out.weight.max
none
exp_avg/transformer.blocks.3.attn_out.weight.min
none
exp_avg/transformer.blocks.3.attn_out.weight.norm
none
exp_avg/transformer.blocks.3.ff_out.weight.avg
none
exp_avg/transformer.blocks.3.ff_out.weight.max
none
exp_avg/transformer.blocks.3.ff_out.weight.min
none
exp_avg/transformer.blocks.3.ff_out.weight.norm
none
exp_avg/transformer.blocks.3.ff_proj.weight.avg
none
exp_avg/transformer.blocks.3.ff_proj.weight.max
none
exp_avg/transformer.blocks.3.ff_proj.weight.min
none
exp_avg/transformer.blocks.3.ff_proj.weight.norm
none
exp_avg/transformer.blocks.4.att_proj.weight.avg
none
exp_avg/transformer.blocks.4.att_proj.weight.max
none
exp_avg/transformer.blocks.4.att_proj.weight.min
none
exp_avg/transformer.blocks.4.att_proj.weight.norm
none
exp_avg/transformer.blocks.4.attn_out.weight.avg
none
exp_avg/transformer.blocks.4.attn_out.weight.max
none
exp_avg/transformer.blocks.4.attn_out.weight.min
none
exp_avg/transformer.blocks.4.attn_out.weight.norm
none
exp_avg/transformer.blocks.4.ff_out.weight.avg
none
exp_avg/transformer.blocks.4.ff_out.weight.max
none
exp_avg/transformer.blocks.4.ff_out.weight.min
none
exp_avg/transformer.blocks.4.ff_out.weight.norm
none
exp_avg/transformer.blocks.4.ff_proj.weight.avg
none
exp_avg/transformer.blocks.4.ff_proj.weight.max
none
exp_avg/transformer.blocks.4.ff_proj.weight.min
none
exp_avg/transformer.blocks.4.ff_proj.weight.norm
none
exp_avg/transformer.blocks.5.att_proj.weight.avg
none
exp_avg/transformer.blocks.5.att_proj.weight.max
none
exp_avg/transformer.blocks.5.att_proj.weight.min
none
exp_avg/transformer.blocks.5.att_proj.weight.norm
none
exp_avg/transformer.blocks.5.attn_out.weight.avg
none
exp_avg/transformer.blocks.5.attn_out.weight.max
none
exp_avg/transformer.blocks.5.attn_out.weight.min
none
exp_avg/transformer.blocks.5.attn_out.weight.norm
none
exp_avg/transformer.blocks.5.ff_out.weight.avg
none
exp_avg/transformer.blocks.5.ff_out.weight.max
none
exp_avg/transformer.blocks.5.ff_out.weight.min
none
exp_avg/transformer.blocks.5.ff_out.weight.norm
none
exp_avg/transformer.blocks.5.ff_proj.weight.avg
none
exp_avg/transformer.blocks.5.ff_proj.weight.max
none
exp_avg/transformer.blocks.5.ff_proj.weight.min
none
exp_avg/transformer.blocks.5.ff_proj.weight.norm
none
exp_avg/transformer.blocks.6.att_proj.weight.avg
none
exp_avg/transformer.blocks.6.att_proj.weight.max
none
exp_avg/transformer.blocks.6.att_proj.weight.min
none
exp_avg/transformer.blocks.6.att_proj.weight.norm
none
exp_avg/transformer.blocks.6.attn_out.weight.avg
none
exp_avg/transformer.blocks.6.attn_out.weight.max
none
exp_avg/transformer.blocks.6.attn_out.weight.min
none
exp_avg/transformer.blocks.6.attn_out.weight.norm
none
exp_avg/transformer.blocks.6.ff_out.weight.avg
none
exp_avg/transformer.blocks.6.ff_out.weight.max
none
exp_avg/transformer.blocks.6.ff_out.weight.min
none
exp_avg/transformer.blocks.6.ff_out.weight.norm
none
exp_avg/transformer.blocks.6.ff_proj.weight.avg
none
exp_avg/transformer.blocks.6.ff_proj.weight.max
none
exp_avg/transformer.blocks.6.ff_proj.weight.min
none
exp_avg/transformer.blocks.6.ff_proj.weight.norm
none
exp_avg/transformer.blocks.7.att_proj.weight.avg
none
exp_avg/transformer.blocks.7.att_proj.weight.max
none
exp_avg/transformer.blocks.7.att_proj.weight.min
none
exp_avg/transformer.blocks.7.att_proj.weight.norm
none
exp_avg/transformer.blocks.7.attn_out.weight.avg
none
exp_avg/transformer.blocks.7.attn_out.weight.max
none
exp_avg/transformer.blocks.7.attn_out.weight.min
none
exp_avg/transformer.blocks.7.attn_out.weight.norm
none
exp_avg/transformer.blocks.7.ff_out.weight.avg
none
exp_avg/transformer.blocks.7.ff_out.weight.max
none
exp_avg/transformer.blocks.7.ff_out.weight.min
none
exp_avg/transformer.blocks.7.ff_out.weight.norm
none
exp_avg/transformer.blocks.7.ff_proj.weight.avg
none
exp_avg/transformer.blocks.7.ff_proj.weight.max
none
exp_avg/transformer.blocks.7.ff_proj.weight.min
none
exp_avg/transformer.blocks.7.ff_proj.weight.norm
none
exp_avg/transformer.blocks.8.att_proj.weight.avg
none
exp_avg/transformer.blocks.8.att_proj.weight.max
none
exp_avg/transformer.blocks.8.att_proj.weight.min
none
exp_avg/transformer.blocks.8.att_proj.weight.norm
none
exp_avg/transformer.blocks.8.attn_out.weight.avg
none
exp_avg/transformer.blocks.8.attn_out.weight.max
none
exp_avg/transformer.blocks.8.attn_out.weight.min
none
exp_avg/transformer.blocks.8.attn_out.weight.norm
none
exp_avg/transformer.blocks.8.ff_out.weight.avg
none
exp_avg/transformer.blocks.8.ff_out.weight.max
none
exp_avg/transformer.blocks.8.ff_out.weight.min
none
exp_avg/transformer.blocks.8.ff_out.weight.norm
none
exp_avg/transformer.blocks.8.ff_proj.weight.avg
none
exp_avg/transformer.blocks.8.ff_proj.weight.max
none
exp_avg/transformer.blocks.8.ff_proj.weight.min
none
exp_avg/transformer.blocks.8.ff_proj.weight.norm
none
exp_avg/transformer.blocks.9.att_proj.weight.avg
none
exp_avg/transformer.blocks.9.att_proj.weight.max
none
exp_avg/transformer.blocks.9.att_proj.weight.min
none
exp_avg/transformer.blocks.9.att_proj.weight.norm
none
exp_avg/transformer.blocks.9.attn_out.weight.avg
none
exp_avg/transformer.blocks.9.attn_out.weight.max
none
exp_avg/transformer.blocks.9.attn_out.weight.min
none
exp_avg/transformer.blocks.9.attn_out.weight.norm
none
exp_avg/transformer.blocks.9.ff_out.weight.avg
none
exp_avg/transformer.blocks.9.ff_out.weight.max
none
exp_avg/transformer.blocks.9.ff_out.weight.min
none
exp_avg/transformer.blocks.9.ff_out.weight.norm
none
exp_avg/transformer.blocks.9.ff_proj.weight.avg
none
exp_avg/transformer.blocks.9.ff_proj.weight.max
none
exp_avg/transformer.blocks.9.ff_proj.weight.min
none
exp_avg/transformer.blocks.9.ff_proj.weight.norm
none
exp_avg/transformer.blocks.10.att_proj.weight.avg
none
exp_avg/transformer.blocks.10.att_proj.weight.max
none
exp_avg/transformer.blocks.10.att_proj.weight.min
none
exp_avg/transformer.blocks.10.att_proj.weight.norm
none
exp_avg/transformer.blocks.10.attn_out.weight.avg
none
exp_avg/transformer.blocks.10.attn_out.weight.max
none
exp_avg/transformer.blocks.10.attn_out.weight.min
none
exp_avg/transformer.blocks.10.attn_out.weight.norm
none
exp_avg/transformer.blocks.10.ff_out.weight.avg
none
exp_avg/transformer.blocks.10.ff_out.weight.max
none
exp_avg/transformer.blocks.10.ff_out.weight.min
none
exp_avg/transformer.blocks.10.ff_out.weight.norm
none
exp_avg/transformer.blocks.10.ff_proj.weight.avg
none
exp_avg/transformer.blocks.10.ff_proj.weight.max
none
exp_avg/transformer.blocks.10.ff_proj.weight.min
none
exp_avg/transformer.blocks.10.ff_proj.weight.norm
none
exp_avg/transformer.blocks.11.att_proj.weight.avg
none
exp_avg/transformer.blocks.11.att_proj.weight.max
none
exp_avg/transformer.blocks.11.att_proj.weight.min
none
exp_avg/transformer.blocks.11.att_proj.weight.norm
none
exp_avg/transformer.blocks.11.attn_out.weight.avg
none
exp_avg/transformer.blocks.11.attn_out.weight.max
none
exp_avg/transformer.blocks.11.attn_out.weight.min
none
exp_avg/transformer.blocks.11.attn_out.weight.norm
none
exp_avg/transformer.blocks.11.ff_out.weight.avg
none
exp_avg/transformer.blocks.11.ff_out.weight.max
none
exp_avg/transformer.blocks.11.ff_out.weight.min
none
exp_avg/transformer.blocks.11.ff_out.weight.norm
none
exp_avg/transformer.blocks.11.ff_proj.weight.avg
none
exp_avg/transformer.blocks.11.ff_proj.weight.max
none
exp_avg/transformer.blocks.11.ff_proj.weight.min
none
exp_avg/transformer.blocks.11.ff_proj.weight.norm
none
exp_avg/transformer.blocks.12.att_proj.weight.avg
none
exp_avg/transformer.blocks.12.att_proj.weight.max
none
exp_avg/transformer.blocks.12.att_proj.weight.min
none
exp_avg/transformer.blocks.12.att_proj.weight.norm
none
exp_avg/transformer.blocks.12.attn_out.weight.avg
none
exp_avg/transformer.blocks.12.attn_out.weight.max
none
exp_avg/transformer.blocks.12.attn_out.weight.min
none
exp_avg/transformer.blocks.12.attn_out.weight.norm
none
exp_avg/transformer.blocks.12.ff_out.weight.avg
none
exp_avg/transformer.blocks.12.ff_out.weight.max
none
exp_avg/transformer.blocks.12.ff_out.weight.min
none
exp_avg/transformer.blocks.12.ff_out.weight.norm
none
exp_avg/transformer.blocks.12.ff_proj.weight.avg
none
exp_avg/transformer.blocks.12.ff_proj.weight.max
none
exp_avg/transformer.blocks.12.ff_proj.weight.min
none
exp_avg/transformer.blocks.12.ff_proj.weight.norm
none
exp_avg/transformer.blocks.13.att_proj.weight.avg
none
exp_avg/transformer.blocks.13.att_proj.weight.max
none
exp_avg/transformer.blocks.13.att_proj.weight.min
none
exp_avg/transformer.blocks.13.att_proj.weight.norm
none
exp_avg/transformer.blocks.13.attn_out.weight.avg
none
exp_avg/transformer.blocks.13.attn_out.weight.max
none
exp_avg/transformer.blocks.13.attn_out.weight.min
none
exp_avg/transformer.blocks.13.attn_out.weight.norm
none
exp_avg/transformer.blocks.13.ff_out.weight.avg
none
exp_avg/transformer.blocks.13.ff_out.weight.max
none
exp_avg/transformer.blocks.13.ff_out.weight.min
none
exp_avg/transformer.blocks.13.ff_out.weight.norm
none
exp_avg/transformer.blocks.13.ff_proj.weight.avg
none
exp_avg/transformer.blocks.13.ff_proj.weight.max
none
exp_avg/transformer.blocks.13.ff_proj.weight.min
none
exp_avg/transformer.blocks.13.ff_proj.weight.norm
none
exp_avg/transformer.blocks.14.att_proj.weight.avg
none
exp_avg/transformer.blocks.14.att_proj.weight.max
none
exp_avg/transformer.blocks.14.att_proj.weight.min
none
exp_avg/transformer.blocks.14.att_proj.weight.norm
none
exp_avg/transformer.blocks.14.attn_out.weight.avg
none
exp_avg/transformer.blocks.14.attn_out.weight.max
none
exp_avg/transformer.blocks.14.attn_out.weight.min
none
exp_avg/transformer.blocks.14.attn_out.weight.norm
none
exp_avg/transformer.blocks.14.ff_out.weight.avg
none
exp_avg/transformer.blocks.14.ff_out.weight.max
none
exp_avg/transformer.blocks.14.ff_out.weight.min
none
exp_avg/transformer.blocks.14.ff_out.weight.norm
none
exp_avg/transformer.blocks.14.ff_proj.weight.avg
none
exp_avg/transformer.blocks.14.ff_proj.weight.max
none
exp_avg/transformer.blocks.14.ff_proj.weight.min
none
exp_avg/transformer.blocks.14.ff_proj.weight.norm
none
exp_avg/transformer.blocks.15.att_proj.weight.avg
none
exp_avg/transformer.blocks.15.att_proj.weight.max
none
exp_avg/transformer.blocks.15.att_proj.weight.min
none
exp_avg/transformer.blocks.15.att_proj.weight.norm
none
exp_avg/transformer.blocks.15.attn_out.weight.avg
none
exp_avg/transformer.blocks.15.attn_out.weight.max
none
exp_avg/transformer.blocks.15.attn_out.weight.min
none
exp_avg/transformer.blocks.15.attn_out.weight.norm
none
exp_avg/transformer.blocks.15.ff_out.weight.avg
none
exp_avg/transformer.blocks.15.ff_out.weight.max
none
exp_avg/transformer.blocks.15.ff_out.weight.min
none
exp_avg/transformer.blocks.15.ff_out.weight.norm
none
exp_avg/transformer.blocks.15.ff_proj.weight.avg
none
exp_avg/transformer.blocks.15.ff_proj.weight.max
none
exp_avg/transformer.blocks.15.ff_proj.weight.min
none
exp_avg/transformer.blocks.15.ff_proj.weight.norm
none
exp_avg/transformer.blocks.16.att_proj.weight.avg
none
exp_avg/transformer.blocks.16.att_proj.weight.max
none
exp_avg/transformer.blocks.16.att_proj.weight.min
none
exp_avg/transformer.blocks.16.att_proj.weight.norm
none
exp_avg/transformer.blocks.16.attn_out.weight.avg
none
exp_avg/transformer.blocks.16.attn_out.weight.max
none
exp_avg/transformer.blocks.16.attn_out.weight.min
none
exp_avg/transformer.blocks.16.attn_out.weight.norm
none
exp_avg/transformer.blocks.16.ff_out.weight.avg
none
exp_avg/transformer.blocks.16.ff_out.weight.max
none
exp_avg/transformer.blocks.16.ff_out.weight.min
none
exp_avg/transformer.blocks.16.ff_out.weight.norm
none
exp_avg/transformer.blocks.16.ff_proj.weight.avg
none
exp_avg/transformer.blocks.16.ff_proj.weight.max
none
exp_avg/transformer.blocks.16.ff_proj.weight.min
none
exp_avg/transformer.blocks.16.ff_proj.weight.norm
none
exp_avg/transformer.blocks.17.att_proj.weight.avg
none
exp_avg/transformer.blocks.17.att_proj.weight.max
none
exp_avg/transformer.blocks.17.att_proj.weight.min
none
exp_avg/transformer.blocks.17.att_proj.weight.norm
none
exp_avg/transformer.blocks.17.attn_out.weight.avg
none
exp_avg/transformer.blocks.17.attn_out.weight.max
none
exp_avg/transformer.blocks.17.attn_out.weight.min
none
exp_avg/transformer.blocks.17.attn_out.weight.norm
none
exp_avg/transformer.blocks.17.ff_out.weight.avg
none
exp_avg/transformer.blocks.17.ff_out.weight.max
none
exp_avg/transformer.blocks.17.ff_out.weight.min
none
exp_avg/transformer.blocks.17.ff_out.weight.norm
none
exp_avg/transformer.blocks.17.ff_proj.weight.avg
none
exp_avg/transformer.blocks.17.ff_proj.weight.max
none
exp_avg/transformer.blocks.17.ff_proj.weight.min
none
exp_avg/transformer.blocks.17.ff_proj.weight.norm
none
exp_avg/transformer.blocks.18.att_proj.weight.avg
none
exp_avg/transformer.blocks.18.att_proj.weight.max
none
exp_avg/transformer.blocks.18.att_proj.weight.min
none
exp_avg/transformer.blocks.18.att_proj.weight.norm
none
exp_avg/transformer.blocks.18.attn_out.weight.avg
none
exp_avg/transformer.blocks.18.attn_out.weight.max
none
exp_avg/transformer.blocks.18.attn_out.weight.min
none
exp_avg/transformer.blocks.18.attn_out.weight.norm
none
exp_avg/transformer.blocks.18.ff_out.weight.avg
none
exp_avg/transformer.blocks.18.ff_out.weight.max
none
exp_avg/transformer.blocks.18.ff_out.weight.min
none
exp_avg/transformer.blocks.18.ff_out.weight.norm
none
exp_avg/transformer.blocks.18.ff_proj.weight.avg
none
exp_avg/transformer.blocks.18.ff_proj.weight.max
none
exp_avg/transformer.blocks.18.ff_proj.weight.min
none
exp_avg/transformer.blocks.18.ff_proj.weight.norm
none
exp_avg/transformer.blocks.19.att_proj.weight.avg
none
exp_avg/transformer.blocks.19.att_proj.weight.max
none
exp_avg/transformer.blocks.19.att_proj.weight.min
none
exp_avg/transformer.blocks.19.att_proj.weight.norm
none
exp_avg/transformer.blocks.19.attn_out.weight.avg
none
exp_avg/transformer.blocks.19.attn_out.weight.max
none
exp_avg/transformer.blocks.19.attn_out.weight.min
none
exp_avg/transformer.blocks.19.attn_out.weight.norm
none
exp_avg/transformer.blocks.19.ff_out.weight.avg
none
exp_avg/transformer.blocks.19.ff_out.weight.max
none
exp_avg/transformer.blocks.19.ff_out.weight.min
none
exp_avg/transformer.blocks.19.ff_out.weight.norm
none
exp_avg/transformer.blocks.19.ff_proj.weight.avg
none
exp_avg/transformer.blocks.19.ff_proj.weight.max
none
exp_avg/transformer.blocks.19.ff_proj.weight.min
none
exp_avg/transformer.blocks.19.ff_proj.weight.norm
none
exp_avg/transformer.blocks.20.att_proj.weight.avg
none
exp_avg/transformer.blocks.20.att_proj.weight.max
none
exp_avg/transformer.blocks.20.att_proj.weight.min
none
exp_avg/transformer.blocks.20.att_proj.weight.norm
none
exp_avg/transformer.blocks.20.attn_out.weight.avg
none
exp_avg/transformer.blocks.20.attn_out.weight.max
none
exp_avg/transformer.blocks.20.attn_out.weight.min
none
exp_avg/transformer.blocks.20.attn_out.weight.norm
none
exp_avg/transformer.blocks.20.ff_out.weight.avg
none
exp_avg/transformer.blocks.20.ff_out.weight.max
none
exp_avg/transformer.blocks.20.ff_out.weight.min
none
exp_avg/transformer.blocks.20.ff_out.weight.norm
none
exp_avg/transformer.blocks.20.ff_proj.weight.avg
none
exp_avg/transformer.blocks.20.ff_proj.weight.max
none
exp_avg/transformer.blocks.20.ff_proj.weight.min
none
exp_avg/transformer.blocks.20.ff_proj.weight.norm
none
exp_avg/transformer.blocks.21.att_proj.weight.avg
none
exp_avg/transformer.blocks.21.att_proj.weight.max
none
exp_avg/transformer.blocks.21.att_proj.weight.min
none
exp_avg/transformer.blocks.21.att_proj.weight.norm
none
exp_avg/transformer.blocks.21.attn_out.weight.avg
none
exp_avg/transformer.blocks.21.attn_out.weight.max
none
exp_avg/transformer.blocks.21.attn_out.weight.min
none
exp_avg/transformer.blocks.21.attn_out.weight.norm
none
exp_avg/transformer.blocks.21.ff_out.weight.avg
none
exp_avg/transformer.blocks.21.ff_out.weight.max
none
exp_avg/transformer.blocks.21.ff_out.weight.min
none
exp_avg/transformer.blocks.21.ff_out.weight.norm
none
exp_avg/transformer.blocks.21.ff_proj.weight.avg
none
exp_avg/transformer.blocks.21.ff_proj.weight.max
none
exp_avg/transformer.blocks.21.ff_proj.weight.min
none
exp_avg/transformer.blocks.21.ff_proj.weight.norm
none
exp_avg/transformer.blocks.22.att_proj.weight.avg
none
exp_avg/transformer.blocks.22.att_proj.weight.max
none
exp_avg/transformer.blocks.22.att_proj.weight.min
none
exp_avg/transformer.blocks.22.att_proj.weight.norm
none
exp_avg/transformer.blocks.22.attn_out.weight.avg
none
exp_avg/transformer.blocks.22.attn_out.weight.max
none
exp_avg/transformer.blocks.22.attn_out.weight.min
none
exp_avg/transformer.blocks.22.attn_out.weight.norm
none
exp_avg/transformer.blocks.22.ff_out.weight.avg
none
exp_avg/transformer.blocks.22.ff_out.weight.max
none
exp_avg/transformer.blocks.22.ff_out.weight.min
none
exp_avg/transformer.blocks.22.ff_out.weight.norm
none
exp_avg/transformer.blocks.22.ff_proj.weight.avg
none
exp_avg/transformer.blocks.22.ff_proj.weight.max
none
exp_avg/transformer.blocks.22.ff_proj.weight.min
none
exp_avg/transformer.blocks.22.ff_proj.weight.norm
none
exp_avg/transformer.blocks.23.att_proj.weight.avg
none
exp_avg/transformer.blocks.23.att_proj.weight.max
none
exp_avg/transformer.blocks.23.att_proj.weight.min
none
exp_avg/transformer.blocks.23.att_proj.weight.norm
none
exp_avg/transformer.blocks.23.attn_out.weight.avg
none
exp_avg/transformer.blocks.23.attn_out.weight.max
none
exp_avg/transformer.blocks.23.attn_out.weight.min
none
exp_avg/transformer.blocks.23.attn_out.weight.norm
none
exp_avg/transformer.blocks.23.ff_out.weight.avg
none
exp_avg/transformer.blocks.23.ff_out.weight.max
none
exp_avg/transformer.blocks.23.ff_out.weight.min
none
exp_avg/transformer.blocks.23.ff_out.weight.norm
none
exp_avg/transformer.blocks.23.ff_proj.weight.avg
none
exp_avg/transformer.blocks.23.ff_proj.weight.max
none
exp_avg/transformer.blocks.23.ff_proj.weight.min
none
exp_avg/transformer.blocks.23.ff_proj.weight.norm
none
exp_avg/transformer.blocks.24.att_proj.weight.avg
none
exp_avg/transformer.blocks.24.att_proj.weight.max
none
exp_avg/transformer.blocks.24.att_proj.weight.min
none
exp_avg/transformer.blocks.24.att_proj.weight.norm
none
exp_avg/transformer.blocks.24.attn_out.weight.avg
none
exp_avg/transformer.blocks.24.attn_out.weight.max
none
exp_avg/transformer.blocks.24.attn_out.weight.min
none
exp_avg/transformer.blocks.24.attn_out.weight.norm
none
exp_avg/transformer.blocks.24.ff_out.weight.avg
none
exp_avg/transformer.blocks.24.ff_out.weight.max
none
exp_avg/transformer.blocks.24.ff_out.weight.min
none
exp_avg/transformer.blocks.24.ff_out.weight.norm
none
exp_avg/transformer.blocks.24.ff_proj.weight.avg
none
exp_avg/transformer.blocks.24.ff_proj.weight.max
none
exp_avg/transformer.blocks.24.ff_proj.weight.min
none
exp_avg/transformer.blocks.24.ff_proj.weight.norm
none
exp_avg/transformer.blocks.25.att_proj.weight.avg
none
exp_avg/transformer.blocks.25.att_proj.weight.max
none
exp_avg/transformer.blocks.25.att_proj.weight.min
none
exp_avg/transformer.blocks.25.att_proj.weight.norm
none
exp_avg/transformer.blocks.25.attn_out.weight.avg
none
exp_avg/transformer.blocks.25.attn_out.weight.max
none
exp_avg/transformer.blocks.25.attn_out.weight.min
none
exp_avg/transformer.blocks.25.attn_out.weight.norm
none
exp_avg/transformer.blocks.25.ff_out.weight.avg
none
exp_avg/transformer.blocks.25.ff_out.weight.max
none
exp_avg/transformer.blocks.25.ff_out.weight.min
none
exp_avg/transformer.blocks.25.ff_out.weight.norm
none
exp_avg/transformer.blocks.25.ff_proj.weight.avg
none
exp_avg/transformer.blocks.25.ff_proj.weight.max
none
exp_avg/transformer.blocks.25.ff_proj.weight.min
none
exp_avg/transformer.blocks.25.ff_proj.weight.norm
none
exp_avg/transformer.blocks.26.att_proj.weight.avg
none
exp_avg/transformer.blocks.26.att_proj.weight.max
none
exp_avg/transformer.blocks.26.att_proj.weight.min
none
exp_avg/transformer.blocks.26.att_proj.weight.norm
none
exp_avg/transformer.blocks.26.attn_out.weight.avg
none
exp_avg/transformer.blocks.26.attn_out.weight.max
none
exp_avg/transformer.blocks.26.attn_out.weight.min
none
exp_avg/transformer.blocks.26.attn_out.weight.norm
none
exp_avg/transformer.blocks.26.ff_out.weight.avg
none
exp_avg/transformer.blocks.26.ff_out.weight.max
none
exp_avg/transformer.blocks.26.ff_out.weight.min
none
exp_avg/transformer.blocks.26.ff_out.weight.norm
none
exp_avg/transformer.blocks.26.ff_proj.weight.avg
none
exp_avg/transformer.blocks.26.ff_proj.weight.max
none
exp_avg/transformer.blocks.26.ff_proj.weight.min
none
exp_avg/transformer.blocks.26.ff_proj.weight.norm
none
exp_avg/transformer.blocks.27.att_proj.weight.avg
none
exp_avg/transformer.blocks.27.att_proj.weight.max
none
exp_avg/transformer.blocks.27.att_proj.weight.min
none
exp_avg/transformer.blocks.27.att_proj.weight.norm
none
exp_avg/transformer.blocks.27.attn_out.weight.avg
none
exp_avg/transformer.blocks.27.attn_out.weight.max
none
exp_avg/transformer.blocks.27.attn_out.weight.min
none
exp_avg/transformer.blocks.27.attn_out.weight.norm
none
exp_avg/transformer.blocks.27.ff_out.weight.avg
none
exp_avg/transformer.blocks.27.ff_out.weight.max
none
exp_avg/transformer.blocks.27.ff_out.weight.min
none
exp_avg/transformer.blocks.27.ff_out.weight.norm
none
exp_avg/transformer.blocks.27.ff_proj.weight.avg
none
exp_avg/transformer.blocks.27.ff_proj.weight.max
none
exp_avg/transformer.blocks.27.ff_proj.weight.min
none
exp_avg/transformer.blocks.27.ff_proj.weight.norm
none
exp_avg/transformer.blocks.28.att_proj.weight.avg
none
exp_avg/transformer.blocks.28.att_proj.weight.max
none
exp_avg/transformer.blocks.28.att_proj.weight.min
none
exp_avg/transformer.blocks.28.att_proj.weight.norm
none
exp_avg/transformer.blocks.28.attn_out.weight.avg
none
exp_avg/transformer.blocks.28.attn_out.weight.max
none
exp_avg/transformer.blocks.28.attn_out.weight.min
none
exp_avg/transformer.blocks.28.attn_out.weight.norm
none
exp_avg/transformer.blocks.28.ff_out.weight.avg
none
exp_avg/transformer.blocks.28.ff_out.weight.max
none
exp_avg/transformer.blocks.28.ff_out.weight.min
none
exp_avg/transformer.blocks.28.ff_out.weight.norm
none
exp_avg/transformer.blocks.28.ff_proj.weight.avg
none
exp_avg/transformer.blocks.28.ff_proj.weight.max
none
exp_avg/transformer.blocks.28.ff_proj.weight.min
none
exp_avg/transformer.blocks.28.ff_proj.weight.norm
none
exp_avg/transformer.blocks.29.att_proj.weight.avg
none
exp_avg/transformer.blocks.29.att_proj.weight.max
none
exp_avg/transformer.blocks.29.att_proj.weight.min
none
exp_avg/transformer.blocks.29.att_proj.weight.norm
none
exp_avg/transformer.blocks.29.attn_out.weight.avg
none
exp_avg/transformer.blocks.29.attn_out.weight.max
none
exp_avg/transformer.blocks.29.attn_out.weight.min
none
exp_avg/transformer.blocks.29.attn_out.weight.norm
none
exp_avg/transformer.blocks.29.ff_out.weight.avg
none
exp_avg/transformer.blocks.29.ff_out.weight.max
none
exp_avg/transformer.blocks.29.ff_out.weight.min
none
exp_avg/transformer.blocks.29.ff_out.weight.norm
none
exp_avg/transformer.blocks.29.ff_proj.weight.avg
none
exp_avg/transformer.blocks.29.ff_proj.weight.max
none
exp_avg/transformer.blocks.29.ff_proj.weight.min
none
exp_avg/transformer.blocks.29.ff_proj.weight.norm
none
exp_avg/transformer.blocks.30.att_proj.weight.avg
none
exp_avg/transformer.blocks.30.att_proj.weight.max
none
exp_avg/transformer.blocks.30.att_proj.weight.min
none
exp_avg/transformer.blocks.30.att_proj.weight.norm
none
exp_avg/transformer.blocks.30.attn_out.weight.avg
none
exp_avg/transformer.blocks.30.attn_out.weight.max
none
exp_avg/transformer.blocks.30.attn_out.weight.min
none
exp_avg/transformer.blocks.30.attn_out.weight.norm
none
exp_avg/transformer.blocks.30.ff_out.weight.avg
none
exp_avg/transformer.blocks.30.ff_out.weight.max
none
exp_avg/transformer.blocks.30.ff_out.weight.min
none
exp_avg/transformer.blocks.30.ff_out.weight.norm
none
exp_avg/transformer.blocks.30.ff_proj.weight.avg
none
exp_avg/transformer.blocks.30.ff_proj.weight.max
none
exp_avg/transformer.blocks.30.ff_proj.weight.min
none
exp_avg/transformer.blocks.30.ff_proj.weight.norm
none
exp_avg/transformer.blocks.31.att_proj.weight.avg
none
exp_avg/transformer.blocks.31.att_proj.weight.max
none
exp_avg/transformer.blocks.31.att_proj.weight.min
none
exp_avg/transformer.blocks.31.att_proj.weight.norm
none
exp_avg/transformer.blocks.31.attn_out.weight.avg
none
exp_avg/transformer.blocks.31.attn_out.weight.max
none
exp_avg/transformer.blocks.31.attn_out.weight.min
none
exp_avg/transformer.blocks.31.attn_out.weight.norm
none
exp_avg/transformer.blocks.31.ff_out.weight.avg
none
exp_avg/transformer.blocks.31.ff_out.weight.max
none
exp_avg/transformer.blocks.31.ff_out.weight.min
none
exp_avg/transformer.blocks.31.ff_out.weight.norm
none
exp_avg/transformer.blocks.31.ff_proj.weight.avg
none
exp_avg/transformer.blocks.31.ff_proj.weight.max
none
exp_avg/transformer.blocks.31.ff_proj.weight.min
none
exp_avg/transformer.blocks.31.ff_proj.weight.norm
none
exp_avg/transformer.ff_out.weight.avg
none
exp_avg/transformer.ff_out.weight.max
none
exp_avg/transformer.ff_out.weight.min
none
exp_avg/transformer.ff_out.weight.norm
none
exp_avg/transformer.wte.weight.avg
none
exp_avg/transformer.wte.weight.max
none
exp_avg/transformer.wte.weight.min
none
exp_avg/transformer.wte.weight.norm
none
grad/transformer.blocks.0.att_proj.weight.avg
none
grad/transformer.blocks.0.att_proj.weight.max
none
grad/transformer.blocks.0.att_proj.weight.min
none
grad/transformer.blocks.0.att_proj.weight.norm
none
grad/transformer.blocks.0.attn_out.weight.avg
none
grad/transformer.blocks.0.attn_out.weight.max
none
grad/transformer.blocks.0.attn_out.weight.min
none
grad/transformer.blocks.0.attn_out.weight.norm
none
grad/transformer.blocks.0.ff_out.weight.avg
none
grad/transformer.blocks.0.ff_out.weight.max
none
grad/transformer.blocks.0.ff_out.weight.min
none
grad/transformer.blocks.0.ff_out.weight.norm
none
grad/transformer.blocks.0.ff_proj.weight.avg
none
grad/transformer.blocks.0.ff_proj.weight.max
none
grad/transformer.blocks.0.ff_proj.weight.min
none
grad/transformer.blocks.0.ff_proj.weight.norm
none
grad/transformer.blocks.1.att_proj.weight.avg
none
grad/transformer.blocks.1.att_proj.weight.max
none
grad/transformer.blocks.1.att_proj.weight.min
none
grad/transformer.blocks.1.att_proj.weight.norm
none
grad/transformer.blocks.1.attn_out.weight.avg
none
grad/transformer.blocks.1.attn_out.weight.max
none
grad/transformer.blocks.1.attn_out.weight.min
none
grad/transformer.blocks.1.attn_out.weight.norm
none
grad/transformer.blocks.1.ff_out.weight.avg
none
grad/transformer.blocks.1.ff_out.weight.max
none
grad/transformer.blocks.1.ff_out.weight.min
none
grad/transformer.blocks.1.ff_out.weight.norm
none
grad/transformer.blocks.1.ff_proj.weight.avg
none
grad/transformer.blocks.1.ff_proj.weight.max
none
grad/transformer.blocks.1.ff_proj.weight.min
none
grad/transformer.blocks.1.ff_proj.weight.norm
none
grad/transformer.blocks.2.att_proj.weight.avg
none
grad/transformer.blocks.2.att_proj.weight.max
none
grad/transformer.blocks.2.att_proj.weight.min
none
grad/transformer.blocks.2.att_proj.weight.norm
none
grad/transformer.blocks.2.attn_out.weight.avg
none
grad/transformer.blocks.2.attn_out.weight.max
none
grad/transformer.blocks.2.attn_out.weight.min
none
grad/transformer.blocks.2.attn_out.weight.norm
none
grad/transformer.blocks.2.ff_out.weight.avg
none
grad/transformer.blocks.2.ff_out.weight.max
none
grad/transformer.blocks.2.ff_out.weight.min
none
grad/transformer.blocks.2.ff_out.weight.norm
none
grad/transformer.blocks.2.ff_proj.weight.avg
none
grad/transformer.blocks.2.ff_proj.weight.max
none
grad/transformer.blocks.2.ff_proj.weight.min
none
grad/transformer.blocks.2.ff_proj.weight.norm
none
grad/transformer.blocks.3.att_proj.weight.avg
none
grad/transformer.blocks.3.att_proj.weight.max
none
grad/transformer.blocks.3.att_proj.weight.min
none
grad/transformer.blocks.3.att_proj.weight.norm
none
grad/transformer.blocks.3.attn_out.weight.avg
none
grad/transformer.blocks.3.attn_out.weight.max
none
grad/transformer.blocks.3.attn_out.weight.min
none
grad/transformer.blocks.3.attn_out.weight.norm
none
grad/transformer.blocks.3.ff_out.weight.avg
none
grad/transformer.blocks.3.ff_out.weight.max
none
grad/transformer.blocks.3.ff_out.weight.min
none
grad/transformer.blocks.3.ff_out.weight.norm
none
grad/transformer.blocks.3.ff_proj.weight.avg
none
grad/transformer.blocks.3.ff_proj.weight.max
none
grad/transformer.blocks.3.ff_proj.weight.min
none
grad/transformer.blocks.3.ff_proj.weight.norm
none
grad/transformer.blocks.4.att_proj.weight.avg
none
grad/transformer.blocks.4.att_proj.weight.max
none
grad/transformer.blocks.4.att_proj.weight.min
none
grad/transformer.blocks.4.att_proj.weight.norm
none
grad/transformer.blocks.4.attn_out.weight.avg
none
grad/transformer.blocks.4.attn_out.weight.max
none
grad/transformer.blocks.4.attn_out.weight.min
none
grad/transformer.blocks.4.attn_out.weight.norm
none
grad/transformer.blocks.4.ff_out.weight.avg
none
grad/transformer.blocks.4.ff_out.weight.max
none
grad/transformer.blocks.4.ff_out.weight.min
none
grad/transformer.blocks.4.ff_out.weight.norm
none
grad/transformer.blocks.4.ff_proj.weight.avg
none
grad/transformer.blocks.4.ff_proj.weight.max
none
grad/transformer.blocks.4.ff_proj.weight.min
none
grad/transformer.blocks.4.ff_proj.weight.norm
none
grad/transformer.blocks.5.att_proj.weight.avg
none
grad/transformer.blocks.5.att_proj.weight.max
none
grad/transformer.blocks.5.att_proj.weight.min
none
grad/transformer.blocks.5.att_proj.weight.norm
none
grad/transformer.blocks.5.attn_out.weight.avg
none
grad/transformer.blocks.5.attn_out.weight.max
none
grad/transformer.blocks.5.attn_out.weight.min
none
grad/transformer.blocks.5.attn_out.weight.norm
none
grad/transformer.blocks.5.ff_out.weight.avg
none
grad/transformer.blocks.5.ff_out.weight.max
none
grad/transformer.blocks.5.ff_out.weight.min
none
grad/transformer.blocks.5.ff_out.weight.norm
none
grad/transformer.blocks.5.ff_proj.weight.avg
none
grad/transformer.blocks.5.ff_proj.weight.max
none
grad/transformer.blocks.5.ff_proj.weight.min
none
grad/transformer.blocks.5.ff_proj.weight.norm
none
grad/transformer.blocks.6.att_proj.weight.avg
none
grad/transformer.blocks.6.att_proj.weight.max
none
grad/transformer.blocks.6.att_proj.weight.min
none
grad/transformer.blocks.6.att_proj.weight.norm
none
grad/transformer.blocks.6.attn_out.weight.avg
none
grad/transformer.blocks.6.attn_out.weight.max
none
grad/transformer.blocks.6.attn_out.weight.min
none
grad/transformer.blocks.6.attn_out.weight.norm
none
grad/transformer.blocks.6.ff_out.weight.avg
none
grad/transformer.blocks.6.ff_out.weight.max
none
grad/transformer.blocks.6.ff_out.weight.min
none
grad/transformer.blocks.6.ff_out.weight.norm
none
grad/transformer.blocks.6.ff_proj.weight.avg
none
grad/transformer.blocks.6.ff_proj.weight.max
none
grad/transformer.blocks.6.ff_proj.weight.min
none
grad/transformer.blocks.6.ff_proj.weight.norm
none
grad/transformer.blocks.7.att_proj.weight.avg
none
grad/transformer.blocks.7.att_proj.weight.max
none
grad/transformer.blocks.7.att_proj.weight.min
none
grad/transformer.blocks.7.att_proj.weight.norm
none
grad/transformer.blocks.7.attn_out.weight.avg
none
grad/transformer.blocks.7.attn_out.weight.max
none
grad/transformer.blocks.7.attn_out.weight.min
none
grad/transformer.blocks.7.attn_out.weight.norm
none
grad/transformer.blocks.7.ff_out.weight.avg
none
grad/transformer.blocks.7.ff_out.weight.max
none
grad/transformer.blocks.7.ff_out.weight.min
none
grad/transformer.blocks.7.ff_out.weight.norm
none
grad/transformer.blocks.7.ff_proj.weight.avg
none
grad/transformer.blocks.7.ff_proj.weight.max
none
grad/transformer.blocks.7.ff_proj.weight.min
none
grad/transformer.blocks.7.ff_proj.weight.norm
none
grad/transformer.blocks.8.att_proj.weight.avg
none
grad/transformer.blocks.8.att_proj.weight.max
none
grad/transformer.blocks.8.att_proj.weight.min
none
grad/transformer.blocks.8.att_proj.weight.norm
none
grad/transformer.blocks.8.attn_out.weight.avg
none
grad/transformer.blocks.8.attn_out.weight.max
none
grad/transformer.blocks.8.attn_out.weight.min
none
grad/transformer.blocks.8.attn_out.weight.norm
none
grad/transformer.blocks.8.ff_out.weight.avg
none
grad/transformer.blocks.8.ff_out.weight.max
none
grad/transformer.blocks.8.ff_out.weight.min
none
grad/transformer.blocks.8.ff_out.weight.norm
none
grad/transformer.blocks.8.ff_proj.weight.avg
none
grad/transformer.blocks.8.ff_proj.weight.max
none
grad/transformer.blocks.8.ff_proj.weight.min
none
grad/transformer.blocks.8.ff_proj.weight.norm
none
grad/transformer.blocks.9.att_proj.weight.avg
none
grad/transformer.blocks.9.att_proj.weight.max
none
grad/transformer.blocks.9.att_proj.weight.min
none
grad/transformer.blocks.9.att_proj.weight.norm
none
grad/transformer.blocks.9.attn_out.weight.avg
none
grad/transformer.blocks.9.attn_out.weight.max
none
grad/transformer.blocks.9.attn_out.weight.min
none
grad/transformer.blocks.9.attn_out.weight.norm
none
grad/transformer.blocks.9.ff_out.weight.avg
none
grad/transformer.blocks.9.ff_out.weight.max
none
grad/transformer.blocks.9.ff_out.weight.min
none
grad/transformer.blocks.9.ff_out.weight.norm
none
grad/transformer.blocks.9.ff_proj.weight.avg
none
grad/transformer.blocks.9.ff_proj.weight.max
none
grad/transformer.blocks.9.ff_proj.weight.min
none
grad/transformer.blocks.9.ff_proj.weight.norm
none
grad/transformer.blocks.10.att_proj.weight.avg
none
grad/transformer.blocks.10.att_proj.weight.max
none
grad/transformer.blocks.10.att_proj.weight.min
none
grad/transformer.blocks.10.att_proj.weight.norm
none
grad/transformer.blocks.10.attn_out.weight.avg
none
grad/transformer.blocks.10.attn_out.weight.max
none
grad/transformer.blocks.10.attn_out.weight.min
none
grad/transformer.blocks.10.attn_out.weight.norm
none
grad/transformer.blocks.10.ff_out.weight.avg
none
grad/transformer.blocks.10.ff_out.weight.max
none
grad/transformer.blocks.10.ff_out.weight.min
none
grad/transformer.blocks.10.ff_out.weight.norm
none
grad/transformer.blocks.10.ff_proj.weight.avg
none
grad/transformer.blocks.10.ff_proj.weight.max
none
grad/transformer.blocks.10.ff_proj.weight.min
none
grad/transformer.blocks.10.ff_proj.weight.norm
none
grad/transformer.blocks.11.att_proj.weight.avg
none
grad/transformer.blocks.11.att_proj.weight.max
none
grad/transformer.blocks.11.att_proj.weight.min
none
grad/transformer.blocks.11.att_proj.weight.norm
none
grad/transformer.blocks.11.attn_out.weight.avg
none
grad/transformer.blocks.11.attn_out.weight.max
none
grad/transformer.blocks.11.attn_out.weight.min
none
grad/transformer.blocks.11.attn_out.weight.norm
none
grad/transformer.blocks.11.ff_out.weight.avg
none
grad/transformer.blocks.11.ff_out.weight.max
none
grad/transformer.blocks.11.ff_out.weight.min
none
grad/transformer.blocks.11.ff_out.weight.norm
none
grad/transformer.blocks.11.ff_proj.weight.avg
none
grad/transformer.blocks.11.ff_proj.weight.max
none
grad/transformer.blocks.11.ff_proj.weight.min
none
grad/transformer.blocks.11.ff_proj.weight.norm
none
grad/transformer.blocks.12.att_proj.weight.avg
none
grad/transformer.blocks.12.att_proj.weight.max
none
grad/transformer.blocks.12.att_proj.weight.min
none
grad/transformer.blocks.12.att_proj.weight.norm
none
grad/transformer.blocks.12.attn_out.weight.avg
none
grad/transformer.blocks.12.attn_out.weight.max
none
grad/transformer.blocks.12.attn_out.weight.min
none
grad/transformer.blocks.12.attn_out.weight.norm
none
grad/transformer.blocks.12.ff_out.weight.avg
none
grad/transformer.blocks.12.ff_out.weight.max
none
grad/transformer.blocks.12.ff_out.weight.min
none
grad/transformer.blocks.12.ff_out.weight.norm
none
grad/transformer.blocks.12.ff_proj.weight.avg
none
grad/transformer.blocks.12.ff_proj.weight.max
none
grad/transformer.blocks.12.ff_proj.weight.min
none
grad/transformer.blocks.12.ff_proj.weight.norm
none
grad/transformer.blocks.13.att_proj.weight.avg
none
grad/transformer.blocks.13.att_proj.weight.max
none
grad/transformer.blocks.13.att_proj.weight.min
none
grad/transformer.blocks.13.att_proj.weight.norm
none
grad/transformer.blocks.13.attn_out.weight.avg
none
grad/transformer.blocks.13.attn_out.weight.max
none
grad/transformer.blocks.13.attn_out.weight.min
none
grad/transformer.blocks.13.attn_out.weight.norm
none
grad/transformer.blocks.13.ff_out.weight.avg
none
grad/transformer.blocks.13.ff_out.weight.max
none
grad/transformer.blocks.13.ff_out.weight.min
none
grad/transformer.blocks.13.ff_out.weight.norm
none
grad/transformer.blocks.13.ff_proj.weight.avg
none
grad/transformer.blocks.13.ff_proj.weight.max
none
grad/transformer.blocks.13.ff_proj.weight.min
none
grad/transformer.blocks.13.ff_proj.weight.norm
none
grad/transformer.blocks.14.att_proj.weight.avg
none
grad/transformer.blocks.14.att_proj.weight.max
none
grad/transformer.blocks.14.att_proj.weight.min
none
grad/transformer.blocks.14.att_proj.weight.norm
none
grad/transformer.blocks.14.attn_out.weight.avg
none
grad/transformer.blocks.14.attn_out.weight.max
none
grad/transformer.blocks.14.attn_out.weight.min
none
grad/transformer.blocks.14.attn_out.weight.norm
none
grad/transformer.blocks.14.ff_out.weight.avg
none
grad/transformer.blocks.14.ff_out.weight.max
none
grad/transformer.blocks.14.ff_out.weight.min
none
grad/transformer.blocks.14.ff_out.weight.norm
none
grad/transformer.blocks.14.ff_proj.weight.avg
none
grad/transformer.blocks.14.ff_proj.weight.max
none
grad/transformer.blocks.14.ff_proj.weight.min
none
grad/transformer.blocks.14.ff_proj.weight.norm
none
grad/transformer.blocks.15.att_proj.weight.avg
none
grad/transformer.blocks.15.att_proj.weight.max
none
grad/transformer.blocks.15.att_proj.weight.min
none
grad/transformer.blocks.15.att_proj.weight.norm
none
grad/transformer.blocks.15.attn_out.weight.avg
none
grad/transformer.blocks.15.attn_out.weight.max
none
grad/transformer.blocks.15.attn_out.weight.min
none
grad/transformer.blocks.15.attn_out.weight.norm
none
grad/transformer.blocks.15.ff_out.weight.avg
none
grad/transformer.blocks.15.ff_out.weight.max
none
grad/transformer.blocks.15.ff_out.weight.min
none
grad/transformer.blocks.15.ff_out.weight.norm
none
grad/transformer.blocks.15.ff_proj.weight.avg
none
grad/transformer.blocks.15.ff_proj.weight.max
none
grad/transformer.blocks.15.ff_proj.weight.min
none
grad/transformer.blocks.15.ff_proj.weight.norm
none
grad/transformer.blocks.16.att_proj.weight.avg
none
grad/transformer.blocks.16.att_proj.weight.max
none
grad/transformer.blocks.16.att_proj.weight.min
none
grad/transformer.blocks.16.att_proj.weight.norm
none
grad/transformer.blocks.16.attn_out.weight.avg
none
grad/transformer.blocks.16.attn_out.weight.max
none
grad/transformer.blocks.16.attn_out.weight.min
none
grad/transformer.blocks.16.attn_out.weight.norm
none
grad/transformer.blocks.16.ff_out.weight.avg
none
grad/transformer.blocks.16.ff_out.weight.max
none
grad/transformer.blocks.16.ff_out.weight.min
none
grad/transformer.blocks.16.ff_out.weight.norm
none
grad/transformer.blocks.16.ff_proj.weight.avg
none
grad/transformer.blocks.16.ff_proj.weight.max
none
grad/transformer.blocks.16.ff_proj.weight.min
none
grad/transformer.blocks.16.ff_proj.weight.norm
none
grad/transformer.blocks.17.att_proj.weight.avg
none
grad/transformer.blocks.17.att_proj.weight.max
none
grad/transformer.blocks.17.att_proj.weight.min
none
grad/transformer.blocks.17.att_proj.weight.norm
none
grad/transformer.blocks.17.attn_out.weight.avg
none
grad/transformer.blocks.17.attn_out.weight.max
none
grad/transformer.blocks.17.attn_out.weight.min
none
grad/transformer.blocks.17.attn_out.weight.norm
none
grad/transformer.blocks.17.ff_out.weight.avg
none
grad/transformer.blocks.17.ff_out.weight.max
none
grad/transformer.blocks.17.ff_out.weight.min
none
grad/transformer.blocks.17.ff_out.weight.norm
none
grad/transformer.blocks.17.ff_proj.weight.avg
none
grad/transformer.blocks.17.ff_proj.weight.max
none
grad/transformer.blocks.17.ff_proj.weight.min
none
grad/transformer.blocks.17.ff_proj.weight.norm
none
grad/transformer.blocks.18.att_proj.weight.avg
none
grad/transformer.blocks.18.att_proj.weight.max
none
grad/transformer.blocks.18.att_proj.weight.min
none
grad/transformer.blocks.18.att_proj.weight.norm
none
grad/transformer.blocks.18.attn_out.weight.avg
none
grad/transformer.blocks.18.attn_out.weight.max
none
grad/transformer.blocks.18.attn_out.weight.min
none
grad/transformer.blocks.18.attn_out.weight.norm
none
grad/transformer.blocks.18.ff_out.weight.avg
none
grad/transformer.blocks.18.ff_out.weight.max
none
grad/transformer.blocks.18.ff_out.weight.min
none
grad/transformer.blocks.18.ff_out.weight.norm
none
grad/transformer.blocks.18.ff_proj.weight.avg
none
grad/transformer.blocks.18.ff_proj.weight.max
none
grad/transformer.blocks.18.ff_proj.weight.min
none
grad/transformer.blocks.18.ff_proj.weight.norm
none
grad/transformer.blocks.19.att_proj.weight.avg
none
grad/transformer.blocks.19.att_proj.weight.max
none
grad/transformer.blocks.19.att_proj.weight.min
none
grad/transformer.blocks.19.att_proj.weight.norm
none
grad/transformer.blocks.19.attn_out.weight.avg
none
grad/transformer.blocks.19.attn_out.weight.max
none
grad/transformer.blocks.19.attn_out.weight.min
none
grad/transformer.blocks.19.attn_out.weight.norm
none
grad/transformer.blocks.19.ff_out.weight.avg
none
grad/transformer.blocks.19.ff_out.weight.max
none
grad/transformer.blocks.19.ff_out.weight.min
none
grad/transformer.blocks.19.ff_out.weight.norm
none
grad/transformer.blocks.19.ff_proj.weight.avg
none
grad/transformer.blocks.19.ff_proj.weight.max
none
grad/transformer.blocks.19.ff_proj.weight.min
none
grad/transformer.blocks.19.ff_proj.weight.norm
none
grad/transformer.blocks.20.att_proj.weight.avg
none
grad/transformer.blocks.20.att_proj.weight.max
none
grad/transformer.blocks.20.att_proj.weight.min
none
grad/transformer.blocks.20.att_proj.weight.norm
none
grad/transformer.blocks.20.attn_out.weight.avg
none
grad/transformer.blocks.20.attn_out.weight.max
none
grad/transformer.blocks.20.attn_out.weight.min
none
grad/transformer.blocks.20.attn_out.weight.norm
none
grad/transformer.blocks.20.ff_out.weight.avg
none
grad/transformer.blocks.20.ff_out.weight.max
none
grad/transformer.blocks.20.ff_out.weight.min
none
grad/transformer.blocks.20.ff_out.weight.norm
none
grad/transformer.blocks.20.ff_proj.weight.avg
none
grad/transformer.blocks.20.ff_proj.weight.max
none
grad/transformer.blocks.20.ff_proj.weight.min
none
grad/transformer.blocks.20.ff_proj.weight.norm
none
grad/transformer.blocks.21.att_proj.weight.avg
none
grad/transformer.blocks.21.att_proj.weight.max
none
grad/transformer.blocks.21.att_proj.weight.min
none
grad/transformer.blocks.21.att_proj.weight.norm
none
grad/transformer.blocks.21.attn_out.weight.avg
none
grad/transformer.blocks.21.attn_out.weight.max
none
grad/transformer.blocks.21.attn_out.weight.min
none
grad/transformer.blocks.21.attn_out.weight.norm
none
grad/transformer.blocks.21.ff_out.weight.avg
none
grad/transformer.blocks.21.ff_out.weight.max
none
grad/transformer.blocks.21.ff_out.weight.min
none
grad/transformer.blocks.21.ff_out.weight.norm
none
grad/transformer.blocks.21.ff_proj.weight.avg
none
grad/transformer.blocks.21.ff_proj.weight.max
none
grad/transformer.blocks.21.ff_proj.weight.min
none
grad/transformer.blocks.21.ff_proj.weight.norm
none
grad/transformer.blocks.22.att_proj.weight.avg
none
grad/transformer.blocks.22.att_proj.weight.max
none
grad/transformer.blocks.22.att_proj.weight.min
none
grad/transformer.blocks.22.att_proj.weight.norm
none
grad/transformer.blocks.22.attn_out.weight.avg
none
grad/transformer.blocks.22.attn_out.weight.max
none
grad/transformer.blocks.22.attn_out.weight.min
none
grad/transformer.blocks.22.attn_out.weight.norm
none
grad/transformer.blocks.22.ff_out.weight.avg
none
grad/transformer.blocks.22.ff_out.weight.max
none
grad/transformer.blocks.22.ff_out.weight.min
none
grad/transformer.blocks.22.ff_out.weight.norm
none
grad/transformer.blocks.22.ff_proj.weight.avg
none
grad/transformer.blocks.22.ff_proj.weight.max
none
grad/transformer.blocks.22.ff_proj.weight.min
none
grad/transformer.blocks.22.ff_proj.weight.norm
none
grad/transformer.blocks.23.att_proj.weight.avg
none
grad/transformer.blocks.23.att_proj.weight.max
none
grad/transformer.blocks.23.att_proj.weight.min
none
grad/transformer.blocks.23.att_proj.weight.norm
none
grad/transformer.blocks.23.attn_out.weight.avg
none
grad/transformer.blocks.23.attn_out.weight.max
none
grad/transformer.blocks.23.attn_out.weight.min
none
grad/transformer.blocks.23.attn_out.weight.norm
none
grad/transformer.blocks.23.ff_out.weight.avg
none
grad/transformer.blocks.23.ff_out.weight.max
none
grad/transformer.blocks.23.ff_out.weight.min
none
grad/transformer.blocks.23.ff_out.weight.norm
none
grad/transformer.blocks.23.ff_proj.weight.avg
none
grad/transformer.blocks.23.ff_proj.weight.max
none
grad/transformer.blocks.23.ff_proj.weight.min
none
grad/transformer.blocks.23.ff_proj.weight.norm
none
grad/transformer.blocks.24.att_proj.weight.avg
none
grad/transformer.blocks.24.att_proj.weight.max
none
grad/transformer.blocks.24.att_proj.weight.min
none
grad/transformer.blocks.24.att_proj.weight.norm
none
grad/transformer.blocks.24.attn_out.weight.avg
none
grad/transformer.blocks.24.attn_out.weight.max
none
grad/transformer.blocks.24.attn_out.weight.min
none
grad/transformer.blocks.24.attn_out.weight.norm
none
grad/transformer.blocks.24.ff_out.weight.avg
none
grad/transformer.blocks.24.ff_out.weight.max
none
grad/transformer.blocks.24.ff_out.weight.min
none
grad/transformer.blocks.24.ff_out.weight.norm
none
grad/transformer.blocks.24.ff_proj.weight.avg
none
grad/transformer.blocks.24.ff_proj.weight.max
none
grad/transformer.blocks.24.ff_proj.weight.min
none
grad/transformer.blocks.24.ff_proj.weight.norm
none
grad/transformer.blocks.25.att_proj.weight.avg
none
grad/transformer.blocks.25.att_proj.weight.max
none
grad/transformer.blocks.25.att_proj.weight.min
none
grad/transformer.blocks.25.att_proj.weight.norm
none
grad/transformer.blocks.25.attn_out.weight.avg
none
grad/transformer.blocks.25.attn_out.weight.max
none
grad/transformer.blocks.25.attn_out.weight.min
none
grad/transformer.blocks.25.attn_out.weight.norm
none
grad/transformer.blocks.25.ff_out.weight.avg
none
grad/transformer.blocks.25.ff_out.weight.max
none
grad/transformer.blocks.25.ff_out.weight.min
none
grad/transformer.blocks.25.ff_out.weight.norm
none
grad/transformer.blocks.25.ff_proj.weight.avg
none
grad/transformer.blocks.25.ff_proj.weight.max
none
grad/transformer.blocks.25.ff_proj.weight.min
none
grad/transformer.blocks.25.ff_proj.weight.norm
none
grad/transformer.blocks.26.att_proj.weight.avg
none
grad/transformer.blocks.26.att_proj.weight.max
none
grad/transformer.blocks.26.att_proj.weight.min
none
grad/transformer.blocks.26.att_proj.weight.norm
none
grad/transformer.blocks.26.attn_out.weight.avg
none
grad/transformer.blocks.26.attn_out.weight.max
none
grad/transformer.blocks.26.attn_out.weight.min
none
grad/transformer.blocks.26.attn_out.weight.norm
none
grad/transformer.blocks.26.ff_out.weight.avg
none
grad/transformer.blocks.26.ff_out.weight.max
none
grad/transformer.blocks.26.ff_out.weight.min
none
grad/transformer.blocks.26.ff_out.weight.norm
none
grad/transformer.blocks.26.ff_proj.weight.avg
none
grad/transformer.blocks.26.ff_proj.weight.max
none
grad/transformer.blocks.26.ff_proj.weight.min
none
grad/transformer.blocks.26.ff_proj.weight.norm
none
grad/transformer.blocks.27.att_proj.weight.avg
none
grad/transformer.blocks.27.att_proj.weight.max
none
grad/transformer.blocks.27.att_proj.weight.min
none
grad/transformer.blocks.27.att_proj.weight.norm
none
grad/transformer.blocks.27.attn_out.weight.avg
none
grad/transformer.blocks.27.attn_out.weight.max
none
grad/transformer.blocks.27.attn_out.weight.min
none
grad/transformer.blocks.27.attn_out.weight.norm
none
grad/transformer.blocks.27.ff_out.weight.avg
none
grad/transformer.blocks.27.ff_out.weight.max
none
grad/transformer.blocks.27.ff_out.weight.min
none
grad/transformer.blocks.27.ff_out.weight.norm
none
grad/transformer.blocks.27.ff_proj.weight.avg
none
grad/transformer.blocks.27.ff_proj.weight.max
none
grad/transformer.blocks.27.ff_proj.weight.min
none
grad/transformer.blocks.27.ff_proj.weight.norm
none
grad/transformer.blocks.28.att_proj.weight.avg
none
grad/transformer.blocks.28.att_proj.weight.max
none
grad/transformer.blocks.28.att_proj.weight.min
none
grad/transformer.blocks.28.att_proj.weight.norm
none
grad/transformer.blocks.28.attn_out.weight.avg
none
grad/transformer.blocks.28.attn_out.weight.max
none
grad/transformer.blocks.28.attn_out.weight.min
none
grad/transformer.blocks.28.attn_out.weight.norm
none
grad/transformer.blocks.28.ff_out.weight.avg
none
grad/transformer.blocks.28.ff_out.weight.max
none
grad/transformer.blocks.28.ff_out.weight.min
none
grad/transformer.blocks.28.ff_out.weight.norm
none
grad/transformer.blocks.28.ff_proj.weight.avg
none
grad/transformer.blocks.28.ff_proj.weight.max
none
grad/transformer.blocks.28.ff_proj.weight.min
none
grad/transformer.blocks.28.ff_proj.weight.norm
none
grad/transformer.blocks.29.att_proj.weight.avg
none
grad/transformer.blocks.29.att_proj.weight.max
none
grad/transformer.blocks.29.att_proj.weight.min
none
grad/transformer.blocks.29.att_proj.weight.norm
none
grad/transformer.blocks.29.attn_out.weight.avg
none
grad/transformer.blocks.29.attn_out.weight.max
none
grad/transformer.blocks.29.attn_out.weight.min
none
grad/transformer.blocks.29.attn_out.weight.norm
none
grad/transformer.blocks.29.ff_out.weight.avg
none
grad/transformer.blocks.29.ff_out.weight.max
none
grad/transformer.blocks.29.ff_out.weight.min
none
grad/transformer.blocks.29.ff_out.weight.norm
none
grad/transformer.blocks.29.ff_proj.weight.avg
none
grad/transformer.blocks.29.ff_proj.weight.max
none
grad/transformer.blocks.29.ff_proj.weight.min
none
grad/transformer.blocks.29.ff_proj.weight.norm
none
grad/transformer.blocks.30.att_proj.weight.avg
none
grad/transformer.blocks.30.att_proj.weight.max
none
grad/transformer.blocks.30.att_proj.weight.min
none
grad/transformer.blocks.30.att_proj.weight.norm
none
grad/transformer.blocks.30.attn_out.weight.avg
none
grad/transformer.blocks.30.attn_out.weight.max
none
grad/transformer.blocks.30.attn_out.weight.min
none
grad/transformer.blocks.30.attn_out.weight.norm
none
grad/transformer.blocks.30.ff_out.weight.avg
none
grad/transformer.blocks.30.ff_out.weight.max
none
grad/transformer.blocks.30.ff_out.weight.min
none
grad/transformer.blocks.30.ff_out.weight.norm
none
grad/transformer.blocks.30.ff_proj.weight.avg
none
grad/transformer.blocks.30.ff_proj.weight.max
none
grad/transformer.blocks.30.ff_proj.weight.min
none
grad/transformer.blocks.30.ff_proj.weight.norm
none
grad/transformer.blocks.31.att_proj.weight.avg
none
grad/transformer.blocks.31.att_proj.weight.max
none
grad/transformer.blocks.31.att_proj.weight.min
none
grad/transformer.blocks.31.att_proj.weight.norm
none
grad/transformer.blocks.31.attn_out.weight.avg
none
grad/transformer.blocks.31.attn_out.weight.max
none
grad/transformer.blocks.31.attn_out.weight.min
none
grad/transformer.blocks.31.attn_out.weight.norm
none
grad/transformer.blocks.31.ff_out.weight.avg
none
grad/transformer.blocks.31.ff_out.weight.max
none
grad/transformer.blocks.31.ff_out.weight.min
none
grad/transformer.blocks.31.ff_out.weight.norm
none
grad/transformer.blocks.31.ff_proj.weight.avg
none
grad/transformer.blocks.31.ff_proj.weight.max
none
grad/transformer.blocks.31.ff_proj.weight.min
none
grad/transformer.blocks.31.ff_proj.weight.norm
none
grad/transformer.ff_out.weight.avg
none
grad/transformer.ff_out.weight.max
none
grad/transformer.ff_out.weight.min
none
grad/transformer.ff_out.weight.norm
none
grad/transformer.wte.weight.avg
none
grad/transformer.wte.weight.max
none
grad/transformer.wte.weight.min
none
grad/transformer.wte.weight.norm
none
learning_rate_group0
number
learning_rate_group1
number
param/transformer.blocks.0.att_proj.weight.avg
none
param/transformer.blocks.0.att_proj.weight.max
none
param/transformer.blocks.0.att_proj.weight.min
none
param/transformer.blocks.0.att_proj.weight.norm
none
param/transformer.blocks.0.attn_out.weight.avg
none
param/transformer.blocks.0.attn_out.weight.max
none
param/transformer.blocks.0.attn_out.weight.min
none
param/transformer.blocks.0.attn_out.weight.norm
none
param/transformer.blocks.0.ff_out.weight.avg
none
param/transformer.blocks.0.ff_out.weight.max
none
param/transformer.blocks.0.ff_out.weight.min
none
param/transformer.blocks.0.ff_out.weight.norm
none
param/transformer.blocks.0.ff_proj.weight.avg
none
param/transformer.blocks.0.ff_proj.weight.max
none
param/transformer.blocks.0.ff_proj.weight.min
none
param/transformer.blocks.0.ff_proj.weight.norm
none
param/transformer.blocks.1.att_proj.weight.avg
none
param/transformer.blocks.1.att_proj.weight.max
none
param/transformer.blocks.1.att_proj.weight.min
none
param/transformer.blocks.1.att_proj.weight.norm
none
param/transformer.blocks.1.attn_out.weight.avg
none
param/transformer.blocks.1.attn_out.weight.max
none
param/transformer.blocks.1.attn_out.weight.min
none
param/transformer.blocks.1.attn_out.weight.norm
none
param/transformer.blocks.1.ff_out.weight.avg
none
param/transformer.blocks.1.ff_out.weight.max
none
param/transformer.blocks.1.ff_out.weight.min
none
param/transformer.blocks.1.ff_out.weight.norm
none
param/transformer.blocks.1.ff_proj.weight.avg
none
param/transformer.blocks.1.ff_proj.weight.max
none
param/transformer.blocks.1.ff_proj.weight.min
none
param/transformer.blocks.1.ff_proj.weight.norm
none
param/transformer.blocks.2.att_proj.weight.avg
none
param/transformer.blocks.2.att_proj.weight.max
none
param/transformer.blocks.2.att_proj.weight.min
none
param/transformer.blocks.2.att_proj.weight.norm
none
param/transformer.blocks.2.attn_out.weight.avg
none
param/transformer.blocks.2.attn_out.weight.max
none
param/transformer.blocks.2.attn_out.weight.min
none
param/transformer.blocks.2.attn_out.weight.norm
none
param/transformer.blocks.2.ff_out.weight.avg
none
param/transformer.blocks.2.ff_out.weight.max
none
param/transformer.blocks.2.ff_out.weight.min
none
param/transformer.blocks.2.ff_out.weight.norm
none
param/transformer.blocks.2.ff_proj.weight.avg
none
param/transformer.blocks.2.ff_proj.weight.max
none
param/transformer.blocks.2.ff_proj.weight.min
none
param/transformer.blocks.2.ff_proj.weight.norm
none
param/transformer.blocks.3.att_proj.weight.avg
none
param/transformer.blocks.3.att_proj.weight.max
none
param/transformer.blocks.3.att_proj.weight.min
none
param/transformer.blocks.3.att_proj.weight.norm
none
param/transformer.blocks.3.attn_out.weight.avg
none
param/transformer.blocks.3.attn_out.weight.max
none
param/transformer.blocks.3.attn_out.weight.min
none
param/transformer.blocks.3.attn_out.weight.norm
none
param/transformer.blocks.3.ff_out.weight.avg
none
param/transformer.blocks.3.ff_out.weight.max
none
param/transformer.blocks.3.ff_out.weight.min
none
param/transformer.blocks.3.ff_out.weight.norm
none
param/transformer.blocks.3.ff_proj.weight.avg
none
param/transformer.blocks.3.ff_proj.weight.max
none
param/transformer.blocks.3.ff_proj.weight.min
none
param/transformer.blocks.3.ff_proj.weight.norm
none
param/transformer.blocks.4.att_proj.weight.avg
none
param/transformer.blocks.4.att_proj.weight.max
none
param/transformer.blocks.4.att_proj.weight.min
none
param/transformer.blocks.4.att_proj.weight.norm
none
param/transformer.blocks.4.attn_out.weight.avg
none
param/transformer.blocks.4.attn_out.weight.max
none
param/transformer.blocks.4.attn_out.weight.min
none
param/transformer.blocks.4.attn_out.weight.norm
none
param/transformer.blocks.4.ff_out.weight.avg
none
param/transformer.blocks.4.ff_out.weight.max
none
param/transformer.blocks.4.ff_out.weight.min
none
param/transformer.blocks.4.ff_out.weight.norm
none
param/transformer.blocks.4.ff_proj.weight.avg
none
param/transformer.blocks.4.ff_proj.weight.max
none
param/transformer.blocks.4.ff_proj.weight.min
none
param/transformer.blocks.4.ff_proj.weight.norm
none
param/transformer.blocks.5.att_proj.weight.avg
none
param/transformer.blocks.5.att_proj.weight.max
none
param/transformer.blocks.5.att_proj.weight.min
none
param/transformer.blocks.5.att_proj.weight.norm
none
param/transformer.blocks.5.attn_out.weight.avg
none
param/transformer.blocks.5.attn_out.weight.max
none
param/transformer.blocks.5.attn_out.weight.min
none
param/transformer.blocks.5.attn_out.weight.norm
none
param/transformer.blocks.5.ff_out.weight.avg
none
param/transformer.blocks.5.ff_out.weight.max
none
param/transformer.blocks.5.ff_out.weight.min
none
param/transformer.blocks.5.ff_out.weight.norm
none
param/transformer.blocks.5.ff_proj.weight.avg
none
param/transformer.blocks.5.ff_proj.weight.max
none
param/transformer.blocks.5.ff_proj.weight.min
none
param/transformer.blocks.5.ff_proj.weight.norm
none
param/transformer.blocks.6.att_proj.weight.avg
none
param/transformer.blocks.6.att_proj.weight.max
none
param/transformer.blocks.6.att_proj.weight.min
none
param/transformer.blocks.6.att_proj.weight.norm
none
param/transformer.blocks.6.attn_out.weight.avg
none
param/transformer.blocks.6.attn_out.weight.max
none
param/transformer.blocks.6.attn_out.weight.min
none
param/transformer.blocks.6.attn_out.weight.norm
none
param/transformer.blocks.6.ff_out.weight.avg
none
param/transformer.blocks.6.ff_out.weight.max
none
param/transformer.blocks.6.ff_out.weight.min
none
param/transformer.blocks.6.ff_out.weight.norm
none
param/transformer.blocks.6.ff_proj.weight.avg
none
param/transformer.blocks.6.ff_proj.weight.max
none
param/transformer.blocks.6.ff_proj.weight.min
none
param/transformer.blocks.6.ff_proj.weight.norm
none
param/transformer.blocks.7.att_proj.weight.avg
none
param/transformer.blocks.7.att_proj.weight.max
none
param/transformer.blocks.7.att_proj.weight.min
none
param/transformer.blocks.7.att_proj.weight.norm
none
param/transformer.blocks.7.attn_out.weight.avg
none
param/transformer.blocks.7.attn_out.weight.max
none
param/transformer.blocks.7.attn_out.weight.min
none
param/transformer.blocks.7.attn_out.weight.norm
none
param/transformer.blocks.7.ff_out.weight.avg
none
param/transformer.blocks.7.ff_out.weight.max
none
param/transformer.blocks.7.ff_out.weight.min
none
param/transformer.blocks.7.ff_out.weight.norm
none
param/transformer.blocks.7.ff_proj.weight.avg
none
param/transformer.blocks.7.ff_proj.weight.max
none
param/transformer.blocks.7.ff_proj.weight.min
none
param/transformer.blocks.7.ff_proj.weight.norm
none
param/transformer.blocks.8.att_proj.weight.avg
none
param/transformer.blocks.8.att_proj.weight.max
none
param/transformer.blocks.8.att_proj.weight.min
none
param/transformer.blocks.8.att_proj.weight.norm
none
param/transformer.blocks.8.attn_out.weight.avg
none
param/transformer.blocks.8.attn_out.weight.max
none
param/transformer.blocks.8.attn_out.weight.min
none
param/transformer.blocks.8.attn_out.weight.norm
none
param/transformer.blocks.8.ff_out.weight.avg
none
param/transformer.blocks.8.ff_out.weight.max
none
param/transformer.blocks.8.ff_out.weight.min
none
param/transformer.blocks.8.ff_out.weight.norm
none
param/transformer.blocks.8.ff_proj.weight.avg
none
param/transformer.blocks.8.ff_proj.weight.max
none
param/transformer.blocks.8.ff_proj.weight.min
none
param/transformer.blocks.8.ff_proj.weight.norm
none
param/transformer.blocks.9.att_proj.weight.avg
none
param/transformer.blocks.9.att_proj.weight.max
none
param/transformer.blocks.9.att_proj.weight.min
none
param/transformer.blocks.9.att_proj.weight.norm
none
param/transformer.blocks.9.attn_out.weight.avg
none
param/transformer.blocks.9.attn_out.weight.max
none
param/transformer.blocks.9.attn_out.weight.min
none
param/transformer.blocks.9.attn_out.weight.norm
none
param/transformer.blocks.9.ff_out.weight.avg
none
param/transformer.blocks.9.ff_out.weight.max
none
param/transformer.blocks.9.ff_out.weight.min
none
param/transformer.blocks.9.ff_out.weight.norm
none
param/transformer.blocks.9.ff_proj.weight.avg
none
param/transformer.blocks.9.ff_proj.weight.max
none
param/transformer.blocks.9.ff_proj.weight.min
none
param/transformer.blocks.9.ff_proj.weight.norm
none
param/transformer.blocks.10.att_proj.weight.avg
none
param/transformer.blocks.10.att_proj.weight.max
none
param/transformer.blocks.10.att_proj.weight.min
none
param/transformer.blocks.10.att_proj.weight.norm
none
param/transformer.blocks.10.attn_out.weight.avg
none
param/transformer.blocks.10.attn_out.weight.max
none
param/transformer.blocks.10.attn_out.weight.min
none
param/transformer.blocks.10.attn_out.weight.norm
none
param/transformer.blocks.10.ff_out.weight.avg
none
param/transformer.blocks.10.ff_out.weight.max
none
param/transformer.blocks.10.ff_out.weight.min
none
param/transformer.blocks.10.ff_out.weight.norm
none
param/transformer.blocks.10.ff_proj.weight.avg
none
param/transformer.blocks.10.ff_proj.weight.max
none
param/transformer.blocks.10.ff_proj.weight.min
none
param/transformer.blocks.10.ff_proj.weight.norm
none
param/transformer.blocks.11.att_proj.weight.avg
none
param/transformer.blocks.11.att_proj.weight.max
none
param/transformer.blocks.11.att_proj.weight.min
none
param/transformer.blocks.11.att_proj.weight.norm
none
param/transformer.blocks.11.attn_out.weight.avg
none
param/transformer.blocks.11.attn_out.weight.max
none
param/transformer.blocks.11.attn_out.weight.min
none
param/transformer.blocks.11.attn_out.weight.norm
none
param/transformer.blocks.11.ff_out.weight.avg
none
param/transformer.blocks.11.ff_out.weight.max
none
param/transformer.blocks.11.ff_out.weight.min
none
param/transformer.blocks.11.ff_out.weight.norm
none
param/transformer.blocks.11.ff_proj.weight.avg
none
param/transformer.blocks.11.ff_proj.weight.max
none
param/transformer.blocks.11.ff_proj.weight.min
none
param/transformer.blocks.11.ff_proj.weight.norm
none
param/transformer.blocks.12.att_proj.weight.avg
none
param/transformer.blocks.12.att_proj.weight.max
none
param/transformer.blocks.12.att_proj.weight.min
none
param/transformer.blocks.12.att_proj.weight.norm
none
param/transformer.blocks.12.attn_out.weight.avg
none
param/transformer.blocks.12.attn_out.weight.max
none
param/transformer.blocks.12.attn_out.weight.min
none
param/transformer.blocks.12.attn_out.weight.norm
none
param/transformer.blocks.12.ff_out.weight.avg
none
param/transformer.blocks.12.ff_out.weight.max
none
param/transformer.blocks.12.ff_out.weight.min
none
param/transformer.blocks.12.ff_out.weight.norm
none
param/transformer.blocks.12.ff_proj.weight.avg
none
param/transformer.blocks.12.ff_proj.weight.max
none
param/transformer.blocks.12.ff_proj.weight.min
none
param/transformer.blocks.12.ff_proj.weight.norm
none
param/transformer.blocks.13.att_proj.weight.avg
none
param/transformer.blocks.13.att_proj.weight.max
none
param/transformer.blocks.13.att_proj.weight.min
none
param/transformer.blocks.13.att_proj.weight.norm
none
param/transformer.blocks.13.attn_out.weight.avg
none
param/transformer.blocks.13.attn_out.weight.max
none
param/transformer.blocks.13.attn_out.weight.min
none
param/transformer.blocks.13.attn_out.weight.norm
none
param/transformer.blocks.13.ff_out.weight.avg
none
param/transformer.blocks.13.ff_out.weight.max
none
param/transformer.blocks.13.ff_out.weight.min
none
param/transformer.blocks.13.ff_out.weight.norm
none
param/transformer.blocks.13.ff_proj.weight.avg
none
param/transformer.blocks.13.ff_proj.weight.max
none
param/transformer.blocks.13.ff_proj.weight.min
none
param/transformer.blocks.13.ff_proj.weight.norm
none
param/transformer.blocks.14.att_proj.weight.avg
none
param/transformer.blocks.14.att_proj.weight.max
none
param/transformer.blocks.14.att_proj.weight.min
none
param/transformer.blocks.14.att_proj.weight.norm
none
param/transformer.blocks.14.attn_out.weight.avg
none
param/transformer.blocks.14.attn_out.weight.max
none
param/transformer.blocks.14.attn_out.weight.min
none
param/transformer.blocks.14.attn_out.weight.norm
none
param/transformer.blocks.14.ff_out.weight.avg
none
param/transformer.blocks.14.ff_out.weight.max
none
param/transformer.blocks.14.ff_out.weight.min
none
param/transformer.blocks.14.ff_out.weight.norm
none
param/transformer.blocks.14.ff_proj.weight.avg
none
param/transformer.blocks.14.ff_proj.weight.max
none
param/transformer.blocks.14.ff_proj.weight.min
none
param/transformer.blocks.14.ff_proj.weight.norm
none
param/transformer.blocks.15.att_proj.weight.avg
none
param/transformer.blocks.15.att_proj.weight.max
none
param/transformer.blocks.15.att_proj.weight.min
none
param/transformer.blocks.15.att_proj.weight.norm
none
param/transformer.blocks.15.attn_out.weight.avg
none
param/transformer.blocks.15.attn_out.weight.max
none
param/transformer.blocks.15.attn_out.weight.min
none
param/transformer.blocks.15.attn_out.weight.norm
none
param/transformer.blocks.15.ff_out.weight.avg
none
param/transformer.blocks.15.ff_out.weight.max
none
param/transformer.blocks.15.ff_out.weight.min
none
param/transformer.blocks.15.ff_out.weight.norm
none
param/transformer.blocks.15.ff_proj.weight.avg
none
param/transformer.blocks.15.ff_proj.weight.max
none
param/transformer.blocks.15.ff_proj.weight.min
none
param/transformer.blocks.15.ff_proj.weight.norm
none
param/transformer.blocks.16.att_proj.weight.avg
none
param/transformer.blocks.16.att_proj.weight.max
none
param/transformer.blocks.16.att_proj.weight.min
none
param/transformer.blocks.16.att_proj.weight.norm
none
param/transformer.blocks.16.attn_out.weight.avg
none
param/transformer.blocks.16.attn_out.weight.max
none
param/transformer.blocks.16.attn_out.weight.min
none
param/transformer.blocks.16.attn_out.weight.norm
none
param/transformer.blocks.16.ff_out.weight.avg
none
param/transformer.blocks.16.ff_out.weight.max
none
param/transformer.blocks.16.ff_out.weight.min
none
param/transformer.blocks.16.ff_out.weight.norm
none
param/transformer.blocks.16.ff_proj.weight.avg
none
param/transformer.blocks.16.ff_proj.weight.max
none
param/transformer.blocks.16.ff_proj.weight.min
none
param/transformer.blocks.16.ff_proj.weight.norm
none
param/transformer.blocks.17.att_proj.weight.avg
none
param/transformer.blocks.17.att_proj.weight.max
none
param/transformer.blocks.17.att_proj.weight.min
none
param/transformer.blocks.17.att_proj.weight.norm
none
param/transformer.blocks.17.attn_out.weight.avg
none
param/transformer.blocks.17.attn_out.weight.max
none
param/transformer.blocks.17.attn_out.weight.min
none
param/transformer.blocks.17.attn_out.weight.norm
none
param/transformer.blocks.17.ff_out.weight.avg
none
param/transformer.blocks.17.ff_out.weight.max
none
param/transformer.blocks.17.ff_out.weight.min
none
param/transformer.blocks.17.ff_out.weight.norm
none
param/transformer.blocks.17.ff_proj.weight.avg
none
param/transformer.blocks.17.ff_proj.weight.max
none
param/transformer.blocks.17.ff_proj.weight.min
none
param/transformer.blocks.17.ff_proj.weight.norm
none
param/transformer.blocks.18.att_proj.weight.avg
none
param/transformer.blocks.18.att_proj.weight.max
none
param/transformer.blocks.18.att_proj.weight.min
none
param/transformer.blocks.18.att_proj.weight.norm
none
param/transformer.blocks.18.attn_out.weight.avg
none
param/transformer.blocks.18.attn_out.weight.max
none
param/transformer.blocks.18.attn_out.weight.min
none
param/transformer.blocks.18.attn_out.weight.norm
none
param/transformer.blocks.18.ff_out.weight.avg
none
param/transformer.blocks.18.ff_out.weight.max
none
param/transformer.blocks.18.ff_out.weight.min
none
param/transformer.blocks.18.ff_out.weight.norm
none
param/transformer.blocks.18.ff_proj.weight.avg
none
param/transformer.blocks.18.ff_proj.weight.max
none
param/transformer.blocks.18.ff_proj.weight.min
none
param/transformer.blocks.18.ff_proj.weight.norm
none
param/transformer.blocks.19.att_proj.weight.avg
none
param/transformer.blocks.19.att_proj.weight.max
none
param/transformer.blocks.19.att_proj.weight.min
none
param/transformer.blocks.19.att_proj.weight.norm
none
param/transformer.blocks.19.attn_out.weight.avg
none
param/transformer.blocks.19.attn_out.weight.max
none
param/transformer.blocks.19.attn_out.weight.min
none
param/transformer.blocks.19.attn_out.weight.norm
none
param/transformer.blocks.19.ff_out.weight.avg
none
param/transformer.blocks.19.ff_out.weight.max
none
param/transformer.blocks.19.ff_out.weight.min
none
param/transformer.blocks.19.ff_out.weight.norm
none
param/transformer.blocks.19.ff_proj.weight.avg
none
param/transformer.blocks.19.ff_proj.weight.max
none
param/transformer.blocks.19.ff_proj.weight.min
none
param/transformer.blocks.19.ff_proj.weight.norm
none
param/transformer.blocks.20.att_proj.weight.avg
none
param/transformer.blocks.20.att_proj.weight.max
none
param/transformer.blocks.20.att_proj.weight.min
none
param/transformer.blocks.20.att_proj.weight.norm
none
param/transformer.blocks.20.attn_out.weight.avg
none
param/transformer.blocks.20.attn_out.weight.max
none
param/transformer.blocks.20.attn_out.weight.min
none
param/transformer.blocks.20.attn_out.weight.norm
none
param/transformer.blocks.20.ff_out.weight.avg
none
param/transformer.blocks.20.ff_out.weight.max
none
param/transformer.blocks.20.ff_out.weight.min
none
param/transformer.blocks.20.ff_out.weight.norm
none
param/transformer.blocks.20.ff_proj.weight.avg
none
param/transformer.blocks.20.ff_proj.weight.max
none
param/transformer.blocks.20.ff_proj.weight.min
none
param/transformer.blocks.20.ff_proj.weight.norm
none
param/transformer.blocks.21.att_proj.weight.avg
none
param/transformer.blocks.21.att_proj.weight.max
none
param/transformer.blocks.21.att_proj.weight.min
none
param/transformer.blocks.21.att_proj.weight.norm
none
param/transformer.blocks.21.attn_out.weight.avg
none
param/transformer.blocks.21.attn_out.weight.max
none
param/transformer.blocks.21.attn_out.weight.min
none
param/transformer.blocks.21.attn_out.weight.norm
none
param/transformer.blocks.21.ff_out.weight.avg
none
param/transformer.blocks.21.ff_out.weight.max
none
param/transformer.blocks.21.ff_out.weight.min
none
param/transformer.blocks.21.ff_out.weight.norm
none
param/transformer.blocks.21.ff_proj.weight.avg
none
param/transformer.blocks.21.ff_proj.weight.max
none
param/transformer.blocks.21.ff_proj.weight.min
none
param/transformer.blocks.21.ff_proj.weight.norm
none
param/transformer.blocks.22.att_proj.weight.avg
none
param/transformer.blocks.22.att_proj.weight.max
none
param/transformer.blocks.22.att_proj.weight.min
none
param/transformer.blocks.22.att_proj.weight.norm
none
param/transformer.blocks.22.attn_out.weight.avg
none
param/transformer.blocks.22.attn_out.weight.max
none
param/transformer.blocks.22.attn_out.weight.min
none
param/transformer.blocks.22.attn_out.weight.norm
none
param/transformer.blocks.22.ff_out.weight.avg
none
param/transformer.blocks.22.ff_out.weight.max
none
param/transformer.blocks.22.ff_out.weight.min
none
param/transformer.blocks.22.ff_out.weight.norm
none
param/transformer.blocks.22.ff_proj.weight.avg
none
param/transformer.blocks.22.ff_proj.weight.max
none
param/transformer.blocks.22.ff_proj.weight.min
none
param/transformer.blocks.22.ff_proj.weight.norm
none
param/transformer.blocks.23.att_proj.weight.avg
none
param/transformer.blocks.23.att_proj.weight.max
none
param/transformer.blocks.23.att_proj.weight.min
none
param/transformer.blocks.23.att_proj.weight.norm
none
param/transformer.blocks.23.attn_out.weight.avg
none
param/transformer.blocks.23.attn_out.weight.max
none
param/transformer.blocks.23.attn_out.weight.min
none
param/transformer.blocks.23.attn_out.weight.norm
none
param/transformer.blocks.23.ff_out.weight.avg
none
param/transformer.blocks.23.ff_out.weight.max
none
param/transformer.blocks.23.ff_out.weight.min
none
param/transformer.blocks.23.ff_out.weight.norm
none
param/transformer.blocks.23.ff_proj.weight.avg
none
param/transformer.blocks.23.ff_proj.weight.max
none
param/transformer.blocks.23.ff_proj.weight.min
none
param/transformer.blocks.23.ff_proj.weight.norm
none
param/transformer.blocks.24.att_proj.weight.avg
none
param/transformer.blocks.24.att_proj.weight.max
none
param/transformer.blocks.24.att_proj.weight.min
none
param/transformer.blocks.24.att_proj.weight.norm
none
param/transformer.blocks.24.attn_out.weight.avg
none
param/transformer.blocks.24.attn_out.weight.max
none
param/transformer.blocks.24.attn_out.weight.min
none
param/transformer.blocks.24.attn_out.weight.norm
none
param/transformer.blocks.24.ff_out.weight.avg
none
param/transformer.blocks.24.ff_out.weight.max
none
param/transformer.blocks.24.ff_out.weight.min
none
param/transformer.blocks.24.ff_out.weight.norm
none
param/transformer.blocks.24.ff_proj.weight.avg
none
param/transformer.blocks.24.ff_proj.weight.max
none
param/transformer.blocks.24.ff_proj.weight.min
none
param/transformer.blocks.24.ff_proj.weight.norm
none
param/transformer.blocks.25.att_proj.weight.avg
none
param/transformer.blocks.25.att_proj.weight.max
none
param/transformer.blocks.25.att_proj.weight.min
none
param/transformer.blocks.25.att_proj.weight.norm
none
param/transformer.blocks.25.attn_out.weight.avg
none
param/transformer.blocks.25.attn_out.weight.max
none
param/transformer.blocks.25.attn_out.weight.min
none
param/transformer.blocks.25.attn_out.weight.norm
none
param/transformer.blocks.25.ff_out.weight.avg
none
param/transformer.blocks.25.ff_out.weight.max
none
param/transformer.blocks.25.ff_out.weight.min
none
param/transformer.blocks.25.ff_out.weight.norm
none
param/transformer.blocks.25.ff_proj.weight.avg
none
param/transformer.blocks.25.ff_proj.weight.max
none
param/transformer.blocks.25.ff_proj.weight.min
none
param/transformer.blocks.25.ff_proj.weight.norm
none
param/transformer.blocks.26.att_proj.weight.avg
none
param/transformer.blocks.26.att_proj.weight.max
none
param/transformer.blocks.26.att_proj.weight.min
none
param/transformer.blocks.26.att_proj.weight.norm
none
param/transformer.blocks.26.attn_out.weight.avg
none
param/transformer.blocks.26.attn_out.weight.max
none
param/transformer.blocks.26.attn_out.weight.min
none
param/transformer.blocks.26.attn_out.weight.norm
none
param/transformer.blocks.26.ff_out.weight.avg
none
param/transformer.blocks.26.ff_out.weight.max
none
param/transformer.blocks.26.ff_out.weight.min
none
param/transformer.blocks.26.ff_out.weight.norm
none
param/transformer.blocks.26.ff_proj.weight.avg
none
param/transformer.blocks.26.ff_proj.weight.max
none
param/transformer.blocks.26.ff_proj.weight.min
none
param/transformer.blocks.26.ff_proj.weight.norm
none
param/transformer.blocks.27.att_proj.weight.avg
none
param/transformer.blocks.27.att_proj.weight.max
none
param/transformer.blocks.27.att_proj.weight.min
none
param/transformer.blocks.27.att_proj.weight.norm
none
param/transformer.blocks.27.attn_out.weight.avg
none
param/transformer.blocks.27.attn_out.weight.max
none
param/transformer.blocks.27.attn_out.weight.min
none
param/transformer.blocks.27.attn_out.weight.norm
none
param/transformer.blocks.27.ff_out.weight.avg
none
param/transformer.blocks.27.ff_out.weight.max
none
param/transformer.blocks.27.ff_out.weight.min
none
param/transformer.blocks.27.ff_out.weight.norm
none
param/transformer.blocks.27.ff_proj.weight.avg
none
param/transformer.blocks.27.ff_proj.weight.max
none
param/transformer.blocks.27.ff_proj.weight.min
none
param/transformer.blocks.27.ff_proj.weight.norm
none
param/transformer.blocks.28.att_proj.weight.avg
none
param/transformer.blocks.28.att_proj.weight.max
none
param/transformer.blocks.28.att_proj.weight.min
none
param/transformer.blocks.28.att_proj.weight.norm
none
param/transformer.blocks.28.attn_out.weight.avg
none
param/transformer.blocks.28.attn_out.weight.max
none
param/transformer.blocks.28.attn_out.weight.min
none
param/transformer.blocks.28.attn_out.weight.norm
none
param/transformer.blocks.28.ff_out.weight.avg
none
param/transformer.blocks.28.ff_out.weight.max
none
param/transformer.blocks.28.ff_out.weight.min
none
param/transformer.blocks.28.ff_out.weight.norm
none
param/transformer.blocks.28.ff_proj.weight.avg
none
param/transformer.blocks.28.ff_proj.weight.max
none
param/transformer.blocks.28.ff_proj.weight.min
none
param/transformer.blocks.28.ff_proj.weight.norm
none
param/transformer.blocks.29.att_proj.weight.avg
none
param/transformer.blocks.29.att_proj.weight.max
none
param/transformer.blocks.29.att_proj.weight.min
none
param/transformer.blocks.29.att_proj.weight.norm
none
param/transformer.blocks.29.attn_out.weight.avg
none
param/transformer.blocks.29.attn_out.weight.max
none
param/transformer.blocks.29.attn_out.weight.min
none
param/transformer.blocks.29.attn_out.weight.norm
none
param/transformer.blocks.29.ff_out.weight.avg
none
param/transformer.blocks.29.ff_out.weight.max
none
param/transformer.blocks.29.ff_out.weight.min
none
param/transformer.blocks.29.ff_out.weight.norm
none
param/transformer.blocks.29.ff_proj.weight.avg
none
param/transformer.blocks.29.ff_proj.weight.max
none
param/transformer.blocks.29.ff_proj.weight.min
none
param/transformer.blocks.29.ff_proj.weight.norm
none
param/transformer.blocks.30.att_proj.weight.avg
none
param/transformer.blocks.30.att_proj.weight.max
none
param/transformer.blocks.30.att_proj.weight.min
none
param/transformer.blocks.30.att_proj.weight.norm
none
param/transformer.blocks.30.attn_out.weight.avg
none
param/transformer.blocks.30.attn_out.weight.max
none
param/transformer.blocks.30.attn_out.weight.min
none
param/transformer.blocks.30.attn_out.weight.norm
none
param/transformer.blocks.30.ff_out.weight.avg
none
param/transformer.blocks.30.ff_out.weight.max
none
param/transformer.blocks.30.ff_out.weight.min
none
param/transformer.blocks.30.ff_out.weight.norm
none
param/transformer.blocks.30.ff_proj.weight.avg
none
param/transformer.blocks.30.ff_proj.weight.max
none
param/transformer.blocks.30.ff_proj.weight.min
none
param/transformer.blocks.30.ff_proj.weight.norm
none
param/transformer.blocks.31.att_proj.weight.avg
none
param/transformer.blocks.31.att_proj.weight.max
none
param/transformer.blocks.31.att_proj.weight.min
none
param/transformer.blocks.31.att_proj.weight.norm
none
param/transformer.blocks.31.attn_out.weight.avg
none
param/transformer.blocks.31.attn_out.weight.max
none
param/transformer.blocks.31.attn_out.weight.min
none
param/transformer.blocks.31.attn_out.weight.norm
none
param/transformer.blocks.31.ff_out.weight.avg
none
param/transformer.blocks.31.ff_out.weight.max
none
param/transformer.blocks.31.ff_out.weight.min
none
param/transformer.blocks.31.ff_out.weight.norm
none
param/transformer.blocks.31.ff_proj.weight.avg
none
param/transformer.blocks.31.ff_proj.weight.max
none
param/transformer.blocks.31.ff_proj.weight.min
none
param/transformer.blocks.31.ff_proj.weight.norm
none
param/transformer.ff_out.weight.avg
none
param/transformer.ff_out.weight.max
none
param/transformer.ff_out.weight.min
none
param/transformer.ff_out.weight.norm
none
param/transformer.wte.weight.avg
none
param/transformer.wte.weight.max
none
param/transformer.wte.weight.min
none
param/transformer.wte.weight.norm
none
total_grad_norm
none
throughput
device/batches_per_second
number
device/tokens_per_second
number
total_tokens
number
train
CrossEntropyLoss
number
Perplexity
number
Files
  • Root
Root
Name
Size
Type
requirements.frozen.txt
2.9KB
Plain Text
wandb-job.json
297.0KB
JSON Document
Loading...