Bminixhofer_ai2's workspace
Runs
1
1 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
algo.T
algo.backbone
algo.causal_attention
algo.curriculum_end
algo.curriculum_start
algo.gamma_max
algo.gamma_min
algo.gumbel_tau_log10_end
algo.gumbel_tau_log10_start
algo.ignore_bos
algo.integral_cache_path
algo.loss_type
algo.name
algo.parameterization
algo.subs_masking
algo.time_conditioning
callbacks.checkpoint_every_n_steps._target_
callbacks.checkpoint_every_n_steps.auto_insert_metric_name
callbacks.checkpoint_every_n_steps.dirpath
callbacks.checkpoint_every_n_steps.every_n_train_steps
callbacks.checkpoint_every_n_steps.save_last
callbacks.checkpoint_every_n_steps.save_top_k
callbacks.checkpoint_every_n_steps.verbose
callbacks.checkpoint_monitor._target_
callbacks.checkpoint_monitor.auto_insert_metric_name
callbacks.checkpoint_monitor.dirpath
callbacks.checkpoint_monitor.filename
callbacks.checkpoint_monitor.mode
callbacks.checkpoint_monitor.monitor
callbacks.checkpoint_monitor.save_last
callbacks.checkpoint_monitor.save_top_k
callbacks.checkpoint_monitor.verbose
callbacks.learning_rate_monitor._target_
callbacks.learning_rate_monitor.logging_interval
checkpointing.resume_ckpt_path
checkpointing.resume_from_ckpt
checkpointing.save_dir
config
data.cache_dir
data.insert_train_eos
data.insert_valid_eos
data.streaming
data.tokenizer_name_or_path
data.train
Finished
tengx
duo
lm1b
log-linear
1d 7h 33m 9s
-
0
dit
false
500000
0
-1.75
-3.5
-3
-3
false
/weka/oe-adapt-default/tengx/paper/duo-simple-main/duo/integral/bert-base-uncased.pkl
elbo
duo
mean
false
true
lightning.pytorch.callbacks.ModelCheckpoint
false
/weka/oe-adapt-default/tengx/paper/duo-simple-main/duo/ckpt/checkpoints
500
true
-1
true
lightning.pytorch.callbacks.ModelCheckpoint
false
/weka/oe-adapt-default/tengx/paper/duo-simple-main/duo/ckpt/checkpoints
best
min
val/nll
false
1
true
lightning.pytorch.callbacks.LearningRateMonitor
step
./outputs/lm1b/checkpoints/last.ckpt
true
/weka/oe-adapt-default/tengx/paper/duo-simple-main/duo/ckpt
{'mode': 'train', 'seed': 1, 'loader': {'global_batch_size': 512, 'eval_global_batch_size': '${.global_batch_size}', 'batch_size': 64, 'eval_batch_size': 64, 'num_workers': '${eval:"len(__import__(\'os\').sched_getaffinity(0))"}', 'pin_memory': True}, 'sampling': {'predictor': 'ancestral', 'steps': 1000, 'noise_removal': 'ancestral', 'use_float64': True, 'p_nucleus': 1.0, 'num_sample_batches': 2, 'num_sample_log': 2, 'semi_ar': False, 'stride_length': 1, 'num_strides': 1}, 'training': {'ema': 0.9999, 'antithetic_sampling': True, 'importance_sampling': False, 'sampling_eps': 0.001, 'change_of_variables': False, 'loss_precision': 'bf16', 'finetune_path': ''}, 'eval': {'checkpoint_path': '', 'disable_ema': False, 'compute_generative_perplexity': True, 'perplexity_batch_size': 8, 'compute_perplexity_on_sanity': False, 'gen_ppl_eval_model_name_or_path': 'gpt2-large', 'generate_samples': True, 'generated_samples_path': '${cwd:}/samples.json'}, 'optim': {'weight_decay': 0, 'lr': 0.0003, 'beta1': 0.9, 'beta2': 0.999, 'eps': 1e-08}, 'trainer': {'_target_': 'lightning.Trainer', 'accelerator': 'cuda', 'num_nodes': 1, 'devices': '${device_count:}', 'accumulate_grad_batches': '${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}', 'gradient_clip_val': 1.0, 'precision': 'bf16', 'num_sanity_val_steps': 2, 'max_steps': 1000000, 'log_every_n_steps': 100, 'limit_train_batches': 1.0, 'limit_val_batches': 1.0, 'val_check_interval': 5000}, 'wandb': {'project': 'flow-ode-duo', 'notes': 'Flow ODEs for UDLM', 'group': None, 'job_type': None, 'name': 'duo-lm1b-negative', 'tags': ['${noise.type}', '${data.train}', '${data.valid}', '${algo.name}']}, 'checkpointing': {'save_dir': '${cwd:}', 'resume_from_ckpt': True, 'resume_ckpt_path': './outputs/${data.train}/checkpoints/last.ckpt'}, 'callbacks': {'checkpoint_every_n_steps': {'_target_': 'lightning.pytorch.callbacks.ModelCheckpoint', 'save_top_k': -1, 'save_last': True, 'dirpath': '${checkpointing.save_dir}/checkpoints', 'verbose': True, 'auto_insert_metric_name': False, 'every_n_train_steps': 500}, 'checkpoint_monitor': {'_target_': 'lightning.pytorch.callbacks.ModelCheckpoint', 'monitor': 'val/nll', 'mode': 'min', 'save_top_k': 1, 'save_last': False, 'dirpath': '${checkpointing.save_dir}/checkpoints', 'filename': 'best', 'auto_insert_metric_name': False, 'verbose': True}, 'learning_rate_monitor': {'_target_': 'lightning.pytorch.callbacks.LearningRateMonitor', 'logging_interval': 'step'}}, 'data': {'train': 'lm1b', 'valid': 'lm1b', 'tokenizer_name_or_path': 'bert-base-uncased', 'cache_dir': '/weka/oe-adapt-default/tengx/paper/duo-simple-main/duo/data', 'wrap': False, 'streaming': False, 'insert_train_eos': True, 'insert_valid_eos': True}, 'model': {'name': 'small', 'type': 'ddit', 'hidden_size': 768, 'cond_dim': 128, 'length': 128, 'n_blocks': 12, 'n_heads': 12, 'scale_by_sigma': True, 'dropout': 0.1, 'tie_word_embeddings': False, 'vocab_lookup': True}, 'strategy': {'_target_': 'lightning.pytorch.strategies.DDPStrategy', 'find_unused_parameters': False}, 'noise': {'type': 'log-linear', 'parameterization': 'log-linear', 'eps': 0, 'denoiser_latent_conditioning': -1, 'freeze_encoder': False, 'freeze_decoder': False}, 'lr_scheduler': {'_target_': 'transformers.get_constant_schedule_with_warmup', 'num_warmup_steps': 2500}, 'prior': {'type': 'none', 'latent_width': 0, 'latent_height': 0}, 'algo': {'name': 'duo', 'backbone': 'dit', 'parameterization': 'mean', 'time_conditioning': True, 'T': 0, 'subs_masking': False, 'causal_attention': False, 'gumbel_tau_log10_start': -3.0, 'gumbel_tau_log10_end': -3.0, 'curriculum_start': 0, 'curriculum_end': 500000, 'integral_cache_path': '${hydra:runtime.cwd}/integral/${data.tokenizer_name_or_path}.pkl', 'loss_type': 'elbo', 'ignore_bos': False, 'gamma_min': -3.5, 'gamma_max': -1.75}}
/weka/oe-adapt-default/tengx/paper/duo-simple-main/duo/data
true
true
false
bert-base-uncased
lm1b
1-1
of 1