Huseinzol05's workspace
Runs
12
State
Notes
User
Tags
Created
Runtime
Sweep
cfg
dataset.dataset._target_
dataset.frame_length
dataset.min_audio_length
dataset.padding_idx
dataset.test.batch_size
dataset.test.filelist
dataset.test.shuffle
dataset.train.batch_size
dataset.train.filelist
dataset.train.shuffle
dataset.val.batch_size
dataset.val.filelist
dataset.val.shuffle
debug
log_dir
model.codec_decoder.codebook_dim
model.codec_decoder.codebook_size
model.codec_decoder.depth
model.codec_decoder.dilations
model.codec_decoder.heads
model.codec_decoder.hidden_dim
model.codec_decoder.ngf
model.codec_decoder.pos_meb_dim
model.codec_decoder.up_ratios
model.codec_decoder.vq_commit_weight
model.codec_decoder.vq_dim
model.codec_decoder.vq_full_commit_loss
model.codec_decoder.vq_num_quantizers
model.codec_decoder.vq_weight_init
model.codec_encoder.depth
model.codec_encoder.dilations
model.codec_encoder.heads
model.codec_encoder.hidden_dim
model.codec_encoder.ngf
model.codec_encoder.pos_meb_dim
model.codec_encoder.up_ratios
model.mpd.channel_increasing_factor
model.mpd.channels
model.mpd.max_downsample_channels
model.mpd.periods
model.mstft.channels
model.mstft.downsample_scales
model.mstft.in_channels
Running
-
huseinzol05
8d 15h 59m 25s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 20, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
20
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
1d 47m 14s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
2h 38m 16s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
30m 13s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
3h 13m 7s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
36m 14s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
8h 10m 29s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
5h 10m 41s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
16h 20m 53s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Failed
-
huseinzol05
1h 26m 49s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
11h 20m 50s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
Crashed
-
huseinzol05
3d 7h 57m 2s
-
{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}
data_module.FSDataset_add_STFT
200
96000
0
1
/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt
false
18
/home/husein/ssd3/gemma3/audio-files.txt
true
8
/home/husein/ssd3/gemma3/audio-files-test.txt
false
false
24k
16
16384
12
[1,3,9]
16
1024
48
64
[5,4,4,2,2]
0.25
2048
false
1
false
1
[1,3,9]
16
1024
48
64
[2,2,4,4,5]
4
16
512
[2,3,5,7,11]
32
[2,2,2]
1
1-12
of 12