huseinzol05

Huseinzol05's workspace

Runs

Running

huseinzol05

1w ago

8d 15h 59m 25s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 20, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

1w ago

1d 47m 14s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

1w ago

2h 38m 16s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

1w ago

30m 13s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

1w ago

3h 13m 7s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

1w ago

36m 14s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

1w ago

8h 10m 29s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

2w ago

5h 10m 41s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

2w ago

16h 20m 53s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Failed

huseinzol05

2w ago

1h 26m 49s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

2w ago

11h 20m 50s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

Crashed

huseinzol05

2w ago

3d 7h 57m 2s

{'preprocess': {'datasets': {'LibriSpeech': {'root': '/aifs4su/data/zheny/data/data_8_21_2/', 'trainsets': ['dev', 'test', 'train'], 'testsets': ['test-clean']}}, 'view': {'train_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_higher_quality.txt', 'test_filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt'}, 'audio': {'sr': 16000, 'normalization': False, 'max_wav_value': 32768.0}, 'stft': {'n_fft': 1280, 'window_length': 1280, 'hop_length': 320}}, 'dataset': {'dataset': {'_target_': 'data_module.FSDataset_add_STFT'}, 'train': {'filelist': '/home/husein/ssd3/gemma3/audio-files.txt', 'batch_size': 18, 'shuffle': True}, 'val': {'filelist': '/home/husein/ssd3/gemma3/audio-files-test.txt', 'batch_size': 8, 'shuffle': False}, 'test': {'filelist': '/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt', 'batch_size': 1, 'shuffle': False}, 'padding_idx': 0, 'frame_length': 200, 'min_audio_length': 96000}, 'model': {'codec_encoder': {'ngf': 48, 'hidden_dim': 1024, 'depth': 1, 'heads': 16, 'pos_meb_dim': 64, 'up_ratios': [2, 2, 4, 4, 5], 'dilations': [1, 3, 9]}, 'codec_decoder': {'ngf': 48, 'up_ratios': [5, 4, 4, 2, 2], 'dilations': [1, 3, 9], 'hidden_dim': 1024, 'depth': 12, 'heads': 16, 'pos_meb_dim': 64, 'vq_num_quantizers': 1, 'vq_dim': 2048, 'vq_commit_weight': 0.25, 'vq_weight_init': False, 'vq_full_commit_loss': False, 'codebook_size': 16384, 'codebook_dim': 16}, 'mpd': {'periods': [2, 3, 5, 7, 11], 'max_downsample_channels': 512, 'channels': 16, 'channel_increasing_factor': 4}, 'mstft': {'stft_params': {'fft_sizes': [78, 126, 206, 334, 542, 876, 1418, 2296], 'hop_sizes': [39, 63, 103, 167, 271, 438, 709, 1148], 'win_lengths': [78, 126, 206, 334, 542, 876, 1418, 2296], 'window': 'hann_window'}, 'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'max_downsample_channels': 512, 'downsample_scales': [2, 2, 2], 'use_weight_norm': True}}, 'train': {'trainer': {'accelerator': 'gpu', 'num_nodes': 1, 'devices': 2, 'min_steps': 3000000, 'max_steps': 3000000, 'precision': '16-mixed', 'limit_val_batches': 2560, 'val_check_interval': 4000, 'num_sanity_val_steps': 0, 'accumulate_grad_batches': 1}, 'lambdas': {'lambda_disc': 1.0, 'lambda_feat_match_loss': 1.0, 'lambda_mel_loss': 15.0, 'lambda_adv': 1.0, 'lambda_stft_loss': 1.0, 'lambda_semantic_loss': 5, 'lambda_perceptual_loss': 0}, 'use_mel_loss': True, 'use_feat_match_loss': True, 'use_stft_loss': False, 'stft_loss_params': {'fft_sizes': [512, 2048], 'hop_sizes': [128, 512], 'win_lengths': [512, 2048], 'window': 'hann_window'}, 'gen_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'disc_optim_params': {'lr': 1.0, 'betas': [0.8, 0.9]}, 'gen_grad_clip': 1.0, 'disc_grad_clip': 1.0, 'gen_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}, 'disc_schedule_params': {'warmup_step': 3000, 'down_step': 400000, 'min_lr': 2e-05, 'max_lr': 0.0001}}, 'log_dir': '24k', 'debug': False, 'ckpt': None, 'input_dir': None, 'output_dir': None}

data_module.FSDataset_add_STFT

200

96000

/aifs4su/data/zheny/data/data_8_21_2/mls_all_audio_path_test.txt

false

/home/husein/ssd3/gemma3/audio-files.txt

true

/home/husein/ssd3/gemma3/audio-files-test.txt

false

24k

16384

[1,3,9]

1024

[5,4,4,2,2]

0.25

2048

false

[1,3,9]

1024

[2,2,4,4,5]

512

[2,3,5,7,11]

[2,2,2]

1-12

of 12