| generator: | |
| name: ScalarModel | |
| config: | |
| num_bands: 1 | |
| sample_rate: 24000 | |
| causal: true | |
| num_samples: 2 | |
| downsample_factors: | |
| - 2 | |
| - 3 | |
| - 4 | |
| - 4 | |
| - 5 | |
| downsample_kernel_sizes: | |
| - 4 | |
| - 6 | |
| - 8 | |
| - 8 | |
| - 10 | |
| upsample_factors: | |
| - 5 | |
| - 4 | |
| - 4 | |
| - 3 | |
| - 2 | |
| upsample_kernel_sizes: | |
| - 10 | |
| - 8 | |
| - 8 | |
| - 6 | |
| - 4 | |
| latent_hidden_dim: 136 | |
| default_kernel_size: 7 | |
| delay_kernel_size: 5 | |
| init_channel: 48 | |
| res_kernel_size: 7 | |
| d_list: | |
| - mfd | |
| mfd: | |
| name: MultiFrequencyDiscriminator | |
| config: | |
| hop_lengths: | |
| - 32 | |
| - 64 | |
| - 128 | |
| - 256 | |
| - 512 | |
| - 1024 | |
| hidden_channels: | |
| - 64 | |
| - 128 | |
| - 256 | |
| - 512 | |
| - 512 | |
| - 512 | |
| domain: double | |
| mel_scale: true | |
| sample_rate: 24000 | |
| mpd: | |
| name: MultiPeriodDiscriminator | |
| config: | |
| period_sizes: | |
| - 2 | |
| - 3 | |
| - 5 | |
| - 7 | |
| - 11 | |
| period_kernel_size: 5 | |
| msd: | |
| name: MultiScaleDiscriminator | |
| config: | |
| num_scales: 3 | |
| pool_kernel_size: 4 | |
| pool_stride: 2 | |
| optimizer: | |
| g: | |
| name: AdamW | |
| config: | |
| lr: 0.0002 | |
| betas: | |
| - 0.8 | |
| - 0.99 | |
| eps: 1.0e-06 | |
| d: | |
| name: AdamW | |
| config: | |
| lr: 0.0002 | |
| betas: | |
| - 0.8 | |
| - 0.99 | |
| eps: 1.0e-06 | |
| lr_scheduler: | |
| g: | |
| name: ExponentialLR | |
| config: | |
| gamma: 0.999 | |
| d: | |
| name: ExponentialLR | |
| config: | |
| gamma: 0.999 | |
| criterion: | |
| g_criterion: | |
| name: losses.generator_loss.GeneratorSTFTLoss | |
| config: | |
| use_mel_loss: false | |
| adv_criterion: MSEGLoss | |
| mel_loss_weight: 45 | |
| use_feature_match: true | |
| feat_match_loss_weight: 20 | |
| use_full_stft_loss: true | |
| use_sub_stft_loss: true | |
| full_stft_loss_weight: 1 | |
| sub_stft_loss_weight: 1 | |
| mel_scale_loss: | |
| sampling_rate: 24000 | |
| n_fft: 1024 | |
| num_mels: 80 | |
| hop_size: 160 | |
| win_size: 800 | |
| fmin: 0 | |
| full_multi_scale_stft_loss: | |
| fft_sizes: | |
| - 512 | |
| - 1024 | |
| - 2048 | |
| win_sizes: | |
| - 480 | |
| - 960 | |
| - 1200 | |
| hop_sizes: | |
| - 120 | |
| - 240 | |
| - 300 | |
| sub_multi_scale_stft_loss: | |
| num_bands: 6 | |
| fft_sizes: | |
| - 128 | |
| - 256 | |
| - 256 | |
| win_sizes: | |
| - 80 | |
| - 120 | |
| - 200 | |
| hop_sizes: | |
| - 20 | |
| - 40 | |
| - 50 | |
| d_criterion: | |
| name: losses.discriminator_loss.MSEDiscriminatorLoss | |
| config: null | |
| commit_loss_weight: 1.0 | |
| training_file: /home/ydc/code2/ScalartTokenizer16k_m36/data/train_v2.scp | |
| validation_file: /home/ydc/code2/ScalartTokenizer16k_m36/data/val.scp | |
| seed: 2333 | |
| cudnn_deterministic: false | |
| tensorboard: true | |
| checkpoint_interval: 5000 | |
| summary_interval: 100 | |
| validation_interval: 5000 | |
| num_epoches: 50 | |
| print_freq: 10 | |
| discriminator_iter_start: 0 | |
| num_ckpt_keep: 10 | |
| segment_size: 48000 | |
| audio_norm_scale: 0.95 | |
| batch_size: 16 | |
| num_workers: 4 | |
| num_plots: 8 | |
| local_rank: -1 | |
| basic_model_config: config/scalar24k_64dim.yaml | |
| exp_model_config: null | |
| log_dir: /data9/ydc/exp/s_codec_24k_136dim_scale9_25hz | |
| hop_length: 2000 | |
| ngpus_per_node: 4 | |
| sample_rate: 24000 | |
| model_ckpt_dir: /data9/ydc/exp/s_codec_24k_136dim_scale9_25hz/model_ckpts | |