| { | |
| "architectures": [ | |
| "EncodecModel" | |
| ], | |
| "audio_channels": 1, | |
| "chunk_length_s": null, | |
| "codebook_dim": 128, | |
| "codebook_size": 1024, | |
| "compress": 2, | |
| "dilation_growth_rate": 2, | |
| "hidden_size": 128, | |
| "kernel_size": 7, | |
| "last_kernel_size": 7, | |
| "model_type": "encodec", | |
| "norm_type": "weight_norm", | |
| "normalize": false, | |
| "num_filters": 32, | |
| "num_lstm_layers": 2, | |
| "num_residual_layers": 1, | |
| "overlap": null, | |
| "pad_mode": "reflect", | |
| "residual_kernel_size": 3, | |
| "sampling_rate": 24000, | |
| "target_bandwidths": [ | |
| 1.5, | |
| 3.0, | |
| 6.0, | |
| 12.0, | |
| 24.0 | |
| ], | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.31.0.dev0", | |
| "trim_right_ratio": 1.0, | |
| "upsampling_ratios": [ | |
| 8, | |
| 5, | |
| 4, | |
| 2 | |
| ], | |
| "use_causal_conv": true | |
| } |