Spaces:
Runtime error
Runtime error
| import argparse | |
| import os | |
| import pathlib | |
| import time | |
| from typing import NoReturn | |
| import librosa | |
| import numpy as np | |
| import soundfile | |
| import torch | |
| from bytesep.inference import Separator | |
| from bytesep.models.lightning_modules import get_model_class | |
| from bytesep.utils import read_yaml | |
| def inference(args) -> NoReturn: | |
| r"""Separate all audios in a directory. | |
| Args: | |
| config_yaml: str, the config file of a model being trained | |
| checkpoint_path: str, the path of checkpoint to be loaded | |
| audios_dir: str, the directory of audios to be separated | |
| output_dir: str, the directory to write out separated audios | |
| scale_volume: bool, if True then the volume is scaled to the maximum value of 1. | |
| Returns: | |
| NoReturn | |
| """ | |
| # Arguments & parameters | |
| config_yaml = args.config_yaml | |
| checkpoint_path = args.checkpoint_path | |
| audios_dir = args.audios_dir | |
| output_dir = args.output_dir | |
| scale_volume = args.scale_volume | |
| device = ( | |
| torch.device('cuda') | |
| if args.cuda and torch.cuda.is_available() | |
| else torch.device('cpu') | |
| ) | |
| configs = read_yaml(config_yaml) | |
| sample_rate = configs['train']['sample_rate'] | |
| input_channels = configs['train']['channels'] | |
| target_source_types = configs['train']['target_source_types'] | |
| target_sources_num = len(target_source_types) | |
| model_type = configs['train']['model_type'] | |
| mono = input_channels == 1 | |
| segment_samples = int(30 * sample_rate) | |
| batch_size = 1 | |
| device = "cuda" | |
| models_contains_inplaceabn = True | |
| # Need to use torch.distributed if models contain inplace_abn.abn.InPlaceABNSync. | |
| if models_contains_inplaceabn: | |
| import torch.distributed as dist | |
| dist.init_process_group( | |
| 'gloo', init_method='file:///tmp/somefile', rank=0, world_size=1 | |
| ) | |
| print("Using {} for separating ..".format(device)) | |
| # paths | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Get model class. | |
| Model = get_model_class(model_type) | |
| # Create model. | |
| model = Model(input_channels=input_channels, target_sources_num=target_sources_num) | |
| # Load checkpoint. | |
| checkpoint = torch.load(checkpoint_path, map_location='cpu') | |
| model.load_state_dict(checkpoint["model"]) | |
| # Move model to device. | |
| model.to(device) | |
| # Create separator. | |
| separator = Separator( | |
| model=model, | |
| segment_samples=segment_samples, | |
| batch_size=batch_size, | |
| device=device, | |
| ) | |
| audio_names = sorted(os.listdir(audios_dir)) | |
| for audio_name in audio_names: | |
| audio_path = os.path.join(audios_dir, audio_name) | |
| # Load audio. | |
| audio, _ = librosa.load(audio_path, sr=sample_rate, mono=mono) | |
| if audio.ndim == 1: | |
| audio = audio[None, :] | |
| input_dict = {'waveform': audio} | |
| # Separate | |
| separate_time = time.time() | |
| sep_wav = separator.separate(input_dict) | |
| # (channels_num, audio_samples) | |
| print('Separate time: {:.3f} s'.format(time.time() - separate_time)) | |
| # Write out separated audio. | |
| if scale_volume: | |
| sep_wav /= np.max(np.abs(sep_wav)) | |
| soundfile.write(file='_zz.wav', data=sep_wav.T, samplerate=sample_rate) | |
| output_path = os.path.join( | |
| output_dir, '{}.mp3'.format(pathlib.Path(audio_name).stem) | |
| ) | |
| os.system('ffmpeg -y -loglevel panic -i _zz.wav "{}"'.format(output_path)) | |
| print('Write out to {}'.format(output_path)) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="") | |
| parser.add_argument( | |
| "--config_yaml", | |
| type=str, | |
| required=True, | |
| help="The config file of a model being trained.", | |
| ) | |
| parser.add_argument( | |
| "--checkpoint_path", | |
| type=str, | |
| required=True, | |
| help="The path of checkpoint to be loaded.", | |
| ) | |
| parser.add_argument( | |
| "--audios_dir", | |
| type=str, | |
| required=True, | |
| help="The directory of audios to be separated.", | |
| ) | |
| parser.add_argument( | |
| "--output_dir", | |
| type=str, | |
| required=True, | |
| help="The directory to write out separated audios.", | |
| ) | |
| parser.add_argument( | |
| '--scale_volume', | |
| action='store_true', | |
| default=False, | |
| help="set to True if separated audios are scaled to the maximum value of 1.", | |
| ) | |
| parser.add_argument("--cuda", action='store_true', default=True) | |
| args = parser.parse_args() | |
| inference(args) | |