andhikagg commited on
Commit
16b0eb1
·
verified ·
1 Parent(s): 64c58cf

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -24
app.py CHANGED
@@ -131,7 +131,7 @@ if os.path.isfile("rmvpe.pt"):
131
  # yield info, None
132
  # return vc_fn
133
 
134
- def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
135
  def vc_fn(
136
  vc_audio_mode,
137
  vc_input,
@@ -147,38 +147,28 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
147
  protect,
148
  ):
149
  try:
150
- logs = []
151
- print(f"Converting using {model_name}...")
152
- logs.append(f"Converting using {model_name}...")
153
- yield "\n".join(logs), None
154
-
155
- # === PERBAIKAN logika or ===
156
- if (vc_audio_mode == "Input path" or vc_audio_mode == "Youtube") and vc_input != "":
157
  audio, sr = librosa.load(vc_input, sr=16000, mono=True)
158
-
159
  elif vc_audio_mode == "Upload audio":
160
  if vc_upload is None:
161
  return "You need to upload an audio", None
162
  sampling_rate, audio = vc_upload
163
  duration = audio.shape[0] / sampling_rate
164
- if duration > 20 and spaces:
165
  return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
166
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
167
  if len(audio.shape) > 1:
168
  audio = librosa.to_mono(audio.transpose(1, 0))
169
  if sampling_rate != 16000:
170
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
171
-
172
  elif vc_audio_mode == "TTS Audio":
173
- if len(tts_text) > 100 and spaces:
174
  return "Text is too long", None
175
  if tts_text is None or tts_voice is None:
176
  return "You need to enter text and select a voice", None
177
  asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
178
  audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
179
  vc_input = "tts.mp3"
180
-
181
- # Lanjut pipeline
182
  times = [0, 0, 0]
183
  f0_up_key = int(f0_up_key)
184
  audio_opt = vc.pipeline(
@@ -191,6 +181,7 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
191
  f0_up_key,
192
  f0_method,
193
  file_index,
 
194
  index_rate,
195
  if_f0,
196
  filter_radius,
@@ -202,18 +193,13 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
202
  f0_file=None,
203
  )
204
  info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
205
- print(f"{model_name} | {info}")
206
- logs.append(f"Successfully Convert {model_name}\n{info}")
207
- yield "\n".join(logs), (tgt_sr, audio_opt)
208
-
209
- except GeneratorExit:
210
- # Ini sinyal normal ketika user stop proses di tengah jalan
211
- raise
212
-
213
- except Exception:
214
  info = traceback.format_exc()
215
  print(info)
216
- yield info, None
 
217
 
218
  def load_model():
219
  categories = []
 
131
  # yield info, None
132
  # return vc_fn
133
 
134
+ def create_vc_fn(model_title, tgt_sr, net_g, vc, if_f0, version, file_index):
135
  def vc_fn(
136
  vc_audio_mode,
137
  vc_input,
 
147
  protect,
148
  ):
149
  try:
150
+ if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
 
 
 
 
 
 
151
  audio, sr = librosa.load(vc_input, sr=16000, mono=True)
 
152
  elif vc_audio_mode == "Upload audio":
153
  if vc_upload is None:
154
  return "You need to upload an audio", None
155
  sampling_rate, audio = vc_upload
156
  duration = audio.shape[0] / sampling_rate
157
+ if duration > 20 and limitation:
158
  return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
159
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
160
  if len(audio.shape) > 1:
161
  audio = librosa.to_mono(audio.transpose(1, 0))
162
  if sampling_rate != 16000:
163
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
 
164
  elif vc_audio_mode == "TTS Audio":
165
+ if len(tts_text) > 100 and limitation:
166
  return "Text is too long", None
167
  if tts_text is None or tts_voice is None:
168
  return "You need to enter text and select a voice", None
169
  asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
170
  audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
171
  vc_input = "tts.mp3"
 
 
172
  times = [0, 0, 0]
173
  f0_up_key = int(f0_up_key)
174
  audio_opt = vc.pipeline(
 
181
  f0_up_key,
182
  f0_method,
183
  file_index,
184
+ # file_big_npy,
185
  index_rate,
186
  if_f0,
187
  filter_radius,
 
193
  f0_file=None,
194
  )
195
  info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
196
+ print(f"{model_title} | {info}")
197
+ return info, (tgt_sr, audio_opt)
198
+ except:
 
 
 
 
 
 
199
  info = traceback.format_exc()
200
  print(info)
201
+ return info, None
202
+ return vc_fn
203
 
204
  def load_model():
205
  categories = []