rvc-blue-archive

Running

App Files Files

andhikagg commited on Jun 25

Commit

16b0eb1

verified ·

1 Parent(s): 64c58cf

Upload app.py

Browse files

Files changed (1) hide show

app.py +10 -24

app.py CHANGED Viewed

@@ -131,7 +131,7 @@ if os.path.isfile("rmvpe.pt"):
             # yield info, None
     # return vc_fn
-def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
     def vc_fn(
         vc_audio_mode,
         vc_input,
@@ -147,38 +147,28 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
         protect,
     ):
         try:
-            logs = []
-            print(f"Converting using {model_name}...")
-            logs.append(f"Converting using {model_name}...")
-            yield "\n".join(logs), None
-            # === PERBAIKAN logika or ===
-            if (vc_audio_mode == "Input path" or vc_audio_mode == "Youtube") and vc_input != "":
                 audio, sr = librosa.load(vc_input, sr=16000, mono=True)
             elif vc_audio_mode == "Upload audio":
                 if vc_upload is None:
                     return "You need to upload an audio", None
                 sampling_rate, audio = vc_upload
                 duration = audio.shape[0] / sampling_rate
-                if duration > 20 and spaces:
                     return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
                 audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
                 if len(audio.shape) > 1:
                     audio = librosa.to_mono(audio.transpose(1, 0))
                 if sampling_rate != 16000:
                     audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             elif vc_audio_mode == "TTS Audio":
-                if len(tts_text) > 100 and spaces:
                     return "Text is too long", None
                 if tts_text is None or tts_voice is None:
                     return "You need to enter text and select a voice", None
                 asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
                 vc_input = "tts.mp3"
-            # Lanjut pipeline
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
@@ -191,6 +181,7 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
                 f0_up_key,
                 f0_method,
                 file_index,
                 index_rate,
                 if_f0,
                 filter_radius,
@@ -202,18 +193,13 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
                 f0_file=None,
             )
             info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
-            print(f"{model_name} | {info}")
-            logs.append(f"Successfully Convert {model_name}\n{info}")
-            yield "\n".join(logs), (tgt_sr, audio_opt)
-        except GeneratorExit:
-            # Ini sinyal normal ketika user stop proses di tengah jalan
-            raise
-        except Exception:
             info = traceback.format_exc()
             print(info)
-            yield info, None
 def load_model():
     categories = []

             # yield info, None
     # return vc_fn
+def create_vc_fn(model_title, tgt_sr, net_g, vc, if_f0, version, file_index):
     def vc_fn(
         vc_audio_mode,
         vc_input,
         protect,
     ):
         try:
+            if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
                 audio, sr = librosa.load(vc_input, sr=16000, mono=True)
             elif vc_audio_mode == "Upload audio":
                 if vc_upload is None:
                     return "You need to upload an audio", None
                 sampling_rate, audio = vc_upload
                 duration = audio.shape[0] / sampling_rate
+                if duration > 20 and limitation:
                     return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
                 audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
                 if len(audio.shape) > 1:
                     audio = librosa.to_mono(audio.transpose(1, 0))
                 if sampling_rate != 16000:
                     audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             elif vc_audio_mode == "TTS Audio":
+                if len(tts_text) > 100 and limitation:
                     return "Text is too long", None
                 if tts_text is None or tts_voice is None:
                     return "You need to enter text and select a voice", None
                 asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
                 audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
                 vc_input = "tts.mp3"
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
                 f0_up_key,
                 f0_method,
                 file_index,
+                # file_big_npy,
                 index_rate,
                 if_f0,
                 filter_radius,
                 f0_file=None,
             )
             info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
+            print(f"{model_title} | {info}")
+            return info, (tgt_sr, audio_opt)
+        except:
             info = traceback.format_exc()
             print(info)
+            return info, None
+    return vc_fn
 def load_model():
     categories = []