Spaces:

fffiloni
/

YuE

Paused

Optimized for speed

by KingNish - opened Feb 7

←

Files changed (3) hide show

app.py CHANGED Viewed

@@ -124,7 +124,7 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         "--genre_txt", f"{genre_txt_path}",
         "--lyrics_txt", f"{lyrics_txt_path}",
         "--run_n_segments", str(num_segments),
-        "--stage2_batch_size", "4",
         "--output_dir", f"{output_dir}",
         "--cuda_idx", "0",
         "--max_new_tokens", str(max_new_tokens)

         "--genre_txt", f"{genre_txt_path}",
         "--lyrics_txt", f"{lyrics_txt_path}",
         "--run_n_segments", str(num_segments),
+        "--stage2_batch_size", "16",
         "--output_dir", f"{output_dir}",
         "--cuda_idx", "0",
         "--max_new_tokens", str(max_new_tokens)

inference/infer.py CHANGED Viewed

@@ -76,7 +76,7 @@ print(f"Using device: {device}")
 mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
 model = AutoModelForCausalLM.from_pretrained(
     stage1_model,
-    torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
     )
 model.to(device)

 mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
 model = AutoModelForCausalLM.from_pretrained(
     stage1_model,
+    torch_dtype=torch.float16,
     attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
     )
 model.to(device)

requirements.txt CHANGED Viewed

@@ -3,7 +3,7 @@ torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118
 omegaconf
 einops
 numpy<2
-transformers
 sentencepiece
 tqdm
 tensorboard

 omegaconf
 einops
 numpy<2
+git+https://github.com/KingNish24/transformers.git@yue-patch
 sentencepiece
 tqdm
 tensorboard