MusicGen

Build error

App Files Files Community

ford442 commited on Aug 12

Commit

1153954

verified ·

1 Parent(s): 137ed0b

Update audiocraft/models/lm.py

Browse files

Files changed (1) hide show

audiocraft/models/lm.py +94 -0

audiocraft/models/lm.py CHANGED Viewed

@@ -531,3 +531,97 @@ class LMModel(StreamingModule):
         # ensure the returned codes are all valid
         assert (out_codes >= 0).all() and (out_codes <= self.card).all()
         return out_codes

         # ensure the returned codes are all valid
         assert (out_codes >= 0).all() and (out_codes <= self.card).all()
         return out_codes
+    @torch.no_grad()
+    def generate_segment(self,
+                     segment: int,
+                     prompt_text: str,
+                     max_segment_len: int,
+                     seed: tp.Optional[int] = None,
+                     # Pass other generation params like temp, top_k, etc.
+                     **kwargs
+                     ) -> tp.Tuple[torch.Tensor, int]:
+    """
+    Generates audio segment by segment, saving state to the filesystem.
+    This mirrors the logic from the RealViz script for robust, persistent state.
+    Args:
+        segment (int): The segment number to generate (starts at 1).
+        prompt_text (str): The text description for the music.
+        max_segment_len (int): The number of tokens to generate in this segment.
+        seed (int, optional): The seed for generation. If None and segment is 1,
+                              a random seed is created.
+        **kwargs: Additional generation parameters (temp, top_k, cfg_coef).
+    Returns:
+        A tuple containing:
+        - full_codes (torch.Tensor): The generated tokens for the ENTIRE song so far.
+        - seed (int): The seed used for the generation process.
+    """
+    # Ensure a consistent seed across all segments of a song
+    if segment == 1:
+        if seed is None:
+            seed = random.randint(0, np.iinfo(np.int32).max)
+        print(f"Starting new generation with Seed: {seed}")
+        # --- This block runs only for the very first segment ---
+        conditions = [ConditioningAttributes(text={'description': prompt_text})]
+        # Start with an empty prompt tensor
+        prompt_codes = torch.zeros((1, self.num_codebooks, 0), dtype=torch.long, device=self.device)
+        self.clear_streaming_state() # Ensure model state is fresh
+    else:
+        # --- This block runs for all subsequent segments ---
+        state_file = f"musicgen_state_{segment-1}_{seed}.pt"
+        if not os.path.exists(state_file):
+            raise FileNotFoundError(f"State file not found! Cannot resume from segment {segment}. Please run segment {segment-1} first.")
+        print(f"Resuming from state file: {state_file}")
+        state = torch.load(state_file, map_location=self.device)
+        # Restore all necessary components from the saved state
+        seed = state['seed']
+        conditions = state['conditions']
+        # The prompt for the next segment is the full output from the previous one
+        prompt_codes = state['generated_tokens']
+        # CRITICAL: Restore the model's internal KV cache
+        self.set_streaming_state(state['model_state'])
+    # --- This part runs for EVERY segment ---
+    # The 'generate' function here refers to the original, non-chunking one.
+    # We are using it to generate just one segment's worth of audio.
+    # `remove_prompts=True` is vital to avoid re-generating the input prompt.
+    newly_generated_codes = self.generate(
+        prompt=prompt_codes,
+        conditions=conditions,
+        max_gen_len=prompt_codes.shape[-1] + max_segment_len, # Generate N more tokens
+        remove_prompts=True,
+        **kwargs
+    )
+    # Combine the previous audio with the new segment
+    full_codes = torch.cat([prompt_codes, newly_generated_codes], dim=-1)
+    # --- Save the new state for the NEXT segment to use ---
+    print(f"Segment {segment} finished. Saving state...")
+    new_model_state = self.get_streaming_state()
+    # Move tensors to CPU before saving for portability
+    new_model_state.to('cpu')
+    new_state_to_save = {
+        'seed': seed,
+        'conditions': conditions,
+        'generated_tokens': full_codes.to('cpu'),
+        'model_state': new_model_state,
+    }
+    # Save the state dictionary to a file
+    new_state_file = f"musicgen_state_{segment}_{seed}.pt"
+    torch.save(new_state_to_save, new_state_file)
+    print(f"State for resuming at segment {segment + 1} saved to {new_state_file}")
+    return full_codes, seed
+# You should also add the device property to your LMModel class if it's not there
+@property
+def device(self):
+    return next(self.parameters()).device