x2XcarleX2x commited on
Commit
91c93ea
·
verified ·
1 Parent(s): f84d55e

Update aduc_framework/managers/wan_manager.py

Browse files
Files changed (1) hide show
  1. aduc_framework/managers/wan_manager.py +205 -145
aduc_framework/managers/wan_manager.py CHANGED
@@ -1,7 +1,10 @@
1
  # aduc_framework/managers/wan_manager.py
2
- # WanManager v0.0.1 (beta)
3
 
4
  import os
 
 
 
5
  import tempfile
6
  import random
7
  from typing import List, Any, Optional, Tuple
@@ -10,137 +13,187 @@ import numpy as np
10
  import torch
11
  from PIL import Image
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  from diffusers import FlowMatchEulerDiscreteScheduler
14
  from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
15
  from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
16
  from diffusers.utils.export_utils import export_to_video
17
 
 
 
18
 
19
  class WanManager:
20
  """
21
- WanManager v0.0.1 (beta)
22
- - image: primeiro item (peso fixo 1.0) -> latente 0
23
- - handle: segundo item (se presente) -> latente 4, com handle_weight da lista
24
- - last: último item -> último latente, com anchor_weight_last da lista
25
- - Mantém LoRA Lightning fundida, FlowMatch Euler, device_map='auto' e contrato i2v.
26
- - Fallback: se a pipeline não suportar os novos args, chama a API original sem handle/pesos.
 
 
 
 
 
 
 
 
 
27
  """
28
 
29
  MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 
30
 
31
- # Dimensões
32
  MAX_DIMENSION = 832
33
  MIN_DIMENSION = 480
34
  DIMENSION_MULTIPLE = 16
35
  SQUARE_SIZE = 480
36
-
37
- # Vídeo
38
  FIXED_FPS = 16
39
  MIN_FRAMES_MODEL = 8
40
  MAX_FRAMES_MODEL = 81
41
 
 
42
  default_negative_prompt = (
43
- "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,"
44
- "JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,"
45
- "手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,"
 
 
46
  )
47
 
48
  def __init__(self) -> None:
 
49
  print("Loading models into memory. This may take a few minutes...")
50
 
51
- # Pipeline i2v com dois transformadores (alto/baixo ruído)
52
- self.pipe = WanImageToVideoPipeline.from_pretrained(
53
- self.MODEL_ID,
54
- transformer=WanTransformer3DModel.from_pretrained(
55
- "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers",
56
- subfolder="transformer",
57
- torch_dtype=torch.bfloat16,
58
- device_map="auto",
59
- ),
60
- transformer_2=WanTransformer3DModel.from_pretrained(
61
- "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers",
62
- subfolder="transformer_2",
63
- torch_dtype=torch.bfloat16,
64
- device_map="auto",
65
- ),
66
- torch_dtype=torch.bfloat16,
67
- )
68
 
69
- # Scheduler
70
- self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
71
- self.pipe.scheduler.config, shift=32.0
 
 
 
 
 
 
 
72
  )
 
73
 
74
- # LoRA Lightning (fused)
75
  print("Applying 8-step Lightning LoRA...")
76
  try:
77
- self.pipe.load_lora_weights(
78
- "Kijai/WanVideo_comfy",
79
- weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
80
- adapter_name="lightx2v",
81
- )
82
- kwargs_lora = {"load_into_transformer_2": True}
83
- self.pipe.load_lora_weights(
84
- "Kijai/WanVideo_comfy",
85
- weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
86
- adapter_name="lightx2v_2",
87
- **kwargs_lora,
88
- )
89
  self.pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
90
-
91
  print("Fusing LoRA weights into the main model...")
92
  self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
93
  self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
94
-
95
  self.pipe.unload_lora_weights()
96
- print("Lightning LoRA successfully fused. Model is ready for fast 8-step generation.")
97
  except Exception as e:
98
- print(f"AVISO: Falha ao carregar/fundir LoRA. A geração pode ser mais lenta. Erro: {e}")
99
 
100
  print("All models loaded. Service is ready.")
101
 
102
- # ===== Utils =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  def process_image_for_video(self, image: Image.Image) -> Image.Image:
105
- width, height = image.size
106
- if width == height:
107
- return image.resize((self.SQUARE_SIZE, self.SQUARE_SIZE), Image.Resampling.LANCZOS)
108
-
109
- aspect_ratio = width / height
110
- new_width, new_height = width, height
111
-
112
- if new_width > self.MAX_DIMENSION or new_height > self.MAX_DIMENSION:
113
- scale = (self.MAX_DIMENSION / new_width) if aspect_ratio > 1 else (self.MAX_DIMENSION / new_height)
114
- new_width *= scale
115
- new_height *= scale
116
-
117
- if new_width < self.MIN_DIMENSION or new_height < self.MIN_DIMENSION:
118
- scale = (self.MIN_DIMENSION / new_height) if aspect_ratio > 1 else (self.MIN_DIMENSION / new_width)
119
- new_width *= scale
120
- new_height *= scale
121
-
122
- final_width = int(round(new_width / self.DIMENSION_MULTIPLE) * self.DIMENSION_MULTIPLE)
123
- final_height = int(round(new_height / self.DIMENSION_MULTIPLE) * self.DIMENSION_MULTIPLE)
124
-
125
- final_width = max(final_width, self.MIN_DIMENSION if aspect_ratio < 1 else self.SQUARE_SIZE)
126
- final_height = max(final_height, self.MIN_DIMENSION if aspect_ratio > 1 else self.SQUARE_SIZE)
127
-
128
- return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
129
-
130
- def resize_and_crop_to_match(self, target_image: Image.Image, reference_image: Image.Image) -> Image.Image:
131
- ref_width, ref_height = reference_image.size
132
- target_width, target_height = target_image.size
133
- scale = max(ref_width / target_width, ref_height / target_height)
134
- new_width, new_height = int(target_width * scale), int(target_height * scale)
135
- resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
136
- left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
137
- return resized.crop((left, top, left + ref_width, top + ref_height))
138
-
139
- # ===== API =====
140
 
141
  def generate_video_from_conditions(
142
  self,
143
- images_condition_items: List[List[Any]], # [[patch(Image), frame(int|str), peso(float)], ...]
144
  prompt: str,
145
  negative_prompt: Optional[str],
146
  duration_seconds: float,
@@ -150,84 +203,91 @@ class WanManager:
150
  seed: int,
151
  randomize_seed: bool,
152
  output_type: str = "np",
153
- ) -> Tuple[str, int]:
154
- """
155
- - Primeiro item: image (peso fixo 1.0) no latente 0.
156
- - Segundo item (opcional): handle em latente 4 com peso da lista.
157
- - Último item: last no último latente com peso da lista.
158
- """
159
  if not images_condition_items or len(images_condition_items) < 2:
160
  raise ValueError("Forneça ao menos dois itens (início e fim).")
161
 
162
  items = images_condition_items
163
-
164
- # image (peso fixo 1.0)
165
  start_image = items[0][0]
 
 
 
166
 
167
- # handle (segundo item se houver)
168
  handle_image = items[1][0] if len(items) >= 3 else None
169
  handle_weight = float(items[1][2]) if len(items) >= 3 and items[1][2] is not None else 1.0
170
-
171
- # last (sempre o último item)
172
- end_image = items[-1][0]
173
  end_weight = float(items[-1][2]) if len(items[-1]) >= 3 and items[-1][2] is not None else 1.0
174
 
175
- if start_image is None or end_image is None:
176
- raise ValueError("As imagens inicial e final não podem ser vazias.")
177
- if not isinstance(start_image, Image.Image) or not isinstance(end_image, Image.Image):
178
- raise TypeError("Os 'patches' devem ser PIL.Image.")
179
- if handle_image is not None and not isinstance(handle_image, Image.Image):
180
- raise TypeError("O 'patch' do handle deve ser PIL.Image.")
181
-
182
  processed_start = self.process_image_for_video(start_image)
183
  processed_end = self.resize_and_crop_to_match(end_image, processed_start)
184
- processed_handle = self.resize_and_crop_to_match(handle_image, processed_start) if handle_image is not None else None
185
-
186
- target_height, target_width = processed_start.height, processed_start.width
187
-
188
- num_frames = int(round(duration_seconds * self.FIXED_FPS))
189
- num_frames = int(np.clip(num_frames, self.MIN_FRAMES_MODEL, self.MAX_FRAMES_MODEL))
 
 
 
 
 
 
190
 
191
  current_seed = random.randint(0, np.iinfo(np.int32).max) if randomize_seed else int(seed)
192
  generator = torch.Generator().manual_seed(current_seed)
193
 
 
 
 
194
  call_kwargs = dict(
195
- image=processed_start, # latente 0 (peso 1.0 implícito)
196
- last_image=processed_end, # último latente (peso ajustável)
197
- prompt=prompt,
198
- negative_prompt=negative_prompt if negative_prompt is not None else self.default_negative_prompt,
199
- height=target_height,
200
- width=target_width,
201
- num_frames=num_frames,
202
- guidance_scale=float(guidance_scale),
203
- guidance_scale_2=float(guidance_scale_2),
204
- num_inference_steps=int(steps),
205
- generator=generator,
206
- output_type=output_type,
207
  )
208
 
209
- try:
210
- if processed_handle is not None:
211
- # handle no latente 4 com peso da lista; last no último com end_weight
212
- result = self.pipe(
213
- **call_kwargs,
214
- handle_image=processed_handle,
215
- handle_weight=float(handle_weight),
216
- handle_latent_index=25,
217
- anchor_weight_last=float(end_weight),
218
- )
219
- else:
220
- # sem handle; apenas peso do last
221
- result = self.pipe(
222
- **call_kwargs,
223
- anchor_weight_last=float(end_weight),
224
- )
225
- except TypeError:
226
- print("[WanManager] handle/anchor args não suportados; usando chamada padrão.")
227
- result = self.pipe(**call_kwargs)
228
-
 
 
 
 
 
 
 
229
  frames = result.frames[0]
 
 
230
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
231
  video_path = tmp.name
232
  export_to_video(frames, video_path, fps=self.FIXED_FPS)
233
- return video_path, current_seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # aduc_framework/managers/wan_manager.py
2
+ # WanManager v1.0.0 (production-ready)
3
 
4
  import os
5
+ import platform
6
+ import shutil
7
+ import subprocess
8
  import tempfile
9
  import random
10
  from typing import List, Any, Optional, Tuple
 
13
  import torch
14
  from PIL import Image
15
 
16
+ # Habilita TF32 para performance em GPUs Ampere+
17
+ torch.backends.cuda.matmul.allow_tf32 = True
18
+
19
+ # SDPA / FlashAttention context
20
+ try:
21
+ from torch.nn.attention import sdpa_kernel, SDPBackend
22
+ _SDPA_NEW = True
23
+ except Exception:
24
+ from torch.backends.cuda import sdp_kernel as _legacy_sdp
25
+ _SDPA_NEW = False
26
+
27
  from diffusers import FlowMatchEulerDiscreteScheduler
28
  from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
29
  from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
30
  from diffusers.utils.export_utils import export_to_video
31
 
32
+ from aduc_framework.utils.callbacks import DenoiseStepLogger
33
+
34
 
35
  class WanManager:
36
  """
37
+ Gerenciador de produção para a pipeline Wan 2.2 Image-to-Video.
38
+
39
+ Funcionalidades Principais:
40
+ - **Diagnóstico de Ambiente:** Exibe um banner detalhado no início com informações sobre
41
+ PyTorch, CUDA, GPUs, e suporte a otimizações (SDPA, xFormers).
42
+ - **Gerenciamento de Memória:** Distribui o modelo de forma otimizada por múltiplas
43
+ GPUs, definindo limites de VRAM para evitar sobrecargas.
44
+ - **Performance Otimizada:** Utiliza LoRA Lightning fundida para geração rápida e
45
+ aproveita o SDPA (Scaled Dot Product Attention) com uma cadeia de fallback
46
+ inteligente (Flash -> Efficient -> Math) para máxima velocidade.
47
+ - **Validação de Parâmetros Robusta:** Implementa regras de negócio para validar e
48
+ corrigir o número total de frames (`4n+1`) e a posição do frame de controle
49
+ (`8n+1` com buffers de segurança), garantindo estabilidade e resultados previsíveis.
50
+ - **Depuração Visual:** Integra um sistema de callbacks para capturar o processo de
51
+ denoising, gerando um vídeo de depuração e uma grade de imagens com cada passo.
52
  """
53
 
54
  MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
55
+ TRANSFORMER_ID = "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers"
56
 
 
57
  MAX_DIMENSION = 832
58
  MIN_DIMENSION = 480
59
  DIMENSION_MULTIPLE = 16
60
  SQUARE_SIZE = 480
 
 
61
  FIXED_FPS = 16
62
  MIN_FRAMES_MODEL = 8
63
  MAX_FRAMES_MODEL = 81
64
 
65
+ # Prompt negativo padrão em inglês
66
  default_negative_prompt = (
67
+ "bright, overexposed, static, blurry details, text, subtitles, watermark, style, "
68
+ "artwork, painting, still image, gray scale, worst quality, low quality, jpeg artifacts, "
69
+ "ugly, deformed, disfigured, missing fingers, extra fingers, poorly drawn hands, "
70
+ "poorly drawn face, malformed limbs, fused fingers, messy background, three legs, "
71
+ "too many people, walking backwards."
72
  )
73
 
74
  def __init__(self) -> None:
75
+ self._print_env_banner()
76
  print("Loading models into memory. This may take a few minutes...")
77
 
78
+ n_gpus = torch.cuda.device_count()
79
+ max_memory = {i: "43GiB" for i in range(n_gpus)}
80
+ max_memory["cpu"] = "120GiB"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ transformer = WanTransformer3DModel.from_pretrained(
83
+ self.TRANSFORMER_ID, subfolder="transformer", torch_dtype=torch.bfloat16,
84
+ device_map="auto", max_memory=max_memory
85
+ )
86
+ transformer_2 = WanTransformer3DModel.from_pretrained(
87
+ self.TRANSFORMER_ID, subfolder="transformer_2", torch_dtype=torch.bfloat16,
88
+ device_map="auto", max_memory=max_memory
89
+ )
90
+ self.pipe = WanImageToVideoPipeline.from_pretrained(
91
+ self.MODEL_ID, transformer=transformer, transformer_2=transformer_2, torch_dtype=torch.bfloat16
92
  )
93
+ self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.pipe.scheduler.config, shift=32.0)
94
 
 
95
  print("Applying 8-step Lightning LoRA...")
96
  try:
97
+ self.pipe.load_lora_weights("Kijai/WanVideo_comfy", weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors", adapter_name="lightx2v")
98
+ self.pipe.load_lora_weights("Kijai/WanVideo_comfy", weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors", adapter_name="lightx2v_2", load_into_transformer_2=True)
 
 
 
 
 
 
 
 
 
 
99
  self.pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
 
100
  print("Fusing LoRA weights into the main model...")
101
  self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
102
  self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
 
103
  self.pipe.unload_lora_weights()
104
+ print("Lightning LoRA successfully fused.")
105
  except Exception as e:
106
+ print(f"[WanManager] AVISO: Falha ao fundir LoRA Lightning: {e}")
107
 
108
  print("All models loaded. Service is ready.")
109
 
110
+ def _print_env_banner(self) -> None:
111
+ def _safe_get(fn, default="n/a"):
112
+ try: return fn()
113
+ except Exception: return default
114
+
115
+ torch_ver = getattr(torch, "__version__", "unknown")
116
+ cuda_rt = getattr(torch.version, "cuda", "unknown")
117
+ cudnn_ver = _safe_get(lambda: torch.backends.cudnn.version())
118
+ cuda_ok = torch.cuda.is_available()
119
+ n_gpu = torch.cuda.device_count() if cuda_ok else 0
120
+ devs, total_vram, caps = [], [], []
121
+ if cuda_ok:
122
+ for i in range(n_gpu):
123
+ props = torch.cuda.get_device_properties(i)
124
+ devs.append(f"cuda:{i} {props.name}")
125
+ total_vram.append(f"{props.total_memory/1024**3:.1f}GiB")
126
+ caps.append(f"{props.major}.{props.minor}")
127
+
128
+ try: bf16_supported = torch.cuda.is_bf16_supported()
129
+ except: bf16_supported = False
130
+
131
+ tf32_allowed = torch.backends.cuda.matmul.allow_tf32
132
+ sdpa_api = "torch.nn.attention (2.1+)" if _SDPA_NEW else "torch.backends.cuda (2.0)" if not _SDPA_NEW and hasattr(torch.backends.cuda, 'sdp_kernel') else "unavailable"
133
+
134
+ try:
135
+ import xformers
136
+ xformers_ok = True
137
+ except ImportError:
138
+ xformers_ok = False
139
+
140
+ alloc_conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "unset")
141
+ visible = os.environ.get("CUDA_VISIBLE_DEVICES", "unset")
142
+ python_ver = platform.python_version()
143
+ nvcc = shutil.which("nvcc")
144
+ nvcc_ver = "n/a"
145
+ if nvcc:
146
+ try: nvcc_ver = subprocess.check_output([nvcc, "--version"], text=True).strip().splitlines()[-1]
147
+ except Exception: nvcc_ver = "n/a"
148
+
149
+ banner_lines = [
150
+ "================== WAN MANAGER • ENV ==================",
151
+ f"Python : {python_ver}", f"PyTorch : {torch_ver}",
152
+ f"CUDA (torch) : {cuda_rt}", f"cuDNN : {cudnn_ver}",
153
+ f"CUDA available : {cuda_ok}", f"GPU count : {n_gpu}",
154
+ f"GPUs : {', '.join(devs) if devs else 'n/a'}",
155
+ f"GPU VRAM : {', '.join(total_vram) if total_vram else 'n/a'}",
156
+ f"Compute Capability : {', '.join(caps) if caps else 'n/a'}",
157
+ f"BF16 supported : {bf16_supported}", f"TF32 allowed : {tf32_allowed}",
158
+ f"SDPA API : {sdpa_api}", f"xFormers available : {xformers_ok}",
159
+ f"CUDA_VISIBLE_DEVICES: {visible}", f"PYTORCH_CUDA_ALLOC_CONF: {alloc_conf}",
160
+ f"nvcc : {nvcc_ver}",
161
+ "=======================================================",
162
+ ]
163
+ print("\n".join(banner_lines))
164
+
165
+ def _round_multiple(self, x: int, multiple: int) -> int:
166
+ return int(round(x / multiple) * multiple)
167
 
168
  def process_image_for_video(self, image: Image.Image) -> Image.Image:
169
+ w, h = image.size
170
+ if w == h: return image.resize((self.SQUARE_SIZE, self.SQUARE_SIZE), Image.Resampling.LANCZOS)
171
+ ar = w / h
172
+ nw, nh = w, h
173
+ if nw > self.MAX_DIMENSION or nh > self.MAX_DIMENSION:
174
+ s = (self.MAX_DIMENSION / nw) if ar > 1 else (self.MAX_DIMENSION / nh)
175
+ nw, nh = nw * s, nh * s
176
+ if nw < self.MIN_DIMENSION or nh < self.MIN_DIMENSION:
177
+ s = (self.MIN_DIMENSION / nh) if ar > 1 else (self.MIN_DIMENSION / nw)
178
+ nw, nh = nw * s, nh * s
179
+ fw = self._round_multiple(int(nw), self.DIMENSION_MULTIPLE)
180
+ fh = self._round_multiple(int(nh), self.DIMENSION_MULTIPLE)
181
+ fw = max(fw, self.MIN_DIMENSION if ar < 1 else self.SQUARE_SIZE)
182
+ fh = max(fh, self.MIN_DIMENSION if ar > 1 else self.SQUARE_SIZE)
183
+ return image.resize((fw, fh), Image.Resampling.LANCZOS)
184
+
185
+ def resize_and_crop_to_match(self, target: Image.Image, ref: Image.Image) -> Image.Image:
186
+ rw, rh = ref.size
187
+ tw, th = target.size
188
+ s = max(rw / tw, rh / th)
189
+ nw, nh = int(tw * s), int(th * s)
190
+ resized = target.resize((nw, nh), Image.Resampling.LANCZOS)
191
+ left, top = (nw - rw) // 2, (nh - rh) // 2
192
+ return resized.crop((left, top, left + rw, top + rh))
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  def generate_video_from_conditions(
195
  self,
196
+ images_condition_items: List[List[Any]],
197
  prompt: str,
198
  negative_prompt: Optional[str],
199
  duration_seconds: float,
 
203
  seed: int,
204
  randomize_seed: bool,
205
  output_type: str = "np",
206
+ ) -> Tuple[str, int, Optional[str], Optional[str]]:
 
 
 
 
 
207
  if not images_condition_items or len(images_condition_items) < 2:
208
  raise ValueError("Forneça ao menos dois itens (início e fim).")
209
 
210
  items = images_condition_items
 
 
211
  start_image = items[0][0]
212
+ end_image = items[-1][0]
213
+ if start_image is None or end_image is None:
214
+ raise ValueError("As imagens inicial e final não podem ser vazias.")
215
 
 
216
  handle_image = items[1][0] if len(items) >= 3 else None
217
  handle_weight = float(items[1][2]) if len(items) >= 3 and items[1][2] is not None else 1.0
 
 
 
218
  end_weight = float(items[-1][2]) if len(items[-1]) >= 3 and items[-1][2] is not None else 1.0
219
 
 
 
 
 
 
 
 
220
  processed_start = self.process_image_for_video(start_image)
221
  processed_end = self.resize_and_crop_to_match(end_image, processed_start)
222
+ processed_handle = self.resize_and_crop_to_match(handle_image, processed_start) if handle_image else None
223
+
224
+ H, W = processed_start.height, processed_start.width
225
+
226
+ # 1. Calcula e valida o número total de frames
227
+ initial_frames = int(round(duration_seconds * self.FIXED_FPS))
228
+ clamped_frames = int(np.clip(initial_frames, self.MIN_FRAMES_MODEL, self.MAX_FRAMES_MODEL))
229
+ sf_t = getattr(self.pipe, "vae_scale_factor_temporal", 4)
230
+ num_frames = ((clamped_frames - 1) // sf_t * sf_t) + 1 # Garante o formato 4n+1
231
+
232
+ print(f"[WanManager] INFO: Duração {duration_seconds}s => {initial_frames} frames. "
233
+ f"Após clamp e alinhamento 4n+1, o total de frames final é {num_frames}.")
234
 
235
  current_seed = random.randint(0, np.iinfo(np.int32).max) if randomize_seed else int(seed)
236
  generator = torch.Generator().manual_seed(current_seed)
237
 
238
+ denoise_callback = DenoiseStepLogger(self.pipe)
239
+ callback_kwargs = {"callback_on_step_end": denoise_callback, "callback_on_step_end_tensor_inputs": ["latents"]}
240
+
241
  call_kwargs = dict(
242
+ image=processed_start, last_image=processed_end, prompt=prompt, negative_prompt=negative_prompt or self.default_negative_prompt,
243
+ height=H, width=W, num_frames=num_frames, guidance_scale=float(guidance_scale), guidance_scale_2=float(guidance_scale_2),
244
+ num_inference_steps=int(steps), generator=generator, output_type=output_type,
 
 
 
 
 
 
 
 
 
245
  )
246
 
247
+ # 2. Calcula e valida o frame de controle (handle)
248
+ corrected_handle_index = None
249
+ if processed_handle is not None:
250
+ handle_frame_ui = int(items[1][1]) if len(items) >= 3 and items[1][1] is not None else 17
251
+
252
+ block_index = round(handle_frame_ui / 8)
253
+ aligned_frame = (block_index * 8 )+ 1
254
+
255
+ min_safe_frame = 9 # Buffer de 8 frames no início (1*8 + 1)
256
+ max_safe_frame = num_frames - 9 # Buffer de 8 frames no fim
257
+
258
+ corrected_handle_index = max(min_safe_frame, min(aligned_frame, max_safe_frame))
259
+
260
+ print(f"[WanManager] INFO: Handle Frame UI {handle_frame_ui} alinhado para {aligned_frame} e validado para {corrected_handle_index} (limites seguros: {min_safe_frame}-{max_safe_frame}).")
261
+
262
+ base_kwargs = {**call_kwargs, "anchor_weight_last": float(end_weight)}
263
+ if processed_handle is not None:
264
+ base_kwargs.update({
265
+ "handle_image": processed_handle,
266
+ "handle_weight": float(handle_weight),
267
+ "handle_frame_index": corrected_handle_index,
268
+ })
269
+
270
+ final_kwargs = {**base_kwargs, **callback_kwargs}
271
+ result = None
272
+
273
+ result = self.pipe(**base_kwargs)
274
  frames = result.frames[0]
275
+
276
+
277
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
278
  video_path = tmp.name
279
  export_to_video(frames, video_path, fps=self.FIXED_FPS)
280
+
281
+ debug_video_path, grid_image_path = None, None
282
+ if denoise_callback.intermediate_frames:
283
+ with tempfile.NamedTemporaryFile(suffix="_denoise_process.mp4", delete=False) as tmp:
284
+ debug_video_path = tmp.name
285
+ denoise_callback.save_as_video(debug_video_path, fps=max(1, steps // 2))
286
+
287
+ grid_pil = denoise_callback.create_steps_grid()
288
+ if grid_pil:
289
+ with tempfile.NamedTemporaryFile(suffix="_steps_grid.png", delete=False) as tmp:
290
+ grid_image_path = tmp.name
291
+ grid_pil.save(grid_image_path)
292
+
293
+ return video_path, current_seed, debug_video_path, grid_image_path