Aduc-sdr-2_5s

Paused

Aduc-sdr-2_5s / aduc_framework /engineers /deformes3D.py

Carlexxx

feat: ✨ aBINC 2.2

fb56537 2 months ago

7.06 kB

	# aduc_framework/engineers/deformes3D.py
	#
	# Versão 12.4.0 (Correção de Integração com VaeManager v2)
	# - Remove a função auxiliar obsoleta `_pil_to_latent` que ainda tentava
	# chamar o antigo método `vae_manager_singleton.encode()`.
	# - Consolida o uso exclusivo do método `encode_batch` para toda a codificação
	# de imagens, alinhando-se com a arquitetura de VAE persistente e otimizada.

	import os
	import logging
	import torch
	import numpy as np
	from PIL import Image
	from typing import List, Dict, Any

	from ..types import LatentConditioningItem, KeyframeGenerationJob
	from ..managers.ltx_manager import ltx_manager_singleton
	from ..managers.vae_manager import vae_manager_singleton

	logger = logging.getLogger(__name__)

	class Deformes3DEngine:
	def __init__(self):
	self.workspace_dir: str \| None = None
	logger.info("Deformes3DEngine (Pintor de Sequência) instanciado.")

	def initialize(self, workspace_dir: str):
	if self.workspace_dir is not None and self.workspace_dir == workspace_dir:
	return
	self.workspace_dir = workspace_dir
	logger.info(f"Pintor 3D inicializado com workspace: {self.workspace_dir}.")

	def generate_keyframes_from_job(
	self,
	job: KeyframeGenerationJob
	) -> List[Dict[str, Any]]:
	from .composer import composer_singleton as Composer

	if not self.workspace_dir:
	raise RuntimeError("Deformes3DEngine não foi inicializado.")

	storyboard = job.storyboard
	num_keyframes_to_generate = len(storyboard)
	global_prompt = job.global_prompt
	ref_id_to_path_map = job.ref_id_to_path_map
	available_ref_ids = job.available_ref_ids
	keyframe_prefix = job.keyframe_prefix

	target_resolution_tuple = (512, 512)
	current_base_image_path = job.ref_image_paths[0]
	previous_prompt = ""
	all_keyframes_data: List[Dict[str, Any]] = []

	logger.info(f"Pintor 3D: Recebida ordem para gerar {num_keyframes_to_generate} keyframes para a cena '{keyframe_prefix}'.")

	# --- FLUXO DE CODIFICAÇÃO EM LOTE OTIMIZADO ---
	# 1. Coletar a imagens base
	images_to_encode = [Image.open(current_base_image_path).convert("RGB")]
	ref_weights = [0.4] # Peso fixo para a imagem anterior
	encoded_latents = vae_manager_singleton.encode_batch(images_to_encode, target_resolution_tuple)

	for i in range(num_keyframes_to_generate):
	scene_index = i + 1
	current_scene_narrative = storyboard[i]
	future_scene_narrative = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
	logger.info(f"--> Planejando Keyframe {scene_index}/{num_keyframes_to_generate}...")

	composition_plan = Composer.execute_cognitive_task(
	task_id="COGNITIVE_01_PLAN_KEYFRAME",
	template_data={
	"historico_prompt": previous_prompt,
	"cena_atual": current_scene_narrative,
	"cena_futura": future_scene_narrative,
	"available_ref_images": available_ref_ids,
	},
	images=[Image.open(current_base_image_path)]
	)

	img_prompt = composition_plan.get("composition_prompt", current_scene_narrative)
	selected_references = composition_plan.get("reference_images", [])
	logger.info(f"Plano do Diretor de Arte recebido. Prompt: '{img_prompt[:80]}...'. Referências: {len(selected_references)}")


	#for ref in selected_references:
	# image_path = ref_id_to_path_map.get(ref.get("id"))
	# if image_path:
	# images_to_encode.append(Image.open(image_path).convert("RGB"))
	# ref_weights.append(ref.get("weight", 0.3))

	# 2. Chamar o método em lote do VaeManager UMA ÚNICA VEZ
	#logger.info(f"Codificando {len(images_to_encode)} imagens de referência em lote...")
	#encoded_latents = vae_manager_singleton.encode_batch(images_to_encode, target_resolution_tuple)
	#logger.info("Codificação em lote concluída.")

	# 3. Construir os itens de condicionamento com os latentes já prontos
	ltx_conditioning_items = []
	for latent_tensor, weight in zip(encoded_latents, ref_weights):
	ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight))
	# --- FIM DA OTIMIZAÇÃO ---

	ltx_base_params = {"guidance_scale": 2.0, "stg_scale": 0.015, "num_inference_steps": 25}
	generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
	height=target_resolution_tuple[0], width=target_resolution_tuple[1],
	conditioning_items_data=ltx_conditioning_items,
	motion_prompt=img_prompt,
	video_total_frames=36, video_fps=24,
	**ltx_base_params
	)

	final_latent = generated_latents[:, :, -1:, :, :]
	enriched_pixel_tensor = vae_manager_singleton.decode(final_latent)

	encoded_latents = [encoded_latents[0], final_latent]
	ref_weights = [0.05,0.5]

	pixel_path = os.path.join(self.workspace_dir, f"{keyframe_prefix}_kf{scene_index:03d}_pixel.png")
	latent_path = os.path.join(self.workspace_dir, f"{keyframe_prefix}_kf{scene_index:03d}_latent.pt")
	self.save_image_from_tensor(enriched_pixel_tensor, pixel_path)
	torch.save(final_latent.cpu(), latent_path)

	keyframe_data = {
	"id": scene_index,
	"caminho_pixel": pixel_path,
	"caminho_latent": latent_path,
	"prompt_keyframe": img_prompt
	}
	all_keyframes_data.append(keyframe_data)

	current_base_image_path = pixel_path
	previous_prompt = img_prompt

	logger.info(f"Pintor 3D: Ordem de serviço para a cena '{keyframe_prefix}' concluída.")
	return all_keyframes_data

	# --- FUNÇÃO PROBLEMÁTICA REMOVIDA ---
	# A função _pil_to_latent foi removida pois sua lógica foi
	# centralizada e otimizada dentro do loop principal.

	def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
	"""Salva um tensor de pixel como um arquivo de imagem."""
	# Garante que o tensor está na CPU para manipulação com numpy/PIL
	pixel_tensor_cpu = pixel_tensor.cpu()
	tensor_chw = pixel_tensor_cpu.squeeze(0).squeeze(1)
	tensor_hwc = tensor_chw.permute(1, 2, 0)
	# Desnormaliza de [-1, 1] para [0, 1]
	tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
	# Converte para [0, 255] e tipo de imagem
	image_np = (tensor_hwc.float().numpy() * 255).astype(np.uint8)
	Image.fromarray(image_np).save(path)

	# --- Instância Singleton ---
	deformes3d_engine_singleton = Deformes3DEngine()