Aduc-sdr-2_5s

Paused

App Files Files Community

euIaxs22 commited on Sep 30

Commit

9342209

verified ·

1 Parent(s): 3c4ad98

Update services/vincie.py

Browse files

Files changed (1) hide show

services/vincie.py +254 -116

services/vincie.py CHANGED Viewed

@@ -1,122 +1,260 @@
-# services/vincie.py (VERSÃO FINAL - DELEGAÇÃO TOTAL)
-# Versão 4.5.0
-# - REMOÇÃO: Remove a lógica manual de resolução de herança. A correção do CWD
-#   já permite que o VINCIEGenerator/OmegaConf resolva os caminhos `__inherit__` corretamente.
-# - Esta é a abordagem mais limpa, confiando totalmente no código original.
 import os
-import sys
 from pathlib import Path
-import random
-import torch
-import numpy as np
-from PIL import Image
-from omegaconf import OmegaConf
-from einops import rearrange
-from typing import List, Generator, Tuple
 from huggingface_hub import snapshot_download
-# --- Configuração de Ambiente ---
-VINCIE_ROOT = Path("/app/VINCIE")
-if os.getcwd() != str(VINCIE_ROOT):
-    os.chdir(VINCIE_ROOT)
-if str(VINCIE_ROOT) not in sys.path:
-    sys.path.insert(0, str(VINCIE_ROOT))
-    print(f"✅ CWD alterado para '{VINCIE_ROOT}' e adicionado ao sys.path.")
-# -----------------------------
-from generate import VINCIEGenerator
-from common.seed import shift_seed
-class VinciePipelineService:
-    def __init__(self, config_path: str = "configs/generate.yaml"):
-        print(">>> [VinciePipelineService] Inicializando serviço com delegação total...")
-        if not torch.cuda.is_available():
-            raise RuntimeError("CUDA não disponível.")
-        self.device = torch.device("cuda")
-        self.dtype = torch.bfloat16
-        # 1. Garantir que os modelos estejam baixados e linkados
-        self._ensure_models_are_downloaded_and_linked()
-        # 2. Carregar a configuração. NÃO PRECISAMOS MAIS MANIPULÁ-LA.
-        print(f">>> Carregando configuração de '{config_path}'...")
-        self.config = OmegaConf.load(config_path)
-        # 3. Instanciar a classe VINCIEGenerator.
-        #    Ela agora resolverá todos os caminhos e heranças internamente.
-        print(">>> Instanciando o motor VINCIEGenerator...")
-        self.runner = VINCIEGenerator(self.config)
-        # 4. Delegar o carregamento.
-        print(">>> Delegando o carregamento dos modelos...")
-        self.runner.configure_persistence()
-        self.runner.configure_models() # Esta chamada agora deve ter sucesso completo.
-        self.runner.configure_diffusion()
-        self.temp_dir = Path("./temp_io") # Relativo ao novo CWD
-        self.temp_dir.mkdir(exist_ok=True)
-        print(">>> [VinciePipelineService] Serviço pronto.")
-    def _ensure_models_are_downloaded_and_linked(self):
-        repo_id = "ByteDance-Seed/VINCIE-3B"
-        cache_dir = os.environ.get("HF_HOME", "/data/.cache/huggingface")
-        print(f"📥 Verificando/Baixando {repo_id}...")
-        snapshot_path = Path(snapshot_download(repo_id=repo_id, cache_dir=cache_dir, resume_download=True))
-        # O código VINCIE espera os checkpoints em ./ckpt/VINCIE-3B
-        link_target_dir = VINCIE_ROOT / "ckpt"
-        link_path = link_target_dir / "VINCIE-3B"
-        link_target_dir.mkdir(exist_ok=True)
-        if not link_path.exists():
-             os.symlink(snapshot_path, link_path, target_is_directory=True)
-             print(f"🔗 Link simbólico criado: {link_path} -> {snapshot_path}")
-        else:
-             print(f"🔗 Link simbólico já existe.")
-    # --- O restante da classe (lógica de inferência) permanece o mesmo ---
-    @torch.no_grad()
-    def run_multi_turn_session(self, initial_image, prompts, negative_prompt, steps, cfg_scale, seed):
-        for f in self.temp_dir.glob('*.png'): f.unlink()
-        initial_image_path = self.temp_dir / "turn_0_output.png"
-        initial_image.save(initial_image_path)
-        context_image_paths = [str(initial_image_path)]
-        context_prompts = []
-        for i, prompt in enumerate(prompts):
-            turn_index = i + 1
-            print(f">>> [Turno {turn_index}/{len(prompts)}] Gerando com prompt: '{prompt}'")
-            context_prompts.append(prompt)
-            prompt_dict = OmegaConf.create({'index': turn_index, 'img_paths': context_image_paths, 'context': context_prompts})
-            _, conditions, noises, _, _ = self.runner.prepare_input(prompt=prompt_dict, repeat_idx=0)
-            texts_neg = [negative_prompt]
-            self.runner.config.diffusion.timesteps.sampling.steps = steps
-            self.runner.configure_diffusion()
-            final_latents = self.runner.inference(noises=[noises], conditions=[conditions], texts_pos=[context_prompts], texts_neg=texts_neg, cfg_scale=cfg_scale)
-            output_tensor = self.runner.vae_decode(final_latents)[0][:, -1, :, :]
-            output_tensor = output_tensor.clip(-1, 1).add(1).mul(0.5).mul(255)
-            image_np = rearrange(output_tensor.to("cpu", torch.uint8), "c h w -> h w c").numpy()
-            generated_image = Image.fromarray(image_np)
-            turn_image_path = self.temp_dir / f"turn_{turn_index}_output.png"
-            generated_image.save(turn_image_path)
-            context_image_paths.append(str(turn_image_path))
-            yield generated_image, i
-# --- Bloco de Tratamento de Erro ---
 try:
-    vincie_pipeline_service = VinciePipelineService()
-except Exception as e:
-    import traceback
-    print(f"ERRO CRÍTICO AO INICIALIZAR O VINCIEPIPELINESERVICE: {e}")
-    traceback.print_exc()
-    class VinciePlaceholder:
-        def __init__(self, error): self.error = error
-        def run_multi_turn_session(self, *args, **kwargs):
-            raise RuntimeError(f"O serviço VINCIE falhou: {self.error}")
-            yield None, 0
-    vincie_pipeline_service = VinciePlaceholder(e)

+#!/usr/bin/env python3
+"""
+VincieService (singleton-friendly)
+- Prepara o repositório VINCIE e o checkpoint completo via snapshot_download, honrando HF_HUB_CACHE.
+- Cria symlink de compatibilidade /app/VINCIE/ckpt/VINCIE-3B -> <snapshot no cache>.
+- Permite fixar GPUs dedicadas ao processo via CUDA_VISIBLE_DEVICES.
+- Opcionalmente ativa o NVIDIA Persistence Mode para reduzir latência de inicialização.
+- Executa geração chamando o main.py do VINCIE com overrides (cfg_scale, resolution_input, aspect_ratio_input, steps).
+- Realiza limpeza leve de GPU após cada job, mantendo o processo vivo e pronto.
+Observação:
+- Este serviço usa subprocess para chamar o main.py oficial, priorizando compatibilidade.
+- Para reter pesos do modelo em VRAM entre jobs, integrar diretamente generate.py em um servidor Python persistente.
+"""
 import os
+import json
+import subprocess
 from pathlib import Path
+from typing import List, Optional
 from huggingface_hub import snapshot_download
+class VincieService:
+    def __init__(
+        self,
+        repo_dir: str = "/app/VINCIE",
+        ckpt_symlink_dir: str = "/app/VINCIE/ckpt/VINCIE-3B",
+        python_bin: str = "python",
+        repo_id: str = "ByteDance-Seed/VINCIE-3B",
+        output_root: str = "/app/outputs",
+    ):
+        self.repo_dir = Path(repo_dir)
+        self.ckpt_symlink = Path(ckpt_symlink_dir)
+        self.python = python_bin
+        self.repo_id = repo_id
+        self.generate_yaml = self.repo_dir / "configs" / "generate.yaml"
+        (self.repo_dir / "ckpt").mkdir(parents=True, exist_ok=True)
+        self.output_root = Path(output_root)
+        self.output_root.mkdir(parents=True, exist_ok=True)
+        # Caminho real do snapshot no cache (definido após ensure_model)
+        self.ckpt_dir: Optional[Path] = None
+        # Ambiente mutável do serviço (permite fixar GPUs)
+        self._env = os.environ.copy()
+    # ---------- Repositório e modelo ----------
+    def ensure_repo(self, git_url: str = "https://github.com/ByteDance-Seed/VINCIE") -> None:
+        if not self.repo_dir.exists():
+            subprocess.run(["git", "clone", git_url, str(self.repo_dir)], check=True)
+    def ensure_model(self, hf_token: Optional[str] = None, revision: Optional[str] = None) -> None:
+        """
+        Baixa o snapshot completo do repositório do modelo no cache local e cria o symlink esperado pelo repo.
+        - Usa HF_HUB_CACHE como cache_dir quando definido.
+        """
+        token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
+        cache_dir = os.environ.get("HF_HUB_CACHE")
+        snapshot_path = snapshot_download(
+            repo_id=self.repo_id,
+            revision=revision,
+            cache_dir=cache_dir,
+            token=token,
+            local_files_only=False,
+        )
+        self.ckpt_dir = Path(snapshot_path)
+        # Symlink de compatibilidade dentro do repo
+        try:
+            if self.ckpt_symlink.is_symlink():
+                self.ckpt_symlink.unlink()
+            elif self.ckpt_symlink.exists():
+                # Opcional: não remover diretório real automaticamente
+                pass
+            if not self.ckpt_symlink.exists():
+                self.ckpt_symlink.symlink_to(self.ckpt_dir, target_is_directory=True)
+        except Exception as e:
+            print("Warning: failed to create checkpoint symlink:", e)
+    def ready(self) -> bool:
+        have_repo = self.repo_dir.exists() and self.generate_yaml.exists()
+        dit_ok = self.ckpt_dir is not None and (self.ckpt_dir / "dit.pth").exists()
+        vae_ok = self.ckpt_dir is not None and (self.ckpt_dir / "vae.pth").exists()
+        return bool(have_repo and dit_ok and vae_ok)
+    # ---------- GPUs dedicadas e persistência ----------
+    def pin_gpus(self, device_indices: List[int]) -> None:
+        """
+        Restringe a visibilidade de GPUs para este processo, ex.: [0,1,2,3].
+        Deve ser chamado antes de qualquer inicialização CUDA pesada.
+        """
+        visible = ",".join(str(i) for i in device_indices)
+        self._env["CUDA_VISIBLE_DEVICES"] = visible
+    def enable_persistence_mode(self) -> None:
+        """
+        Liga o persistence mode do driver NVIDIA para reduzir latência de inicialização CUDA.
+        Requer permissões adequadas.
+        """
+        try:
+            subprocess.run(["nvidia-smi", "-pm", "1"], check=True)
+        except Exception as e:
+            print("Warning: failed to enable persistence mode:", e)
+    # ---------- Execução do VINCIE ----------
+    def _build_overrides(
+        self,
+        extra_overrides: Optional[List[str]] = None,
+        cfg_scale: Optional[float] = None,
+        resolution_input: Optional[int] = None,
+        aspect_ratio_input: Optional[str] = None,
+        steps: Optional[int] = None,
+    ) -> List[str]:
+        overrides = list(extra_overrides or [])
+        if self.ckpt_dir is not None:
+            overrides.append(f"ckpt.path={str(self.ckpt_dir)}")
+        if cfg_scale is not None:
+            overrides.append(f"generation.cfg_scale={cfg_scale}")
+        if resolution_input is not None:
+            overrides.append(f"generation.resolution_input={resolution_input}")
+        if aspect_ratio_input is not None:
+            overrides.append(f"generation.aspect_ratio_input={aspect_ratio_input}")
+        if steps is not None:
+            overrides.append(f"generation.steps={steps}")
+        return overrides
+    def _run_vincie_once(self, overrides: List[str], work_output: Path) -> None:
+        """
+        Invoca o main.py oficial com overrides; execução única do job.
+        """
+        work_output.mkdir(parents=True, exist_ok=True)
+        cmd = [
+            self.python,
+            "main.py",
+            str(self.generate_yaml),
+            *overrides,
+            f"generation.output.dir={str(work_output)}",
+        ]
+        subprocess.run(cmd, cwd=self.repo_dir, check=True, env=self._env)
+    def _clean_gpu_memory(self) -> None:
+        """
+        Limpa caches alocador CUDA e estatísticas de pico, sem descarregar pesos que estejam vivos no processo.
+        Como este serviço invoca um subprocess a cada job, a VRAM do subprocess é liberada ao término;
+        ainda assim, executar uma limpeza leve no contexto do serviço não causa efeito colateral.
+        """
+        try:
+            # Executa um snippet Python rápido no mesmo conjunto de GPUs visíveis
+            code = r"""
+import torch, gc
 try:
+    torch.cuda.synchronize()
+except Exception:
+    pass
+gc.collect()
+try:
+    torch.cuda.empty_cache()
+    torch.cuda.memory.reset_peak_memory_stats()
+except Exception:
+    pass
+"""
+            subprocess.run([self.python, "-c", code], check=True, env=self._env)
+        except Exception as e:
+            print("Warning: GPU cleanup failed:", e)
+    # ---------- APIs de alto nível ----------
+    def multi_turn_edit(
+        self,
+        input_image: str,
+        turns: List[str],
+        out_dir_name: Optional[str] = None,
+        *,
+        cfg_scale: Optional[float] = None,
+        resolution_input: Optional[int] = None,
+        aspect_ratio_input: Optional[str] = None,
+        steps: Optional[int] = None,
+        pad_img_placeholder: Optional[bool] = None,
+    ) -> Path:
+        """
+        Executa pipeline multi-turn com overrides opcionais.
+        """
+        out_dir = self.output_root / (out_dir_name or f"multi_turn_{self._slug(input_image)}")
+        image_json = json.dumps([str(input_image)])
+        prompts_json = json.dumps(turns)
+        base_overrides = [
+            f"generation.positive_prompt.image_path={image_json}",
+            f"generation.positive_prompt.prompts={prompts_json}",
+        ]
+        if pad_img_placeholder is not None:
+            base_overrides.append(f"generation.pad_img_placehoder={str(bool(pad_img_placeholder)).lower()}")
+        overrides = self._build_overrides(
+            extra_overrides=base_overrides,
+            cfg_scale=cfg_scale,
+            resolution_input=resolution_input,
+            aspect_ratio_input=aspect_ratio_input,
+            steps=steps,
+        )
+        self._run_vincie_once(overrides, out_dir)
+        self._clean_gpu_memory()
+        return out_dir
+    def multi_concept_compose(
+        self,
+        concept_images: List[str],
+        concept_prompts: List[str],
+        final_prompt: str,
+        out_dir_name: Optional[str] = None,
+        *,
+        cfg_scale: Optional[float] = None,
+        resolution_input: Optional[int] = None,
+        aspect_ratio_input: Optional[str] = None,
+        steps: Optional[int] = None,
+    ) -> Path:
+        """
+        Executa pipeline multi-concept com overrides opcionais.
+        """
+        out_dir = self.output_root / (out_dir_name or "multi_concept")
+        imgs_json = json.dumps([str(p) for p in concept_images])
+        prompts_all = concept_prompts + [final_prompt]
+        prompts_json = json.dumps(prompts_all)
+        base_overrides = [
+            f"generation.positive_prompt.image_path={imgs_json}",
+            f"generation.positive_prompt.prompts={prompts_json}",
+            "generation.pad_img_placehoder=False",
+        ]
+        overrides = self._build_overrides(
+            extra_overrides=base_overrides,
+            cfg_scale=cfg_scale,
+            resolution_input=resolution_input,
+            aspect_ratio_input=aspect_ratio_input,
+            steps=steps,
+        )
+        self._run_vincie_once(overrides, out_dir)
+        self._clean_gpu_memory()
+        return out_dir
+    # ---------- Util ----------
+    @staticmethod
+    def _slug(path_or_text: str) -> str:
+        p = Path(path_or_text)
+        base = p.stem if p.exists() else str(path_or_text)
+        keep = "".join(c if c.isalnum() or c in "-_." else "_" for c in str(base))
+        return keep[:64]