Aduc-sdr-2_5s / services /vince_server.py
euIaxs22's picture
Update services/vince_server.py
10fdfb8 verified
raw
history blame
12.1 kB
#!/usr/bin/env python3
import inspect
import os, sys, gc, subprocess
from pathlib import Path
from typing import List, Optional
from omegaconf import OmegaConf, open_dict
# Copiado/adaptado do vincie.py: uso de HF Hub
from huggingface_hub import hf_hub_download, list_repo_files, HfApi
VINCIE_DIR = Path(os.getenv("VINCIE_DIR", "/app/VINCIE"))
VINCE_GIT_URL = os.getenv("VINCE_GIT_URL", "https://github.com/ByteDance-Seed/VINCIE")
VINCE_REPO_ID = os.getenv("VINCE_REPO_ID", "ByteDance-Seed/VINCIE-3B")
VINCE_CKPT = Path(os.getenv("VINCE_CKPT", "/app/ckpt/VINCIE-3B"))
if str(VINCIE_DIR) not in sys.path:
sys.path.insert(0, str(VINCIE_DIR))
# inclui 'models/' relativo (mantido)
try:
app_models = Path("/app/models"); vincie_models = VINCIE_DIR / "models"
if not app_models.exists() and vincie_models.exists():
app_models.symlink_to(vincie_models, target_is_directory=True)
except Exception as e:
print("[vince_server] warn: link /app/models failed:", e)
class VinceServer:
def __init__(self, config_path: str="/app/VINCIE/configs/generate.yaml",
*, base_overrides: Optional[List[str]]=None,
output_root: str="/app/outputs", chdir_repo: bool=True):
self.config_path = config_path
self.output_root = Path(output_root); self.output_root.mkdir(parents=True, exist_ok=True)
overrides = list(base_overrides or [])
self.HF_HOME = Path(os.getenv("HF_HOME", "/data/.cache/huggingface"))
self.HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") or None
# 1) Clonar/garantir repo
self.ensure_repo()
# 2) Baixar TODOS os arquivos do modelo no ckpt target
self.ensure_model_all()
# 3) Symlink compatível repo/ckpt/VINCIE-3B -> /app/ckpt/VINCIE-3B
self._ensure_ckpt_symlink()
# 4) chdir para compatibilidade com caminhos relativos
if chdir_repo:
try:
os.chdir(str(VINCIE_DIR))
except Exception as e:
print("[vince_server] warn: chdir repo failed:", e)
from common.config import load_config, create_object # type: ignore
# 5) Carregar e bootstrap para manter GPU quente
self.config = load_config(self.config_path, overrides)
self.gen = create_object(self.config)
self._bootstrap_models()
# ==== Helpers de bootstrap copiados/adaptados do vincie.py ====
def ensure_repo(self) -> None:
"""Clona o repositório oficial quando ausente (idempotente)."""
if not VINCIE_DIR.exists():
subprocess.run(["git", "clone", VINCE_GIT_URL, str(VINCIE_DIR)], check=True)
def ensure_model_all(self, repo_revision: Optional[str]=None) -> None:
"""
Baixa TODOS os arquivos do modelo do Hub (VINCE_REPO_ID) para VINCE_CKPT,
preservando subdiretórios. Critério de pular download: arquivo > 1MB.
Cria pastas necessárias.
"""
VINCE_CKPT.mkdir(parents=True, exist_ok=True)
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
# Lista recursiva de arquivos do repositório
try:
_ = HfApi(token=token) # instanciado para compat; não é estritamente necessário
all_files: List[str] = list_repo_files(
repo_id=VINCE_REPO_ID,
repo_type="model",
revision=repo_revision,
token=token,
)
except Exception as e:
print(f"[vince_server] warn: list_repo_files failed for '{VINCE_REPO_ID}': {e}")
all_files = []
def _need(path: Path) -> bool:
try:
return not (path.exists() and path.stat().st_size > 1_000_000)
except FileNotFoundError:
return True
downloaded = 0
for rel_path in all_files:
if rel_path.endswith("/"):
continue
dst = VINCE_CKPT / rel_path
dst.parent.mkdir(parents=True, exist_ok=True)
if _need(dst):
try:
hf_hub_download(
repo_id=VINCE_REPO_ID,
filename=rel_path,
cache_dir=str(self.HF_HOME),
local_dir=str(VINCE_CKPT),
#local_dir_use_symlinks=False,
token=token,
#force_download=False,
#local_files_only=False,
#revision=repo_revision,
)
downloaded += 1
except Exception as de:
print(f"[vince_server] warn: download failed '{rel_path}': {de}")
print(f"[vince_server] model assets checked={len(all_files)} downloaded={downloaded}")
def _ensure_ckpt_symlink(self) -> None:
"""
Cria symlink compatível para caminhos relativos do repo:
/app/VINCIE/ckpt/VINCIE-3B -> /app/ckpt/VINCIE-3B.
Não remove diretórios reais; apenas substitui symlink divergente.
"""
repo_ckpt_dir = VINCIE_DIR / "ckpt"
repo_ckpt_dir.mkdir(parents=True, exist_ok=True)
link = repo_ckpt_dir / "VINCIE-3B"
try:
if link.is_symlink():
# Reaponta se destino for diferente
try:
if link.resolve() != VINCE_CKPT:
link.unlink()
except Exception:
# Se quebrado, remove e recria
link.unlink(missing_ok=True)
if not link.exists():
link.symlink_to(VINCE_CKPT, target_is_directory=True)
except Exception as e:
print("[vince_server] warn: ensure_ckpt_symlink failed:", e)
# ==== Restante: mantido do servidor original ====
def _assert_ckpt_ready(self):
ckpt = VINCE_CKPT
dit = ckpt / "dit.pth"; vae = ckpt / "vae.pth"; llm = ckpt / "llm14b"
missing = []
if not dit.exists(): missing.append("dit.pth")
if not vae.exists(): missing.append("vae.pth")
if not llm.exists(): missing.append("llm14b/")
if missing:
raise RuntimeError(f"[vince_server] missing checkpoint assets: {', '.join(missing)}")
def _bootstrap_models(self):
# Fail-fast: se alguma etapa obrigatória falhar, aborta com erro explícito
for name in (
"configure_persistence",
"configure_models",
"configure_diffusion",
#"configure_sampler",
):
fn = getattr(self.gen, name, None)
if not callable(fn):
raise RuntimeError(f"[vince_server] missing bootstrap step: {name}")
fn()
def _apply_generation_overrides(self, cfg, *, output_dir: Path,
image_path: Optional[str]=None,
prompts: Optional[List[str]]=None,
cfg_scale: Optional[float]=None,
aspect_ratio: Optional[str]=None,
resolution: Optional[int]=None,
steps: Optional[int]=None):
with open_dict(cfg):
cfg["generation"]["output"]["dir"] = str(output_dir)
if image_path is not None:
cfg["generation"]["positive_prompt"]["image_path"] = [str(image_path)]
if prompts is not None:
cfg["generation"]["positive_prompt"]["prompts"] = list(prompts)
if cfg_scale is not None:
try:
cfg["diffusion"]["cfg"]["scale"] = float(cfg_scale)
except Exception:
pass
if aspect_ratio is not None:
cfg["generation"]["aspect_ratio_input"] = str(aspect_ratio)
if resolution is not None:
cfg["generation"]["resolution_input"] = int(resolution)
cfg["generation"]["resolution"] = int(resolution)
if steps is not None:
self._set_steps(steps)
def _set_steps(self, steps: int):
try:
import torch
s = getattr(self.gen, "sampler", None)
if s is None: return
if hasattr(s, "timesteps") and isinstance(s.timesteps, (list, tuple)):
total = len(s.timesteps)
if steps > 0 and steps < total:
idx = torch.linspace(0, total - 1, steps).round().long().tolist()
s.timesteps = [s.timesteps[i] for i in idx]
except Exception as e:
print("[vince_server] warn: set_steps failed:", e)
def _infer_once(self):
infer_methods = ["inference_loop", "entrypoint", "run"]
for name in infer_methods:
fn = getattr(self.gen, name, None)
if callable(fn):
print(f"[vince_server] using inference method: {name}")
# =============================================================
# CORREÇÃO APLICADA AQUI
# O método de inferência (ex: inference_loop) não espera
# argumentos, pois acessa a configuração através do `self`
# do seu próprio objeto (`self.gen`).
return fn()
# =============================================================
raise RuntimeError("[vince_server] no valid inference method found")
def _cleanup(self):
try:
import torch
torch.cuda.synchronize()
torch.cuda.empty_cache()
if hasattr(torch.cuda, "reset_peak_memory_stats"):
torch.cuda.reset_peak_memory_stats()
except Exception:
pass
gc.collect()
# ===== APIs públicas mantidas =====
def generate_multi_turn(self, input_image: str, turns: List[str],
out_dir_name: Optional[str]=None,
*, cfg_scale: Optional[float]=None,
aspect_ratio: Optional[str]=None,
resolution: Optional[int]=None,
steps: Optional[int]=None) -> Path:
out_dir = self.output_root / (out_dir_name or f"multi_turn_{Path(input_image).stem}")
cfg = OmegaConf.load(self.config_path)
self._apply_generation_overrides(cfg, output_dir=out_dir, image_path=input_image,
prompts=turns, cfg_scale=cfg_scale,
aspect_ratio=aspect_ratio, resolution=resolution,
steps=steps)
self.config = cfg
result = self._infer_once()
self._cleanup()
return out_dir
def generate_multi_concept(self, concept_images: List[str], concept_prompts: List[str],
final_prompt: str, out_dir_name: Optional[str]=None,
*, cfg_scale: Optional[float]=None,
aspect_ratio: Optional[str]=None,
resolution: Optional[int]=None,
steps: Optional[int]=None,
pad_placeholder: bool=False) -> Path:
out_dir = self.output_root / (out_dir_name or "multi_concept")
prompts_all = concept_prompts + [final_prompt]
cfg = OmegaConf.load(self.config_path)
with open_dict(cfg):
cfg["generation"]["positive_prompt"]["image_path"] = [str(p) for p in concept_images]
cfg["generation"]["positive_prompt"]["prompts"] = list(prompts_all)
cfg["generation"]["pad_img_placehoder"] = bool(pad_placeholder)
self._apply_generation_overrides(cfg, output_dir=out_dir, cfg_scale=cfg_scale,
aspect_ratio=aspect_ratio, resolution=resolution,
steps=steps)
self.config = cfg
result = self._infer_once()
self._cleanup()
return out_dir