import os
import sys
import subprocess
import time
import yaml
from pathlib import Path
from typing import Optional, Tuple
import torch
from PIL import Image

# Importações serão feitas dinamicamente após a modificação do sys.path
from huggingface_hub import hf_hub_download, snapshot_download

APP_HOME = Path(os.environ.get("APP_HOME", "/app"))

class LTXServer:
    _instance = None
    _pipeline = None
    _latent_upsampler = None

    def __new__(cls, *args, **kwargs):
        if not cls._instance:
            cls._instance = super(LTXServer, cls).__new__(cls)
        return cls._instance

    def __init__(self):
        if hasattr(self, '_initialized') and self._initialized: return
        
        print("🚀 LTXServer (Download Cirúrgico) inicializando...")
        
        self.LTX_REPO_DIR = Path("/data/LTX-Video") # Instalado pelo Dockerfile
        self.MODELS_DIR = Path("/data/ltx_models") # Pasta unificada para todos os modelos
        self.CONFIG_PATH = APP_HOME / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
        self.OUTPUT_ROOT = APP_HOME / "outputs" / "ltx"
        self.HF_HOME_CACHE = Path(os.getenv("HF_HOME", "/data/.cache/huggingface"))
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.dtype = torch.bfloat16 if self.device == "cuda" and torch.cuda.is_bf16_supported() else torch.float16

        for p in [self.MODELS_DIR, self.OUTPUT_ROOT]:
            p.mkdir(parents=True, exist_ok=True)
            
        self.setup_dependencies()

        # Importações dinâmicas após o setup do repositório
        from ltx_video.inference import create_ltx_video_pipeline, create_latent_upsampler

        try:
            print("[LTXServer] Montando pipelines a partir dos arquivos locais...")
            
            # Modifica o config em memória para usar os caminhos locais
            # onde baixamos o VAE, Text Encoder, etc.
            self.config_yaml["text_encoder_model_name_or_path"] = str(self.MODELS_DIR)

            # Monta a pipeline principal, passando o caminho para os pesos e o diretório do text encoder
            self._pipeline = create_ltx_video_pipeline(
                ckpt_path=str(self.MODELS_DIR / self.config_yaml["checkpoint_path"]),
                precision=self.config_yaml["precision"],
                text_encoder_model_name_or_path=self.config_yaml["text_encoder_model_name_or_path"],
                sampler=self.config_yaml["sampler"],
                device=self.device
            )

            # Monta a pipeline de upsample
            self._latent_upsampler = create_latent_upsampler(
                latent_upsampler_model_path=str(self.MODELS_DIR / self.config_yaml["spatial_upscaler_model_path"]),
                device=self.device
            )
            print("✅ LTXServer (Download Cirúrgico) pronto.")
        except Exception as e:
            print(f"ERRO CRÍTICO ao montar as pipelines LTX: {e}")
            raise

        self._initialized = True

    def setup_dependencies(self):
        """Lê o config, baixa apenas os modelos necessários e prepara o ambiente."""
        self._ensure_repo_and_install()
        self._ensure_specific_models()

    def _ensure_repo_and_install(self) -> None:
        """Garante que o código do LTX-Video esteja clonado e instalado."""
        if not (self.LTX_REPO_DIR / "inference.py").exists():
             print(f"[LTXServer] Clonando repositório LTX-Video...")
             subprocess.run(["git", "clone", "--depth", "1", "https://github.com/Lightricks/LTX-Video.git", str(self.LTX_REPO_DIR)], check=True)
             print("[LTXServer] Instalando LTX-Video em modo editável...")
             subprocess.run([sys.executable, "-m", "pip", "install", "-e", f"{self.LTX_REPO_DIR}[inference-script]"], check=True)
        else:
            print("[LTXServer] Repositório LTX-Video já existe e está instalado.")
        
        if str(self.LTX_REPO_DIR) not in sys.path:
            sys.path.insert(0, str(self.LTX_REPO_DIR))

    def _ensure_specific_models(self) -> None:
        """Baixa apenas os arquivos de modelo especificados no YAML e suas dependências."""
        print(f"[LTXServer] Lendo config para identificar modelos necessários: {self.CONFIG_PATH}")
        if not self.CONFIG_PATH.exists():
            raise FileNotFoundError(f"Arquivo de configuração não encontrado: {self.CONFIG_PATH}")
        
        with open(self.CONFIG_PATH, "r") as file:
            self.config_yaml = yaml.safe_load(file)
            
        # Lista de arquivos .safetensors individuais a serem baixados
        files_to_download = [
            self.config_yaml.get("checkpoint_path"),
            self.config_yaml.get("spatial_upscaler_model_path"),
        ]
        
        print(f"[LTXServer] Verificando arquivos de modelo principais em {self.MODELS_DIR}...")
        for filename in files_to_download:
            if filename and not (self.MODELS_DIR / filename).exists():
                print(f"Baixando {filename}...")
                hf_hub_download(
                    repo_id="Lightricks/LTX-Video",
                    filename=filename,
                    local_dir=str(self.MODELS_DIR),
                    cache_dir=str(self.HF_HOME_CACHE),
                    token=os.getenv("HF_TOKEN")
                )

        # Baixa os componentes de apoio (VAE, Text Encoder, Scheduler) para o mesmo diretório
        print("[LTXServer] Verificando componentes de apoio (VAE, Text Encoder)...")
        snapshot_download(
            repo_id="Lightricks/LTX-Video",
            local_dir=str(self.MODELS_DIR),
            cache_dir=str(self.HF_HOME_CACHE),
            token=os.getenv("HF_TOKEN"),
            allow_patterns=["text_encoder/*", "tokenizer/*", "vae/*", "scheduler/*"],
            resume_download=True
        )
        
        print("[LTXServer] Todos os modelos necessários estão prontos.")

    def round_to_nearest_resolution(self, height, width) -> Tuple[int, int]:
        ratio = self._pipeline.vae.spatial_compression_ratio
        height = height - (height % ratio)
        width = width - (width % ratio)
        return int(height), int(width)

    def run_inference(self, **kwargs) -> str:
        from ltx_video.pipelines.pipeline_ltx_video import LTXMultiScalePipeline, ConditioningItem
        from diffusers.utils import export_to_video, load_image, load_video
        
        prompt = kwargs.get("prompt")
        image_path = kwargs.get("image_path")
        target_height = kwargs.get("target_height")
        target_width = kwargs.get("target_width")
        num_frames = kwargs.get("num_frames")
        seed = kwargs.get("seed")
        denoise_strength = kwargs.get("denoise_strength", 0.5)

        output_file_path = self.OUTPUT_ROOT / f"run_{int(time.time())}.mp4"
        generator = torch.Generator(device=self.device).manual_seed(seed)
        
        multi_scale_pipeline = LTXMultiScalePipeline(self._pipeline, self._latent_upsampler)

        conditions = None
        if image_path:
            image = load_image(image_path)
            video_condition_input = load_video(export_to_video([image]))
            condition = ConditioningItem(video_condition_input, 0, 1.0) # Corrigido para usar ConditioningItem
            conditions = [condition]

        # Configura os parâmetros da chamada com base no arquivo YAML e nos inputs da UI
        call_kwargs = {
            "prompt": prompt,
            "frame_rate": 24,
            "negative_prompt": "worst quality, inconsistent motion, blurry, jittery, distorted",
            "height": target_height, "width": target_width, "num_frames": num_frames,
            "generator": generator, "output_type": "pt",
            "conditioning_items": conditions,
            "decode_timestep": self.config_yaml["decode_timestep"],
            "decode_noise_scale": self.config_yaml["decode_noise_scale"],
            "first_pass": self.config_yaml["first_pass"],
            "second_pass": self.config_yaml["second_pass"],
            "downscale_factor": self.config_yaml["downscale_factor"],
            "denoise_strength": denoise_strength, # Adicionado
        }
        
        print("[LTXServer] Executando pipeline multi-escala...")
        result_tensor = multi_scale_pipeline(**call_kwargs).images
        
        # Converte o tensor de saída para um vídeo
        video_np = result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy()
        video_np = (video_np * 255).astype("uint8")
        export_to_video(video_np, str(output_file_path), fps=24)
        
        return str(output_file_path)

# Instância Singleton
ltx_server_singleton = LTXServer()