# ============================================================================= # DOCKERFILE - Complete AI Video Suite v2.0.0 # Optimized for 8x NVIDIA L40S GPUs (384GB Total VRAM) # Production-Ready Multi-GPU Video Generation Suite # ============================================================================= FROM nvidia/cuda:12.8.0-devel-ubuntu22.04 # ============================================================================= # METADATA AND LABELS # ============================================================================= LABEL maintainer="Complete AI Video Suite Team" LABEL description="Multi-GPU AI Video Generation Suite with LTX FP8, Q8 Kernels, SeedVR, Wan2.2, VINCIE, MMAudio" LABEL version="2.0.0" LABEL build_date="2025-09-18" LABEL cuda_version="12.4.0" LABEL python_version="3.10" LABEL pytorch_version="2.8.0+cu128" LABEL architecture="amd64" LABEL gpu_optimized="8x_L40S" LABEL total_vram="384GB" LABEL license="MIT" # ============================================================================= # ENVIRONMENT VARIABLES - PRODUCTION OPTIMIZED # ============================================================================= ENV DEBIAN_FRONTEND=noninteractive ENV TZ=UTC ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 # Python optimization ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONIOENCODING=utf-8 ENV PIP_NO_CACHE_DIR=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=0 # CUDA optimizations for 8x L40S GPUs ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics ENV NVIDIA_REQUIRE_CUDA="cuda>=12.8" ENV CUDA_LAUNCH_BLOCKING=0 ENV TORCH_CUDA_ARCH_LIST="8.9" ENV CUDA_CACHE_MAXSIZE=2147483648 # Multi-GPU distributed training ENV NCCL_DEBUG=DEBUG ENV NCCL_TREE_THRESHOLD=1 ENV NCCL_P2P_DISABLE=0 ENV NCCL_IB_DISABLE=0 ENV NCCL_NVLS_ENABLE=1 ENV NCCL_CROSS_NIC=1 # PyTorch optimizations ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,roundup_power2_divisions:16 ENV TORCH_BACKENDS_CUDNN_BENCHMARK=1 ENV TORCH_BACKENDS_CUDA_MATMUL_ALLOW_TF32=1 ENV TORCH_BACKENDS_CUDNN_ALLOW_TF32=1 # Application paths ENV APP_HOME=/app ENV HF_HOME=/app/model_cache ENV HF_HUB_CACHE=/app/model_cache/hub ENV TRANSFORMERS_CACHE=/app/model_cache/transformers ENV TORCH_HOME=/app/model_cache/torch ENV TMPDIR=/app/tmp ENV OUTPUT_DIR=/app/outputs # CPU optimizations ENV OMP_NUM_THREADS=8 ENV MKL_NUM_THREADS=8 ENV NUMEXPR_NUM_THREADS=8 ENV OPENBLAS_NUM_THREADS=8 # ============================================================================= # SYSTEM PACKAGE INSTALLATION # ============================================================================= RUN apt-get update && apt-get install -y \ build-essential \ cmake \ ninja-build \ pkg-config \ python3.11 \ python3.11-dev \ python3.11-distutils \ python3-pip \ python3.11-venv \ git \ git-lfs \ curl \ wget \ rsync \ unzip \ zip \ ffmpeg \ libavcodec-dev \ libavformat-dev \ libavutil-dev \ libswscale-dev \ libgl1-mesa-glx \ libgl1-mesa-dev \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ libgomp1 \ libglu1-mesa \ libglu1-mesa-dev \ htop \ nvtop \ tree \ vim \ nano \ tmux \ screen \ net-tools \ iproute2 \ iotop \ && apt-get autoremove -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /tmp/* \ && rm -rf /var/tmp/* # ============================================================================= # PYTHON SETUP AND OPTIMIZATION # ============================================================================= RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \ ln -sf /usr/bin/python3.10 /usr/bin/python && \ python3 -m pip install --upgrade pip==24.2 setuptools==70.0.0 wheel==0.43.0 RUN pip install \ packaging \ ninja \ cmake \ pybind11 \ scikit-build \ cython \ numpy>=1.24.3 # ============================================================================= # PYTORCH AND CUDA LIBRARIES # ============================================================================= RUN pip install \ torch>=2.8.0+cu128 \ torchvision \ torchaudio \ --index-url https://download.pytorch.org/whl/cu128 RUN pip install torchao RUN python3 -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}'); print(f'Device count: {torch.cuda.device_count()}')" # ============================================================================= # AI/ML LIBRARIES INSTALLATION # ============================================================================= WORKDIR $APP_HOME COPY . . RUN pip install -r requirements.txt # ============================================================================= # APPLICATION STRUCTURE SETUP # ============================================================================= RUN mkdir -p \ $APP_HOME/installer \ $APP_HOME/monitoring \ $APP_HOME/tools \ $APP_HOME/configs \ $APP_HOME/build_cache \ $APP_HOME/model_cache/hub \ $APP_HOME/model_cache/transformers \ $APP_HOME/model_cache/torch \ $APP_HOME/model_cache/ltx_models \ $APP_HOME/tmp \ $APP_HOME/outputs \ $APP_HOME/logs \ && chmod -R 755 $APP_HOME # ============================================================================= # DOWNLOAD PREREQUISITE FILES # ============================================================================= COPY . . COPY configs/ ./configs/ RUN chmod +x start.sh && \ find . -name "*.sh" -exec chmod +x {} \; && \ find . -name "*.py" -exec chmod +x {} \; # ============================================================================= # CREATE OPTIMIZATION PATCHES AND TOOLS (FIXED SYNTAX) # ============================================================================= # ============================================================================= # CONFIGURATION FILES # ============================================================================= # Create default LTX FP8 configuration RUN cat <<'YAML_CONFIG' > $APP_HOME/configs/ltxv-13b-0.9.8-distilled-fp8.yaml # LTX Video FP8 Distilled Configuration # Optimized for 8x L40S GPUs (384GB VRAM) model: target: "ltx_video.models.transformer_temporal.TransformerTemporalModel" params: transformer_additional_kwargs: attention_mode: "sdpa" enable_flash_attention: true memory_efficient_attention: true network_config: model_name: "ltxv-13b-0.9.8-distilled-fp8" fp8_optimization: true quantization: "fp8" ada_optimized: true multi_gpu_support: true scheduler: target: "diffusers.LTXVideoScheduler" params: num_train_timesteps: 1000 beta_start: 0.0001 beta_end: 0.02 beta_schedule: "scaled_linear" vae: target: "diffusers.AutoencoderKLLTXVideo" params: force_upcast: false enable_slicing: true enable_tiling: true text_encoder: target: "transformers.T5EncoderModel" params: torch_dtype: "bfloat16" pipeline: target: "diffusers.LTXVideoPipeline" params: scheduler_type: "LTXVideoScheduler" num_inference_steps: 4 guidance_scale: 1.0 height: 704 width: 1216 num_frames: 121 fps: 30 enable_memory_efficient_attention: true enable_cpu_offload: false enable_model_cpu_offload: false max_batch_size: 4 multi_gpu: enabled: true num_gpus: 8 distribution_strategy: "data_parallel" load_balancing: "memory_aware" synchronize_gpus: true YAML_CONFIG # Create multi-GPU optimization config RUN cat <<'GPU_CONFIG' > $APP_HOME/configs/multi_gpu_config.yaml # Multi-GPU Configuration for 8x L40S Setup system: gpu_count: 8 total_vram: "384GB" compute_capability: "8.9" architecture: "ADA_LOVELACE" distributed_training: backend: "nccl" init_method: "env://" world_size: 8 rank: 0 memory_optimization: gradient_checkpointing: true mixed_precision: "bf16" max_batch_size_per_gpu: 8 gradient_accumulation_steps: 4 memory_fraction: 0.95 performance: torch_compile: true cuda_graphs: true tensor_cores: true flash_attention: true memory_efficient_attention: true load_balancing: strategy: "memory_aware" rebalance_interval: 30 utilization_threshold: 0.8 thermal_management: max_temperature: 83 fan_curve: "aggressive" throttle_threshold: 80 monitoring_interval: 10 power_management: max_power_limit: 300 efficiency_mode: false power_monitoring: true GPU_CONFIG # ============================================================================= # HEALTH CHECK SCRIPT # ============================================================================= RUN cat <<'HEALTHCHECK_SCRIPT' > $APP_HOME/healthcheck.py #!/usr/bin/env python3 """ Health check script for Complete AI Video Suite """ import sys import requests import torch import time import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def check_cuda(): """Check CUDA availability and GPU status""" if not torch.cuda.is_available(): logger.error("CUDA not available") return False gpu_count = torch.cuda.device_count() logger.info(f"CUDA available with {gpu_count} GPUs") for i in range(gpu_count): try: torch.cuda.set_device(i) props = torch.cuda.get_device_properties(i) memory_allocated = torch.cuda.memory_allocated() / 1024**3 memory_total = props.total_memory / 1024**3 logger.info(f"GPU {i}: {props.name} ({memory_allocated:.2f}GB/{memory_total:.1f}GB)") x = torch.randn(100, 100, device=f'cuda:{i}') y = torch.matmul(x, x) torch.cuda.synchronize() except Exception as e: logger.error(f"GPU {i} test failed: {e}") return False return True def check_web_service(): """Check if web service is responding""" try: response = requests.get("http://localhost:7860/", timeout=10) if response.status_code == 200: logger.info("Web service is responding") return True else: logger.error(f"Web service returned status code: {response.status_code}") return False except requests.RequestException as e: logger.error(f"Web service check failed: {e}") return False def main(): """Main health check routine""" logger.info("Starting health check...") if not check_cuda(): sys.exit(1) if not check_web_service(): sys.exit(1) logger.info("All health checks passed") sys.exit(0) if __name__ == "__main__": main() HEALTHCHECK_SCRIPT RUN chmod +x $APP_HOME/healthcheck.py # ============================================================================= # USER SETUP AND SECURITY # ============================================================================= RUN mkdir -p /etc/sudoers.d && \ useradd -m -u 1000 -s /bin/bash appuser && \ usermod -aG sudo appuser && \ chown -R appuser:appuser $APP_HOME && \ echo "appuser ALL=(ALL) NOPASSWD: /usr/bin/nvidia-smi, /usr/bin/nvidia-ml-py" > /etc/sudoers.d/appuser USER appuser WORKDIR $APP_HOME # ============================================================================= # RUNTIME CONFIGURATION # ============================================================================= EXPOSE 7860 8001 8002 6006 VOLUME ["/app/model_cache", "/app/outputs", "/app/logs", "/app/build_cache"] HEALTHCHECK --interval=60s --timeout=30s --start-period=300s --retries=3 \ CMD python3 /app/healthcheck.py # ============================================================================= # FINAL SETUP AND ENTRY POINT # ============================================================================= RUN cat <<'ENTRYPOINT_SCRIPT' > $APP_HOME/docker-entrypoint.sh #!/bin/bash set -euo pipefail echo "🚀 Complete AI Suite - Docker Container Starting..." echo "🐳 Container: $(hostname)" echo "👤 User: $(whoami)" echo "🎮 GPUs: $(nvidia-smi --list-gpus | wc -l || echo '0')" if command -v nvidia-smi >/dev/null 2>&1; then echo "💾 CUDA Memory:" nvidia-smi --query-gpu=memory.total,memory.used --format=csv,noheader,nounits | nl fi echo "🔧 Applying optimization patches..." python3 /app/tools/optimization_patch.py echo "📁 Setting up permissions..." chmod -R 755 /app/installer chmod -R 755 /app/monitoring chmod +x /app/start.sh mkdir -p /app/logs /app/outputs /app/tmp chmod 777 /app/logs /app/outputs /app/tmp echo "✅ Docker container initialization complete" echo "🚀 Starting Complete AI Video Suite..." exec /app/start.sh "$@" ENTRYPOINT_SCRIPT RUN chmod +x $APP_HOME/docker-entrypoint.sh ENTRYPOINT ["/app/docker-entrypoint.sh"] CMD ["--listen", "--multi-gpu", "--optimize"] # ============================================================================= # FINAL METADATA # ============================================================================= RUN echo "Complete AI Video Suite v2.0.0" > /app/VERSION && \ echo "Build Date: 2025-09-18T$(date +%H:%M:%S)" >> /app/VERSION && \ echo "CUDA: 12.4.1" >> /app/VERSION && \ echo "PyTorch: $(python3 -c 'import torch; print(torch.__version__)')" >> /app/VERSION && \ echo "Optimized for: 8x NVIDIA L40S GPUs" >> /app/VERSION LABEL org.opencontainers.image.title="Complete AI Video Suite" LABEL org.opencontainers.image.description="Production-ready multi-GPU video generation with LTX FP8, Q8 Kernels, and more" LABEL org.opencontainers.image.version="2.0.0" LABEL org.opencontainers.image.created="2025-09-18T17:42:00Z" LABEL org.opencontainers.image.revision="main" LABEL org.opencontainers.image.licenses="MIT"