Spaces:
Paused
Paused
| set -euo pipefail | |
| echo "=======================================================" | |
| echo " ADUC-SDR — Start (VINCIE/SeedVR, 8× L40S)" | |
| echo "=======================================================" | |
| # ---------------------- Env base ---------------------- | |
| export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3,4,5,6,7}" | |
| export TORCH_DTYPE="${TORCH_DTYPE:-bfloat16}" | |
| # SDPA / FA toggles | |
| export ENABLE_FLASH_SDP="${ENABLE_FLASH_SDP:-1}" | |
| export ENABLE_MEMORY_EFFICIENT_SDP="${ENABLE_MEMORY_EFFICIENT_SDP:-1}" | |
| export ENABLE_MATH_SDP="${ENABLE_MATH_SDP:-0}" | |
| export FLASH_ATTENTION_DISABLE="${FLASH_ATTENTION_DISABLE:-0}" | |
| export XFORMERS_FORCE_DISABLE="${XFORMERS_FORCE_DISABLE:-1}" | |
| # CUDA / NCCL baseline | |
| export CUDA_MODULE_LOADING="LAZY" | |
| export CUDA_DEVICE_MAX_CONNECTIONS="${CUDA_DEVICE_MAX_CONNECTIONS:-32}" | |
| export CUDA_DEVICE_ORDER="PCI_BUS_ID" | |
| export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:512,garbage_collection_threshold:0.8" | |
| export OMP_NUM_THREADS="${OMP_NUM_THREADS:-8}" | |
| export MKL_NUM_THREADS="${MKL_NUM_THREADS:-8}" | |
| export NCCL_DEBUG="INFO" | |
| export NCCL_ASYNC_ERROR_HANDLING=1 | |
| export NCCL_P2P_DISABLE=0 | |
| export NCCL_IB_DISABLE=1 | |
| export NCCL_SOCKET_IFNAME="lo" | |
| export NCCL_BLOCKING_WAIT=1 | |
| export TORCH_NCCL_BLOCKING_WAIT=1 | |
| export NCCL_TIMEOUT=600 | |
| # ---------------------- Persistência HF/torch ---------------------- | |
| if [ -d /data ]; then | |
| export HF_HOME="/data/.cache/huggingface" | |
| export TORCH_HOME="/data/.cache/torch" | |
| else | |
| export HF_HOME="/app/.cache/huggingface" | |
| export TORCH_HOME="/app/.cache/torch" | |
| fi | |
| export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}" | |
| mkdir -p "$HF_HUB_CACHE" "$TORCH_HOME" | |
| mkdir -p /app/.cache | |
| ln -sf "$HF_HOME" /app/.cache/huggingface | |
| unset TRANSFORMERS_CACHE | |
| export HF_HUB_ENABLE_HF_TRANSFER=1 | |
| export HF_HUB_DOWNLOAD_TIMEOUT=60 | |
| MODEL_REPO="ByteDance-Seed/VINCIE-3B" | |
| CKPT_DIR="/app/ckpt/VINCIE-3B" | |
| mkdir -p "$CKPT_DIR" | |
| # ---------------------- Cache Estruturado HF (persistente) ---------------------- | |
| # Define cache no volume persistente /data (1TB) | |
| if [ -d /data ]; then | |
| export HF_HOME="${HF_HOME:-/data/.cache/huggingface}" | |
| else | |
| export HF_HOME="${HF_HOME:-/app/.cache/huggingface}" | |
| fi | |
| export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}" | |
| mkdir -p "$HF_HUB_CACHE" | |
| echo "📦 Cache HF: $HF_HUB_CACHE" | |
| # Download usando cache estruturado (não duplica arquivos) | |
| python3 - <<'PY' | |
| from huggingface_hub import snapshot_download | |
| import os | |
| cache_dir = os.environ.get('HF_HUB_CACHE') | |
| print(f'📥 Baixando VINCIE-3B para cache: {cache_dir}') | |
| try: | |
| model_path = snapshot_download( | |
| repo_id='ByteDance-Seed/VINCIE-3B', | |
| cache_dir=cache_dir, # Usa cache estruturado | |
| resume_download=True, # Retoma downloads interrompidos | |
| max_workers=8, # Acelera com paralelismo | |
| # Não usa local_dir - mantém tudo no cache HF | |
| ) | |
| print(f'✅ Modelo em cache: {model_path}') | |
| # Cria symlink para compatibilidade com código legacy | |
| ckpt_link = '/app/ckpt/VINCIE-3B' | |
| os.makedirs('/app/ckpt', exist_ok=True) | |
| if os.path.islink(ckpt_link): | |
| os.unlink(ckpt_link) | |
| if not os.path.exists(ckpt_link): | |
| os.symlink(model_path, ckpt_link) | |
| print(f'🔗 Symlink: {ckpt_link} -> {model_path}') | |
| except Exception as e: | |
| print(f'⚠️ Download falhou: {e}') | |
| import traceback | |
| traceback.print_exc() | |
| PY | |
| # ---------------------- Builder Apex/Q8 ---------------------- | |
| if nvidia-smi >/dev/null 2>&1; then | |
| if [ "${DISABLE_BUILDER:-0}" -eq 0 ]; then | |
| echo "Executando builder Apex/Q8..." | |
| chmod +x /app/builder.sh || true | |
| timeout "${BUILDER_TIMEOUT_SEC:-7200000}" bash -lc /app/builder.sh || echo "Builder excedeu tempo/erro, prosseguindo." | |
| else | |
| echo "Builder desabilitado por DISABLE_BUILDER=1" | |
| fi | |
| else | |
| echo "GPU não visível, pulando builder Apex/Q8." | |
| fi | |
| python3 -c "from flash_attn.ops.rms_norm import rms_norm; print(rms_norm)" | |
| # ---------------------- Diagnóstico ---------------------- | |
| /app/info.sh || true | |
| #ls -la /app || true | |
| #ls -R /app | head -n 2000 || true | |
| # ---------------------- Subindo serviço ---------------------- | |
| echo "🚀 Subindo serviços..." | |
| # Dica: pode-se exportar VINCIE_DIRECT_TO_CKPT=1 para fallback interno | |
| python /app/app_vince.py | |