euIaxs22 commited on
Commit
22d0870
·
verified ·
1 Parent(s): 5d4198c

Update start.sh

Browse files
Files changed (1) hide show
  1. start.sh +182 -153
start.sh CHANGED
@@ -1,179 +1,208 @@
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
- echo "======================================================================"
5
- echo " ADUC-SDR Video Suite — Inicialização (SeedVR2-3B + FastAPI/Gradio UI)"
6
- echo "======================================================================"
7
-
8
- # --- 1) Ambiente e GPUs -------------------------------------------------------
9
- export PYTHONFAULTHANDLER=1
10
- export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
11
- export NCCL_DEBUG="${NCCL_DEBUG:-INFO}"
12
- export TORCH_DISTRIBUTED_DEBUG="${TORCH_DISTRIBUTED_DEBUG:-DETAIL}"
13
-
14
- # Autodetecta GPUs somente se a variável não estiver definida
15
- if [ -z "${CUDA_VISIBLE_DEVICES:-}" ]; then
16
- if command -v nvidia-smi >/dev/null 2>&1; then
17
- export CUDA_VISIBLE_DEVICES="$(nvidia-smi --query-gpu=index --format=csv,noheader | tr '\n' ',' | sed 's/,$//')"
18
- fi
19
- fi
20
- NUM_GPUS=$(python3 - <<'PY'
21
- import os, torch
22
- print(torch.cuda.device_count() if torch.cuda.is_available() else 0)
23
- PY
24
- )
25
- echo "[INFO] GPUs visíveis: ${NUM_GPUS} (${CUDA_VISIBLE_DEVICES:-unset})"
26
 
27
- # --- 2) Builder CUDA (opcional) -----------------------------------------------
28
- echo "🛠️ [ETAPA 1/6] Executando builder.sh (se existir)..."
29
  if [ -f "/app/builder.sh" ]; then
30
- bash /app/builder.sh
31
  echo "✅ Builder finalizado."
32
  else
33
- echo "⚠️ Aviso: builder.sh não encontrado. Pulando etapa."
34
  fi
35
 
36
- # --- 3) Caches persistentes (HF/Torch) ----------------------------------------
37
- if [ -d /data ]; then
38
- echo "[INFO] Usando /data como raiz de cache persistente."
39
- export HF_HOME="${HF_HOME:-/data/.cache/huggingface}"
40
- export TORCH_HOME="${TORCH_HOME:-/data/.cache/torch}"
41
- else
42
- echo "[INFO] Usando /app/.cache como fallback local."
43
- export HF_HOME="${HF_HOME:-/app/.cache/huggingface}"
44
- export TORCH_HOME="${TORCH_HOME:-/app/.cache/torch}"
45
- fi
46
- export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
47
- export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
48
- mkdir -p "$HF_HUB_CACHE" "$TORCH_HOME"
49
- echo "[INFO] HF_HOME=$HF_HOME"
50
- echo "[INFO] TORCH_HOME=$TORCH_HOME"
51
-
52
- # --- 4) SeedVR: código-fonte e checkpoints ------------------------------------
53
- export MODEL_REPO="${MODEL_REPO:-ByteDance-Seed/SeedVR2-3B}"
54
- export CKPT_DIR="${CKPT_DIR:-/app/ckpts}"
55
- export SRC_REPO_DIR="${SRC_REPO_DIR:-/app/SeedVR_source}"
56
-
57
- echo "📦 [ETAPA 2/6] Código-fonte SeedVR..."
58
- if [ ! -d "$SRC_REPO_DIR/.git" ] && [ ! -d "$SRC_REPO_DIR/projects" ]; then
59
- git clone https://github.com/ByteDance-Seed/SeedVR.git "$SRC_REPO_DIR" || true
60
- else
61
- echo "[INFO] SeedVR já presente em $SRC_REPO_DIR."
62
- fi
63
 
64
- #echo "📁 [ETAPA 3/6] Sincronizando módulos em /app..."
65
- #mkdir -p /app/common /app/projects /app/data /app/models /app/configs_3b "$CKPT_DIR"
66
- # Copia se existirem; cria symlink se faltarem
67
- #for d in common projects data models configs_3b; do
68
- # if [ -d "$SRC_REPO_DIR/$d" ]; then
69
- # cp -rvu "$SRC_REPO_DIR/$d/." "/app/$d/." || true
70
- # fi
71
- # if [ ! -d "/app/$d" ] && [ -d "$SRC_REPO_DIR/$d" ]; then
72
- # #ln -s "$SRC_REPO_DIR/$d" "/app/$d"
73
- # echo "[LINK] /app/$d -> $SRC_REPO_DIR/$d"
74
- # fi
75
- #done
76
-
77
- # Pré-download dos pesos e embeddings
78
- echo "📥 [ETAPA 4/6] Verificando/baixando pesos do modelo..."
79
  python3 - <<'PY'
80
- import os, sys, traceback
81
- from huggingface_hub import snapshot_download
82
- try:
83
- repo_id = os.environ.get("MODEL_REPO", "ByteDance-Seed/SeedVR2-3B")
84
- cache_dir = os.environ.get("HF_HUB_CACHE")
85
- ckpt_dir = os.environ.get("CKPT_DIR", "/app/ckpts")
86
- os.makedirs(ckpt_dir, exist_ok=True)
87
- print(f"[HF] snapshot {repo_id} -> {ckpt_dir}")
88
- snapshot_download(
89
- repo_id=repo_id,
90
- cache_dir=cache_dir,
91
- local_dir=ckpt_dir,
92
- #local_dir_use_symlinks=False,
93
- #force_download=True,
94
- )
95
- from torch.hub import download_url_to_file
96
- for name in ("pos_emb.pt","neg_emb.pt"):
97
- dst = f"/app/{name}"
98
- if not os.path.exists(dst):
99
- url = f"https://huggingface.co/{repo_id}/resolve/main/{name}"
100
- print(f"[DL] {url} -> {dst}")
101
- download_url_to_file(url, dst)
102
- print("✅ Pesos e embeddings prontos.")
103
- except Exception as e:
104
- print(f"🛑 Falha no prefetch: {e}")
105
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  PY
107
 
108
- # Garante que os imports absolutos funcionem
109
- export PYTHONPATH="/app:/app/projects:${SRC_REPO_DIR}:${PYTHONPATH:-}"
110
-
111
- # --- 5) VINCIE persistente em /data/VINCIE (ou /app/VINCIE) -------------------
112
- if [ -d /data ]; then PERSIST_ROOT="/data"; else PERSIST_ROOT="/app"; fi
113
- export VINCIE_DIR="${VINCIE_DIR:-${PERSIST_ROOT}/VINCIE}"
114
- mkdir -p "$VINCIE_DIR"
115
- echo "[VINCIE] destino: $VINCIE_DIR"
116
-
117
- # Defina um dos modos abaixo via env:
118
- # export VINCIE_REMOTE="https://github.com/<ORG>/<REPO>.git" # Git
119
- # export VINCIE_REF="main"
120
- # export VINCIE_HF_REPO="org/VINCIE" # Hugging Face
121
-
122
- # Git mode
123
- if [ -n "${VINCIE_REMOTE:-}" ]; then
124
- if [ -d "$VINCIE_DIR/.git" ]; then
125
- echo "[VINCIE] Atualizando repo Git..."
126
- git -C "$VINCIE_DIR" fetch --all -p --tags
127
- git -C "$VINCIE_DIR" checkout "${VINCIE_REF:-main}"
128
- git -C "$VINCIE_DIR" pull --ff-only
129
- else
130
- echo "[VINCIE] Clonando ${VINCIE_REMOTE} -> ${VINCIE_DIR}"
131
- git clone --depth 1 --branch "${VINCIE_REF:-main}" "$VINCIE_REMOTE" "$VINCIE_DIR"
132
- fi
133
  fi
134
 
135
- # HF mode
136
- if [ -n "${VINCIE_HF_REPO:-}" ]; then
137
- python3 - <<'PY'
138
- import os
139
- from huggingface_hub import snapshot_download
140
- repo_id = os.environ["VINCIE_HF_REPO"]
141
- local_dir = os.environ["VINCIE_DIR"]
142
- cache_dir = os.environ.get("HF_HUB_CACHE")
143
- os.makedirs(local_dir, exist_ok=True)
144
- snapshot_download(
145
- repo_id=repo_id,
146
- cache_dir=cache_dir,
147
- local_dir=local_dir,
148
- )
149
- print(f"[VINCIE] snapshot pronto em {local_dir}")
150
- PY
151
- fi
152
 
153
- # Torna VINCIE importável, se contiver pacotes Python
154
- export PYTHONPATH="${VINCIE_DIR}:${PYTHONPATH:-}"
155
 
156
- # --- Pretty tree (somente dirs, limpo) ---
157
- echo "🌳 Estrutura"
158
 
 
 
 
159
 
 
 
 
 
 
 
 
 
160
 
 
 
 
 
161
 
162
- tree -L 7 /app
163
- echo " ##############
164
- "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- tree -L 6 /data
167
- echo " ##############
168
- "
 
 
 
 
169
 
170
 
 
 
171
 
172
 
173
- # --- 7) Lançamento da API (Uvicorn) -------------------------------------------
174
- echo "🚀 Iniciando app.py via Uvicorn..."
175
- APP_HOST="${APP_HOST:-0.0.0.0}"
176
- APP_PORT="${APP_PORT:-8000}"
177
- APP_WORKERS="${APP_WORKERS:-1}"
178
- # app.py deve exportar 'app = FastAPI(...)'
179
- exec python -m uvicorn app:app --host "${APP_HOST}" --port "${APP_PORT}" --workers "${APP_WORKERS}" --proxy-headers
 
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
+ : "${APP_DIR:=/app}"
5
+
6
+
7
+ tree -L 4 /app
8
+ tree -L 4 /data
9
+
10
+ export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True,max_split_size_mb:128"
11
+ export CUDA_MODULE_LOADING="LAZY"
12
+
13
+ echo "======================================================="
14
+ echo " VINCIE - Start (VINCIE-3B, 8x L40S)"
15
+ echo "======================================================="
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # 1) Builder (garante Apex/Flash e deps CUDA)
18
+ echo "🛠️ Iniciando o builder.sh para compilar/instalar dependências CUDA..."
19
  if [ -f "/app/builder.sh" ]; then
20
+ /bin/bash /app/builder.sh
21
  echo "✅ Builder finalizado."
22
  else
23
+ echo "⚠️ Aviso: builder.sh não encontrado. Pulando etapa de compilação de dependências."
24
  fi
25
 
26
+ # 2) Env da UI
27
+ export GRADIO_SERVER_NAME="0.0.0.0"
28
+ export GRADIO_SERVER_PORT="${PORT:-7860}"
29
+ export GRADIO_ENABLE_QUEUE="True"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # 3) Preflight: repo + snapshot + symlinks idempotentes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  python3 - <<'PY'
33
+ import os, sys, subprocess
34
+ from pathlib import Path
35
+ from services.vincie import VincieService
36
+
37
+ repo_dir = Path("/app/VINCIE")
38
+ ckpt_repo = repo_dir / "ckpt" / "VINCIE-3B"
39
+ ckpt_app = Path("/app/ckpt") / "VINCIE-3B"
40
+
41
+ def ensure_symlink(link: Path, target: Path):
42
+ if link.is_symlink():
43
+ # relinka se o alvo mudou
44
+ if link.resolve() != target:
45
+ link.unlink()
46
+ link.symlink_to(target, target_is_directory=True)
47
+ elif link.exists():
48
+ # se for pasta/arquivo, remove para padronizar como symlink
49
+ subprocess.run(["rm","-rf",str(link)], check=True)
50
+ link.symlink_to(target, target_is_directory=True)
51
+ else:
52
+ link.parent.mkdir(parents=True, exist_ok=True)
53
+ link.symlink_to(target, target_is_directory=True)
54
+
55
+ # 3.1 baixa repo + snapshot
56
+ svc = VincieService()
57
+ svc.ensure_repo()
58
+ svc.ensure_model()
59
+ snapshot = Path(str(svc.ckpt_dir))
60
+
61
+ # 3.2 cria symlink dentro do repo e também em /app/ckpt
62
+ ensure_symlink(ckpt_repo, snapshot)
63
+ ensure_symlink(ckpt_app, snapshot)
64
+
65
+ # 3.3 valida itens essenciais
66
+ need = [snapshot/"dit.pth", snapshot/"vae.pth", snapshot/"llm14b"]
67
+ missing = [str(p) for p in need if not p.exists()]
68
+ if missing:
69
+ raise SystemExit(f"[preflight] faltam itens no snapshot: {missing}")
70
+
71
+ # 3.4 symlink opcional de models (heranças YAML)
72
+ models_link = Path("/app/models")
73
+ models_src = repo_dir / "models"
74
+ if models_src.exists() and (not models_link.exists()):
75
+ try:
76
+ models_link.symlink_to(models_src, target_is_directory=True)
77
+ print(f"[preflight] linked {models_link} -> {models_src}")
78
+ except Exception as e:
79
+ print("[preflight] warn: link models failed:", e)
80
+
81
+ print(f"[preflight] OK: repo={repo_dir}, ckpt(link)={ckpt_repo} -> {snapshot}")
82
  PY
83
 
84
+ # 4) PYTHONPATH (imports upstream)
85
+ export VINCIE_DIR="${VINCIE_DIR:-/app/VINCIE}"
86
+ : "${PYTHONPATH:=}"
87
+ if [ -n "${PYTHONPATH}" ]; then
88
+ export PYTHONPATH="${VINCIE_DIR}:${PYTHONPATH}"
89
+ else
90
+ export PYTHONPATH="${VINCIE_DIR}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  fi
92
 
93
+ # 5) Diagnóstico mínimo
94
+ echo "🔎 ckpt (repo):"
95
+ ls -la /app/VINCIE/ckpt || true
96
+ ls -la /app/VINCIE/ckpt/VINCIE-3B | head -n 20 || true
97
+ echo "🔎 ckpt (/app):"
98
+ ls -la /app/ckpt || true
99
+ ls -la /app/ckpt/VINCIE-3B | head -n 20 || true
100
+
101
+ # 6) Sobe a UI
102
+ echo "🚀 Iniciando a interface web VINCIE (app_vince.py)..."
103
+ cd "$APP_DIR"
104
+ python3 /app/app_vince.py#!/usr/bin/env bash
105
+ set -euo pipefail
 
 
 
 
106
 
107
+ : "${APP_DIR:=/app}"
 
108
 
109
+ export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True,max_split_size_mb:128"
110
+ export CUDA_MODULE_LOADING="LAZY"
111
 
112
+ echo "======================================================="
113
+ echo " VINCIE - Start (VINCIE-3B, 8x L40S)"
114
+ echo "======================================================="
115
 
116
+ # 1) Builder (garante Apex/Flash e deps CUDA)
117
+ echo "🛠️ Iniciando o builder.sh para compilar/instalar dependências CUDA..."
118
+ if [ -f "/app/builder.sh" ]; then
119
+ /bin/bash /app/builder.sh
120
+ echo "✅ Builder finalizado."
121
+ else
122
+ echo "⚠️ Aviso: builder.sh não encontrado. Pulando etapa de compilação de dependências."
123
+ fi
124
 
125
+ # 2) Env da UI
126
+ export GRADIO_SERVER_NAME="0.0.0.0"
127
+ export GRADIO_SERVER_PORT="${PORT:-7860}"
128
+ export GRADIO_ENABLE_QUEUE="True"
129
 
130
+ # 3) Preflight: repo + snapshot + symlinks idempotentes
131
+ python3 - <<'PY'
132
+ import os, sys, subprocess
133
+ from pathlib import Path
134
+ from services.vincie import VincieService
135
+
136
+ repo_dir = Path("/app/VINCIE")
137
+ ckpt_repo = repo_dir / "ckpt" / "VINCIE-3B"
138
+ ckpt_app = Path("/app/ckpt") / "VINCIE-3B"
139
+
140
+ def ensure_symlink(link: Path, target: Path):
141
+ if link.is_symlink():
142
+ # relinka se o alvo mudou
143
+ if link.resolve() != target:
144
+ link.unlink()
145
+ link.symlink_to(target, target_is_directory=True)
146
+ elif link.exists():
147
+ # se for pasta/arquivo, remove para padronizar como symlink
148
+ subprocess.run(["rm","-rf",str(link)], check=True)
149
+ link.symlink_to(target, target_is_directory=True)
150
+ else:
151
+ link.parent.mkdir(parents=True, exist_ok=True)
152
+ link.symlink_to(target, target_is_directory=True)
153
+
154
+ # 3.1 baixa repo + snapshot
155
+ svc = VincieService()
156
+ svc.ensure_repo()
157
+ svc.ensure_model()
158
+ snapshot = Path(str(svc.ckpt_dir))
159
+
160
+ # 3.2 cria symlink dentro do repo e também em /app/ckpt
161
+ ensure_symlink(ckpt_repo, snapshot)
162
+ ensure_symlink(ckpt_app, snapshot)
163
+
164
+ # 3.3 valida itens essenciais
165
+ need = [snapshot/"dit.pth", snapshot/"vae.pth", snapshot/"llm14b"]
166
+ missing = [str(p) for p in need if not p.exists()]
167
+ if missing:
168
+ raise SystemExit(f"[preflight] faltam itens no snapshot: {missing}")
169
+
170
+ # 3.4 symlink opcional de models (heranças YAML)
171
+ models_link = Path("/app/models")
172
+ models_src = repo_dir / "models"
173
+ if models_src.exists() and (not models_link.exists()):
174
+ try:
175
+ models_link.symlink_to(models_src, target_is_directory=True)
176
+ print(f"[preflight] linked {models_link} -> {models_src}")
177
+ except Exception as e:
178
+ print("[preflight] warn: link models failed:", e)
179
+
180
+ print(f"[preflight] OK: repo={repo_dir}, ckpt(link)={ckpt_repo} -> {snapshot}")
181
+ PY
182
+
183
+ # 4) PYTHONPATH (imports upstream)
184
+ export VINCIE_DIR="${VINCIE_DIR:-/app/VINCIE}"
185
+ : "${PYTHONPATH:=}"
186
+ if [ -n "${PYTHONPATH}" ]; then
187
+ export PYTHONPATH="${VINCIE_DIR}:${PYTHONPATH}"
188
+ else
189
+ export PYTHONPATH="${VINCIE_DIR}"
190
+ fi
191
 
192
+ # 5) Diagnóstico mínimo
193
+ echo "🔎 ckpt (repo):"
194
+ ls -la /app/VINCIE/ckpt || true
195
+ ls -la /app/VINCIE/ckpt/VINCIE-3B | head -n 20 || true
196
+ echo "🔎 ckpt (/app):"
197
+ ls -la /app/ckpt || true
198
+ ls -la /app/ckpt/VINCIE-3B | head -n 20 || true
199
 
200
 
201
+ tree -L 4 /app
202
+ tree -L 4 /data
203
 
204
 
205
+ # 6) Sobe a UI
206
+ echo "🚀 Iniciando a interface web VINCIE (app_vince.py)..."
207
+ cd "$APP_DIR"
208
+ python3 /app/app_vince.py