carlex3321 commited on
Commit
1c5dcff
·
verified ·
1 Parent(s): 4c17046

Update info.sh

Browse files
Files changed (1) hide show
  1. info.sh +123 -79
info.sh CHANGED
@@ -2,112 +2,156 @@
2
  set -euo pipefail
3
 
4
  echo "================= RUNTIME CAPABILITIES ================="
5
- nvidia-smi || true
6
- echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
7
- echo "NVCC: $(nvcc --version 2>/dev/null | tail -n1 || echo 'N/A')"
 
 
 
 
8
  echo
 
 
 
 
 
 
9
 
 
10
  echo "[PyTorch / CUDA backend]"
11
- python3 - <<'PY'
12
- import json
13
- try:
14
- import torch
15
- info = {
16
- "torch": torch.__version__,
17
- "cuda_available": torch.cuda.is_available(),
18
- "cuda_device_count": torch.cuda.device_count(),
19
- "cuda_runtime_version": getattr(torch.version, "cuda", None),
20
- "cudnn_version": (torch.backends.cudnn.version() if torch.cuda.is_available() else None),
21
- "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
22
- "flash_sdp": (torch.backends.cuda.flash_sdp_enabled() if hasattr(torch.backends.cuda,"flash_sdp_enabled") else None),
23
- "mem_efficient_sdp": (torch.backends.cuda.mem_efficient_sdp_enabled() if hasattr(torch.backends.cuda,"mem_efficient_sdp_enabled") else None),
24
- "math_sdp": (torch.backends.cuda.math_sdp_enabled() if hasattr(torch.backends.cuda,"math_sdp_enabled") else None),
25
- }
26
- print(json.dumps(info, indent=2))
27
- if torch.cuda.is_available():
28
- for i in range(torch.cuda.device_count()):
29
- print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
30
- except Exception as e:
31
- print(f"[ERR torch] {type(e).__name__}: {e}")
32
  PY
33
- echo
34
 
 
35
  echo "[Apex]"
36
- python3 - <<'PY'
37
  try:
38
- import importlib
39
- importlib.import_module("apex.normalization")
40
- print("apex.normalization: OK")
41
  except Exception as e:
42
- print(f"Apex: ERR {type(e).__name__}: {e}")
43
  PY
44
- echo
45
 
 
46
  echo "[FlashAttention]"
47
- python3 - <<'PY'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
  import flash_attn
50
- print(f"flash_attn: OK (version={getattr(flash_attn,'__version__', 'unknown')})")
51
- try:
52
- import flash_attn_2_cuda
53
- print("flash_attn_2_cuda: OK")
54
- except Exception as e:
55
- print(f"flash_attn_2_cuda: ERR {type(e).__name__}: {e}")
56
- except Exception as e:
57
- print(f"flash_attn: ERR {type(e).__name__}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  PY
59
- echo
60
 
 
61
  echo "[Triton]"
62
- python3 - <<'PY'
63
  try:
64
- import triton
65
- print(f"triton: OK (version={getattr(triton,'__version__','unknown')})")
66
- try:
67
- import triton.ops
68
- print("triton.ops: legacy module present")
69
- except ModuleNotFoundError:
70
- print("triton.ops: not present (ok on Triton>=3.x)")
71
- except Exception as e:
72
- print(f"triton.ops: WARN {type(e).__name__}: {e}")
73
  except Exception as e:
74
- print(f"triton: ERR {type(e).__name__}: {e}")
75
  PY
76
- echo
77
 
 
78
  echo "[BitsAndBytes (Q8/Q4)]"
79
- python3 - <<'PY'
80
  try:
81
- import bitsandbytes as bnb
82
- v = getattr(bnb, "__version__", "unknown")
83
- print(f"bitsandbytes: OK (version={v})")
84
- try:
85
- import bitsandbytes.triton.int8_matmul_mixed_dequantize as q8
86
- print("bnb.triton.int8_matmul_mixed_dequantize: OK")
87
- except ModuleNotFoundError:
88
- print("bnb.q8.triton: not present (disabled or no GPU build)")
89
- except Exception as e:
90
- print(f"bnb.q8.triton: WARN {type(e).__name__}: {e}")
91
  except Exception as e:
92
- print(f"bitsandbytes: ERR {type(e).__name__}: {e}")
93
  PY
94
- echo
95
 
 
96
  echo "[Transformers / Diffusers / XFormers]"
97
- python3 - <<'PY'
98
- import importlib
99
- def ver(name):
100
- try:
101
- m = importlib.import_module(name)
102
- return getattr(m, "__version__", "unknown")
103
- except Exception as e:
104
- return f"ERR:{type(e).__name__}"
105
- print("transformers:", ver("transformers"))
106
- print("diffusers:", ver("diffusers"))
107
- print("xformers:", ver("xformers"))
108
  PY
109
- echo
110
 
 
111
  echo "[Distribuído / NCCL Env]"
112
- env | egrep 'MASTER_|NCCL|CUDA_VISIBLE_DEVICES|TORCH_|ENABLE_' | sort
 
 
 
 
 
 
 
 
113
  echo "================= END CAPABILITIES ================="
 
2
  set -euo pipefail
3
 
4
  echo "================= RUNTIME CAPABILITIES ================="
5
+ date
6
+ if command -v nvidia-smi >/dev/null 2>&1; then
7
+ nvidia-smi
8
+ else
9
+ echo "nvidia-smi: not available"
10
+ fi
11
+
12
  echo
13
+ echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
14
+ if command -v nvcc >/dev/null 2>&1; then
15
+ nvcc --version || true
16
+ else
17
+ echo "nvcc: not available"
18
+ fi
19
 
20
+ echo
21
  echo "[PyTorch / CUDA backend]"
22
+ python - <<'PY'
23
+ import json, os, torch
24
+ info = {
25
+ "torch": getattr(torch, "__version__", None),
26
+ "cuda_available": torch.cuda.is_available(),
27
+ "cuda_device_count": torch.cuda.device_count(),
28
+ "cuda_runtime_version": getattr(torch.version, "cuda", None),
29
+ "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
30
+ "tf32": torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None,
31
+ "flash_sdp": torch.backends.cuda.enable_flash_sdp if torch.cuda.is_available() else None,
32
+ "mem_efficient_sdp": torch.backends.cuda.enable_mem_efficient_sdp if torch.cuda.is_available() else None,
33
+ "math_sdp": torch.backends.cuda.enable_math_sdp if torch.cuda.is_available() else None,
34
+ }
35
+ print(json.dumps(info, indent=2))
36
+ for i in range(min(torch.cuda.device_count(), 8)):
37
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
 
 
 
 
 
38
  PY
 
39
 
40
+ echo
41
  echo "[Apex]"
42
+ python - <<'PY'
43
  try:
44
+ from apex.normalization import FusedLayerNorm, FusedRMSNorm
45
+ import importlib; importlib.import_module("fused_layer_norm_cuda")
46
+ print("apex.normalization: OK")
47
  except Exception as e:
48
+ print("apex.normalization: FAIL ->", e)
49
  PY
 
50
 
51
+ echo
52
  echo "[FlashAttention]"
53
+ python - <<'PY'
54
+ import importlib, sys
55
+ mods = ["flash_attn", "flash_attn_2_cuda"]
56
+ for m in mods:
57
+ try:
58
+ importlib.import_module(m); print(f"{m}: OK")
59
+ except Exception as e:
60
+ print(f"{m}: FAIL -> {e}")
61
+ PY
62
+
63
+ echo
64
+ echo "[FlashAttention LN test]"
65
+ python - <<'PY'
66
+ import os, warnings, importlib
67
+ warnings.filterwarnings("ignore", category=FutureWarning)
68
+ def ok_import(names):
69
+ for n in names:
70
+ try:
71
+ importlib.import_module(n)
72
+ print(f" [+] import '{n}' OK")
73
+ return True
74
+ except Exception as e:
75
+ print(f" [-] import '{n}' fail: {e}")
76
+ return False
77
+ fa_ver = None
78
  try:
79
  import flash_attn
80
+ fa_ver = getattr(flash_attn, "__version__", None)
81
+ except Exception:
82
+ pass
83
+ try:
84
+ import torch
85
+ tv = torch.__version__
86
+ cu = getattr(torch.version, "cuda", None)
87
+ except Exception:
88
+ tv, cu = "unknown", "unknown"
89
+ print(f" flash_attn version: {fa_ver}")
90
+ print(f" torch: {tv} | cuda: {cu} | TORCH_CUDA_ARCH_LIST={os.getenv('TORCH_CUDA_ARCH_LIST')}")
91
+ names_to_try = [
92
+ "flash_attn_2_cuda",
93
+ "flash_attn.ops.layer_norm",
94
+ "flash_attn.layers.layer_norm",
95
+ ]
96
+ ok = ok_import(names_to_try)
97
+ if not ok:
98
+ print(" Hint: faltam kernels de layer_norm/RMSNorm do FlashAttention.")
99
+ print(" Aceleração ficará reduzida; para instalar:")
100
+ print(" - Rodar builder para compilar e instalar flash_attn e salvar wheel para reuso;")
101
+ print(" - Ou instalar manualmente a tag compatível: Dao-AILab/flash-attention (csrc/layer_norm).")
102
+ print(" Doc: https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm")
103
  PY
 
104
 
105
+ echo
106
  echo "[Triton]"
107
+ python - <<'PY'
108
  try:
109
+ import triton
110
+ print("triton:", triton.__version__)
111
+ try:
112
+ import triton.ops as _; print("triton.ops: OK")
113
+ except Exception as e:
114
+ print("triton.ops: not present (ok on Triton>=3.x)")
 
 
 
115
  except Exception as e:
116
+ print("triton: FAIL ->", e)
117
  PY
 
118
 
119
+ echo
120
  echo "[BitsAndBytes (Q8/Q4)]"
121
+ python - <<'PY'
122
  try:
123
+ import bitsandbytes as bnb
124
+ print("bitsandbytes:", bnb.__version__)
125
+ try:
126
+ from bitsandbytes.triton import _custom_ops as _; print("bnb.triton.int8_matmul_mixed_dequantize: OK")
127
+ except Exception as e:
128
+ print("bnb.triton: partial ->", e)
 
 
 
 
129
  except Exception as e:
130
+ print("bitsandbytes: FAIL ->", e)
131
  PY
 
132
 
133
+ echo
134
  echo "[Transformers / Diffusers / XFormers]"
135
+ python - <<'PY'
136
+ def _v(m):
137
+ try:
138
+ mod = __import__(m)
139
+ print(f"{m}:", getattr(mod, "__version__", "unknown"))
140
+ except Exception as e:
141
+ print(f"{m}: FAIL -> {e}")
142
+ for m in ("transformers","diffusers","xformers"):
143
+ _v(m)
 
 
144
  PY
 
145
 
146
+ echo
147
  echo "[Distribuído / NCCL Env]"
148
+ env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
149
+
150
+ echo
151
+ echo "[Caminhos e permissões de saída]"
152
+ OUT="/app/output"
153
+ echo "OUT dir: $OUT"
154
+ mkdir -p "$OUT"
155
+ ls -la "$OUT" || true
156
+
157
  echo "================= END CAPABILITIES ================="