carlex3321 commited on
Commit
45255d6
·
verified ·
1 Parent(s): 564ca3b

Update info.sh

Browse files
Files changed (1) hide show
  1. info.sh +77 -90
info.sh CHANGED
@@ -1,26 +1,30 @@
1
  #!/usr/bin/env bash
 
2
  set -euo pipefail
3
 
4
  echo "================= RUNTIME CAPABILITIES ================="
5
  date
 
 
6
  if command -v nvidia-smi >/dev/null 2>&1; then
7
  nvidia-smi
8
  else
9
  echo "nvidia-smi: not available"
10
  fi
11
-
12
  echo
 
13
  echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
14
  if command -v nvcc >/dev/null 2>&1; then
15
  nvcc --version || true
16
  else
17
  echo "nvcc: not available"
18
  fi
19
-
20
  echo
 
21
  echo "[PyTorch / CUDA backend]"
22
- python - <<'PY'
23
  import json, os, torch, inspect
 
24
  def to_bool(x):
25
  try:
26
  if callable(x):
@@ -36,129 +40,112 @@ def to_bool(x):
36
  return None
37
 
38
  info = {
39
- "torch": getattr(torch, "__version__", None),
40
- "cuda_available": torch.cuda.is_available(),
41
- "cuda_device_count": torch.cuda.device_count(),
42
- "cuda_runtime_version": getattr(torch.version, "cuda", None),
43
- "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
44
- "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
45
- "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
46
- "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
47
- "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
48
  }
49
  print(json.dumps(info, indent=2))
50
- for i in range(min(torch.cuda.device_count(), 8)):
51
- print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
52
  PY
53
-
54
  echo
55
- echo "[Apex]"
56
- python - <<'PY'
 
57
  try:
58
- from apex.normalization import FusedLayerNorm, FusedRMSNorm
59
- import importlib; importlib.import_module("fused_layer_norm_cuda")
60
- print("apex.normalization: OK")
61
  except Exception as e:
62
- print("apex.normalization: FAIL ->", e)
63
  PY
64
-
65
  echo
66
- echo "[FlashAttention]"
67
- python - <<'PY'
 
68
  import importlib
69
- for m in ("flash_attn","flash_attn_2_cuda"):
70
- try:
71
- importlib.import_module(m); print(f"{m}: OK")
72
- except Exception as e:
73
- print(f"{m}: FAIL -> {e}")
 
 
 
 
 
 
74
  PY
75
-
76
  echo
77
- echo "[FlashAttention LN test]"
78
- python - <<'PY'
79
- import os, warnings, importlib
80
- warnings.filterwarnings("ignore", category=FutureWarning)
81
- def ok_import(names):
82
- for n in names:
83
- try:
84
- importlib.import_module(n)
85
- print(f" [+] import '{n}' OK")
86
- return True
87
- except Exception as e:
88
- print(f" [-] import '{n}' fail: {e}")
89
- return False
90
- fa_ver = None
91
  try:
92
  import flash_attn
93
  fa_ver = getattr(flash_attn, "__version__", None)
 
94
  except Exception:
95
- pass
96
  try:
97
  import torch
98
- tv = torch.__version__
99
- cu = getattr(torch.version, "cuda", None)
100
  except Exception:
101
- tv, cu = "unknown", "unknown"
102
- print(f" flash_attn version: {fa_ver}")
103
- print(f" torch: {tv} | cuda: {cu} | TORCH_CUDA_ARCH_LIST={os.getenv('TORCH_CUDA_ARCH_LIST')}")
104
- names_to_try = [
105
- "flash_attn_2_cuda",
106
- "flash_attn.ops.layer_norm",
107
- "flash_attn.layers.layer_norm",
108
- ]
109
- ok = ok_import(names_to_try)
110
- if not ok:
111
- print(" Hint: faltam kernels LN/RMSNorm do FlashAttention (performance reduzida).")
112
- print(" Use builder.sh para compilar flash_attn e reutilizar a wheel.")
113
  PY
114
-
115
  echo
 
116
  echo "[Triton]"
117
- python - <<'PY'
118
  try:
119
- import triton
120
- print("triton:", triton.__version__)
121
- try:
122
- import triton.ops as _; print("triton.ops: OK")
123
- except Exception:
124
- print("triton.ops: not present (ok on Triton>=3.x)")
125
  except Exception as e:
126
- print("triton: FAIL ->", e)
127
  PY
128
-
129
  echo
 
130
  echo "[BitsAndBytes (Q8/Q4)]"
131
- python - <<'PY'
132
  try:
133
- import bitsandbytes as bnb
134
- print("bitsandbytes:", bnb.__version__)
135
- try:
136
- from bitsandbytes.triton import _custom_ops as _; print("bnb.triton.int8_matmul_mixed_dequantize: OK")
137
- except Exception as e:
138
- print("bnb.triton: partial ->", e)
139
  except Exception as e:
140
- print("bitsandbytes: FAIL ->", e)
141
  PY
142
-
143
  echo
144
- echo "[Transformers / Diffusers / XFormers]"
145
- python - <<'PY'
 
146
  def _v(m):
147
- try:
148
- mod = __import__(m)
149
- print(f"{m}:", getattr(mod, "__version__", "unknown"))
150
- except Exception as e:
151
- print(f"{m}: FAIL -> {e}")
152
- for m in ("transformers","diffusers","xformers"):
153
- _v(m)
154
  PY
155
-
156
  echo
 
157
  echo "[Distribuído / NCCL Env]"
158
  env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
159
-
160
  echo
161
- echo "[Caminhos e permissões de saída]"
 
162
  OUT="/app/outputs"
163
  echo "OUT dir: $OUT"
164
  mkdir -p "$OUT"
 
1
  #!/usr/bin/env bash
2
+
3
  set -euo pipefail
4
 
5
  echo "================= RUNTIME CAPABILITIES ================="
6
  date
7
+
8
+ echo
9
  if command -v nvidia-smi >/dev/null 2>&1; then
10
  nvidia-smi
11
  else
12
  echo "nvidia-smi: not available"
13
  fi
 
14
  echo
15
+
16
  echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
17
  if command -v nvcc >/dev/null 2>&1; then
18
  nvcc --version || true
19
  else
20
  echo "nvcc: not available"
21
  fi
 
22
  echo
23
+
24
  echo "[PyTorch / CUDA backend]"
25
+ python3 - <<'PY'
26
  import json, os, torch, inspect
27
+
28
  def to_bool(x):
29
  try:
30
  if callable(x):
 
40
  return None
41
 
42
  info = {
43
+ "torch": getattr(torch, "__version__", None),
44
+ "cuda_available": torch.cuda.is_available(),
45
+ "cuda_device_count": torch.cuda.device_count(),
46
+ "cuda_runtime_version": getattr(torch.version, "cuda", None),
47
+ "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
48
+ "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
49
+ "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
50
+ "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
51
+ "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
52
  }
53
  print(json.dumps(info, indent=2))
54
+ for i in range(min(torch.cuda.device_count(), 16)):
55
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
56
  PY
 
57
  echo
58
+
59
+ echo "[Apex (FusedLayerNorm/RMSNorm)]"
60
+ python3 - <<'PY'
61
  try:
62
+ from apex.normalization import FusedLayerNorm, FusedRMSNorm
63
+ import importlib; importlib.import_module("fused_layer_norm_cuda")
64
+ print("apex.normalization: OK")
65
  except Exception as e:
66
+ print("apex.normalization: FAIL ->", e)
67
  PY
 
68
  echo
69
+
70
+ echo "[FlashAttention (CUDA/Triton/RMSNorm)]"
71
+ python3 - <<'PY'
72
  import importlib
73
+ mods = [
74
+ 'flash_attn', 'flash_attn_2_cuda',
75
+ 'flash_attn.ops.rms_norm', 'flash_attn.ops.layer_norm',
76
+ 'flash_attn.layers.layer_norm'
77
+ ]
78
+ for m in mods:
79
+ try:
80
+ importlib.import_module(m)
81
+ print(f"{m}: OK")
82
+ except Exception as e:
83
+ print(f"{m}: FAIL -> {e}")
84
  PY
 
85
  echo
86
+
87
+ echo "[FlashAttention versão/details]"
88
+ python3 - <<'PY'
 
 
 
 
 
 
 
 
 
 
 
89
  try:
90
  import flash_attn
91
  fa_ver = getattr(flash_attn, "__version__", None)
92
+ print(f"flash_attn: {fa_ver}")
93
  except Exception:
94
+ print("flash_attn: not importable.")
95
  try:
96
  import torch
97
+ print(f"torch: {torch.__version__} | cuda: {getattr(torch.version, 'cuda', None)}")
 
98
  except Exception:
99
+ pass
 
 
 
 
 
 
 
 
 
 
 
100
  PY
 
101
  echo
102
+
103
  echo "[Triton]"
104
+ python3 - <<'PY'
105
  try:
106
+ import triton
107
+ print("triton:", triton.__version__)
108
+ try:
109
+ import triton.ops as _; print("triton.ops: OK")
110
+ except Exception:
111
+ print("triton.ops: not present (ok on Triton>=3.x)")
112
  except Exception as e:
113
+ print("triton: FAIL ->", e)
114
  PY
 
115
  echo
116
+
117
  echo "[BitsAndBytes (Q8/Q4)]"
118
+ python3 - <<'PY'
119
  try:
120
+ import bitsandbytes as bnb
121
+ print("bitsandbytes:", bnb.__version__)
122
+ try:
123
+ from bitsandbytes.triton import _custom_ops as _; print("bnb.triton._custom_ops: OK")
124
+ except Exception as e:
125
+ print("bnb.triton: partial ->", e)
126
  except Exception as e:
127
+ print("bitsandbytes: FAIL ->", e)
128
  PY
 
129
  echo
130
+
131
+ echo "[Transformers / Diffusers / XFormers / EcoML]"
132
+ python3 - <<'PY'
133
  def _v(m):
134
+ try:
135
+ mod = __import__(m)
136
+ print(f"{m}: {getattr(mod, '__version__', 'unknown')}")
137
+ except Exception as e:
138
+ print(f"{m}: FAIL -> {e}")
139
+ for m in ("transformers", "diffusers", "xformers", "ecuml", "mlx", "ecobase"):
140
+ _v(m)
141
  PY
 
142
  echo
143
+
144
  echo "[Distribuído / NCCL Env]"
145
  env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
 
146
  echo
147
+
148
+ echo "[Output dir/perms]"
149
  OUT="/app/outputs"
150
  echo "OUT dir: $OUT"
151
  mkdir -p "$OUT"