File size: 4,166 Bytes
991841a
45255d6
78f911c
991841a
78f911c
1c5dcff
45255d6
 
1c5dcff
 
 
 
 
991841a
45255d6
1c5dcff
 
 
 
 
 
 
45255d6
991841a
45255d6
0113d93
45255d6
0113d93
 
 
 
 
 
 
 
 
 
 
 
 
 
1c5dcff
45255d6
 
 
 
 
 
 
 
 
1c5dcff
 
45255d6
 
991841a
1c5dcff
45255d6
 
 
991841a
45255d6
 
 
991841a
45255d6
991841a
1c5dcff
45255d6
 
 
0113d93
45255d6
 
 
 
 
 
 
 
 
 
 
1c5dcff
 
45255d6
 
 
991841a
 
1c5dcff
45255d6
1c5dcff
45255d6
1c5dcff
 
45255d6
1c5dcff
45255d6
991841a
1c5dcff
45255d6
991841a
45255d6
991841a
45255d6
 
 
 
 
 
991841a
45255d6
991841a
1c5dcff
45255d6
991841a
45255d6
991841a
45255d6
 
 
 
 
 
991841a
45255d6
991841a
1c5dcff
45255d6
 
 
1c5dcff
45255d6
 
 
 
 
 
 
991841a
1c5dcff
45255d6
991841a
1c5dcff
 
45255d6
 
0113d93
1c5dcff
 
 
 
991841a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env bash

set -euo pipefail

echo "================= RUNTIME CAPABILITIES ================="
date

echo
if command -v nvidia-smi >/dev/null 2>&1; then
  nvidia-smi
else
  echo "nvidia-smi: not available"
fi
echo

echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
if command -v nvcc >/dev/null 2>&1; then
  nvcc --version || true
else
  echo "nvcc: not available"
fi
echo

echo "[PyTorch / CUDA backend]"
python3 - <<'PY'
import json, os, torch, inspect

def to_bool(x):
    try:
        if callable(x):
            try:
                sig = inspect.signature(x)
                if len(sig.parameters)==0:
                    return bool(x())
            except Exception:
                pass
            return True
        return bool(x)
    except Exception:
        return None

info = {
    "torch": getattr(torch, "__version__", None),
    "cuda_available": torch.cuda.is_available(),
    "cuda_device_count": torch.cuda.device_count(),
    "cuda_runtime_version": getattr(torch.version, "cuda", None),
    "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
    "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
    "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
    "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
    "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
}
print(json.dumps(info, indent=2))
for i in range(min(torch.cuda.device_count(), 16)):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
PY
echo

echo "[Apex (FusedLayerNorm/RMSNorm)]"
python3 - <<'PY'
try:
    from apex.normalization import FusedLayerNorm, FusedRMSNorm
    import importlib; importlib.import_module("fused_layer_norm_cuda")
    print("apex.normalization: OK")
except Exception as e:
    print("apex.normalization: FAIL ->", e)
PY
echo

echo "[FlashAttention (CUDA/Triton/RMSNorm)]"
python3 - <<'PY'
import importlib
mods = [
    'flash_attn', 'flash_attn_2_cuda',
    'flash_attn.ops.rms_norm', 'flash_attn.ops.layer_norm',
    'flash_attn.layers.layer_norm'
]
for m in mods:
    try:
        importlib.import_module(m)
        print(f"{m}: OK")
    except Exception as e:
        print(f"{m}: FAIL -> {e}")
PY
echo

echo "[FlashAttention versão/details]"
python3 - <<'PY'
try:
    import flash_attn
    fa_ver = getattr(flash_attn, "__version__", None)
    print(f"flash_attn: {fa_ver}")
except Exception:
    print("flash_attn: not importable.")
try:
    import torch
    print(f"torch: {torch.__version__} | cuda: {getattr(torch.version, 'cuda', None)}")
except Exception:
    pass
PY
echo

echo "[Triton]"
python3 - <<'PY'
try:
    import triton
    print("triton:", triton.__version__)
    try:
        import triton.ops as _; print("triton.ops: OK")
    except Exception:
        print("triton.ops: not present (ok on Triton>=3.x)")
except Exception as e:
    print("triton: FAIL ->", e)
PY
echo

echo "[BitsAndBytes (Q8/Q4)]"
python3 - <<'PY'
try:
    import bitsandbytes as bnb
    print("bitsandbytes:", bnb.__version__)
    try:
        from bitsandbytes.triton import _custom_ops as _; print("bnb.triton._custom_ops: OK")
    except Exception as e:
        print("bnb.triton: partial ->", e)
except Exception as e:
    print("bitsandbytes: FAIL ->", e)
PY
echo

echo "[Transformers / Diffusers / XFormers / EcoML]"
python3 - <<'PY'
def _v(m):
    try:
        mod = __import__(m)
        print(f"{m}: {getattr(mod, '__version__', 'unknown')}")
    except Exception as e:
        print(f"{m}: FAIL -> {e}")
for m in ("transformers", "diffusers", "xformers", "ecuml", "mlx", "ecobase"):
    _v(m)
PY
echo

echo "[Distribuído / NCCL Env]"
env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
echo

echo "[Output dir/perms]"
OUT="/app/outputs"
echo "OUT dir: $OUT"
mkdir -p "$OUT"
ls -la "$OUT" || true

echo "================= END CAPABILITIES ================="