Aduc-sdr-2_5s / info.sh
carlex3321's picture
Update info.sh
78f911c verified
raw
history blame
3.67 kB
#!/usr/bin/env bash
set -euo pipefail
echo "================= RUNTIME CAPABILITIES ================="
nvidia-smi || true
echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
echo "NVCC: $(nvcc --version 2>/dev/null | tail -n1 || echo 'N/A')"
echo
echo "[PyTorch / CUDA backend]"
python3 - <<'PY'
import json
try:
import torch
info = {
"torch": torch.__version__,
"cuda_available": torch.cuda.is_available(),
"cuda_device_count": torch.cuda.device_count(),
"cuda_runtime_version": getattr(torch.version, "cuda", None),
"cudnn_version": (torch.backends.cudnn.version() if torch.cuda.is_available() else None),
"tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
"flash_sdp": (torch.backends.cuda.flash_sdp_enabled() if hasattr(torch.backends.cuda,"flash_sdp_enabled") else None),
"mem_efficient_sdp": (torch.backends.cuda.mem_efficient_sdp_enabled() if hasattr(torch.backends.cuda,"mem_efficient_sdp_enabled") else None),
"math_sdp": (torch.backends.cuda.math_sdp_enabled() if hasattr(torch.backends.cuda,"math_sdp_enabled") else None),
}
print(json.dumps(info, indent=2))
if torch.cuda.is_available():
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
except Exception as e:
print(f"[ERR torch] {type(e).__name__}: {e}")
PY
echo
echo "[Apex]"
python3 - <<'PY'
try:
import importlib
importlib.import_module("apex.normalization")
print("apex.normalization: OK")
except Exception as e:
print(f"Apex: ERR {type(e).__name__}: {e}")
PY
echo
echo "[FlashAttention]"
python3 - <<'PY'
try:
import flash_attn
print(f"flash_attn: OK (version={getattr(flash_attn,'__version__', 'unknown')})")
try:
import flash_attn_2_cuda
print("flash_attn_2_cuda: OK")
except Exception as e:
print(f"flash_attn_2_cuda: ERR {type(e).__name__}: {e}")
except Exception as e:
print(f"flash_attn: ERR {type(e).__name__}: {e}")
PY
echo
echo "[Triton]"
python3 - <<'PY'
try:
import triton
print(f"triton: OK (version={getattr(triton,'__version__','unknown')})")
try:
import triton.ops
print("triton.ops: legacy module present")
except ModuleNotFoundError:
print("triton.ops: not present (ok on Triton>=3.x)")
except Exception as e:
print(f"triton.ops: WARN {type(e).__name__}: {e}")
except Exception as e:
print(f"triton: ERR {type(e).__name__}: {e}")
PY
echo
echo "[BitsAndBytes (Q8/Q4)]"
python3 - <<'PY'
try:
import bitsandbytes as bnb
v = getattr(bnb, "__version__", "unknown")
print(f"bitsandbytes: OK (version={v})")
try:
import bitsandbytes.triton.int8_matmul_mixed_dequantize as q8
print("bnb.triton.int8_matmul_mixed_dequantize: OK")
except ModuleNotFoundError:
print("bnb.q8.triton: not present (disabled or no GPU build)")
except Exception as e:
print(f"bnb.q8.triton: WARN {type(e).__name__}: {e}")
except Exception as e:
print(f"bitsandbytes: ERR {type(e).__name__}: {e}")
PY
echo
echo "[Transformers / Diffusers / XFormers]"
python3 - <<'PY'
import importlib
def ver(name):
try:
m = importlib.import_module(name)
return getattr(m, "__version__", "unknown")
except Exception as e:
return f"ERR:{type(e).__name__}"
print("transformers:", ver("transformers"))
print("diffusers:", ver("diffusers"))
print("xformers:", ver("xformers"))
PY
echo
echo "[Distribuído / NCCL Env]"
env | egrep 'MASTER_|NCCL|CUDA_VISIBLE_DEVICES|TORCH_|ENABLE_' | sort
echo "================= END CAPABILITIES ================="