# fp8_cublas ComfyUI node dependencies
#
# torchao is required for MXFP8 mode (SM100+ / RTX 5090).
# It is bundled with recent ComfyUI installations; upgrade if needed.
torchao>=0.10.0
#
# gemm-fp8 (IST-DASLab) — OPTIONAL, SM89 (RTX 4090) only.
# Enables the optimized CUTLASS FP8 kernel that matches the 473 TFLOPS
# technique from: https://bit.ly/fp8-sm89-article
# Cannot be installed via pip (no PyPI release). Install manually:
#
#   git clone --recurse-submodules https://github.com/IST-DASLab/gemm-fp8
#   cd gemm-fp8 && pip install -e .
#
# Without this, SM89 falls back to torch._scaled_mm (still faster than BF16).
# SM120+ (RTX 5090) does not use gemm-fp8 at all.
