Support fp8 dynamic quantization for fmha (#3206)

* Support qscale for dynamic quant, remove static quant * Support hdim=256 * Remove bias test case for fp8 --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> Co-authored-by: asleepzzz <hanwen.chang@amd.com>
2026-04-19 22:39:03 +00:00 · 2025-11-24 16:28:25 +08:00
parent 096f0a3b23
commit 5948dbffe4
17 changed files with 369 additions and 280 deletions
--- a/include/ck_tile/host/host_tensor.hpp
+++ b/include/ck_tile/host/host_tensor.hpp
@@ -598,6 +598,8 @@ struct HostTensor

    typename Data::size_type size() const { return mData.size(); }

+    T max() const { return *std::max_element(mData.begin(), mData.end()); }
+
    // return a slice of this tensor
    // for simplicity we just copy the data and return a new tensor
    auto slice(std::vector<size_t> s_begin, std::vector<size_t> s_end) const