mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Add conv bwd weight fp16 comp bf8 fp8 op, instances and example (#945)
* Add f8 bf8 gemm example * Add element-wise ops * Add intrinsics * Update reference calculation * Add an additional type option for xdlops gemm * Fix build process * Add bf8 to buffer addressing * Update blockwise op, split typeA and typeB * Update for compatibility * Uppdate naming to f8->fp8 * Update naming * Format * Update naming (#937) * Add a client example * Add computetypes to device and gridwise ops * Add instances, update instance factory * Format * Fix a flag * Add ckProfiler mode * Fix typos * Add an example * Add bf8 generator * add bf8 mfma; fixed type_convert for bf8 * move verfication ahead of timing * Update reference calculation * Fix reference * Narrow down float init range * Fix bf8 bf8 mfma * Add bf8 @ fp8 mfma * Update example * Update instances * Update profiler api * Update for compatibility * Format * Remove extra example * Clean up * workaround convert --------- Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
@@ -185,7 +185,8 @@ struct PassThrough
|
||||
template <>
|
||||
__host__ __device__ void operator()<bf8_t, half_t>(bf8_t& y, const half_t& x) const
|
||||
{
|
||||
y = type_convert<bf8_t>(x);
|
||||
// to-do: fix half_t to bf8_t convert
|
||||
y = ck::type_convert<bf8_t>(ck::type_convert<float>(x));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user