mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
Add example of Gemm + AddAddFastGelu (data type: int4) (#369)
* Add custom target to bundle examples together
* Add int4 example conditionally (just copy from int8 example)
* Extract common code into common.hpp
* Move ref gemm type alias into data-type-specific sources
* Add #error directive to prevent compile with wrong setting
* Let AddAddFastGelu support int4 parameter type
* Let check_err() support int4 parameter type
* Add wrapper function to hide value conversion while copying memory
* Finish int4 example for GEMM + AddAddFastGelu
* Add new DeviceMem API to copy memory
* Use new DeviceMem API to implement examples
* Fix wrongly use of macro 'CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4'
* Revert "Add new DeviceMem API to copy memory"
This reverts commit e26e7af71e.
* Add conversion ctor for Tensor<>
* Add 'const' specifier to Tensor<>::CopyAsType()
* Convert Tensor<> values before/after transfer between host & device
This commit is contained in:
@@ -177,7 +177,11 @@ struct AddAddFastGelu
|
||||
template <typename T>
|
||||
static inline constexpr bool is_valid_param_type_v =
|
||||
std::is_same_v<T, float> || std::is_same_v<T, half_t> || std::is_same_v<T, bhalf_t> ||
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>;
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>
|
||||
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|
||||
|| std::is_same_v<T, ck::int4_t>
|
||||
#endif
|
||||
;
|
||||
|
||||
template <typename E, typename C, typename D0, typename D1>
|
||||
__host__ __device__ constexpr void
|
||||
|
||||
Reference in New Issue
Block a user