mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
* add DeviceGemmSplitKXdl
* add file device_gemm_splitk_xdl.hpp
* set c matrix zero
* using atomic
* add all tuning parameter to f32 mkkn
* grid size change to 720
* add tunning parameter for NT
* add tunning parameter for TN
* add tunning parameter for TT
* add m=96tunning parameter
* add lost config
* add element wise operation
* fixed MPerBlock=96
* remove marco for slpitk swtich
* add test
* add new line at the end of device_gemm_xdl_instance.hpp
* remove step hack
* seperate split-k instance files
* add tunning parameters
* change disired grid size to parameters
* remove slice length
* add desiredgridsize parameter to ckProfiler
* add losting file device_gemm_xdl_splitk_instance.hpp
* change desired gride size to kbatch
* format
* format
* clean up
* add selection of device_instances
* clean code
* fix build issue
Co-authored-by: ltqin <letaoqin@amd.com>
Co-authored-by: Chao Liu <chao.liu2@amd.com>
Co-authored-by: Jing Zhang <jizhan@amd.com>
[ROCm/composable_kernel commit: 4be7f0198e]
43 lines
1.8 KiB
C++
43 lines
1.8 KiB
C++
#ifndef DEVICE_GEMM_HPP
|
|
#define DEVICE_GEMM_HPP
|
|
|
|
#include <iostream>
|
|
#include "device_base.hpp"
|
|
|
|
namespace ck {
|
|
namespace tensor_operation {
|
|
namespace device {
|
|
|
|
template <typename AElementwiseOperation,
|
|
typename BElementwiseOperation,
|
|
typename CElementwiseOperation>
|
|
struct DeviceGemm : public BaseOperator
|
|
{
|
|
virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(const void* p_a,
|
|
const void* p_b,
|
|
void* p_c,
|
|
ck::index_t M,
|
|
ck::index_t N,
|
|
ck::index_t K,
|
|
ck::index_t StrideA,
|
|
ck::index_t StrideB,
|
|
ck::index_t StrideC,
|
|
AElementwiseOperation a_element_op,
|
|
BElementwiseOperation b_element_op,
|
|
CElementwiseOperation c_element_op,
|
|
ck::index_t KBatch = 1) = 0;
|
|
|
|
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
|
|
};
|
|
|
|
template <typename AElementwiseOperation,
|
|
typename BElementwiseOperation,
|
|
typename CElementwiseOperation>
|
|
using DeviceGemmPtr = std::unique_ptr<
|
|
DeviceGemm<AElementwiseOperation, BElementwiseOperation, CElementwiseOperation>>;
|
|
|
|
} // namespace device
|
|
} // namespace tensor_operation
|
|
} // namespace ck
|
|
#endif
|