mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 04:19:36 +00:00
Padded Generic Kernel Instance (#730)
* Add NumReduceDim template parameter to DeviceSoftmax and Softmax client API to simplify instances collecting * Move the generic kernel instance to be the first of the instance list for elementwise op of normalization * Add GetGenericInstance() interface for DeviceOperationInstanceFactory class of DeviceSoftmax * Add testing of GetGenericInstance() in client_example of Softmax * Revert "Add testing of GetGenericInstance() in client_example of Softmax" This reverts commitf629cd9a93. * Revert "Add GetGenericInstance() interface for DeviceOperationInstanceFactory class of DeviceSoftmax" This reverts commita9f0d000eb. * Support generic kernel instance to be the first instance returned by GetInstances() for GroupNorm * Move generic kernel instance to separate tuple for elementwise op of normalization * Remove un-used files for softmax instance * Store generic kernel instance to separate tuple for softmax * Add IsSupported checking for generic instance to client example of softmax * Replace the get_device_normalize_from_mean_meansquare_instances() by the DeviceOperationInstanceFactory class for elementwise-normalization * clang-format fix * Remove int8 from softmax instances --------- Co-authored-by: zjing14 <zhangjing14@gmail.com> [ROCm/composable_kernel commit:0d9118226b]
This commit is contained in:
@@ -72,6 +72,30 @@ int main(int argc, char* argv[])
|
||||
|
||||
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
|
||||
|
||||
const auto& generic_op_ptr = op_ptrs[0];
|
||||
|
||||
auto generic_argument_ptr =
|
||||
generic_op_ptr->MakeArgumentPointer({N, H, W, G, C}, // lengths
|
||||
xy_strides, // xStrides
|
||||
gamma_beta_strides, // gammaStrides
|
||||
gamma_beta_strides, // betaStrides
|
||||
xy_strides, // yStrides
|
||||
{1, 2, 4}, // reduceDims
|
||||
1e-6,
|
||||
x_device_buf.GetDeviceBuffer(),
|
||||
gamma_device_buf.GetDeviceBuffer(),
|
||||
beta_device_buf.GetDeviceBuffer(),
|
||||
y_device_buf.GetDeviceBuffer(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
Swish{});
|
||||
|
||||
if(!generic_op_ptr->IsSupportedArgument(generic_argument_ptr.get()))
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"The generic kernel instance should be able to support any input shapes");
|
||||
};
|
||||
|
||||
std::string best_op_name;
|
||||
bool found = false;
|
||||
int best_op_id = -1;
|
||||
|
||||
Reference in New Issue
Block a user