mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-19 12:30:16 +00:00
Grouped conv_fwd_bias_bnorm_clamp instances and tests (#3525)
* Added bias_bnorm_clamp instances.
* fwd_bias_bnorm_clamp comp instances
* fwd_bias_bnorm_mem_inter and mem_intra instances
* fwd_bias_bnorm_merged_group_instances
* fwd_bias_bnorm_clamp_conv3d_bf16 and f16 instances
* Device level changes for fwd_bias_bnorm_clamp
* Added the test to the regression test list.
* Removed the part 2 and 2x instances
* Removed the irrelevant checks in wmma
* Refactored the instances to adapt to new device implementation
* Updated the reference and include files
* enabling tests
* Added missing profiler
* Added missing instance entry , deleted by mistake
* Reduce bias bnorm clamp instances to only a single generic one.
* Clean up cmakelists file
* clang-format
* Change bias bnorm clamp tests to use monotone initialization values to avoid tiny off-integer gemm results on RDNA3 from blowing up.
* Renaming some instance lists and add functions to be more standardized.
* Commented out non default instances.
---------
Co-authored-by: kiefer <kiefer.van.teutem@streamhpc.com>
[ROCm/composable_kernel commit: 8daf6ea302]
This commit is contained in:
@@ -122,12 +122,12 @@ template <ck::index_t NDimSpatial,
|
||||
typename BComputeType = AComputeType,
|
||||
typename IndexType = ck::index_t,
|
||||
bool ElementwiseGK = false>
|
||||
bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
int instance_index = -1)
|
||||
bool profile_grouped_conv_fwd_bias_bnorm_clamp_impl(int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
int instance_index = -1)
|
||||
{
|
||||
const float floor = 0.f;
|
||||
const float ceil = 2048.f;
|
||||
@@ -198,18 +198,29 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
|
||||
std::cout << "scale: " << scale.mDesc << std::endl;
|
||||
std::cout << "shift: " << shift.mDesc << std::endl;
|
||||
|
||||
// Note: For the integer initialization method (which is used for verification in the tests), I
|
||||
// changed the initialization ranges such that the overall operation becomes monotone. This
|
||||
// means that all multiplications are positive, and all additions are positive. Without this,
|
||||
// the outelementop can make small relative errors arbitrarily large by shifting them toward
|
||||
// zero. In this specific case this should not be an issue, since small integer inputs should
|
||||
// lead to exact outputs from the gemm. However, this is not the case on RDNA3, where integer
|
||||
// inputs can lead to slightly off-integer outputs. This is another issue to investigate, but it
|
||||
// remains the case that the outelementop blowing up tiny errors is not reasonable, so changing
|
||||
// the operation to monotone for now. If we want to move away from monotone we would need to
|
||||
// have a proper error propagation analysis, which is much more complicated.
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
case 1:
|
||||
input.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
|
||||
weight.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
|
||||
input.GenerateTensorValue(GeneratorTensor_2<InDataType>{0, 5});
|
||||
weight.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{0, 5});
|
||||
|
||||
bias.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
mean.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
bias.GenerateTensorValue(GeneratorTensor_2<OutDataType>{0, 5});
|
||||
// Mean is negative because this is subtracted.
|
||||
mean.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 0});
|
||||
variance.GenerateTensorValue(GeneratorTensor_2<OutDataType>{0, 5});
|
||||
scale.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
shift.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
scale.GenerateTensorValue(GeneratorTensor_2<OutDataType>{0, 5});
|
||||
shift.GenerateTensorValue(GeneratorTensor_2<OutDataType>{0, 5});
|
||||
break;
|
||||
default:
|
||||
input.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
|
||||
|
||||
Reference in New Issue
Block a user