mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 09:45:56 +00:00
refactored deviceBatchedGemm; removed GridwiseBatchedGemm; added fp32 and int8 to profiler (#120)
changed long_index_t to index_t when computing memory offset uncomment other ops in profiler added test for batched_gemm
This commit is contained in:
@@ -317,7 +317,7 @@ float bf16_to_f32_(ck::bhalf_t src_val);
|
||||
void bf16_to_f32_(const Tensor<ck::bhalf_t>& src, Tensor<float>& dst);
|
||||
|
||||
template <typename T>
|
||||
void check_error(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
float check_error(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
{
|
||||
float error = 0;
|
||||
float max_diff = -1;
|
||||
@@ -354,6 +354,7 @@ void check_error(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
|
||||
std::cout << "error: " << error << std::endl;
|
||||
std::cout << "max_diff: " << max_diff << ", " << ref_value << ", " << result_value << std::endl;
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
@@ -93,8 +93,8 @@ struct GeneratorTensor_2<int8_t>
|
||||
template <typename T>
|
||||
struct GeneratorTensor_3
|
||||
{
|
||||
T min_value = 0;
|
||||
T max_value = 1;
|
||||
float min_value = 0;
|
||||
float max_value = 1;
|
||||
|
||||
template <typename... Is>
|
||||
T operator()(Is...)
|
||||
@@ -122,22 +122,6 @@ struct GeneratorTensor_3<ck::bhalf_t>
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GeneratorTensor_3<int8_t>
|
||||
{
|
||||
float min_value = 0;
|
||||
float max_value = 1;
|
||||
|
||||
template <typename... Is>
|
||||
int8_t operator()(Is...)
|
||||
{
|
||||
int8_t min_tmp = static_cast<int8_t>(min_value);
|
||||
int8_t max_tmp = static_cast<int8_t>(max_value);
|
||||
|
||||
return (std::rand() % (max_tmp - min_tmp)) + min_tmp;
|
||||
}
|
||||
};
|
||||
|
||||
struct GeneratorTensor_Checkboard
|
||||
{
|
||||
template <typename... Ts>
|
||||
|
||||
Reference in New Issue
Block a user