refactored deviceBatchedGemm; removed GridwiseBatchedGemm; added fp32 and int8 to profiler (#120)

changed long_index_t to index_t when computing memory offset

uncomment other ops in profiler

added test for batched_gemm
This commit is contained in:
Jianfeng Yan
2022-03-21 16:45:14 -05:00
committed by GitHub
parent 485ea46a40
commit cb87b049de
23 changed files with 1309 additions and 896 deletions

View File

@@ -317,7 +317,7 @@ float bf16_to_f32_(ck::bhalf_t src_val);
void bf16_to_f32_(const Tensor<ck::bhalf_t>& src, Tensor<float>& dst);
template <typename T>
void check_error(const Tensor<T>& ref, const Tensor<T>& result)
float check_error(const Tensor<T>& ref, const Tensor<T>& result)
{
float error = 0;
float max_diff = -1;
@@ -354,6 +354,7 @@ void check_error(const Tensor<T>& ref, const Tensor<T>& result)
std::cout << "error: " << error << std::endl;
std::cout << "max_diff: " << max_diff << ", " << ref_value << ", " << result_value << std::endl;
return max_diff;
}
template <typename T>

View File

@@ -93,8 +93,8 @@ struct GeneratorTensor_2<int8_t>
template <typename T>
struct GeneratorTensor_3
{
T min_value = 0;
T max_value = 1;
float min_value = 0;
float max_value = 1;
template <typename... Is>
T operator()(Is...)
@@ -122,22 +122,6 @@ struct GeneratorTensor_3<ck::bhalf_t>
}
};
template <>
struct GeneratorTensor_3<int8_t>
{
float min_value = 0;
float max_value = 1;
template <typename... Is>
int8_t operator()(Is...)
{
int8_t min_tmp = static_cast<int8_t>(min_value);
int8_t max_tmp = static_cast<int8_t>(max_value);
return (std::rand() % (max_tmp - min_tmp)) + min_tmp;
}
};
struct GeneratorTensor_Checkboard
{
template <typename... Ts>