refactored deviceBatchedGemm; removed GridwiseBatchedGemm; added fp32 and int8 to profiler (#120)

changed long_index_t to index_t when computing memory offset uncomment other ops in profiler added test for batched_gemm
2026-05-13 09:45:56 +00:00 · 2022-03-21 16:45:14 -05:00
parent 485ea46a40
commit cb87b049de
23 changed files with 1309 additions and 896 deletions
--- a/library/include/ck/library/host_tensor/host_tensor.hpp
+++ b/library/include/ck/library/host_tensor/host_tensor.hpp
@@ -317,7 +317,7 @@ float bf16_to_f32_(ck::bhalf_t src_val);
 void bf16_to_f32_(const Tensor<ck::bhalf_t>& src, Tensor<float>& dst);

 template <typename T>
-void check_error(const Tensor<T>& ref, const Tensor<T>& result)
+float check_error(const Tensor<T>& ref, const Tensor<T>& result)
 {
    float error     = 0;
    float max_diff  = -1;
@@ -354,6 +354,7 @@ void check_error(const Tensor<T>& ref, const Tensor<T>& result)

    std::cout << "error: " << error << std::endl;
    std::cout << "max_diff: " << max_diff << ", " << ref_value << ", " << result_value << std::endl;
+    return max_diff;
 }

 template <typename T>
--- a/library/include/ck/library/host_tensor/host_tensor_generator.hpp
+++ b/library/include/ck/library/host_tensor/host_tensor_generator.hpp
@@ -93,8 +93,8 @@ struct GeneratorTensor_2<int8_t>
 template <typename T>
 struct GeneratorTensor_3
 {
-    T min_value = 0;
-    T max_value = 1;
+    float min_value = 0;
+    float max_value = 1;

    template <typename... Is>
    T operator()(Is...)
@@ -122,22 +122,6 @@ struct GeneratorTensor_3<ck::bhalf_t>
    }
 };

-template <>
-struct GeneratorTensor_3<int8_t>
-{
-    float min_value = 0;
-    float max_value = 1;
-
-    template <typename... Is>
-    int8_t operator()(Is...)
-    {
-        int8_t min_tmp = static_cast<int8_t>(min_value);
-        int8_t max_tmp = static_cast<int8_t>(max_value);
-
-        return (std::rand() % (max_tmp - min_tmp)) + min_tmp;
-    }
-};
-
 struct GeneratorTensor_Checkboard
 {
    template <typename... Ts>