mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-05 20:55:59 +00:00
Fix large case init bounds
This commit is contained in:
@@ -33,13 +33,16 @@ template <ck::index_t NDimSpatial,
|
||||
typename WeiDataType,
|
||||
typename InDataType,
|
||||
typename ComputeDataType = InDataType>
|
||||
bool profile_grouped_conv_bwd_data_impl(int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
ck::index_t split_k = 1,
|
||||
index_t instance_index = -1)
|
||||
bool profile_grouped_conv_bwd_data_impl(
|
||||
int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
ck::index_t split_k = 1,
|
||||
index_t instance_index = -1,
|
||||
std::optional<std::array<float, 2>> init_bounds_out = std::nullopt,
|
||||
std::optional<std::array<float, 2>> init_bounds_wei = std::nullopt)
|
||||
{
|
||||
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
@@ -72,6 +75,16 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
|
||||
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_element_space_size);
|
||||
DeviceMem in_device_buf(sizeof(InDataType) * in_element_space_size);
|
||||
|
||||
// Initialization bounds for output and weight tensors
|
||||
// Output: {-5, -4, ..., 3, 4} for integers, [0, 1) for floats.
|
||||
const auto default_out =
|
||||
(init_method == 1) ? std::array<float, 2>{-5.0f, 5.0f} : std::array<float, 2>{0.0f, 1.0f};
|
||||
const auto [out_min, out_max] = init_bounds_out.value_or(default_out);
|
||||
// Weight: {-5, -4, ..., 3, 4} for integers, [-0.5, 0.5) for floats.
|
||||
const auto default_wei =
|
||||
(init_method == 1) ? std::array<float, 2>{-5.0f, 5.0f} : std::array<float, 2>{-0.5f, 0.5f};
|
||||
const auto [wei_min, wei_max] = init_bounds_wei.value_or(default_wei);
|
||||
|
||||
// Generate data directly on GPU using DeviceMem methods
|
||||
switch(init_method)
|
||||
{
|
||||
@@ -82,13 +95,13 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
|
||||
break;
|
||||
case 1:
|
||||
// Discrete integer values in range [-5, 5]
|
||||
out_device_buf.FillUniformRandInteger<OutDataType>(-5, 5);
|
||||
wei_device_buf.FillUniformRandInteger<WeiDataType>(-5, 5);
|
||||
out_device_buf.FillUniformRandInteger<OutDataType>(out_min, out_max);
|
||||
wei_device_buf.FillUniformRandInteger<WeiDataType>(wei_min, wei_max);
|
||||
break;
|
||||
case 2:
|
||||
// Continuous float values
|
||||
out_device_buf.FillUniformRandFp<OutDataType>(0.0f, 1.0f);
|
||||
wei_device_buf.FillUniformRandFp<WeiDataType>(-0.5f, 0.5f);
|
||||
out_device_buf.FillUniformRandFp<OutDataType>(out_min, out_max);
|
||||
wei_device_buf.FillUniformRandFp<WeiDataType>(wei_min, wei_max);
|
||||
break;
|
||||
default:
|
||||
// Constant value 1
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <typeinfo>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
@@ -39,13 +40,16 @@ template <ck::index_t NDimSpatial,
|
||||
typename BComputeType = AComputeType,
|
||||
typename IndexType = ck::index_t,
|
||||
typename OutElementOp = ck::tensor_operation::element_wise::PassThrough>
|
||||
bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
const OutElementOp out_element_op = OutElementOp{},
|
||||
index_t instance_index = -1)
|
||||
bool profile_grouped_conv_fwd_impl(
|
||||
int do_verification,
|
||||
int init_method,
|
||||
bool do_log,
|
||||
bool time_kernel,
|
||||
const ck::utils::conv::ConvParam& conv_param,
|
||||
const OutElementOp out_element_op = OutElementOp{},
|
||||
index_t instance_index = -1,
|
||||
std::optional<std::array<float, 2>> init_bounds_in = std::nullopt,
|
||||
std::optional<std::array<float, 2>> init_bounds_wei = std::nullopt)
|
||||
{
|
||||
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
@@ -100,6 +104,16 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
DeviceMem wei_device_buf(sizeof(WeiDataType) * weight_size);
|
||||
DeviceMem out_device_buf(sizeof(OutDataType) * output_size);
|
||||
|
||||
// Initialization bounds for input and weight tensors
|
||||
// Input: {-5, -4, ..., 3, 4} for integers, [0, 1) for floats.
|
||||
const auto default_in =
|
||||
(init_method == 1) ? std::array<float, 2>{-5.0f, 5.0f} : std::array<float, 2>{0.0f, 1.0f};
|
||||
const auto [in_min, in_max] = init_bounds_in.value_or(default_in);
|
||||
// Weight: {-5, -4, ..., 3, 4} for integers, [-0.5, 0.5) for floats.
|
||||
const auto default_wei =
|
||||
(init_method == 1) ? std::array<float, 2>{-5.0f, 5.0f} : std::array<float, 2>{-0.5f, 0.5f};
|
||||
const auto [wei_min, wei_max] = init_bounds_wei.value_or(default_wei);
|
||||
|
||||
// Generate data directly on GPU using DeviceMem methods
|
||||
switch(init_method)
|
||||
{
|
||||
@@ -109,14 +123,14 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
|
||||
wei_device_buf.SetZero();
|
||||
break;
|
||||
case 1:
|
||||
// Discrete integer generation: {-5, -4, -3, ..., 3, 4}
|
||||
in_device_buf.FillUniformRandInteger<InDataType>(-5, 5);
|
||||
wei_device_buf.FillUniformRandInteger<WeiDataType>(-5, 5);
|
||||
// Discrete integer generation
|
||||
in_device_buf.FillUniformRandInteger<InDataType>(in_min, in_max);
|
||||
wei_device_buf.FillUniformRandInteger<WeiDataType>(wei_min, wei_max);
|
||||
break;
|
||||
default:
|
||||
// Continuous float generation
|
||||
in_device_buf.FillUniformRandFp<InDataType>(0.0f, 1.0f);
|
||||
wei_device_buf.FillUniformRandFp<WeiDataType>(-0.5f, 0.5f);
|
||||
in_device_buf.FillUniformRandFp<InDataType>(in_min, in_max);
|
||||
wei_device_buf.FillUniformRandFp<WeiDataType>(wei_min, wei_max);
|
||||
}
|
||||
|
||||
// Create host tensors (for verification if needed)
|
||||
|
||||
@@ -40,11 +40,14 @@ class TestGroupedConvndBwdData : public ::testing::Test
|
||||
DataType,
|
||||
DataType>(
|
||||
true, // do_verification
|
||||
1, // init_method: integer value
|
||||
1, // init_method: float value
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
split_k);
|
||||
split_k,
|
||||
-1,
|
||||
std::optional<std::array<float, 2>>{{0, 5}},
|
||||
std::optional<std::array<float, 2>>{{0, 5}});
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
|
||||
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
@@ -39,10 +40,14 @@ class TestGroupedConvndFwd : public ::testing::Test
|
||||
DataType,
|
||||
IndexType>(
|
||||
true, // do_verification
|
||||
1, // init_method: integer value
|
||||
1, // init_method: float value
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
param,
|
||||
ck::tensor_operation::element_wise::PassThrough{},
|
||||
-1,
|
||||
std::optional<std::array<float, 2>>{{0, 5}},
|
||||
std::optional<std::array<float, 2>>{{0, 5}});
|
||||
}
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user