diff --git a/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp b/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp index a818009f9a..198bf42ce7 100644 --- a/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp +++ b/include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp @@ -86,7 +86,7 @@ struct DeviceBinaryElementwise : public BaseOperator p_c_(p_c), functor_(functor), threadPerBlock_(threadPerBlock), - gridSize_(128) // FIXME - Calculate the grid size by number of CU in the future + gridSize_(120) // FIXME - Calculate the grid size by number of CU in the future { a_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_a, gridSize_, threadPerBlock_); b_grid_desc_m0_ = MakeDescriptor_M0(shape, stride_b, gridSize_, threadPerBlock_);