mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 06:44:36 +00:00
Standalone sweep once softmax kernel w/ ckProfiler (#295)
* use 'sweep once' softmax kernel where applicable
* threadwise copy's dst buffer can specify invalid element value
* add int8 in/out float compute softmax support
give a bit of leeway for int absolute tolerance as there's a single data point of all test cases showing off-by-1 error
* format
* softmax inherits DeviceNormalization
* softmax profiler stub
* tighten up reference softmax interface
* example prints tensor dimension
* add fp32 to softmax profiler
* rename header
* hook with ckProfiler
* format
* resolve merge conflict
* resolve merge conflicts
* update normalization profiler help string
* resolve conflict
* typo
* remove residual
* softmax profiler: address feedback
* test for mixed precision input/output
* fully qualify ck::math::isnan
* add comment for device normalization interface
* revise wording
* constness for alpha/beta scaler pointer
[ROCm/composable_kernel commit: 93c99f3d87]
This commit is contained in:
@@ -222,6 +222,12 @@ struct Tensor
|
||||
|
||||
Tensor(const Tensor& other) : mDesc(other.mDesc), mData(other.mData) {}
|
||||
|
||||
Tensor& operator=(const Tensor& other)
|
||||
{
|
||||
mDesc = other.mDesc;
|
||||
mData = other.mData;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank)
|
||||
{
|
||||
|
||||
@@ -26,12 +26,11 @@ struct ReferenceSoftmax : public device::BaseOperator
|
||||
Tensor<OutDataType>& out,
|
||||
AccDataType alpha,
|
||||
AccDataType beta,
|
||||
const index_t rank,
|
||||
const std::vector<index_t> sm_reduce_dims)
|
||||
: in_(in), out_(out), alpha_(alpha), beta_(beta), sm_reduce_dims_(sm_reduce_dims)
|
||||
{
|
||||
// std::cout << "debug: scalar dims: ";
|
||||
for(int i = 0; i < rank; i++)
|
||||
for(size_t i = 0; i < in.mDesc.GetNumOfDimension(); i++)
|
||||
{
|
||||
if(std::find(sm_reduce_dims.begin(), sm_reduce_dims.end(), i) ==
|
||||
sm_reduce_dims.end())
|
||||
@@ -47,7 +46,6 @@ struct ReferenceSoftmax : public device::BaseOperator
|
||||
Tensor<OutDataType>& out_;
|
||||
AccDataType alpha_;
|
||||
AccDataType beta_;
|
||||
index_t rank_;
|
||||
std::vector<index_t> sm_reduce_dims_;
|
||||
std::vector<index_t> sm_scalar_dims_; // dim after internal max/sum reduction
|
||||
};
|
||||
@@ -136,10 +134,9 @@ struct ReferenceSoftmax : public device::BaseOperator
|
||||
Tensor<OutDataType>& out,
|
||||
AccDataType alpha,
|
||||
AccDataType beta,
|
||||
const index_t rank,
|
||||
const std::vector<index_t> sm_reduce_dims)
|
||||
{
|
||||
return Argument{in, out, alpha, beta, rank, sm_reduce_dims};
|
||||
return Argument{in, out, alpha, beta, sm_reduce_dims};
|
||||
}
|
||||
|
||||
static auto MakeInvoker() { return Invoker{}; }
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "ck/utility/functional2.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
|
||||
@@ -159,7 +159,7 @@ check_err(const std::vector<T>& out,
|
||||
const std::vector<T>& ref,
|
||||
const std::string& msg = "Error: Incorrect results!",
|
||||
double = 0,
|
||||
double = 0)
|
||||
double atol = 0)
|
||||
{
|
||||
if(out.size() != ref.size())
|
||||
{
|
||||
@@ -179,7 +179,7 @@ check_err(const std::vector<T>& out,
|
||||
int64_t r = ref[i];
|
||||
err = std::abs(o - r);
|
||||
|
||||
if(err > 0)
|
||||
if(err > atol)
|
||||
{
|
||||
max_err = err > max_err ? err : max_err;
|
||||
err_count++;
|
||||
|
||||
Reference in New Issue
Block a user