mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
[CK] Refactor GPU verification kernel to gather error stats on GPU (#3551)
* Refactor GPU verification kernel to gather erorr stats on GPU
* Check if result is all zero
* non-negative error count doesn't need custom Atomics
* Remove unnecessary AtomicMaxFloat function
* Simpler warp reduction, remove passed flag
* Move verification header to include
* Fix header path in test
* Fix block reduction loop
[ROCm/composable_kernel commit: f173642087]
This commit is contained in:
@@ -13,8 +13,8 @@
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/gpu_verification.hpp"
|
||||
#include "ck/library/reference_tensor_operation/gpu/naive_conv_utils.hpp"
|
||||
#include "profiler/gpu_verification.hpp"
|
||||
|
||||
using namespace ck::profiler;
|
||||
using ck::ref::SimpleDeviceMem;
|
||||
|
||||
Reference in New Issue
Block a user