mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
* Tiny fix in dynamic_buffer.hpp to support vectorized AtomicAdd for double type
* Update to host layer and host reduction
* Merge and remove reduction kernels
* Merge and remove reduction device interfaces and update pooling device interface
* Merge and remove useless reduction device instances
* Update to reduction profiler and reduction ctests
* Update to reduction and pooling examples and add one reduction example
* Change to reduction examples to let them testable by ctest
* Add explicit pass checking for reduction and pooling examples
* Explicit assignment of tensor shapes in example reduce_blockwise_two_call
* Use atomic_add to repace atomicAdd and add atomic_add for double type
* Add reduce ctest support for double data type
* Replace to_int_vector() by using c++ std::vector::assign()
* Keep DeviceReduceThreadWise separated from DeviceReduceBlockWise
* Merge DeviceReduceBlockWise and DeviceReduceMultiBlockAtomicAdd into DeviceReduceMultiBlock
* Add GetAtomicOperationZeroValue() support for AtomicMax
* Tiny change to reduce example README.md
* Fix some tiny issues due to branch merging
* Revoke previous change in dynamic_buffer.hpp and add atomic_add for double2_t
* Add reduce multiblock_atomic_add instances for fp64 to verify vectorized atomic_add on fp64
* Renaming
* Clean the header includings in device_reduce instances header files
[ROCm/composable_kernel commit: 63eee2d999]
64 lines
2.5 KiB
Bash
Executable File
64 lines
2.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
## The following will be used for CI
|
|
|
|
set -x
|
|
|
|
## for float
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,3 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,2,3 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1,2,3 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 2 0 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 3 0 2
|
|
|
|
## for float64
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,3 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,2,3 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1,2,3 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 2 6 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 3 6 2
|
|
|
|
## for float16
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,3 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,2,3 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1,2,3 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 2 1 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 3 1 2
|
|
|
|
## for int8_t
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,3 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,2,3 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1,2,3 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 2 3 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 3 3 2
|
|
|
|
## for bfloat16
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,3 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0,2,3 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1,2,3 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 0 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 1 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 2 5 2
|
|
bin/test_reduce_no_index -D 64,4,280,82 -R 3 5 2
|
|
|
|
set +x
|
|
|