This commit is contained in:
Damien Lejeune
2026-01-16 09:33:47 -05:00
parent 244048fc52
commit 727af14aad
2 changed files with 48 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
#pragma once
namespace ck_tile {
template <WarpPerBlock_M, WarpPerBlock_N, ThreadPerWarp_M, ThreadPerWarp_N, ThreadTile_M, ThreadTile_N, Repeat_M, Repeat_N>
struct SinkHornKnoppShape
{
static constexpr index_t WarpPerBlock_M = WarpPerBlock_M;
static constexpr index_t WarpPerBlock_N = WarpPerBlock_N;
static constexpr index_t ThreadPerWarp_M = ThreadPerWarp_M;
static constexpr index_t ThreadPerWarp_N = ThreadPerWarp_N;
static constexpr index_t ThreadTile_M = ThreadTile_M;
static constexpr index_t ThreadTile_N = ThreadTile_N;
static constexpr index_t Repeat_M = Repeat_M;
static constexpr index_t Repeat_N = Repeat_N;
};
template <typename _XDataType,
typename _YDataType,
typename _BlockShape,
typename _ComputeDataType = float>
struct SinkhornKnoppProblem
{
using XDataType = remove_cvref_t<_XDataType>;
using ComputeDataType = remove_cvref_t<_ComputeDataType>;
using YDataType = remove_cvref_t<_YDataType>;
using BlockShape = remove_cvref_t<_BlockShape>;
};
} // namespace ck_tile

View File

@@ -11,4 +11,21 @@ struct SinkhornKnoppArgs
int max_iterations;
};
struct SinkhornKnoppKernel
{
template <typename Problem>
CK_TILE_DEVICE void operator()(const SinkhornKnoppArgs& args) const {
// Creating tensor descriptors, views and windows for inputs and outputs
// Create the reduce ops
// * Reduce Op ADD for row and column sums
// * Elementwise Op EXP for exponentiation
// Run the first steps iteration of the Sinkhorn-Knopp algorithm
// Using the exponentiation as the elementwise operation
// Hot loop for Sinkhorn-Knopp iterations from max_iterations=1
//
}
};
} // namespace ck_tile