Files
composable_kernel/include/ck_tile/host/reference/reference_sinkhorn.hpp
2026-02-06 08:38:12 +00:00

74 lines
2.0 KiB
C++

// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include "ck_tile/core.hpp"
#include "ck_tile/host/host_tensor.hpp"
#include <thread>
namespace ck_tile {
template <typename XDataType, typename ComputeDataType, typename YDataType>
void sinkhorn_knopp_ref(const HostTensor<XDataType>& x_n_n,
HostTensor<YDataType>& y_n_n,
const int n_iter)
{
const index_t input_n = x_n_n.get_length(0);
HostTensor<ComputeDataType> c_n_n({input_n, input_n}, {1, input_n});
HostTensor<ComputeDataType> acc_n({input_n}, {1});
// First apply exp to make input nonnegative
for(index_t i = 0; i < input_n; ++i)
{
for(index_t j = 0; j < input_n; ++j)
{
c_n_n(i, j) = exp(type_convert<ComputeDataType>(x_n_n(i, j)));
// c_n_n(i, j) = type_convert<ComputeDataType>(x_n_n(i, j));
}
}
// Iterate normalization on rows and columns
for(auto it = 0; it < n_iter; ++it)
{
// Sum and scale rowwise
for(index_t i = 0; i < input_n; ++i)
{
acc_n(i) = 0;
for(index_t j = 0; j < input_n; ++j)
{
acc_n(i) += c_n_n(i, j);
}
for(index_t j = 0; j < input_n; ++j)
{
c_n_n(i, j) /= acc_n(i);
}
}
// Repeat columnwise
for(index_t i = 0; i < input_n; ++i)
{
acc_n(i) = 0;
for(index_t j = 0; j < input_n; ++j)
{
acc_n(i) += c_n_n(j, i);
}
for(index_t j = 0; j < input_n; ++j)
{
c_n_n(j, i) /= acc_n(i);
}
}
}
// Copy and cast to output type
for(index_t i = 0; i < input_n; ++i)
{
for(index_t j = 0; j < input_n; ++j)
{
y_n_n(i, j) = type_convert<YDataType>(c_n_n(i, j));
}
}
}
} // namespace ck_tile