Merge commit '1acd8e041c8a7b2c95f6f7bf60ee9417eecb924a' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-04 12:16:45 +00:00
parent c4c6e0d485
commit 5677205f88
21 changed files with 1245 additions and 782 deletions

View File

@@ -0,0 +1,26 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck_tile/core/arch/arch.hpp"
auto string_to_datatype(const std::string& datatype)
{
using PrecVariant = std::variant<ck_tile::half_t, ck_tile::bf16_t, float>;
if(datatype == "fp16")
{
return PrecVariant{ck_tile::half_t{}};
}
else if(datatype == "bf16")
{
return PrecVariant{ck_tile::bf16_t{}};
}
else if(datatype == "fp32")
{
return PrecVariant{float{}};
}
else
{
throw std::runtime_error("Unsupported data type: " + datatype);
}
};

View File

@@ -1,11 +1,11 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck_tile/core/arch/arch.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/ops/elementwise.hpp"
#include "ck_tile/host/reference/reference_elementwise.hpp"
#include "json_dump.hpp"
#include "elementwise_common.hpp"
auto create_args(int argc, char* argv[])
{
@@ -14,7 +14,8 @@ auto create_args(int argc, char* argv[])
.insert("n", "1024", "n dimension")
.insert("stride", "-1", "stride per row, if -1 then equal to n")
.insert("v", "1", "cpu validation or not")
.insert("prec", "fp16", "precision")
.insert("x_prec", "fp16", "input precision, fp16/bf16/fp32")
.insert("y_prec", "fp16", "output precision, fp16/bf16/fp32")
.insert("warmup", "10", "cold iter")
.insert("repeat", "50", "hot iter")
.insert("json", "0", "0: No Json, 1: Dump Results in Json format")
@@ -24,7 +25,10 @@ auto create_args(int argc, char* argv[])
return std::make_tuple(result, arg_parser);
}
template <typename DataType>
// XDataType: Data type of the input tensors.
// ComputeDataType: Data type used for intermediate computations (often float for precision).
// YDataType: Data type of the output tensor.
template <typename XDataType, typename YDataType>
bool run(const ck_tile::ArgParser& arg_parser)
{
ck_tile::index_t M = arg_parser.get_int("m");
@@ -34,25 +38,18 @@ bool run(const ck_tile::ArgParser& arg_parser)
// If stride is negative (default -1), set it to N, assuming a dense row-major layout.
if(stride < 0)
stride = N;
std::string data_type = arg_parser.get_str("prec");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
if(stride < N)
{
throw std::runtime_error("stride must be >= N");
}
// Define type aliases for clarity.
// XDataType: Data type of the input tensors.
// ComputeDataType: Data type used for intermediate computations (often float for precision).
// YDataType: Data type of the output tensor.
// XElementwiseOperation: The specific elementwise operation to perform (e.g., Add, Mul).
using XDataType = DataType;
using ComputeDataType =
float; // Using float for intermediate calculations can improve numerical stability.
using YDataType = DataType;
using XElementwiseOperation = ck_tile::element_wise::Add;
// 1. Initialize the input data on the host (CPU).
@@ -219,11 +216,22 @@ int main(int argc, char* argv[])
if(!result)
return -1;
const std::string data_type = arg_parser.get_str("prec");
if(data_type == "fp16")
try
{
return run<ck_tile::half_t>(arg_parser) ? 0 : -2;
const auto x_prec_variant = string_to_datatype(arg_parser.get_str("x_prec"));
const auto y_prec_variant = string_to_datatype(arg_parser.get_str("y_prec"));
return std::visit(
[&](auto&& x_dt, auto&& y_dt) -> int {
using XDataType = std::decay_t<decltype(x_dt)>;
using YDataType = std::decay_t<decltype(y_dt)>;
return run<XDataType, YDataType>(arg_parser);
},
x_prec_variant,
y_prec_variant);
}
catch(const std::exception& e)
{
std::cerr << "Error: " << e.what() << std::endl;
return -3;
}
return -3;
}

View File

@@ -1,11 +1,11 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck_tile/core/arch/arch.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/ops/elementwise.hpp"
#include "ck_tile/host/reference/reference_elementwise.hpp"
#include "json_dump.hpp"
#include "elementwise_common.hpp"
auto create_args(int argc, char* argv[])
{
@@ -15,7 +15,8 @@ auto create_args(int argc, char* argv[])
.insert("dim2", "32", "dimension 2")
.insert("dim3", "32", "dimension 3")
.insert("v", "1", "cpu validation or not")
.insert("prec", "fp16", "precision")
.insert("x_prec", "fp16", "input precision")
.insert("y_prec", "fp16", "output precision")
.insert("warmup", "10", "cold iter")
.insert("repeat", "50", "hot iter")
.insert("json", "0", "0: No Json, 1: Dump Results in Json format")
@@ -25,7 +26,7 @@ auto create_args(int argc, char* argv[])
return std::make_tuple(result, arg_parser);
}
template <typename DataType>
template <typename XDataType, typename YDataType>
bool run(const ck_tile::ArgParser& arg_parser)
{
ck_tile::index_t D0 = arg_parser.get_int("dim0");
@@ -33,15 +34,12 @@ bool run(const ck_tile::ArgParser& arg_parser)
ck_tile::index_t D2 = arg_parser.get_int("dim2");
ck_tile::index_t D3 = arg_parser.get_int("dim3");
std::string data_type = arg_parser.get_str("prec");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
using XDataType = DataType;
using ComputeDataType =
float; // Using float for intermediate calculations can improve numerical stability.
using YDataType = DataType;
using XElementwiseOperation = ck_tile::element_wise::Add;
// Initialize the input data on the host (CPU).
@@ -164,11 +162,22 @@ int main(int argc, char* argv[])
if(!result)
return -1;
const std::string data_type = arg_parser.get_str("prec");
if(data_type == "fp16")
try
{
return run<ck_tile::half_t>(arg_parser) ? 0 : -2;
const auto x_prec_variant = string_to_datatype(arg_parser.get_str("x_prec"));
const auto y_prec_variant = string_to_datatype(arg_parser.get_str("y_prec"));
return std::visit(
[&](auto&& x_dt, auto&& y_dt) -> int {
using XDataType = std::decay_t<decltype(x_dt)>;
using YDataType = std::decay_t<decltype(y_dt)>;
return run<XDataType, YDataType>(arg_parser);
},
x_prec_variant,
y_prec_variant);
}
catch(const std::exception& e)
{
std::cerr << "Error: " << e.what() << std::endl;
return -3;
}
return -3;
}

View File

@@ -5,6 +5,7 @@
#include "ck_tile/ops/elementwise.hpp"
#include "ck_tile/host/reference/reference_transpose.hpp"
#include "json_dump.hpp"
#include "elementwise_common.hpp"
auto create_args(int argc, char* argv[])
{
@@ -32,10 +33,9 @@ bool run(const ck_tile::ArgParser& arg_parser)
if(stride_in < 0)
stride_in = N; // Dense input: stride for M dim is N
std::string data_type = arg_parser.get_str("prec");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
if(stride_in < N)
{
@@ -161,12 +161,19 @@ int main(int argc, char* argv[])
if(!result)
return -1;
const std::string data_type = arg_parser.get_str("prec");
if(data_type == "fp16")
try
{
return run<ck_tile::half_t>(arg_parser) ? 0 : -2;
const auto prec_variant = string_to_datatype(arg_parser.get_str("prec"));
return std::visit(
[&](auto&& dt) -> int {
using DataType = std::decay_t<decltype(dt)>;
return run<DataType>(arg_parser);
},
prec_variant);
}
catch(const std::exception& e)
{
std::cerr << "Error: " << e.what() << std::endl;
return -3;
}
std::cerr << "Unsupported data type: " << data_type << std::endl;
return -3;
}

View File

@@ -1,11 +1,11 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
#include "ck_tile/core/arch/arch.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/ops/elementwise.hpp"
#include "ck_tile/host/reference/reference_elementwise.hpp"
#include "json_dump.hpp"
#include "elementwise_common.hpp"
auto create_args(int argc, char* argv[])
{
@@ -14,7 +14,9 @@ auto create_args(int argc, char* argv[])
.insert("n", "1024", "n dimension")
.insert("stride", "-1", "stride per row, if -1 then equal to n")
.insert("v", "1", "cpu validation or not")
.insert("prec", "fp16", "precision")
.insert("op", "1", "unary operation, 1: square, 2: convert")
.insert("x_prec", "fp16", "input precision")
.insert("y_prec", "fp16", "output precision")
.insert("warmup", "10", "cold iter")
.insert("repeat", "50", "hot iter")
.insert("json", "0", "0: No Json, 1: Dump Results in Json format")
@@ -24,7 +26,7 @@ auto create_args(int argc, char* argv[])
return std::make_tuple(result, arg_parser);
}
template <typename DataType>
template <typename XElementwiseOperation, typename XDataType, typename YDataType>
bool run(const ck_tile::ArgParser& arg_parser)
{
ck_tile::index_t M = arg_parser.get_int("m");
@@ -32,17 +34,12 @@ bool run(const ck_tile::ArgParser& arg_parser)
ck_tile::index_t stride = arg_parser.get_int("stride");
if(stride < 0)
stride = N;
std::string data_type = arg_parser.get_str("prec");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
int do_validation = arg_parser.get_int("v");
int warmup = arg_parser.get_int("warmup");
int repeat = arg_parser.get_int("repeat");
assert(stride >= N);
using XDataType = DataType;
using YDataType = DataType;
using XElementwiseOperation = ck_tile::element_wise::UnarySquare;
// 1. Initialize the input data on the host
ck_tile::HostTensor<XDataType> x_host_a({M, N}, {stride, 1});
ck_tile::HostTensor<YDataType> y_host({M, N}, {stride, 1});
@@ -122,12 +119,17 @@ bool run(const ck_tile::ArgParser& arg_parser)
{
y_buf.FromDevice(y_validation.data());
auto op = [](const auto& v0) { return v0 * v0; };
auto op = [](const XDataType& v0) -> YDataType {
XElementwiseOperation element_op{};
YDataType result;
element_op(result, v0);
return result;
};
ck_tile::reference_unary_elementwise<XDataType, YDataType, YDataType>(x_host_a, y_host, op);
pass = ck_tile::check_err(
y_validation, y_host, "Elementwise Add Error: Incorrect results!", 0.01, 0.01);
y_validation, y_host, "Elementwise unary op: Incorrect results!", 0.01, 0.01);
}
if(arg_parser.get_int("json") == 1)
@@ -145,17 +147,69 @@ bool run(const ck_tile::ArgParser& arg_parser)
return pass;
}
template <typename XElementwiseOperation, typename XDataType, typename YDataType>
bool filter_then_run(const ck_tile::ArgParser& arg_parser)
{
auto throw_unsupported = [&]() {
const auto x_prec = arg_parser.get_str("x_prec");
const auto op = arg_parser.get_str("op");
throw std::runtime_error("Unsupported! x_prec: " + x_prec + ", op: " + op);
};
bool pass = true;
if constexpr(std::is_same_v<XElementwiseOperation, ck_tile::element_wise::UnarySquare> &&
std::is_same_v<XDataType, ck_tile::bf16_t>)
{
throw_unsupported();
}
else
{
pass = run<XElementwiseOperation, XDataType, YDataType>(arg_parser);
}
return pass;
}
auto string_to_op(const std::string& op)
{
using OpVariant =
std::variant<ck_tile::element_wise::UnarySquare, ck_tile::element_wise::UnaryConvert>;
if(op == "1")
return OpVariant{ck_tile::element_wise::UnarySquare{}};
else if(op == "2")
return OpVariant{ck_tile::element_wise::UnaryConvert{}};
else
{
throw std::runtime_error("Unsupported unary operation: " + op);
}
};
int main(int argc, char* argv[])
{
auto [result, arg_parser] = create_args(argc, argv);
if(!result)
return -1;
const std::string data_type = arg_parser.get_str("prec");
if(data_type == "fp16")
try
{
return run<ck_tile::half_t>(arg_parser) ? 0 : -2;
const auto x_prec_variant = string_to_datatype(arg_parser.get_str("x_prec"));
const auto y_prec_variant = string_to_datatype(arg_parser.get_str("y_prec"));
const auto op_variant = string_to_op(arg_parser.get_str("op"));
return std::visit(
[&](auto&& op, auto&& x_dt, auto&& y_dt) -> int {
using XElementwiseOperation = std::decay_t<decltype(op)>;
using XDataType = std::decay_t<decltype(x_dt)>;
using YDataType = std::decay_t<decltype(y_dt)>;
return filter_then_run<XElementwiseOperation, XDataType, YDataType>(arg_parser);
},
op_variant,
x_prec_variant,
y_prec_variant);
}
catch(const std::exception& e)
{
std::cerr << "Error: " << e.what() << std::endl;
return -3;
}
return -3;
}