mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 12:00:07 +00:00
Fix universal gemm profiler for pk_i4_t (#1790)
* Fix universal gemm profiler for pk_i4_t
* fix
[ROCm/composable_kernel commit: 888317e698]
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -177,7 +177,7 @@ bool profile_gemm_universal_impl(int do_verification,
|
||||
}
|
||||
}
|
||||
|
||||
if(is_same_v<BDataType, pk_i4_t> && is_same_v<ADataType, half_t>)
|
||||
if constexpr(is_same_v<BDataType, pk_i4_t> && is_same_v<ADataType, half_t>)
|
||||
{
|
||||
// vector pk_i4x4 permute
|
||||
for(int i = 0; i < N; i++)
|
||||
@@ -188,7 +188,7 @@ bool profile_gemm_universal_impl(int do_verification,
|
||||
|
||||
for(int k = 0; k < 4; k++)
|
||||
{
|
||||
int i4x2 = b_k_n_permute(j + k * 2, i);
|
||||
int i4x2 = b_k_n_permute(j + k * 2, i).data;
|
||||
input[k * 2 + 0] = (i4x2 >> 4) & 0xf;
|
||||
input[k * 2 + 1] = (i4x2 >> 0) & 0xf;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user