mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Bug fix in LPGEMV m=1 AVX2 kernel for post-ops
Details: - Fixed loading of matadd and matmul pointers in GEMV lt16 kernel for AVX2 M=1 case. - Hard-set row-stride of B to 1(inside GEMV), when it has already been reordered. AMD-Internal:CPUPL-7197, CPUPL-7221 Co-authored-by:Balasubramanian, Vignesh <Vignesh.Balasubramanian@amd.com>
This commit is contained in:
committed by
GitHub
parent
d29f3f0b5e
commit
5044b69d3d
@@ -502,10 +502,10 @@ POST_OPS_MATRIX_ADD_1x16F:
|
||||
{
|
||||
scl_fctr1 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 0 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 0 * 8 ) );
|
||||
scl_fctr2 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 1 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 1 * 8 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -576,10 +576,10 @@ POST_OPS_MATRIX_MUL_1x16F:
|
||||
{
|
||||
scl_fctr1 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 0 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 0 * 8 ) );
|
||||
scl_fctr2 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 1 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 1 * 8 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1109,10 +1109,10 @@ POST_OPS_MATRIX_ADD_1x16F:
|
||||
{
|
||||
scl_fctr1 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 0 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 0 * 8 ) );
|
||||
scl_fctr2 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 1 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 1 * 8 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1183,10 +1183,10 @@ POST_OPS_MATRIX_MUL_1x16F:
|
||||
{
|
||||
scl_fctr1 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 0 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 0 * 8 ) );
|
||||
scl_fctr2 =
|
||||
_mm256_loadu_ps( ( float* )post_ops_list_temp->scale_factor +
|
||||
post_ops_attr.post_op_c_j + ( 1 * 16 ) );
|
||||
post_ops_attr.post_op_c_j + ( 1 * 8 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user