mirror of
https://github.com/amd/blis.git
synced 2026-05-04 22:41:11 +00:00
Low precision gemm refactoring and bug fixes.
-The micro-kernel function signatures follow a common pattern. These functions can be represented as an instantiation of a MACRO as is done in BLIS, and thus the number of micro-kernel header files can be brought down. A new single header file containing all the MACRO definitions with the instantiation is added, and the existing unnecessary header files are removed. -The bias addition in micro-kernel for n remaining < 16 reads the bias array assuming it contains 16 elements. This can result in seg-faults, since out of bound memory is accessed. It is fixed by copying required elements to an intermediate buffer and using that buffer for loading. -Input matrix storage type parameter is added to lpgemm APIs. It can be either row or column major, denoted by r and c respectively. Currently only row major input matrices are supported. -Bug fix in s16 fringe micro-kernel to use correct offset while storing output. AMD-Internal: [CPUPL-2386] Change-Id: Idfa23e69d54ad7e06a67b1e36a5b5558fbff03a3
This commit is contained in:
67
addon/aocl_gemm/frame/lpgemm_5loop_interface_apis.h
Normal file
67
addon/aocl_gemm/frame/lpgemm_5loop_interface_apis.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef LPGEMM_5LOOP_INTF_H
|
||||
#define LPGEMM_5LOOP_INTF_H
|
||||
|
||||
#include "lpgemm_types.h"
|
||||
#include "lpgemm_post_ops.h"
|
||||
|
||||
#define LPGEMM_5LOOP(A_type,B_type,C_type,LP_SFX) \
|
||||
void lpgemm_rowvar_ ## LP_SFX \
|
||||
( \
|
||||
const dim_t m, \
|
||||
const dim_t n, \
|
||||
const dim_t k, \
|
||||
const A_type* a, \
|
||||
const dim_t rs_a, \
|
||||
const dim_t cs_a, \
|
||||
const AOCL_MEMORY_TAG mtag_a, \
|
||||
const B_type* b, \
|
||||
const dim_t rs_b, \
|
||||
const dim_t cs_b, \
|
||||
const AOCL_MEMORY_TAG mtag_b, \
|
||||
C_type* c, \
|
||||
const dim_t rs_c, \
|
||||
C_type alpha, \
|
||||
C_type beta, \
|
||||
rntm_t* rntm, \
|
||||
lpgemm_thrinfo_t* thread, \
|
||||
lpgemm_post_op* post_op_list \
|
||||
) \
|
||||
|
||||
LPGEMM_5LOOP(uint8_t,int8_t,int32_t,u8s8s32o32);
|
||||
LPGEMM_5LOOP(uint8_t,int8_t,int16_t,u8s8s16o16);
|
||||
LPGEMM_5LOOP(float,float,float,f32f32f32of32);
|
||||
#endif // LPGEMM_5LOOP_INTF_H
|
||||
Reference in New Issue
Block a user