Added Memory testing for DTRSM

- Added framework for memory testing.
- Out of bound reads and writes can be
  detected in both C and assembly.
- Added memory tests for DTRSM.
- Test methodology:
  - Use linux's protected pages to set some memory
    before and after the required buffer as protected.
  - Set the first and last page_size bytes as
    read, write and execute protected (red_zones).
  - If any part of code tries to read/write
    in redzones, a SIGSEGV signal will be
    generated, which can be used to detect a
    out of bounds read and write.
  - Page protection can only be set per page.
    If required size for buffer is not a multiple
    of pagesize we have to allocate more memory
    than required in order make sure the start and
    end of redzones align with page boundaries.
  - Overwrite malloc(size) to allocate
    'buffer_size+(2*pagesize)' where buffer_size =
    minimum size such that buffer_size > 'size' and
    buffer_size is multiple of pagesize.
  - Use first and last page_size bytes of allocated
    buffer as redzones, use first 'size' of the middle
    buffer as first greenzone and last 'size' bytes as
    second greenzone.
  - Call test code once with first geenzone and then
    with second greenzone. Greenzones are surrounded
    by redzones, if test code read/writes before or after
    greenzones, it will be detected.

   |_____________________________________________________|
   |  red_zone1 |  green_zone1    greenzone_2 | red_zone2|
   |_____________________________________________________|

AMD-Internal: [CPUPL-4403]
Change-Id: Ic5c22a9adf8f833c77510686eee886485e894354
This commit is contained in:
Shubham Sharma
2024-02-16 15:45:55 +05:30
parent 1bd9f0c856
commit de92fb0680
6 changed files with 673 additions and 89 deletions

View File

@@ -0,0 +1,79 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "common/type_info.h"
namespace testinghelpers {
class ProtectedBuffer
{
private:
static const size_t REDZONE_SIZE = 1;
void* redzone_1 = nullptr;
void* redzone_2 = nullptr;
void* mem = nullptr;
bool is_mem_test = false;
/**
* ==========================================================================
* get_mem
* returns a aligned or unaligned buffer of size "size"
* ==========================================================================
* @param[in] size specifies the size of the buffer to be allocated.
* @param[in] is_aligned specifies if the buffer needs to be aligned or not.
*/
static void* get_mem(dim_t, bool);
public:
void* greenzone_1 = nullptr;
void* greenzone_2 = nullptr;
ProtectedBuffer(dim_t size, bool is_aligned = false, bool is_mem_test = false);
~ProtectedBuffer();
static void handle_mem_test_fail(int signal);
/**
* Adds signal handler for segmentation fault.
*/
static void start_signal_handler();
/**
* Removes signal handler for segmentation fault.
*/
static void stop_signal_handler();
};
}

View File

@@ -40,3 +40,4 @@
#include "data_generators.h"
#include "error_helpers.h"
#include "refCBLAS.h"
#include "protected_buffer.h"

View File

@@ -0,0 +1,180 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(__linux__)
#include <signal.h>
#include <stdexcept>
#include <unistd.h>
#include <sys/mman.h>
#endif
#include "blis.h"
#include "common/protected_buffer.h"
/*
* Returns aligned or unaligned memory of required size
*/
void* testinghelpers::ProtectedBuffer::get_mem(dim_t size, bool is_aligned)
{
#if defined(__linux__)
return is_aligned ? aligned_alloc(BLIS_HEAP_STRIDE_ALIGN_SIZE, size) : malloc(size);
#else
return is_aligned ? _aligned_malloc(BLIS_HEAP_STRIDE_ALIGN_SIZE, size) : malloc(size);
#endif
}
/**
* @brief Allocate memory for greenzones and redzones, and add protection to redzones
*
* @param size size of buffer required
* @param is_aligned should allocated memory be aligned
* @param is_mem_test is memory allocated for memory test.
*/
testinghelpers::ProtectedBuffer::ProtectedBuffer(dim_t size, bool is_aligned, bool is_mem_test)
{
#if defined(__linux__)
this->is_mem_test = is_mem_test;
if (is_mem_test)
{
// query page size
size_t page_size = sysconf(_SC_PAGESIZE);
// calculate minimum number of pages needed for requested size
size_t buffer_size = ((size / page_size)+1) * page_size;
// allocate memory (buffer_size + 1 page to ensure 1st redzone can be started at page bounday
// + 2 * REDZONE_SIZE pages for 1 redzone on each end of buffer)
mem = (char*)get_mem(buffer_size + ((1 + (REDZONE_SIZE * 2)) * page_size), is_aligned);
// set redzone_1 to mem+page_size to make sure that
// atleast one page boundary exist between mem and redzone_1
redzone_1 = (void*)((char*)mem + page_size);
// find page boundary ( address which is multiple of pagesize and less than redzone_1 )
// say page_size is Nth power of 2 therefore only (N+1)th LSB is set in page_size
// (-page_size) implies 2's complement therefore in (-page_size) N LSBs are unset, all
// other bits are set.
// (redzone_1 & -page_size) will unset N LSBs of redzone_1, therefore making redzone_1 a
// multiple of page_size.
// this line is equivalent to (redzone_1 - (redzone_1 % page_size))
// where page_size is power of two.
redzone_1 = (void*)((uintptr_t)(redzone_1) & -page_size);
// redzone_2 = redzone_1 + sizeof redzone_1 + sizeof buffer
redzone_2 = (void*)((char*)redzone_1 + (page_size * REDZONE_SIZE) + buffer_size);
// make redzones read/wrtite/execute protected
int res = mprotect(redzone_1, page_size * REDZONE_SIZE, PROT_NONE);
if (res == -1)
{
do { perror("mprotect"); exit(EXIT_FAILURE); } while (0);
}
res = mprotect(redzone_2, page_size * REDZONE_SIZE, PROT_NONE);
if (res == -1)
{
do { perror("mprotect"); exit(EXIT_FAILURE); } while (0);
}
// get address to the first "size" bytes of buffer
greenzone_1 = (void*)((char*)redzone_1 + (page_size * REDZONE_SIZE));
// get address to the last "size" bytes of buffer
greenzone_2 = (void*)((char*)redzone_2 - size);
}
else
#endif
{
mem = get_mem(size, is_aligned);
greenzone_1 = mem, greenzone_2 = mem;
}
}
/**
* @brief Remove Protection from redzones and free allocated memory
*/
testinghelpers::ProtectedBuffer::~ProtectedBuffer()
{
#if defined(__linux__)
if(is_mem_test)
{
size_t page_size = sysconf(_SC_PAGESIZE);
int res = mprotect(redzone_1, page_size * REDZONE_SIZE, PROT_READ | PROT_WRITE );
if (res == -1)
{
do { perror("mprotect"); exit(EXIT_FAILURE); } while (0);
}
res = mprotect(redzone_2, page_size * REDZONE_SIZE, PROT_READ | PROT_WRITE );
if (res == -1)
{
do { perror("mprotect"); exit(EXIT_FAILURE); } while (0);
}
}
#endif
free(mem);
}
/**
* Function to handle segfault during memory test and convert it to a exception
*/
void testinghelpers::ProtectedBuffer::handle_mem_test_fail(int signal)
{
#if defined(__linux__)
// unmask the segmentation fault signal
sigset_t signal_set;
sigemptyset(&signal_set);
sigaddset(&signal_set, SIGSEGV);
sigprocmask(SIG_UNBLOCK, &signal_set, NULL);
throw std::out_of_range("err invalid");
#endif
}
void testinghelpers::ProtectedBuffer::start_signal_handler()
{
#if defined(__linux__)
// add signal handler for segmentation fault
signal(SIGSEGV, ProtectedBuffer::handle_mem_test_fail);
#endif
}
void testinghelpers::ProtectedBuffer::stop_signal_handler()
{
#if defined(__linux__)
// reset to default signal handler
signal(SIGSEGV, SIG_DFL);
#endif
}

View File

@@ -109,7 +109,7 @@ void generate_NAN_INF( T* mat, char uploa, gtint_t m, gtint_t ld, EVT_TYPE type,
}
template<typename T>
void init_mat( T* mat, char uploa, char storage, char trans, gtint_t from, gtint_t to, gtint_t m,
void random_generator_with_INF_NAN( T* mat, char uploa, char storage, char trans, gtint_t from, gtint_t to, gtint_t m,
gtint_t n, gtint_t ld, EVT_TYPE type = NO_EVT, bool is_a = false )
{
switch( type )
@@ -157,8 +157,8 @@ void test_trsm( char storage, char side, char uploa, char transa, char diaga,
std::vector<T> a( testinghelpers::matsize(storage, transa, mn, mn, lda) );
std::vector<T> b( testinghelpers::matsize(storage, 'n', m, n, ldb) );
srand(time(0));
init_mat( a.data(), uploa, storage, transa, lower, upper, mn, mn, lda, NO_EVT, true);
init_mat( b.data(), uploa, storage, 'n', 3, 10, m, n, ldb, b_init, false);
random_generator_with_INF_NAN( a.data(), uploa, storage, transa, lower, upper, mn, mn, lda, NO_EVT, true);
random_generator_with_INF_NAN( b.data(), uploa, storage, 'n', 3, 10, m, n, ldb, b_init, false);
bool nan_inf_check = false;
// Setting the nan_inf_check boolean to true if alpa has

View File

@@ -39,8 +39,8 @@
#include "level3/trsm/test_trsm.h"
class DTrsmUkrTest :
public ::testing::TestWithParam<std::tuple< dgemmtrsm_ukr_ft, // Function pointer type for daxpyv kernels
class DTRSMUkrTest :
public ::testing::TestWithParam<std::tuple< dgemmtrsm_ukr_ft, // Function pointer type for dtrsm kernels
char, // storage
char, // uploa
char, // diaga
@@ -48,10 +48,23 @@ class DTrsmUkrTest :
gtint_t, // n
gtint_t, // k
double, // alpha
gtint_t >> {}; // ldc_inc
gtint_t, // ldc_inc
bool >> {}; // is_memory_test
class DTRSMSmallUkrTest :
public ::testing::TestWithParam<std::tuple< dtrsm_small_ker_ft, // Function pointer type for dtrsm kernels
char, // side
char, // uploa
char, // diaga
char, // transa
gtint_t, // m
gtint_t, // n
double, // alpha
gtint_t, // lda_inc
gtint_t, // ldb_inc
bool >> {}; // is_memory_test
TEST_P(DTrsmUkrTest, native)
TEST_P(DTRSMUkrTest, native_kernel)
{
using T = double;
dgemmtrsm_ukr_ft ukr_fp = std::get<0>(GetParam());
@@ -63,33 +76,89 @@ TEST_P(DTrsmUkrTest, native)
gtint_t k = std::get<6>(GetParam());
T alpha = std::get<7>(GetParam());
gtint_t ldc = std::get<8>(GetParam());
bool is_memory_test = std::get<9>(GetParam());
double thresh = 2 * m * testinghelpers::getEpsilon<T>();
test_trsm_ukr<T, dgemmtrsm_ukr_ft>( ukr_fp, storage, uploa, diaga, m, n, k, alpha, ldc, thresh );
test_trsm_ukr<T, dgemmtrsm_ukr_ft>( ukr_fp, storage, uploa, diaga, m, n, k, alpha, ldc, thresh, is_memory_test);
}
class DTrsmUkrTestPrint {
TEST_P(DTRSMSmallUkrTest, small_kernel)
{
using T = double;
dtrsm_small_ker_ft ukr_fp = std::get<0>(GetParam());
char side = std::get<1>(GetParam());
char uploa = std::get<2>(GetParam());
char diaga = std::get<3>(GetParam());
char transa = std::get<4>(GetParam());
gtint_t m = std::get<5>(GetParam());
gtint_t n = std::get<6>(GetParam());
T alpha = std::get<7>(GetParam());
gtint_t lda = std::get<8>(GetParam());
gtint_t ldb = std::get<9>(GetParam());
bool is_memory_test = std::get<10>(GetParam());
double thresh = 2 * std::max(std::max(m, n), 3) * testinghelpers::getEpsilon<T>();
test_trsm_small_ukr<T, dtrsm_small_ker_ft>( ukr_fp, side, uploa, diaga, transa, m, n, alpha, lda, ldb, thresh, is_memory_test);
}
class DTRSMUkrTestPrint {
public:
std::string operator()(
testing::TestParamInfo<std::tuple<dgemmtrsm_ukr_ft, char, char, char, gtint_t,
gtint_t, gtint_t, double, gtint_t>> str) const{
gtint_t, gtint_t, double, gtint_t, bool>> str) const{
char storage = std::get<1>(str.param);
char uploa = std::get<2>(str.param);
char diaga = std::get<3>(str.param);
gtint_t k = std::get<6>(str.param);
double alpha = std::get<7>(str.param);
gtint_t ldc = std::get<8>(str.param);
return std::string("dgemmtrsm_ukr") + "_s" + storage + "_d" + diaga + "_u" + uploa +
"_k" + std::to_string(k) + "_a" +
(alpha > 0 ? std::to_string(int(alpha)) : std::string("m") + std::to_string(int(alpha*-1))) +
"_c" + std::to_string(ldc);
bool is_memory_test = std::get<9>(str.param);
std::string res = std::string("dgemmtrsm_ukr")
+ "_stor_" + storage
+ "_diag_" + diaga
+ "_uplo_" + uploa
+ "_k_" + std::to_string(k)
+ "_alpha_" + (alpha > 0 ? std::to_string(int(alpha)) :
std::string("m") + std::to_string(int(alpha*-1)))
+ "_ldc_" + std::to_string(ldc);
return is_memory_test ? res + "_memory_test" : res;
}
};
#ifdef BLIS_KERNELS_ZEN4
class DTRSMSmallUkrTestPrint {
public:
std::string operator()(
testing::TestParamInfo<std::tuple<dtrsm_small_ker_ft, char, char, char, char, gtint_t,
gtint_t, double, gtint_t, gtint_t, bool>> str) const{
char side = std::get<1>(str.param);
char uploa = std::get<2>(str.param);
char diaga = std::get<3>(str.param);
char transa = std::get<4>(str.param);
gtint_t m = std::get<5>(str.param);
gtint_t n = std::get<6>(str.param);
double alpha = std::get<7>(str.param);
gtint_t lda = std::get<8>(str.param);
gtint_t ldb = std::get<9>(str.param);
bool is_memory_test = std::get<10>(str.param);
std::string res = std::string("trsm_small_")
+ "_stor_" + side
+ "_diag_" + diaga
+ "_uplo_" + uploa
+ "_trana_" + transa
+ "_alpha_" + (alpha > 0 ? std::to_string(int(alpha)) :
std::string("m") + std::to_string(int(alpha*-1)))
+ "_lda_" + std::to_string(lda)
+ "_ldb_" + std::to_string(ldb)
+ "_m_" + std::to_string(m)
+ "_n_" + std::to_string(n);
return is_memory_test ? res + "_memory_test" : res;
}
};
#if defined(BLIS_KERNELS_ZEN4) && defined(GTEST_AVX512)
INSTANTIATE_TEST_SUITE_P (
bli_dgemmtrsm_l_zen4_asm_8x24,
DTrsmUkrTest,
DTRSMUkrTest,
::testing::Combine(
::testing::Values(bli_dgemmtrsm_l_zen4_asm_8x24), // ker_ptr
::testing::Values('c', 'r', 'g'), // stor
@@ -99,14 +168,15 @@ INSTANTIATE_TEST_SUITE_P (
::testing::Values(24), // n
::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k
::testing::Values(-1, -5.2, 1, 8.9), // alpha
::testing::Values(0, 9, 53) // ldc
::testing::Values(0, 9, 53), // ldc
::testing::Values(false, true) // is_memory_test
),
::DTrsmUkrTestPrint()
::DTRSMUkrTestPrint()
);
INSTANTIATE_TEST_SUITE_P (
bli_dgemmtrsm_u_zen4_asm_8x24,
DTrsmUkrTest,
DTRSMUkrTest,
::testing::Combine(
::testing::Values(bli_dgemmtrsm_u_zen4_asm_8x24), // ker_ptr
::testing::Values('c', 'r', 'g'), // stor
@@ -116,17 +186,37 @@ INSTANTIATE_TEST_SUITE_P (
::testing::Values(24), // n
::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k
::testing::Values(-1, -5.2, 1, 8.9), // alpha
::testing::Values(0, 9, 53) // ldc
::testing::Values(0, 9, 53), // ldc
::testing::Values(false, true) // is_memory_test
),
::DTrsmUkrTestPrint()
::DTRSMUkrTestPrint()
);
INSTANTIATE_TEST_SUITE_P (
bli_trsm_small_AVX512,
DTRSMSmallUkrTest,
::testing::Combine(
::testing::Values(bli_trsm_small_AVX512), // ker_ptr
::testing::Values('l', 'r'), // side
::testing::Values('l', 'u'), // uplo
::testing::Values('n', 'u'), // diaga
::testing::Values('n', 't'), // transa
::testing::Range(gtint_t(1), gtint_t(9), 1), // m
::testing::Range(gtint_t(1), gtint_t(9), 1), // n
::testing::Values(-3, 3), // alpha
::testing::Values(0, 10), // lda_inc
::testing::Values(0, 10), // ldb_inc
::testing::Values(false, true) // is_memory_test
),
::DTRSMSmallUkrTestPrint()
);
#endif
#ifdef BLIS_KERNELS_HASWELL
#if defined(BLIS_KERNELS_ZEN) && defined(GTEST_AVX2FMA3)
INSTANTIATE_TEST_SUITE_P (
bli_dgemmtrsm_l_haswell_asm_6x8,
DTrsmUkrTest,
DTRSMUkrTest,
::testing::Combine(
::testing::Values(bli_dgemmtrsm_l_haswell_asm_6x8), // ker_ptr
::testing::Values('c', 'r', 'g'), // stor
@@ -136,14 +226,15 @@ INSTANTIATE_TEST_SUITE_P (
::testing::Values(8), // n
::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k
::testing::Values(-1, -5.2, 1, 8.9), // alpha
::testing::Values(0, 9, 53) // ldc
::testing::Values(0, 9, 53), // ldc
::testing::Values(false, true) // is_memory_test
),
::DTrsmUkrTestPrint()
::DTRSMUkrTestPrint()
);
INSTANTIATE_TEST_SUITE_P (
bli_dgemmtrsm_u_haswell_asm_6x8,
DTrsmUkrTest,
DTRSMUkrTest,
::testing::Combine(
::testing::Values(bli_dgemmtrsm_u_haswell_asm_6x8), // ker_ptr
::testing::Values('c', 'r', 'g'), // stor
@@ -153,8 +244,28 @@ INSTANTIATE_TEST_SUITE_P (
::testing::Values(8), // n
::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k
::testing::Values(-1, -5.2, 1, 8.9), // alpha
::testing::Values(0, 9, 53) // ldc
::testing::Values(0, 9, 53), // ldc
::testing::Values(false, true) // is_memory_test
),
::DTrsmUkrTestPrint()
::DTRSMUkrTestPrint()
);
INSTANTIATE_TEST_SUITE_P (
bli_trsm_small,
DTRSMSmallUkrTest,
::testing::Combine(
::testing::Values(bli_trsm_small), // ker_ptr
::testing::Values('l', 'r'), // side
::testing::Values('l', 'u'), // uplo
::testing::Values('n', 'u'), // diaga
::testing::Values('n', 't'), // transa
::testing::Range(gtint_t(1), gtint_t(9), 1), // m
::testing::Range(gtint_t(1), gtint_t(9), 1), // n
::testing::Values(-3, 3), // alpha
::testing::Values(0, 10), // lda_inc
::testing::Values(0, 10), // ldb_inc
::testing::Values(false, true) // is_memory_test
),
::DTRSMSmallUkrTestPrint()
);
#endif

View File

@@ -34,34 +34,52 @@
#pragma once
#include "level3/trsm/trsm.h"
#include <stdexcept>
#include "blis.h"
#include "level3/trsm/trsm.h"
#include "level3/ref_trsm.h"
#include "inc/check_error.h"
#include "common/testing_helpers.h"
#include <stdexcept>
#include <algorithm>
#include "level3/trsm/test_trsm.h"
// function pointer for DTRSM small kernels
typedef err_t (*dtrsm_small_ker_ft)
(
side_t side,
obj_t* alpha,
obj_t* a,
obj_t* b,
cntx_t* cntx,
cntl_t* cntl,
bool is_parallel
);
/*
* Function to test gemmtrsm ukr
*/
template<typename T, typename FT>
static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga,
gtint_t m, gtint_t n, gtint_t k, T alpha,
gtint_t ldc_inc, double thresh)
gtint_t ldc_inc, double thresh, bool is_memory_test)
{
gtint_t lda = m, ldb = n;
gtint_t ldc = ldc_inc;
// Allocate memory for A10(k*lda) and A11(m*lda)
T* a10 = (T*)malloc( (k+m) * lda * sizeof(T) ); //col major
// Allocate memory for A01(k*ldb) and B11(m*ldb)
T* b01 = (T*)aligned_alloc(BLIS_HEAP_STRIDE_ALIGN_SIZE, (k+m) * ldb * sizeof(T)); //row major
//----------------------------------------------------------
// Initialize vectors with random numbers.
//----------------------------------------------------------
init_mat( a10, uploa, 'c', 'n', 3, 10, m, (k+m), lda);
init_mat( b01, uploa, 'r', 'n', 3, 10, n, (k+m), ldb);
testinghelpers::ProtectedBuffer a10_buffer( (k+m) * lda * sizeof(T), false, is_memory_test );
// Allocate aligned memory for B01(k*ldb) and B11(m*ldb)
testinghelpers::ProtectedBuffer b01_buffer( (k+m) * ldb * sizeof(T), true , is_memory_test );
T* a10 = (T*)a10_buffer.greenzone_1; // column major
T* b01 = (T*)b01_buffer.greenzone_1; // row major
// Initialize vectors with random numbers.
random_generator_with_INF_NAN( a10, uploa, 'c', 'n', -0.3, 0.3, m, (k+m), lda);
random_generator_with_INF_NAN( b01, uploa, 'r', 'n', -0.3, 0.3, (k+m), n, ldb);
// Get A11(A10 + sizeof(A01)) and B11(B10 + sizeof(B10))
T* a11 = a10 + (k*lda);
T* b11 = b01 + (k*ldb);
@@ -69,7 +87,7 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga,
// make A11 triangular for trsm
testinghelpers::make_triangular<T>( 'c', uploa, m, a11, lda );
T* c, *c_ref;
T* c, *c_ref, *b11_copy;
gtint_t rs_c, cs_c, rs_c_ref, cs_c_ref;
gtint_t size_c, size_c_ref;
@@ -77,31 +95,47 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga,
if (storage == 'r' || storage == 'R')
{
ldc += n;
rs_c = ldc, cs_c = 1;
rs_c_ref = rs_c, cs_c_ref = cs_c;
size_c = ldc * m * sizeof(T), size_c_ref = ldc * m * sizeof(T);
c_ref = (T*)malloc( size_c_ref );
c = (T*)malloc( size_c );
rs_c = ldc;
cs_c = 1;
rs_c_ref = rs_c;
cs_c_ref = cs_c;
size_c = ldc * m * sizeof(T);
size_c_ref = size_c;
}
else if (storage == 'c' || storage == 'C')
{
ldc += m;
cs_c = ldc, rs_c = 1;
rs_c_ref = rs_c, cs_c_ref = cs_c;
size_c = ldc * n * sizeof(T), size_c_ref = ldc * n * sizeof(T);
c_ref = (T*)malloc( size_c_ref );
c = (T*)malloc( size_c );
rs_c = 1;
cs_c = ldc;
rs_c_ref = rs_c;
cs_c_ref = cs_c;
size_c = ldc * n * sizeof(T);
size_c_ref = size_c;
}
else
else // general storage
{
ldc += m;
rs_c_ref = 1, cs_c_ref = ldc;
rs_c = ldc, cs_c = ldc*ldc;
size_c = ldc * n * ldc * sizeof(T), size_c_ref = ldc * n * 1 * sizeof(T);
c_ref = (T*)malloc( size_c_ref );
c = (T*)malloc( size_c );
// reference does not support general stride, therefore
// reference is set as column major
rs_c_ref = 1,
cs_c_ref = ldc;
// for general stride, rs_c and cs_c both are non unit stride
// ldc is used to derieve both rs_c and cs_c
rs_c = ldc;
cs_c = ldc*ldc;
size_c = ldc * n * ldc * sizeof(T);
size_c_ref = ldc * n * 1 * sizeof(T);
}
memset(c, 0, size_c);
// get memory for C and c_ref
testinghelpers::ProtectedBuffer c_buffer(size_c, false, is_memory_test);
c = (T*)c_buffer.greenzone_1;
c_ref = (T*)malloc( size_c_ref );
// set c buffers to zero to ensure the unused region of C matrix (extra ldb) is zero
memset(c, 0, size_c);
memset(c_ref, 0, size_c_ref);
// copy contents of B11 to C and C_ref
@@ -114,33 +148,95 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga,
}
}
// make A11 diagonal dominant
// Make A11 diagonal dominant in order to make sure that
// input matrics are solvable
// In case BLIS_ENABLE_TRSM_PREINVERSION is enabled,
// diagonal elements of A11 have to be inverted twice,
// once for making it diagonal dominant, and once for packing with
// inversion, inverting it twice is equivalent to not inverting it at all.
// Therefore, in case of BLIS_ENABLE_TRSM_PREINVERSION, diagonal elements
// of A11 are not inverted.
#ifndef BLIS_ENABLE_TRSM_PREINVERSION
for (gtint_t i =0;i< m; i++)
{
a11[i+i*lda] = T{float(m)}*a11[i+i*lda];
a11[i+i*lda] = 1 / a11[i+i*lda];
}
#endif
// If A is unit diagonal, set diagonal elements of A11 to 1
if (diaga == 'u' || diaga == 'U')
{
for (gtint_t i =0;i< m; i++)
{
a11[i+i*lda] = 1;
a11[i+i*lda] = T{1};
}
}
//----------------------------------------------------------
// Call BLIS function.
//----------------------------------------------------------
ukr_fp
(
k,
&alpha,
a10, a11,
b01, b11,
c,
rs_c, cs_c,
nullptr, nullptr
);
// add signal handler for segmentation fault
testinghelpers::ProtectedBuffer::start_signal_handler();
try
{
if( is_memory_test )
{
// calling gemmtrsm ukr will modify b11 buffer
// create a copy of B11 so that it can be restored
// for the second call of gemmtrsm ukr
b11_copy = (T*)malloc( m*ldb*sizeof(T) );
memcpy(b11_copy, b11, m*ldb*sizeof(T));
}
// Call ukr
ukr_fp
(
k,
&alpha,
a10, a11,
b01, b11,
c,
rs_c, cs_c,
nullptr, nullptr
);
if (is_memory_test)
{
// set pointers to second buffer
c = (T*)c_buffer.greenzone_2;
a10 = (T*)a10_buffer.greenzone_2;
b01 = (T*)b01_buffer.greenzone_2;
a11 = a10 + (k*lda);
b11 = b01 + (k*ldb);
// copy data from 1st buffer of A and B to second buffer
memcpy(a10, a10_buffer.greenzone_1, (k+m) * lda * sizeof(T));
memcpy(b01, b01_buffer.greenzone_1, k * ldb * sizeof(T));
memset(c, 0, size_c);
// restore B11 and copy contents of B11 to C
for (gtint_t i = 0; i < m; ++i)
{
for (gtint_t j = 0; j < n; ++j)
{
b11[i*ldb + j] = b11_copy[i*ldb + j];
c[j*cs_c + i*rs_c] = b11_copy[i*ldb + j];
}
}
// free b11_copy
free(b11_copy);
// second call to ukr
ukr_fp( k, &alpha, a10, a11, b01, b11, c, rs_c, cs_c, nullptr, nullptr );
}
}
catch(const std::exception& e)
{
// reset to default signal handler
testinghelpers::ProtectedBuffer::stop_signal_handler();
// show failure in case seg fault was detected
FAIL() << "Memory Test Failed";
}
// reset to default signal handler
testinghelpers::ProtectedBuffer::stop_signal_handler();
#ifdef BLIS_ENABLE_TRSM_PREINVERSION
// compensate for the trsm per-inversion
@@ -150,9 +246,7 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga,
}
#endif
//----------------------------------------------------------
// Call reference implementation to get ref results.
//----------------------------------------------------------
// Call reference implementation to get ref results.
if (storage == 'c' || storage == 'C')
{
testinghelpers::ref_gemm<T>( storage, 'n', 't', m, n, k, -1,
@@ -187,28 +281,147 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga,
testinghelpers::ref_trsm<T>( 'c', 'l', uploa, 'n', diaga, m, n, 1, a11,
lda, c_ref, ldc );
T* c_ref_gs = (T*)malloc( ldc * n * 1 * sizeof(T) );
memset(c_ref_gs, 0, ldc * n * 1 * sizeof(T));
// there is no equivalent blas call for gen storage,
// in order to compare the gen stored C and column major stored
// create a column major copy of C
T* c_gs = (T*)malloc( ldc * n * 1 * sizeof(T) );
memset(c_gs, 0, ldc * n * 1 * sizeof(T));
for (gtint_t i = 0; i < m; ++i)
{
for (gtint_t j = 0; j < n; ++j)
{
c_ref_gs[i*rs_c_ref + j*cs_c_ref] = c[i*rs_c + j*cs_c];
c_gs[i*rs_c_ref + j*cs_c_ref] = c[i*rs_c + j*cs_c];
}
}
free(c);
c = c_ref_gs;
c = c_gs;
}
//----------------------------------------------------------
// Compute component-wise error.
//----------------------------------------------------------
// Compute component-wise error.
computediff<T>( storage, m, n, c, c_ref, ldc, thresh );
free(a10);
free(b01);
free(c);
if(storage != 'r' && storage != 'R' && storage != 'c' && storage != 'C')
{
// free c_gs in case of general stride
free(c);
}
// free buffers
free(c_ref);
}
template<typename T, typename FT>
static void test_trsm_small_ukr( FT ukr_fp, char side, char uploa, char diaga,
char transa, gtint_t m, gtint_t n, T alpha, gtint_t lda,
gtint_t ldb, double thresh, bool is_memory_test)
{
// create blis objects
obj_t ao = BLIS_OBJECT_INITIALIZER;
obj_t bo = BLIS_OBJECT_INITIALIZER;
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
inc_t rs_a = 1;
inc_t cs_a = lda;
inc_t rs_b = 1;
inc_t cs_b = ldb;
num_t dt = BLIS_DOUBLE;
side_t blis_side;
uplo_t blis_uploa;
trans_t blis_transa;
diag_t blis_diaga;
dim_t m0, n0;
dim_t mn0_a;
bli_convert_blas_dim1( m, m0 );
bli_convert_blas_dim1( n, n0 );
bli_param_map_netlib_to_blis_side( side, &blis_side );
bli_param_map_netlib_to_blis_uplo( uploa, &blis_uploa );
bli_param_map_netlib_to_blis_trans( transa, &blis_transa );
bli_param_map_netlib_to_blis_diag( diaga, &blis_diaga );
bli_set_dim_with_side( blis_side, m0, n0, &mn0_a );
bli_obj_init_finish_1x1( dt, (T*)&alpha, &alphao );
cs_a += mn0_a;
cs_b += m;
// Allocate memory for A (col major)
testinghelpers::ProtectedBuffer a_buf( mn0_a * cs_a * sizeof(T), false, is_memory_test );
// Allocate memory for B (col major)
testinghelpers::ProtectedBuffer b_buf( n * cs_b * sizeof(T), false, is_memory_test );
T* a = (T*)a_buf.greenzone_1;
T* b = (T*)b_buf.greenzone_1;
T* b_ref = (T*)malloc( n * cs_b * sizeof(T) ); // col major
// Initialize buffers with random numbers.
random_generator_with_INF_NAN( a, uploa, 'c', 'n', -0.3, 0.3, mn0_a, mn0_a, cs_a);
random_generator_with_INF_NAN( b, uploa, 'c', 'n', -0.3, 0.3, m, n, cs_b);
// copy contents of b to b_ref
memcpy(b_ref, b, n * cs_b * sizeof(T));
// make A triangular
testinghelpers::make_triangular<T>( 'c', uploa, mn0_a, a, cs_a );
// Make A11 diagonal dominant in order to make sure that
// input matrics are solvable
for (gtint_t i =0;i< mn0_a; i++)
{
a[i+i*cs_a] = 1 / a[i+i*cs_a];
}
bli_obj_init_finish( dt, mn0_a, mn0_a, (T*)a, rs_a, cs_a, &ao );
bli_obj_init_finish( dt, m0, n0, (T*)b, rs_b, cs_b, &bo );
const struc_t struca = BLIS_TRIANGULAR;
bli_obj_set_uplo( blis_uploa, &ao );
bli_obj_set_diag( blis_diaga, &ao );
bli_obj_set_conjtrans( blis_transa, &ao );
bli_obj_set_struc( struca, &ao );
// add signal handler for segmentation fault
testinghelpers::ProtectedBuffer::start_signal_handler();
try
{
// call trsm small kernel
ukr_fp(blis_side, &alphao, &ao, &bo, NULL, NULL, false);
if(is_memory_test)
{
// set A and B pointers to second buffer
b = (T*)a_buf.greenzone_2;
a = (T*)b_buf.greenzone_2;
// copy data from first buffers of A and B to second buffer
memcpy(b, b_ref, n * cs_b * sizeof(T));
memcpy(a, (T*)a_buf.greenzone_1, mn0_a * cs_a * sizeof(T));
bli_obj_init_finish( dt, m0, n0, (T*)b, rs_b, cs_b, &bo );
bli_obj_init_finish( dt, mn0_a, mn0_a, (T*)a, rs_a, cs_a, &ao );
// call trsm small kernel
ukr_fp(blis_side, &alphao, &ao, &bo, NULL, NULL, false);
}
}
catch(const std::exception& e)
{
// reset to default signal handler
testinghelpers::ProtectedBuffer::stop_signal_handler();
// show failure in case seg fault was detected
FAIL() << "Memory Test Failed";
}
// reset to default signal handler
testinghelpers::ProtectedBuffer::stop_signal_handler();
// call to reference trsm
testinghelpers::ref_trsm<T>( 'c', side, uploa, transa, diaga, m, n, alpha, a,
cs_a, b_ref, cs_b );
computediff<T>( 'c', m, n, b, b_ref, cs_b, thresh );
// free memory
free(b_ref);
}