From de92fb0680e5e681be6ff800014b6bb8efd2f7bf Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Fri, 16 Feb 2024 15:45:55 +0530 Subject: [PATCH] Added Memory testing for DTRSM - Added framework for memory testing. - Out of bound reads and writes can be detected in both C and assembly. - Added memory tests for DTRSM. - Test methodology: - Use linux's protected pages to set some memory before and after the required buffer as protected. - Set the first and last page_size bytes as read, write and execute protected (red_zones). - If any part of code tries to read/write in redzones, a SIGSEGV signal will be generated, which can be used to detect a out of bounds read and write. - Page protection can only be set per page. If required size for buffer is not a multiple of pagesize we have to allocate more memory than required in order make sure the start and end of redzones align with page boundaries. - Overwrite malloc(size) to allocate 'buffer_size+(2*pagesize)' where buffer_size = minimum size such that buffer_size > 'size' and buffer_size is multiple of pagesize. - Use first and last page_size bytes of allocated buffer as redzones, use first 'size' of the middle buffer as first greenzone and last 'size' bytes as second greenzone. - Call test code once with first geenzone and then with second greenzone. Greenzones are surrounded by redzones, if test code read/writes before or after greenzones, it will be detected. |_____________________________________________________| | red_zone1 | green_zone1 greenzone_2 | red_zone2| |_____________________________________________________| AMD-Internal: [CPUPL-4403] Change-Id: Ic5c22a9adf8f833c77510686eee886485e894354 --- .../inc/common/protected_buffer.h | 79 +++++ .../inc/common/testing_helpers.h | 1 + .../src/common/protected_buffer.cpp | 180 ++++++++++ gtestsuite/testsuite/level3/trsm/test_trsm.h | 6 +- gtestsuite/testsuite/ukr/trsm/dtrsm_ukr.cpp | 161 +++++++-- gtestsuite/testsuite/ukr/trsm/test_trsm_ukr.h | 335 ++++++++++++++---- 6 files changed, 673 insertions(+), 89 deletions(-) create mode 100644 gtestsuite/testinghelpers/inc/common/protected_buffer.h create mode 100644 gtestsuite/testinghelpers/src/common/protected_buffer.cpp diff --git a/gtestsuite/testinghelpers/inc/common/protected_buffer.h b/gtestsuite/testinghelpers/inc/common/protected_buffer.h new file mode 100644 index 000000000..80736f0c3 --- /dev/null +++ b/gtestsuite/testinghelpers/inc/common/protected_buffer.h @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + + +#pragma once + +#include "common/type_info.h" + +namespace testinghelpers { + class ProtectedBuffer + { + private: + static const size_t REDZONE_SIZE = 1; + void* redzone_1 = nullptr; + void* redzone_2 = nullptr; + void* mem = nullptr; + bool is_mem_test = false; + + /** + * ========================================================================== + * get_mem + * returns a aligned or unaligned buffer of size "size" + * ========================================================================== + * @param[in] size specifies the size of the buffer to be allocated. + * @param[in] is_aligned specifies if the buffer needs to be aligned or not. + */ + static void* get_mem(dim_t, bool); + + public: + void* greenzone_1 = nullptr; + void* greenzone_2 = nullptr; + + ProtectedBuffer(dim_t size, bool is_aligned = false, bool is_mem_test = false); + ~ProtectedBuffer(); + + static void handle_mem_test_fail(int signal); + + /** + * Adds signal handler for segmentation fault. + */ + static void start_signal_handler(); + + /** + * Removes signal handler for segmentation fault. + */ + static void stop_signal_handler(); + }; +} \ No newline at end of file diff --git a/gtestsuite/testinghelpers/inc/common/testing_helpers.h b/gtestsuite/testinghelpers/inc/common/testing_helpers.h index 372010914..32553404b 100644 --- a/gtestsuite/testinghelpers/inc/common/testing_helpers.h +++ b/gtestsuite/testinghelpers/inc/common/testing_helpers.h @@ -40,3 +40,4 @@ #include "data_generators.h" #include "error_helpers.h" #include "refCBLAS.h" +#include "protected_buffer.h" diff --git a/gtestsuite/testinghelpers/src/common/protected_buffer.cpp b/gtestsuite/testinghelpers/src/common/protected_buffer.cpp new file mode 100644 index 000000000..be3ccb3cb --- /dev/null +++ b/gtestsuite/testinghelpers/src/common/protected_buffer.cpp @@ -0,0 +1,180 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#if defined(__linux__) +#include +#include +#include +#include +#endif + +#include "blis.h" +#include "common/protected_buffer.h" + +/* +* Returns aligned or unaligned memory of required size +*/ +void* testinghelpers::ProtectedBuffer::get_mem(dim_t size, bool is_aligned) +{ +#if defined(__linux__) + return is_aligned ? aligned_alloc(BLIS_HEAP_STRIDE_ALIGN_SIZE, size) : malloc(size); +#else + return is_aligned ? _aligned_malloc(BLIS_HEAP_STRIDE_ALIGN_SIZE, size) : malloc(size); +#endif +} + +/** + * @brief Allocate memory for greenzones and redzones, and add protection to redzones + * + * @param size size of buffer required + * @param is_aligned should allocated memory be aligned + * @param is_mem_test is memory allocated for memory test. + */ +testinghelpers::ProtectedBuffer::ProtectedBuffer(dim_t size, bool is_aligned, bool is_mem_test) +{ +#if defined(__linux__) + this->is_mem_test = is_mem_test; + if (is_mem_test) + { + // query page size + size_t page_size = sysconf(_SC_PAGESIZE); + + // calculate minimum number of pages needed for requested size + size_t buffer_size = ((size / page_size)+1) * page_size; + + // allocate memory (buffer_size + 1 page to ensure 1st redzone can be started at page bounday + // + 2 * REDZONE_SIZE pages for 1 redzone on each end of buffer) + mem = (char*)get_mem(buffer_size + ((1 + (REDZONE_SIZE * 2)) * page_size), is_aligned); + + // set redzone_1 to mem+page_size to make sure that + // atleast one page boundary exist between mem and redzone_1 + redzone_1 = (void*)((char*)mem + page_size); + + // find page boundary ( address which is multiple of pagesize and less than redzone_1 ) + // say page_size is Nth power of 2 therefore only (N+1)th LSB is set in page_size + // (-page_size) implies 2's complement therefore in (-page_size) N LSBs are unset, all + // other bits are set. + // (redzone_1 & -page_size) will unset N LSBs of redzone_1, therefore making redzone_1 a + // multiple of page_size. + // this line is equivalent to (redzone_1 - (redzone_1 % page_size)) + // where page_size is power of two. + redzone_1 = (void*)((uintptr_t)(redzone_1) & -page_size); + + // redzone_2 = redzone_1 + sizeof redzone_1 + sizeof buffer + redzone_2 = (void*)((char*)redzone_1 + (page_size * REDZONE_SIZE) + buffer_size); + + // make redzones read/wrtite/execute protected + int res = mprotect(redzone_1, page_size * REDZONE_SIZE, PROT_NONE); + if (res == -1) + { + do { perror("mprotect"); exit(EXIT_FAILURE); } while (0); + } + res = mprotect(redzone_2, page_size * REDZONE_SIZE, PROT_NONE); + if (res == -1) + { + do { perror("mprotect"); exit(EXIT_FAILURE); } while (0); + } + + // get address to the first "size" bytes of buffer + greenzone_1 = (void*)((char*)redzone_1 + (page_size * REDZONE_SIZE)); + + // get address to the last "size" bytes of buffer + greenzone_2 = (void*)((char*)redzone_2 - size); + } + else +#endif + { + mem = get_mem(size, is_aligned); + greenzone_1 = mem, greenzone_2 = mem; + } + +} + +/** + * @brief Remove Protection from redzones and free allocated memory + */ +testinghelpers::ProtectedBuffer::~ProtectedBuffer() +{ +#if defined(__linux__) + if(is_mem_test) + { + size_t page_size = sysconf(_SC_PAGESIZE); + + int res = mprotect(redzone_1, page_size * REDZONE_SIZE, PROT_READ | PROT_WRITE ); + if (res == -1) + { + do { perror("mprotect"); exit(EXIT_FAILURE); } while (0); + } + res = mprotect(redzone_2, page_size * REDZONE_SIZE, PROT_READ | PROT_WRITE ); + if (res == -1) + { + do { perror("mprotect"); exit(EXIT_FAILURE); } while (0); + } + } +#endif + free(mem); +} + +/** + * Function to handle segfault during memory test and convert it to a exception + */ +void testinghelpers::ProtectedBuffer::handle_mem_test_fail(int signal) +{ +#if defined(__linux__) + // unmask the segmentation fault signal + sigset_t signal_set; + sigemptyset(&signal_set); + sigaddset(&signal_set, SIGSEGV); + sigprocmask(SIG_UNBLOCK, &signal_set, NULL); + + throw std::out_of_range("err invalid"); +#endif +} + +void testinghelpers::ProtectedBuffer::start_signal_handler() +{ +#if defined(__linux__) + // add signal handler for segmentation fault + signal(SIGSEGV, ProtectedBuffer::handle_mem_test_fail); +#endif +} + + +void testinghelpers::ProtectedBuffer::stop_signal_handler() +{ +#if defined(__linux__) + // reset to default signal handler + signal(SIGSEGV, SIG_DFL); +#endif +} \ No newline at end of file diff --git a/gtestsuite/testsuite/level3/trsm/test_trsm.h b/gtestsuite/testsuite/level3/trsm/test_trsm.h index 833d4bce8..af416d1b1 100644 --- a/gtestsuite/testsuite/level3/trsm/test_trsm.h +++ b/gtestsuite/testsuite/level3/trsm/test_trsm.h @@ -109,7 +109,7 @@ void generate_NAN_INF( T* mat, char uploa, gtint_t m, gtint_t ld, EVT_TYPE type, } template -void init_mat( T* mat, char uploa, char storage, char trans, gtint_t from, gtint_t to, gtint_t m, +void random_generator_with_INF_NAN( T* mat, char uploa, char storage, char trans, gtint_t from, gtint_t to, gtint_t m, gtint_t n, gtint_t ld, EVT_TYPE type = NO_EVT, bool is_a = false ) { switch( type ) @@ -157,8 +157,8 @@ void test_trsm( char storage, char side, char uploa, char transa, char diaga, std::vector a( testinghelpers::matsize(storage, transa, mn, mn, lda) ); std::vector b( testinghelpers::matsize(storage, 'n', m, n, ldb) ); srand(time(0)); - init_mat( a.data(), uploa, storage, transa, lower, upper, mn, mn, lda, NO_EVT, true); - init_mat( b.data(), uploa, storage, 'n', 3, 10, m, n, ldb, b_init, false); + random_generator_with_INF_NAN( a.data(), uploa, storage, transa, lower, upper, mn, mn, lda, NO_EVT, true); + random_generator_with_INF_NAN( b.data(), uploa, storage, 'n', 3, 10, m, n, ldb, b_init, false); bool nan_inf_check = false; // Setting the nan_inf_check boolean to true if alpa has diff --git a/gtestsuite/testsuite/ukr/trsm/dtrsm_ukr.cpp b/gtestsuite/testsuite/ukr/trsm/dtrsm_ukr.cpp index c78af7946..7c9cc89e0 100644 --- a/gtestsuite/testsuite/ukr/trsm/dtrsm_ukr.cpp +++ b/gtestsuite/testsuite/ukr/trsm/dtrsm_ukr.cpp @@ -39,8 +39,8 @@ #include "level3/trsm/test_trsm.h" -class DTrsmUkrTest : - public ::testing::TestWithParam> {}; // ldc_inc + gtint_t, // ldc_inc + bool >> {}; // is_memory_test +class DTRSMSmallUkrTest : + public ::testing::TestWithParam> {}; // is_memory_test -TEST_P(DTrsmUkrTest, native) +TEST_P(DTRSMUkrTest, native_kernel) { using T = double; dgemmtrsm_ukr_ft ukr_fp = std::get<0>(GetParam()); @@ -63,33 +76,89 @@ TEST_P(DTrsmUkrTest, native) gtint_t k = std::get<6>(GetParam()); T alpha = std::get<7>(GetParam()); gtint_t ldc = std::get<8>(GetParam()); + bool is_memory_test = std::get<9>(GetParam()); double thresh = 2 * m * testinghelpers::getEpsilon(); - test_trsm_ukr( ukr_fp, storage, uploa, diaga, m, n, k, alpha, ldc, thresh ); + test_trsm_ukr( ukr_fp, storage, uploa, diaga, m, n, k, alpha, ldc, thresh, is_memory_test); } -class DTrsmUkrTestPrint { +TEST_P(DTRSMSmallUkrTest, small_kernel) +{ + using T = double; + dtrsm_small_ker_ft ukr_fp = std::get<0>(GetParam()); + char side = std::get<1>(GetParam()); + char uploa = std::get<2>(GetParam()); + char diaga = std::get<3>(GetParam()); + char transa = std::get<4>(GetParam()); + gtint_t m = std::get<5>(GetParam()); + gtint_t n = std::get<6>(GetParam()); + T alpha = std::get<7>(GetParam()); + gtint_t lda = std::get<8>(GetParam()); + gtint_t ldb = std::get<9>(GetParam()); + bool is_memory_test = std::get<10>(GetParam()); + + double thresh = 2 * std::max(std::max(m, n), 3) * testinghelpers::getEpsilon(); + test_trsm_small_ukr( ukr_fp, side, uploa, diaga, transa, m, n, alpha, lda, ldb, thresh, is_memory_test); +} + +class DTRSMUkrTestPrint { public: std::string operator()( testing::TestParamInfo> str) const{ + gtint_t, gtint_t, double, gtint_t, bool>> str) const{ char storage = std::get<1>(str.param); char uploa = std::get<2>(str.param); char diaga = std::get<3>(str.param); gtint_t k = std::get<6>(str.param); double alpha = std::get<7>(str.param); gtint_t ldc = std::get<8>(str.param); - return std::string("dgemmtrsm_ukr") + "_s" + storage + "_d" + diaga + "_u" + uploa + - "_k" + std::to_string(k) + "_a" + - (alpha > 0 ? std::to_string(int(alpha)) : std::string("m") + std::to_string(int(alpha*-1))) + - "_c" + std::to_string(ldc); + bool is_memory_test = std::get<9>(str.param); + std::string res = std::string("dgemmtrsm_ukr") + + "_stor_" + storage + + "_diag_" + diaga + + "_uplo_" + uploa + + "_k_" + std::to_string(k) + + "_alpha_" + (alpha > 0 ? std::to_string(int(alpha)) : + std::string("m") + std::to_string(int(alpha*-1))) + + "_ldc_" + std::to_string(ldc); + return is_memory_test ? res + "_memory_test" : res; } }; -#ifdef BLIS_KERNELS_ZEN4 +class DTRSMSmallUkrTestPrint { +public: + std::string operator()( + testing::TestParamInfo> str) const{ + char side = std::get<1>(str.param); + char uploa = std::get<2>(str.param); + char diaga = std::get<3>(str.param); + char transa = std::get<4>(str.param); + gtint_t m = std::get<5>(str.param); + gtint_t n = std::get<6>(str.param); + double alpha = std::get<7>(str.param); + gtint_t lda = std::get<8>(str.param); + gtint_t ldb = std::get<9>(str.param); + bool is_memory_test = std::get<10>(str.param); + std::string res = std::string("trsm_small_") + + "_stor_" + side + + "_diag_" + diaga + + "_uplo_" + uploa + + "_trana_" + transa + + "_alpha_" + (alpha > 0 ? std::to_string(int(alpha)) : + std::string("m") + std::to_string(int(alpha*-1))) + + "_lda_" + std::to_string(lda) + + "_ldb_" + std::to_string(ldb) + + "_m_" + std::to_string(m) + + "_n_" + std::to_string(n); + return is_memory_test ? res + "_memory_test" : res; + } +}; + +#if defined(BLIS_KERNELS_ZEN4) && defined(GTEST_AVX512) INSTANTIATE_TEST_SUITE_P ( bli_dgemmtrsm_l_zen4_asm_8x24, - DTrsmUkrTest, + DTRSMUkrTest, ::testing::Combine( ::testing::Values(bli_dgemmtrsm_l_zen4_asm_8x24), // ker_ptr ::testing::Values('c', 'r', 'g'), // stor @@ -99,14 +168,15 @@ INSTANTIATE_TEST_SUITE_P ( ::testing::Values(24), // n ::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k ::testing::Values(-1, -5.2, 1, 8.9), // alpha - ::testing::Values(0, 9, 53) // ldc + ::testing::Values(0, 9, 53), // ldc + ::testing::Values(false, true) // is_memory_test ), - ::DTrsmUkrTestPrint() + ::DTRSMUkrTestPrint() ); INSTANTIATE_TEST_SUITE_P ( bli_dgemmtrsm_u_zen4_asm_8x24, - DTrsmUkrTest, + DTRSMUkrTest, ::testing::Combine( ::testing::Values(bli_dgemmtrsm_u_zen4_asm_8x24), // ker_ptr ::testing::Values('c', 'r', 'g'), // stor @@ -116,17 +186,37 @@ INSTANTIATE_TEST_SUITE_P ( ::testing::Values(24), // n ::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k ::testing::Values(-1, -5.2, 1, 8.9), // alpha - ::testing::Values(0, 9, 53) // ldc + ::testing::Values(0, 9, 53), // ldc + ::testing::Values(false, true) // is_memory_test ), - ::DTrsmUkrTestPrint() + ::DTRSMUkrTestPrint() +); + +INSTANTIATE_TEST_SUITE_P ( + bli_trsm_small_AVX512, + DTRSMSmallUkrTest, + ::testing::Combine( + ::testing::Values(bli_trsm_small_AVX512), // ker_ptr + ::testing::Values('l', 'r'), // side + ::testing::Values('l', 'u'), // uplo + ::testing::Values('n', 'u'), // diaga + ::testing::Values('n', 't'), // transa + ::testing::Range(gtint_t(1), gtint_t(9), 1), // m + ::testing::Range(gtint_t(1), gtint_t(9), 1), // n + ::testing::Values(-3, 3), // alpha + ::testing::Values(0, 10), // lda_inc + ::testing::Values(0, 10), // ldb_inc + ::testing::Values(false, true) // is_memory_test + ), + ::DTRSMSmallUkrTestPrint() ); #endif -#ifdef BLIS_KERNELS_HASWELL +#if defined(BLIS_KERNELS_ZEN) && defined(GTEST_AVX2FMA3) INSTANTIATE_TEST_SUITE_P ( bli_dgemmtrsm_l_haswell_asm_6x8, - DTrsmUkrTest, + DTRSMUkrTest, ::testing::Combine( ::testing::Values(bli_dgemmtrsm_l_haswell_asm_6x8), // ker_ptr ::testing::Values('c', 'r', 'g'), // stor @@ -136,14 +226,15 @@ INSTANTIATE_TEST_SUITE_P ( ::testing::Values(8), // n ::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k ::testing::Values(-1, -5.2, 1, 8.9), // alpha - ::testing::Values(0, 9, 53) // ldc + ::testing::Values(0, 9, 53), // ldc + ::testing::Values(false, true) // is_memory_test ), - ::DTrsmUkrTestPrint() + ::DTRSMUkrTestPrint() ); INSTANTIATE_TEST_SUITE_P ( bli_dgemmtrsm_u_haswell_asm_6x8, - DTrsmUkrTest, + DTRSMUkrTest, ::testing::Combine( ::testing::Values(bli_dgemmtrsm_u_haswell_asm_6x8), // ker_ptr ::testing::Values('c', 'r', 'g'), // stor @@ -153,8 +244,28 @@ INSTANTIATE_TEST_SUITE_P ( ::testing::Values(8), // n ::testing::Values(0, 1, 2, 8, 9, 10, 500, 1000), // k ::testing::Values(-1, -5.2, 1, 8.9), // alpha - ::testing::Values(0, 9, 53) // ldc + ::testing::Values(0, 9, 53), // ldc + ::testing::Values(false, true) // is_memory_test ), - ::DTrsmUkrTestPrint() + ::DTRSMUkrTestPrint() +); + +INSTANTIATE_TEST_SUITE_P ( + bli_trsm_small, + DTRSMSmallUkrTest, + ::testing::Combine( + ::testing::Values(bli_trsm_small), // ker_ptr + ::testing::Values('l', 'r'), // side + ::testing::Values('l', 'u'), // uplo + ::testing::Values('n', 'u'), // diaga + ::testing::Values('n', 't'), // transa + ::testing::Range(gtint_t(1), gtint_t(9), 1), // m + ::testing::Range(gtint_t(1), gtint_t(9), 1), // n + ::testing::Values(-3, 3), // alpha + ::testing::Values(0, 10), // lda_inc + ::testing::Values(0, 10), // ldb_inc + ::testing::Values(false, true) // is_memory_test + ), + ::DTRSMSmallUkrTestPrint() ); #endif \ No newline at end of file diff --git a/gtestsuite/testsuite/ukr/trsm/test_trsm_ukr.h b/gtestsuite/testsuite/ukr/trsm/test_trsm_ukr.h index d57db8491..9e8edc2f1 100644 --- a/gtestsuite/testsuite/ukr/trsm/test_trsm_ukr.h +++ b/gtestsuite/testsuite/ukr/trsm/test_trsm_ukr.h @@ -34,34 +34,52 @@ #pragma once -#include "level3/trsm/trsm.h" +#include #include "blis.h" +#include "level3/trsm/trsm.h" #include "level3/ref_trsm.h" #include "inc/check_error.h" #include "common/testing_helpers.h" -#include -#include #include "level3/trsm/test_trsm.h" +// function pointer for DTRSM small kernels +typedef err_t (*dtrsm_small_ker_ft) +( + side_t side, + obj_t* alpha, + obj_t* a, + obj_t* b, + cntx_t* cntx, + cntl_t* cntl, + bool is_parallel +); +/* +* Function to test gemmtrsm ukr +*/ template static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga, gtint_t m, gtint_t n, gtint_t k, T alpha, - gtint_t ldc_inc, double thresh) + gtint_t ldc_inc, double thresh, bool is_memory_test) { gtint_t lda = m, ldb = n; gtint_t ldc = ldc_inc; + // Allocate memory for A10(k*lda) and A11(m*lda) - T* a10 = (T*)malloc( (k+m) * lda * sizeof(T) ); //col major - // Allocate memory for A01(k*ldb) and B11(m*ldb) - T* b01 = (T*)aligned_alloc(BLIS_HEAP_STRIDE_ALIGN_SIZE, (k+m) * ldb * sizeof(T)); //row major - //---------------------------------------------------------- - // Initialize vectors with random numbers. - //---------------------------------------------------------- - init_mat( a10, uploa, 'c', 'n', 3, 10, m, (k+m), lda); - init_mat( b01, uploa, 'r', 'n', 3, 10, n, (k+m), ldb); + testinghelpers::ProtectedBuffer a10_buffer( (k+m) * lda * sizeof(T), false, is_memory_test ); + // Allocate aligned memory for B01(k*ldb) and B11(m*ldb) + testinghelpers::ProtectedBuffer b01_buffer( (k+m) * ldb * sizeof(T), true , is_memory_test ); + + + T* a10 = (T*)a10_buffer.greenzone_1; // column major + T* b01 = (T*)b01_buffer.greenzone_1; // row major + + // Initialize vectors with random numbers. + random_generator_with_INF_NAN( a10, uploa, 'c', 'n', -0.3, 0.3, m, (k+m), lda); + random_generator_with_INF_NAN( b01, uploa, 'r', 'n', -0.3, 0.3, (k+m), n, ldb); + // Get A11(A10 + sizeof(A01)) and B11(B10 + sizeof(B10)) T* a11 = a10 + (k*lda); T* b11 = b01 + (k*ldb); @@ -69,7 +87,7 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga, // make A11 triangular for trsm testinghelpers::make_triangular( 'c', uploa, m, a11, lda ); - T* c, *c_ref; + T* c, *c_ref, *b11_copy; gtint_t rs_c, cs_c, rs_c_ref, cs_c_ref; gtint_t size_c, size_c_ref; @@ -77,31 +95,47 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga, if (storage == 'r' || storage == 'R') { ldc += n; - rs_c = ldc, cs_c = 1; - rs_c_ref = rs_c, cs_c_ref = cs_c; - size_c = ldc * m * sizeof(T), size_c_ref = ldc * m * sizeof(T); - c_ref = (T*)malloc( size_c_ref ); - c = (T*)malloc( size_c ); + rs_c = ldc; + cs_c = 1; + rs_c_ref = rs_c; + cs_c_ref = cs_c; + size_c = ldc * m * sizeof(T); + size_c_ref = size_c; } else if (storage == 'c' || storage == 'C') { ldc += m; - cs_c = ldc, rs_c = 1; - rs_c_ref = rs_c, cs_c_ref = cs_c; - size_c = ldc * n * sizeof(T), size_c_ref = ldc * n * sizeof(T); - c_ref = (T*)malloc( size_c_ref ); - c = (T*)malloc( size_c ); + rs_c = 1; + cs_c = ldc; + rs_c_ref = rs_c; + cs_c_ref = cs_c; + size_c = ldc * n * sizeof(T); + size_c_ref = size_c; } - else + else // general storage { ldc += m; - rs_c_ref = 1, cs_c_ref = ldc; - rs_c = ldc, cs_c = ldc*ldc; - size_c = ldc * n * ldc * sizeof(T), size_c_ref = ldc * n * 1 * sizeof(T); - c_ref = (T*)malloc( size_c_ref ); - c = (T*)malloc( size_c ); + + // reference does not support general stride, therefore + // reference is set as column major + rs_c_ref = 1, + cs_c_ref = ldc; + + // for general stride, rs_c and cs_c both are non unit stride + // ldc is used to derieve both rs_c and cs_c + rs_c = ldc; + cs_c = ldc*ldc; + size_c = ldc * n * ldc * sizeof(T); + size_c_ref = ldc * n * 1 * sizeof(T); } - memset(c, 0, size_c); + + // get memory for C and c_ref + testinghelpers::ProtectedBuffer c_buffer(size_c, false, is_memory_test); + c = (T*)c_buffer.greenzone_1; + c_ref = (T*)malloc( size_c_ref ); + + // set c buffers to zero to ensure the unused region of C matrix (extra ldb) is zero + memset(c, 0, size_c); memset(c_ref, 0, size_c_ref); // copy contents of B11 to C and C_ref @@ -114,33 +148,95 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga, } } - // make A11 diagonal dominant + // Make A11 diagonal dominant in order to make sure that + // input matrics are solvable + // In case BLIS_ENABLE_TRSM_PREINVERSION is enabled, + // diagonal elements of A11 have to be inverted twice, + // once for making it diagonal dominant, and once for packing with + // inversion, inverting it twice is equivalent to not inverting it at all. + // Therefore, in case of BLIS_ENABLE_TRSM_PREINVERSION, diagonal elements + // of A11 are not inverted. +#ifndef BLIS_ENABLE_TRSM_PREINVERSION for (gtint_t i =0;i< m; i++) { - a11[i+i*lda] = T{float(m)}*a11[i+i*lda]; + a11[i+i*lda] = 1 / a11[i+i*lda]; } +#endif + // If A is unit diagonal, set diagonal elements of A11 to 1 if (diaga == 'u' || diaga == 'U') { for (gtint_t i =0;i< m; i++) { - a11[i+i*lda] = 1; + a11[i+i*lda] = T{1}; } } - //---------------------------------------------------------- - // Call BLIS function. - //---------------------------------------------------------- - ukr_fp - ( - k, - &alpha, - a10, a11, - b01, b11, - c, - rs_c, cs_c, - nullptr, nullptr - ); + // add signal handler for segmentation fault + testinghelpers::ProtectedBuffer::start_signal_handler(); + try + { + if( is_memory_test ) + { + // calling gemmtrsm ukr will modify b11 buffer + // create a copy of B11 so that it can be restored + // for the second call of gemmtrsm ukr + b11_copy = (T*)malloc( m*ldb*sizeof(T) ); + memcpy(b11_copy, b11, m*ldb*sizeof(T)); + } + + // Call ukr + ukr_fp + ( + k, + &alpha, + a10, a11, + b01, b11, + c, + rs_c, cs_c, + nullptr, nullptr + ); + if (is_memory_test) + { + // set pointers to second buffer + c = (T*)c_buffer.greenzone_2; + a10 = (T*)a10_buffer.greenzone_2; + b01 = (T*)b01_buffer.greenzone_2; + a11 = a10 + (k*lda); + b11 = b01 + (k*ldb); + + // copy data from 1st buffer of A and B to second buffer + memcpy(a10, a10_buffer.greenzone_1, (k+m) * lda * sizeof(T)); + memcpy(b01, b01_buffer.greenzone_1, k * ldb * sizeof(T)); + + memset(c, 0, size_c); + // restore B11 and copy contents of B11 to C + for (gtint_t i = 0; i < m; ++i) + { + for (gtint_t j = 0; j < n; ++j) + { + b11[i*ldb + j] = b11_copy[i*ldb + j]; + c[j*cs_c + i*rs_c] = b11_copy[i*ldb + j]; + } + } + // free b11_copy + free(b11_copy); + + // second call to ukr + ukr_fp( k, &alpha, a10, a11, b01, b11, c, rs_c, cs_c, nullptr, nullptr ); + } + } + catch(const std::exception& e) + { + // reset to default signal handler + testinghelpers::ProtectedBuffer::stop_signal_handler(); + + // show failure in case seg fault was detected + FAIL() << "Memory Test Failed"; + } + // reset to default signal handler + testinghelpers::ProtectedBuffer::stop_signal_handler(); + #ifdef BLIS_ENABLE_TRSM_PREINVERSION // compensate for the trsm per-inversion @@ -150,9 +246,7 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga, } #endif - //---------------------------------------------------------- - // Call reference implementation to get ref results. - //---------------------------------------------------------- + // Call reference implementation to get ref results. if (storage == 'c' || storage == 'C') { testinghelpers::ref_gemm( storage, 'n', 't', m, n, k, -1, @@ -187,28 +281,147 @@ static void test_trsm_ukr( FT ukr_fp, char storage, char uploa, char diaga, testinghelpers::ref_trsm( 'c', 'l', uploa, 'n', diaga, m, n, 1, a11, lda, c_ref, ldc ); - T* c_ref_gs = (T*)malloc( ldc * n * 1 * sizeof(T) ); - memset(c_ref_gs, 0, ldc * n * 1 * sizeof(T)); - + // there is no equivalent blas call for gen storage, + // in order to compare the gen stored C and column major stored + // create a column major copy of C + T* c_gs = (T*)malloc( ldc * n * 1 * sizeof(T) ); + memset(c_gs, 0, ldc * n * 1 * sizeof(T)); for (gtint_t i = 0; i < m; ++i) { for (gtint_t j = 0; j < n; ++j) { - c_ref_gs[i*rs_c_ref + j*cs_c_ref] = c[i*rs_c + j*cs_c]; + c_gs[i*rs_c_ref + j*cs_c_ref] = c[i*rs_c + j*cs_c]; } } - free(c); - c = c_ref_gs; + + c = c_gs; } - //---------------------------------------------------------- - // Compute component-wise error. - //---------------------------------------------------------- + // Compute component-wise error. computediff( storage, m, n, c, c_ref, ldc, thresh ); - free(a10); - free(b01); - free(c); + if(storage != 'r' && storage != 'R' && storage != 'c' && storage != 'C') + { + // free c_gs in case of general stride + free(c); + } + + // free buffers free(c_ref); +} + +template +static void test_trsm_small_ukr( FT ukr_fp, char side, char uploa, char diaga, + char transa, gtint_t m, gtint_t n, T alpha, gtint_t lda, + gtint_t ldb, double thresh, bool is_memory_test) +{ + // create blis objects + obj_t ao = BLIS_OBJECT_INITIALIZER; + obj_t bo = BLIS_OBJECT_INITIALIZER; + obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; + + inc_t rs_a = 1; + inc_t cs_a = lda; + inc_t rs_b = 1; + inc_t cs_b = ldb; + num_t dt = BLIS_DOUBLE; + + side_t blis_side; + uplo_t blis_uploa; + trans_t blis_transa; + diag_t blis_diaga; + dim_t m0, n0; + dim_t mn0_a; + bli_convert_blas_dim1( m, m0 ); + bli_convert_blas_dim1( n, n0 ); + + bli_param_map_netlib_to_blis_side( side, &blis_side ); + bli_param_map_netlib_to_blis_uplo( uploa, &blis_uploa ); + bli_param_map_netlib_to_blis_trans( transa, &blis_transa ); + bli_param_map_netlib_to_blis_diag( diaga, &blis_diaga ); + + bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); + bli_obj_init_finish_1x1( dt, (T*)&alpha, &alphao ); + + cs_a += mn0_a; + cs_b += m; + + // Allocate memory for A (col major) + testinghelpers::ProtectedBuffer a_buf( mn0_a * cs_a * sizeof(T), false, is_memory_test ); + // Allocate memory for B (col major) + testinghelpers::ProtectedBuffer b_buf( n * cs_b * sizeof(T), false, is_memory_test ); + + T* a = (T*)a_buf.greenzone_1; + T* b = (T*)b_buf.greenzone_1; + T* b_ref = (T*)malloc( n * cs_b * sizeof(T) ); // col major + + // Initialize buffers with random numbers. + random_generator_with_INF_NAN( a, uploa, 'c', 'n', -0.3, 0.3, mn0_a, mn0_a, cs_a); + random_generator_with_INF_NAN( b, uploa, 'c', 'n', -0.3, 0.3, m, n, cs_b); + + // copy contents of b to b_ref + memcpy(b_ref, b, n * cs_b * sizeof(T)); + + // make A triangular + testinghelpers::make_triangular( 'c', uploa, mn0_a, a, cs_a ); + + // Make A11 diagonal dominant in order to make sure that + // input matrics are solvable + for (gtint_t i =0;i< mn0_a; i++) + { + a[i+i*cs_a] = 1 / a[i+i*cs_a]; + } + + bli_obj_init_finish( dt, mn0_a, mn0_a, (T*)a, rs_a, cs_a, &ao ); + bli_obj_init_finish( dt, m0, n0, (T*)b, rs_b, cs_b, &bo ); + + const struc_t struca = BLIS_TRIANGULAR; + + bli_obj_set_uplo( blis_uploa, &ao ); + bli_obj_set_diag( blis_diaga, &ao ); + bli_obj_set_conjtrans( blis_transa, &ao ); + bli_obj_set_struc( struca, &ao ); + + // add signal handler for segmentation fault + testinghelpers::ProtectedBuffer::start_signal_handler(); + try + { + // call trsm small kernel + ukr_fp(blis_side, &alphao, &ao, &bo, NULL, NULL, false); + if(is_memory_test) + { + // set A and B pointers to second buffer + b = (T*)a_buf.greenzone_2; + a = (T*)b_buf.greenzone_2; + + // copy data from first buffers of A and B to second buffer + memcpy(b, b_ref, n * cs_b * sizeof(T)); + memcpy(a, (T*)a_buf.greenzone_1, mn0_a * cs_a * sizeof(T)); + bli_obj_init_finish( dt, m0, n0, (T*)b, rs_b, cs_b, &bo ); + bli_obj_init_finish( dt, mn0_a, mn0_a, (T*)a, rs_a, cs_a, &ao ); + + // call trsm small kernel + ukr_fp(blis_side, &alphao, &ao, &bo, NULL, NULL, false); + } + } + catch(const std::exception& e) + { + // reset to default signal handler + testinghelpers::ProtectedBuffer::stop_signal_handler(); + + // show failure in case seg fault was detected + FAIL() << "Memory Test Failed"; + } + // reset to default signal handler + testinghelpers::ProtectedBuffer::stop_signal_handler(); + + // call to reference trsm + testinghelpers::ref_trsm( 'c', side, uploa, transa, diaga, m, n, alpha, a, + cs_a, b_ref, cs_b ); + + computediff( 'c', m, n, b, b_ref, cs_b, thresh ); + + // free memory + free(b_ref); } \ No newline at end of file