Add full QA with verification option, few other changes. (#331)

* add verify flag and update scripts

* replace old check_error function with the new check_err

* fix syntax

* remove blank spaces

* remove empty line

* add check_err for tensors

* fix syntax

* replace tensors with vectors in check_err calls

* fix syntax

* remove blank spaces

* fix syntax

* add new line at end of file

* disable conv2d_bwd_weight test, add gpu check

* set check_gpu using export

* check GPU using runShell

* add definition of runShell

* fix script syntax

* reduce the number of threads, add full qa option

* run processing scripts in bash

* fix the branch and host names in performance scripts, add chronos

* replace parameterizedCron with cron

* archive the perf log files

* try to fix git call

* pass branch and host names as arguments into scripts

* fix script arguments

* fix script arguments

* process results on master

* fix pipeline

* add definition of gpu_arch

* run processing scripts in docker

* fix the brackets

* add agent master for the processing stage

* get rid of show_node_info call on master

* try using mici label instead of master, disable MI100 tests for now

* fix syntax

* simplify container for results processing

* remove node(master) from the process_results stage

* put all stages in original order

* change the agent label from master to mici for gfx908

[ROCm/composable_kernel commit: d8415a96b3]
This commit is contained in:
Illia Silin
2022-07-21 13:25:46 -07:00
committed by GitHub
parent cdb627bf1b
commit 6f6ae03ad8
16 changed files with 464 additions and 330 deletions

View File

@@ -318,13 +318,16 @@ bool profile_batched_gemm_reduce_impl(int do_verification,
reduce0_device_buf.FromDevice(d0_g_m_device_result.mData.data());
reduce1_device_buf.FromDevice(d1_g_m_device_result.mData.data());
float c_error = check_error(c_g_m_n_host_result, c_g_m_n_device_result);
float d0_error = check_error(d0_g_m_host_result, d0_g_m_device_result);
float d1_error = check_error(d1_g_m_host_result, d1_g_m_device_result);
bool c_error =
ck::utils::check_err(c_g_m_n_host_result.mData, c_g_m_n_device_result.mData);
bool d0_error =
ck::utils::check_err(d0_g_m_host_result.mData, d0_g_m_device_result.mData);
bool d1_error =
ck::utils::check_err(d1_g_m_host_result.mData, d1_g_m_device_result.mData);
pass = pass && (c_error < 1E-6);
pass = pass && (d0_error < 1E-6);
pass = pass && (d1_error < 1E-6);
pass = pass && (c_error == true);
pass = pass && (d0_error == true);
pass = pass && (d1_error == true);
if(do_log)
{

View File

@@ -250,11 +250,11 @@ bool profile_conv_bwd_weight_impl(int do_verification,
{
wei_device_buf.FromDevice(wei_k_c_y_x_device_result.mData.data());
float max_error = check_error(wei_k_c_y_x_host_result, wei_k_c_y_x_device_result);
pass = ck::utils::check_err(wei_k_c_y_x_host_result.mData,
wei_k_c_y_x_device_result.mData);
if(max_error > 8)
if(pass == false)
{
pass = false;
std::cout << "Fail info:" << conv_ptr->GetTypeString() << std::endl;
}

View File

@@ -8,6 +8,7 @@
#include "ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
@@ -452,7 +453,7 @@ bool profile_convnd_bwd_data_impl(int do_verification,
std::cout << "Pass Info: " << conv_ptr->GetTypeString() << std::endl;
}
check_error(input_host_result, input_device_result);
success = ck::utils::check_err(input_host_result.mData, input_device_result.mData);
if(do_log)
{

View File

@@ -433,21 +433,17 @@ bool profile_convnd_bwd_weight_impl(int do_verification,
{
wei_device_buf.FromDevice(weights_device_result.mData.data());
float max_error = check_error(weights_host_result, weights_device_result);
success = ck::utils::check_err(weights_host_result.mData, weights_device_result.mData);
if(max_error > 8)
if(success == false)
{
std::cout << "Fail Info: " << conv_ptr->GetTypeString() << std::endl;
success = false;
}
else
{
std::cout << "Pass Info: " << conv_ptr->GetTypeString() << std::endl;
}
check_error(weights_host_result, weights_device_result);
if(do_log)
{
std::cout << "in : ";