Merge "disabled zgemm induced and gemm sqp temporarily." into amd-staging-milan-3.1

2026-05-13 10:35:38 +00:00 · 2021-04-05 05:51:12 -04:00
parent 5562a27823 7112b73d0d
commit 2aad3fbe55
2 changed files with 1090 additions and 414 deletions
--- a/frame/compat/bla_gemm.c
+++ b/frame/compat/bla_gemm.c
@@ -38,7 +38,7 @@
 //
 // Define BLAS-to-BLIS interfaces.
 //
-
+#define ENABLE_INDUCED_METHOD 0
 #ifdef BLIS_BLAS3_CALLS_TAPI

 #undef  GENTFUNC
@@ -639,10 +639,26 @@ void zgemm_
 	  }

    // The code below will be called when number of threads = 1.
+#if ENABLE_INDUCED_METHOD
+	/* 3m_sqp is optimal for certain matrix shapes.
+	   Initial study that it works well for square sizes and sizes closer to square shape.

-
-	dim_t m8rem = m0&7;
-	if( ((blis_transa==BLIS_TRANSPOSE) || (blis_transa==BLIS_NO_TRANSPOSE)) && (blis_transb==BLIS_NO_TRANSPOSE) &&(m8rem==0)&&(n0>40))
+	   * Usage of 3m_sqp is restricted to sizes, where it is found efficient compared to native, sup and other induced method.
+	   * Further investigation is necessary to make the usage choices more generic.  */
+	bool sqp_on = false;
+	if((m0==n0)&&(n0==k0)&&(m0==128))
+	{
+		sqp_on = true;
+	}
+#if 0
+    // though this range is giving 60 gflops/s in standalone, while integration in app cause performance degradation.
+	// to be enabled after fixing.
+	if((m0>=4200) && (m0<=4600) && (n0==326)&&(k0==1120)) //to be tuned further.
+	{
+		sqp_on = true;
+	}
+#endif
+	if( ((blis_transa==BLIS_TRANSPOSE) || (blis_transa==BLIS_NO_TRANSPOSE)) && (blis_transb==BLIS_NO_TRANSPOSE) && (sqp_on==true))
 	{
 		//sqp algo is found better for n > 40
 		if(bli_gemm_sqp(&alphao, &ao, &bo, &betao, &co, NULL, NULL)==BLIS_SUCCESS)
@@ -660,6 +676,7 @@ void zgemm_
 		return;
 	}
 	else
+#endif//ENABLE_INDUCED_METHOD
 	{
 		err_t status = bli_gemmsup(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
 		if(status==BLIS_SUCCESS)
--- a/kernels/zen/3/bli_gemm_sqp.c
+++ b/kernels/zen/3/bli_gemm_sqp.c