From d988a5bbd7e989ec9a2f34d7ad627fa46ed88e82 Mon Sep 17 00:00:00 2001
From: "Field G. Van Zee" <field@cs.utexas.edu>
Date: Mon, 16 Dec 2019 16:30:26 -0600
Subject: [PATCH] Fixed bugs in cblas_sdsdot(), sdsdot_().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Details:
- Fixed a bug in sdsdot_sub() that redundantly added the "alpha" scalar,
  named 'sb'. This value was already being added by the underlying
  sdsdot_() function. Thus, we no longer add 'sb' within sdsdot_sub().
  Thanks to Simon Lukas Märtens for reporting this bug via #367.
- Fixed a second bug in order of typecasting intermediate products in
  sdsdot_(). Previously, the "alpha" scalar was being added after the
  "outer" typecast to float. However, the operation is supposed to first
  add the dot product to the (promoted) scalar and THEN downcast the sum
  to float. Thanks to Devin Matthews for catching this bug.
---
 CREDITS                                  |  5 +++--
 frame/compat/bla_dot.c                   | 14 ++++++++++----
 frame/compat/cblas/f77_sub/f77_dot_sub.c |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/CREDITS b/CREDITS
index 938d76346..9db863714 100644
--- a/CREDITS
+++ b/CREDITS
@@ -51,6 +51,7 @@ but many others have contributed code and feedback, including
   Ye Luo                   @ye-luo             (Argonne National Laboratory)
   Ricardo Magana           @magania            (Hewlett Packard Enterprise)
   Bryan Marker             @bamarker           (The University of Texas at Austin)
+  Simon Lukas Märtens      @ACSimon33          (RWTH Aachen University)
   Devin Matthews           @devinamatthews     (The University of Texas at Austin)
   Stefanos Mavros          @smavros
   Nisanth Padinharepatt                        (AMD)
@@ -60,7 +61,7 @@ but many others have contributed code and feedback, including
   Ilya Polkovnichenko
   Jack Poulson             @poulson            (Stanford)
   Mathieu Poumeyrol        @kali
-  Christos Psarras         @ChrisPsa           (RWTH-Aachen)
+  Christos Psarras         @ChrisPsa           (RWTH Aachen University)
                            @qnerd
   Michael Rader            @mrader1248
   Pradeep Rao              @pradeeptrgit       (AMD)
@@ -74,7 +75,7 @@ but many others have contributed code and feedback, including
   Nathaniel Smith          @njsmith
   Shaden Smith             @ShadenSmith
   Tyler Smith              @tlrmchlsmth        (The University of Texas at Austin)
-  Paul Springer            @springer13         (RWTH-Aachen)
+  Paul Springer            @springer13         (RWTH Aachen University)
   Adam J. Stewart          @adamjstewart       (University of Illinois at Urbana-Champaign)
   Vladimir Sukarev
   Santanu Thangaraj                            (AMD)
diff --git a/frame/compat/bla_dot.c b/frame/compat/bla_dot.c
index eaf2022af..cfdff957d 100644
--- a/frame/compat/bla_dot.c
+++ b/frame/compat/bla_dot.c
@@ -264,10 +264,16 @@ float PASTEF77(sd,sdot)
        const float*   y, const f77_int* incy
      )
 {
-	float r = ( float )PASTEF77(d,sdot)( n,
-	                                     x, incx,
-	                                     y, incy );
-	return r + *sb;
+	return ( float )
+	       (
+	         ( double )(*sb) +
+	         PASTEF77(d,sdot)
+	         (
+	           n,
+	           x, incx,
+	           y, incy
+	         )
+	       );
 }
 
 // Input vectors stored in single precision, computed in double precision,
diff --git a/frame/compat/cblas/f77_sub/f77_dot_sub.c b/frame/compat/cblas/f77_sub/f77_dot_sub.c
index 6c06133f1..8667791fb 100644
--- a/frame/compat/cblas/f77_sub/f77_dot_sub.c
+++ b/frame/compat/cblas/f77_sub/f77_dot_sub.c
@@ -75,7 +75,7 @@ void PASTEF772(sds,dot,sub)
              float*   rval
      )
 {
-	*rval = *sb + PASTEF77(sds,dot)
+	*rval = PASTEF77(sds,dot)
 	(
 	  n,
 	  sb,