Added gtestsuite folder into blis repo

Moved blis gtestsuite from lib-confscript to blis repo (branch: amd-main) Change-Id: If7ad391eef66bac6d26cf5223e6043d52b746072
2026-05-11 17:50:00 +00:00 · 2022-12-08 09:17:25 +05:30
parent 345aacf806
commit cff29bde76
195 changed files with 64114 additions and 0 deletions
--- a/gtestsuite/CMakeLists.txt
+++ b/gtestsuite/CMakeLists.txt
@@ -0,0 +1,249 @@
+##Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved.##
+
+cmake_minimum_required(VERSION 3.10.0)
+set(CMAKE_CXX_COMPILER ${CXX_COMPILER})
+
+project(Blis_GtestSuite)
+
+include(FetchContent)
+FetchContent_Declare(
+  googletest
+  GIT_REPOSITORY https://github.com/google/googletest.git
+  GIT_TAG        release-1.12.1
+)
+#set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+set(BUILD_GMOCK OFF CACHE BOOL "" FORCE)
+set(BUILD_GTEST ON CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(googletest)
+include(GoogleTest)
+
+enable_testing()
+
+# Set the path to the BLIS installation.
+if(NOT(BLIS_PATH))
+    message(FATAL_ERROR "Need to provide a BLIS installation path during CMake invocation. Please use
+    $ cmake .. -DBLIS_PATH=/home/username/blis_installation")
+endif()
+# Set the path to BLIS include directory.
+set(BLIS_INCLUDE ${BLIS_PATH}/include/blis)
+
+# Set OpenMP as the default option
+set(ENABLE_THREADING "openmp" CACHE STRING "Setting OpenMP as the threading library")
+# Set the possible values of theading libraries for cmake-gui
+set_property(CACHE ENABLE_THREADING PROPERTY STRINGS "openmp" "pthreads" "no")
+
+# Set static BLIS as the default library we build against.
+set(BLIS_LINKING_TYPE "static" CACHE STRING "Linking to a static BLIS library")
+# Set the possible values of BLIS linking type for cmake-gui
+set_property(CACHE BLIS_LINKING_TYPE PROPERTY STRINGS "static" "shared")
+
+if(BLIS_LINKING_TYPE STREQUAL "shared")
+    message(FATAL_ERROR "Using shared BLIS library is currently disabled.")
+endif()
+
+option(ENABLE_VALGRIND "Run tests using valgrind" OFF)
+
+option(ENABLE_ASAN "Run tests using Address Sanatizer" OFF)
+
+# Set variable if the platform is Linux based.
+if(UNIX AND NOT APPLE)
+    set(LINUX TRUE)
+endif()
+
+# Throw an error if the platform is Apple.
+if(APPLE)
+    message(FATAL_ERROR "Build system does not support Apple platform.")
+endif()
+
+# Set the include paths.
+set(INC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/inc)
+
+# Set compiler options and BLIS library for Linux.
+if(LINUX)
+    # Add compiler definition.
+
+    add_compile_options(-g -Wall -Wno-unused-function -Wfatal-errors -fPIC)
+
+    if(ENABLE_ASAN)
+       add_definitions(-D__GTEST_VALGRIND_TEST__)
+       add_compile_options(-fsanitize=address -static-libsan)
+    endif()
+
+    # Set GNU OpenMP library as the default option
+    set(OpenMP_LIBRARY "GNU" CACHE STRING "Using GNU OpenMP library")
+    # Set the possibe values of OpenMP runtimes
+    set_property(CACHE OpenMP_LIBRARY PROPERTY STRINGS "GNU" "Intel")
+
+    if(ENABLE_THREADING STREQUAL "no")
+        if(BLIS_LINKING_TYPE STREQUAL "static")
+            set(Blis_LIBRARY "${BLIS_PATH}/lib/libblis.a" CACHE STRING "blis library path")
+        else()
+            set(Blis_LIBRARY "${BLIS_PATH}/lib/libblis.so" CACHE STRING "blis library path")
+        endif()
+    else()
+        if(BLIS_LINKING_TYPE STREQUAL "static")
+            set(Blis_LIBRARY "${BLIS_PATH}/lib/libblis-mt.a" CACHE STRING "blis library path")
+        else()
+            set(Blis_LIBRARY "${BLIS_PATH}/lib/libblis-mt.so" CACHE STRING "blis library path")
+        endif()
+    endif()
+endif()
+
+# Set BLIS library for Windows.
+if(WIN32)
+    if(ENABLE_THREADING STREQUAL "no")
+        if(BLIS_LINKING_TYPE STREQUAL "static")
+            set(Blis_LIBRARY "${BLIS_PATH}/bin/AOCL-LibBlis-Win.a" CACHE STRING "blis library path")
+        else()
+            set(Blis_LIBRARY "${BLIS_PATH}/bin/AOCL-LibBlis-Win-dll.lib" CACHE STRING "blis library path")
+        endif()
+    else()
+        if(BLIS_LINKING_TYPE STREQUAL "static")
+            set(Blis_LIBRARY "${BLIS_PATH}/bin/AOCL-LibBlis-Win-MT.a" CACHE STRING "blis library path")
+        else()
+            set(Blis_LIBRARY "${BLIS_PATH}/bin/AOCL-LibBlis-Win-MT-dll.lib" CACHE STRING "blis library path")
+        endif()
+    endif()
+endif()
+
+# Since this is an out-of-source build we need to copy the input files in the correct destination.
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/input.general
+          ${CMAKE_CURRENT_SOURCE_DIR}/alphabeta.dat
+          ${CMAKE_CURRENT_SOURCE_DIR}/input.operations
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+
+add_executable(gtest_libblis
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/gtest_suite.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/gtest_pthread.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/blis_api.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/blis_inpfile.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/blis_utils_int.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/blis_utils.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_process.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_addv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_amaxv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_axpbyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_axpyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_copyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_dotv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_dotxv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_normfv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_scalv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_scal2v.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_subv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_xpbyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_addm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_axpym.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_copym.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_normfm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_scalm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_scal2m.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_subm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_xpbym.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_axpy2v.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_dotaxpyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_axpyf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_dotxf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_dotxaxpyf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_gemv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_ger.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_hemv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_her.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_her2.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_symv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_syr.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_syr2.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_trmv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_trsv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_gemm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_gemmt.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_hemm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_herk.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_her2k.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_symm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_syrk.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_syr2k.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_trmm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_trmm3.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/ref_trsm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_randv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_randm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_addv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_amaxv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_axpbyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_axpyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_copyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_dotv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_dotxv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_normfv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_scalv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_scal2v.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_setv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_subv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_xpbyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_addm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_axpym.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_copym.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_normfm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_scalm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_scal2m.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_setm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_subm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_xpbym.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_axpy2v.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_dotaxpyv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_axpyf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_dotxf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_dotxaxpyf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_ger.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_hemv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_her.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_her2.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_symv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_syr.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_syr2.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_trmv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_trsv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemmt.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_hemm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_herk.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_her2k.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_symm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_syrk.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_syr2k.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_trmm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_trmm3.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_trsm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/lpgemm_utils.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_u8s8s32os32.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_u8s8s32os8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_f32f32f32of32.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_u8s8s16os16.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_u8s8s16os8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_bf16bf16f32of32.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/src/test_gemm_bf16bf16f32obf16.cpp
+)
+
+target_include_directories(gtest_libblis PUBLIC ${INC_PATH} ${BLIS_INCLUDE})
+target_link_libraries(gtest_libblis gtest gtest_main ${Blis_LIBRARY} pthread)
+
+# Linking appropriate threading library.
+if(ENABLE_THREADING STREQUAL "openmp")
+    if(LINUX)
+        if(OpenMP_LIBRARY STREQUAL "GNU")
+            target_link_libraries(gtest_libblis -fopenmp)
+        else()
+            target_link_libraries(gtest_libblis iomp5)
+        endif()
+    endif()
+endif()
+
+add_test(
+    NAME gtest_libblis
+    COMMAND gtest_libblis
+)
+
+#gtest_discover_tests(gtest_libblis)
--- a/gtestsuite/Makefile
+++ b/gtestsuite/Makefile
@@ -0,0 +1,211 @@
+################################################################################################
+## Compiler options
+################################################################################################
+ifeq ($(AOCL),1)
+CXX             := clang++ #-stdlib=libc++
+else
+CXX             := g++ -std=c++11
+endif
+CFLAGS 	        := -c
+
+
+# Flags passed to the C++ compiler.
+CXXFLAGS        += -g -Wall -Wno-unused-function -Wfatal-errors -fPIC -fnon-call-exceptions
+#Enable macro __GTEST_VALGRIND_TEST__ only when valgrind tool is used
+CXXFLAGS        += #-D__GTEST_VALGRIND_TEST__
+#Enable below flags only when Address Sanitizer tool is used
+CXXFLAGS        += #-fsanitize=address -static-libsan
+LIBPTHREAD      := -fopenmp #-lpthread
+LIBM            := -lm
+LDFLAGS         := $(LIBM) $(LIBPTHREAD)
+
+BLIS_PATH       := ../blis_gcc
+BLIS_LIB        := ${BLIS_PATH}/lib/libblis-mt.a
+GTEST_LIB       := ./lib/libgtest.a
+INC_PATH        := ./inc
+LINUX_PATH      := ${BLIS_PATH}/include/blis
+GTEST_HEADERS   := ./inc/gtest
+
+ifeq ($(mkl),1)
+CXXFLAGS        +=-D_POSIX_C_SOURCE=200112L
+CXXFLAGS        += -std=c++11 -DBLAS=\"mkl\"
+LDFLAGS         += -lrt
+MKL_LIB_PATH   := ${MKLROOT}/intel64
+# MKL
+#MKL_LIB        := -L$(MKL_LIB_PATH) \
+#                  -lmkl_intel_lp64 \
+#                  -lmkl_core \
+#                  -lmkl_sequential \
+#                  -lpthread -lm -ldl
+
+# Uncomment below lines & comment above lines to link with multi-threaded library.
+MKL_LIB        := -L$(MKL_LIB_PATH) \
+                  -lmkl_intel_lp64 \
+                  -lmkl_core \
+                  -lmkl_gnu_thread \
+                  -lpthread -lm -ldl -liomp5
+endif
+LIB_PATH        := $(MKL_LIB) $(BLIS_LIB) $(GTEST_LIB)
+################################################################################################
+## Variables
+################################################################################################
+BIN_PATH      := bin
+TEST_OBJ_PATH := obj
+TEST_SRC_PATH := src
+CPPS          := $(shell ls $(TEST_SRC_PATH)/*.cpp)
+TEMP          := $(subst $(TEST_SRC_PATH)/, $(TEST_OBJ_PATH)/, $(CPPS))
+OBJS          := $(subst .cpp, .o, $(TEMP))
+HEADERS       := $(shell ls inc/*.h)
+INCLUDE_PATH  := -I$(INC_PATH) -I$(LINUX_PATH) -I$(GTEST_HEADERS)
+
+
+TESTSUITE_OUT_FILE := output.testsuite
+ifneq ($(mkl),1)
+TEST_EXE  := $(BIN_PATH)/libblis_gtest
+else
+TEST_EXE  := $(BIN_PATH)/gtest_mkl
+endif
+
+
+TEST_OBJS := $(TEST_OBJ_PATH)/gtest_suite.o \
+           $(TEST_OBJ_PATH)/gtest_pthread.o  \
+           $(TEST_OBJ_PATH)/blis_api.o       \
+           $(TEST_OBJ_PATH)/blis_inpfile.o   \
+           $(TEST_OBJ_PATH)/blis_utils_int.o \
+           $(TEST_OBJ_PATH)/blis_utils.o     \
+           $(TEST_OBJ_PATH)/main.o           \
+           $(TEST_OBJ_PATH)/test_process.o   \
+           $(TEST_OBJ_PATH)/ref_addv.o       \
+           $(TEST_OBJ_PATH)/ref_amaxv.o      \
+           $(TEST_OBJ_PATH)/ref_axpbyv.o     \
+           $(TEST_OBJ_PATH)/ref_axpyv.o      \
+           $(TEST_OBJ_PATH)/ref_copyv.o      \
+           $(TEST_OBJ_PATH)/ref_dotv.o       \
+           $(TEST_OBJ_PATH)/ref_dotxv.o      \
+           $(TEST_OBJ_PATH)/ref_normfv.o     \
+           $(TEST_OBJ_PATH)/ref_scalv.o      \
+           $(TEST_OBJ_PATH)/ref_scal2v.o     \
+           $(TEST_OBJ_PATH)/ref_subv.o       \
+           $(TEST_OBJ_PATH)/ref_xpbyv.o      \
+           $(TEST_OBJ_PATH)/ref_addm.o       \
+           $(TEST_OBJ_PATH)/ref_axpym.o      \
+           $(TEST_OBJ_PATH)/ref_copym.o      \
+           $(TEST_OBJ_PATH)/ref_normfm.o     \
+           $(TEST_OBJ_PATH)/ref_scalm.o      \
+           $(TEST_OBJ_PATH)/ref_scal2m.o     \
+           $(TEST_OBJ_PATH)/ref_subm.o       \
+           $(TEST_OBJ_PATH)/ref_xpbym.o      \
+           $(TEST_OBJ_PATH)/ref_axpy2v.o     \
+           $(TEST_OBJ_PATH)/ref_dotaxpyv.o   \
+           $(TEST_OBJ_PATH)/ref_axpyf.o      \
+           $(TEST_OBJ_PATH)/ref_dotxf.o      \
+           $(TEST_OBJ_PATH)/ref_dotxaxpyf.o  \
+           $(TEST_OBJ_PATH)/ref_gemv.o       \
+           $(TEST_OBJ_PATH)/ref_ger.o        \
+           $(TEST_OBJ_PATH)/ref_hemv.o       \
+           $(TEST_OBJ_PATH)/ref_her.o        \
+           $(TEST_OBJ_PATH)/ref_her2.o       \
+           $(TEST_OBJ_PATH)/ref_symv.o       \
+           $(TEST_OBJ_PATH)/ref_syr.o        \
+           $(TEST_OBJ_PATH)/ref_syr2.o       \
+           $(TEST_OBJ_PATH)/ref_trmv.o       \
+           $(TEST_OBJ_PATH)/ref_trsv.o       \
+           $(TEST_OBJ_PATH)/ref_gemm.o       \
+           $(TEST_OBJ_PATH)/ref_gemmt.o      \
+           $(TEST_OBJ_PATH)/ref_hemm.o       \
+           $(TEST_OBJ_PATH)/ref_herk.o       \
+           $(TEST_OBJ_PATH)/ref_her2k.o      \
+           $(TEST_OBJ_PATH)/ref_symm.o       \
+           $(TEST_OBJ_PATH)/ref_syrk.o       \
+           $(TEST_OBJ_PATH)/ref_syr2k.o      \
+           $(TEST_OBJ_PATH)/ref_trmm.o       \
+           $(TEST_OBJ_PATH)/ref_trmm3.o      \
+           $(TEST_OBJ_PATH)/ref_trsm.o       \
+           $(TEST_OBJ_PATH)/test_randv.o     \
+           $(TEST_OBJ_PATH)/test_randm.o     \
+           $(TEST_OBJ_PATH)/test_addv.o      \
+           $(TEST_OBJ_PATH)/test_amaxv.o     \
+           $(TEST_OBJ_PATH)/test_axpbyv.o    \
+           $(TEST_OBJ_PATH)/test_axpyv.o     \
+           $(TEST_OBJ_PATH)/test_copyv.o     \
+           $(TEST_OBJ_PATH)/test_dotv.o      \
+           $(TEST_OBJ_PATH)/test_dotxv.o     \
+           $(TEST_OBJ_PATH)/test_normfv.o    \
+           $(TEST_OBJ_PATH)/test_scalv.o     \
+           $(TEST_OBJ_PATH)/test_scal2v.o    \
+           $(TEST_OBJ_PATH)/test_setv.o      \
+           $(TEST_OBJ_PATH)/test_subv.o      \
+           $(TEST_OBJ_PATH)/test_xpbyv.o     \
+           $(TEST_OBJ_PATH)/test_addm.o      \
+           $(TEST_OBJ_PATH)/test_axpym.o     \
+           $(TEST_OBJ_PATH)/test_copym.o     \
+           $(TEST_OBJ_PATH)/test_normfm.o    \
+           $(TEST_OBJ_PATH)/test_scalm.o     \
+           $(TEST_OBJ_PATH)/test_scal2m.o    \
+           $(TEST_OBJ_PATH)/test_setm.o      \
+           $(TEST_OBJ_PATH)/test_subm.o      \
+           $(TEST_OBJ_PATH)/test_xpbym.o     \
+           $(TEST_OBJ_PATH)/test_axpy2v.o    \
+           $(TEST_OBJ_PATH)/test_dotaxpyv.o  \
+           $(TEST_OBJ_PATH)/test_axpyf.o     \
+           $(TEST_OBJ_PATH)/test_dotxf.o     \
+           $(TEST_OBJ_PATH)/test_dotxaxpyf.o \
+           $(TEST_OBJ_PATH)/test_gemv.o      \
+           $(TEST_OBJ_PATH)/test_ger.o       \
+           $(TEST_OBJ_PATH)/test_hemv.o      \
+           $(TEST_OBJ_PATH)/test_her.o       \
+           $(TEST_OBJ_PATH)/test_her2.o      \
+           $(TEST_OBJ_PATH)/test_symv.o      \
+           $(TEST_OBJ_PATH)/test_syr.o       \
+           $(TEST_OBJ_PATH)/test_syr2.o      \
+           $(TEST_OBJ_PATH)/test_trmv.o      \
+           $(TEST_OBJ_PATH)/test_trsv.o      \
+           $(TEST_OBJ_PATH)/test_gemm.o      \
+           $(TEST_OBJ_PATH)/test_gemmt.o     \
+           $(TEST_OBJ_PATH)/test_hemm.o      \
+           $(TEST_OBJ_PATH)/test_herk.o      \
+           $(TEST_OBJ_PATH)/test_her2k.o     \
+           $(TEST_OBJ_PATH)/test_symm.o      \
+           $(TEST_OBJ_PATH)/test_syrk.o      \
+           $(TEST_OBJ_PATH)/test_syr2k.o     \
+           $(TEST_OBJ_PATH)/test_trmm.o       \
+           $(TEST_OBJ_PATH)/test_trmm3.o       \
+           $(TEST_OBJ_PATH)/test_trsm.o         \
+           $(TEST_OBJ_PATH)/lpgemm_utils.o       \
+           $(TEST_OBJ_PATH)/test_gemm_u8s8s16os8.o \
+           $(TEST_OBJ_PATH)/test_gemm_u8s8s32os8.o  \
+           $(TEST_OBJ_PATH)/test_gemm_u8s8s16os16.o  \
+           $(TEST_OBJ_PATH)/test_gemm_u8s8s32os32.o   \
+           $(TEST_OBJ_PATH)/test_gemm_f32f32f32of32.o  \
+           $(TEST_OBJ_PATH)/test_gemm_bf16bf16f32of32.o \
+           $(TEST_OBJ_PATH)/test_gemm_bf16bf16f32obf16.o
+
+#all: build run
+
+build: $(TEST_EXE)
+
+$(shell mkdir -p $(TEST_OBJ_PATH) $(BIN_PATH))
+
+$(TEST_EXE): $(TEST_OBJS) $(HEADERS)
+	@echo "------------------------------------------"
+	@echo "Creating the executable for the Program"
+	@echo "------------------------------------------"
+	$(CXX) $(CXXFLAGS) $(INCLUDE_PATH) $(TEST_OBJS) $(LIB_PATH) $(LDFLAGS) -o $(TEST_EXE)
+
+$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.cpp
+	@echo "------------------------------------------"
+	@echo "Compiling the file $<"
+	@echo "------------------------------------------"
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDE_PATH) $< $(LIBPTHREAD)  -o $@
+
+
+# -- Test run/check rules --
+run: $(TEST_EXE)
+#	@echo "Running $(TEST_EXE) with output redirected to '$(TESTSUITE_OUT_FILE)'"
+	@./$(TEST_EXE) > $(TESTSUITE_OUT_FILE)
+	@./$(TEST_EXE)
+
+
+clean:
+	rm -rf $(TEST_OBJ_PATH)/*.o $(TEST_EXE)
+	rm -rf $(TEST_OBJ_PATH) $(BIN_PATH)
--- a/gtestsuite/README.md
+++ b/gtestsuite/README.md
@@ -0,0 +1,38 @@
+Steps to build gtestsuite executable on linux
+1. Set BLIS_PATH( blis installation path ) in gtestsuite Makefile.
+2. To build executable
+    GCC compiler  : type "make"
+    AOCC compiler : type "make AOCL=1"
+   executable will be generated in bin folder.
+3. Set input parameters in input.general file and select api's from input.operations file.
+4. Finally run the executable
+      $./bin/libblis_gtest
+
+Steps to build gtestsuite executable and run valdrind tool
+1. In gtestsuite Makefile, define the macro/enable "__GTEST_VALGRIND_TEST__(-D__GTEST_VALGRIND_TEST__)"
+   Note : Undefine the macro "__GTEST_VALGRIND_TEST__" when it is not built for valgrind test.
+2. Generate the executable as mentioned above(Steps to build gtestsuite executable)
+3. Set input parameters and select api's to test in input.general and input.operations respectively
+4. Finally run the executable
+      $ OMP_NUM_THREADS=1 valgrind --tool=memcheck --leak-check=full --show-leak-kinds=all --track-origins=yes -s <executable>
+
+Steps to build blis library and gtestsuite executable and run ASAN
+1. Build blis library with the flag CFLAGS="-g -fsanitize=address"
+   CC=clang ./configure -a aocl_gemm --enable-threading=openmp --enable-cblas CFLAGS="-g -fsanitize=address" auto
+2. And in gtestsuite Makefile, define the macro/enable "__GTEST_VALGRIND_TEST__(-D__GTEST_VALGRIND_TEST__)"
+   and even set/enable "CXXFLAGS += -fsanitize=address -static-libsan".
+3. Generate the executable as mentioned above(Steps to build gtestsuite executable)
+4. Set input parameters in input.general file and select api's from input.operations file.
+5. Finally run the executable
+      $./bin/libblis_gtest
+
+Steps to build gtestsuite executable for mkl library(machine lib-daytonax-04)
+1. Goto gtestsuite folder and export the following (paths depends on the machine, where intel package is placed)
+     export LD_LIBRARY_PATH=/home/intel2019/update_05/intel64/:$LD_LIBRARY_PATH
+     export MKLROOT=/home/intel2019/update_05/
+     export MKL_DEBUG_CPU_TYPE=5
+     export MKL_ENABLE_INSTRUCTIONS=AVX2
+2. Type "make mkl=1" or "make AOCL=1 mkl=1(for clang compiler)", executable will be generated in bin folder
+3. Set input parameters in input.general file and select api's from input.operations file.
+4. Finally run the executable
+      $./bin/gtest_mkl
--- a/gtestsuite/alphabeta.dat
+++ b/gtestsuite/alphabeta.dat
@@ -0,0 +1,21 @@
+# --------------------------------------------------------------------------
+#
+#  alphabeta.dat
+#  BLIS test suite
+#
+#  This file contains alpha  and beta values which are used
+#  in BLIS operations
+#
+# --------------------------------------------------------------------------
+
+5              #Number of alpha and/or beta values
+-1.000  0.000  #const double alpha[5] = {-1.000,0.000,1.000,0.999,2.333};
+0.000   0.000
+1.000   0.000
+0.999   0.000
+2.333   0.000
+-1.000  0.000  #const double beta[5]  = {-1.000,0.000,1.000,0.999,2.333};
+0.000   0.000
+1.000   0.000
+0.999   0.000
+2.333   0.000
--- a/gtestsuite/inc/blis_test.h
+++ b/gtestsuite/inc/blis_test.h
@@ -0,0 +1,519 @@
+#ifndef BLIS_TEST_H
+#define BLIS_TEST_H
+
+#include <iostream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <cstdio>
+
+#include "blis.h"
+#include "gtest/gtest.h"
+
+using namespace std;
+
+// --- System headers ---------------------------------------------------------
+// For va_* functions.
+#include <stdarg.h>
+// For string manipulation functions.
+#include <string.h>
+// For other string manipulation functions (e.g. isspace()).
+#include <ctype.h>
+
+// For POSIX stuff.
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+
+// --- Constants and types ----------------------------------------------------
+#define PARAMETERS_FILENAME          "input.general"
+#define OPERATIONS_FILENAME          "input.operations"
+#define ALPHA_BETA_FILENAME          "alphabeta.dat"
+#define INPUT_COMMENT_CHAR           '#'
+#define OUTPUT_COMMENT_CHAR          '%'
+
+#define BLIS_FILE_PREFIX_STR         "libblis_test"
+#define BLIS_FILEDATA_PREFIX_STR     "blis"
+#define BLAS_FILEDATA_PREFIX_STR     "blas"
+#define CBLAS_FILEDATA_PREFIX_STR    "cblas"
+
+#define INPUT_BUFFER_SIZE            256
+#define MAX_FILENAME_LENGTH          1000
+#define MAX_BINARY_NAME_LENGTH       256
+#define MAX_FUNC_STRING_LENGTH       36
+#define FLOPS_PER_UNIT_PERF          1e9
+
+#define MAX_NUM_MSTORAGE             4
+#define MAX_NUM_VSTORAGE             5
+#define MAX_NUM_DATATYPES            4
+#define MAX_NUM_PARAMETERS           7
+#define MAX_NUM_DIMENSIONS           3
+#define MAX_NUM_OPERANDS             5
+
+#define MAX_PASS_STRING_LENGTH       32
+#define BLIS_TEST_FAIL_STRING        "FAILURE"
+#define BLIS_TEST_WARN_STRING        "MARGINAL"
+#define BLIS_TEST_PASS_STRING        "PASS"
+#define BLIS_TEST_OVERFLOW_STRING    "OVERFLOW"
+#define BLIS_TEST_UNDERFLOW_STRING   "UNDERFLOW"
+
+#define ON_FAILURE_IGNORE_CHAR       'i'
+#define ON_FAILURE_SLEEP_CHAR        's'
+#define ON_FAILURE_ABORT_CHAR        'a'
+
+#define SECONDS_TO_SLEEP             3
+
+#define DISABLE                      0
+#define ENABLE                       1
+#define ENABLE_ONLY                  2
+
+#define MAX_PARAM_VALS_PER_TYPE      4
+#define BLIS_TEST_PARAM_SIDE_CHARS   "lr"
+#define BLIS_TEST_PARAM_UPLO_CHARS   "lu"
+#define BLIS_TEST_PARAM_UPLODE_CHARS "dlu"
+#define BLIS_TEST_PARAM_TRANS_CHARS  "ncth"
+#define BLIS_TEST_PARAM_CONJ_CHARS   "nc"
+#define BLIS_TEST_PARAM_DIAG_CHARS   "nu"
+
+#define BLIS_INIT_SUCCESS            0
+#define BLIS_INIT_FAILURE           -1
+#define NUM_PARAM_TYPES              6
+#define MAX_NUM_ABVALUES             5
+
+/*Allocating buffers with malloc in gtestsuite */
+#define __GTESTSUITE_MALLOC_BUFFER__
+
+typedef enum
+{
+    BLIS_TEST_PARAM_SIDE      = 0,
+    BLIS_TEST_PARAM_UPLO      = 1,
+    BLIS_TEST_PARAM_UPLODE    = 2,
+    BLIS_TEST_PARAM_TRANS     = 3,
+    BLIS_TEST_PARAM_CONJ      = 4,
+    BLIS_TEST_PARAM_DIAG      = 5,
+} param_t;
+
+#define MAX_STORE_VALS_PER_TYPE      4
+#define BLIS_TEST_MSTORE_CHARS       "crg"
+#define BLIS_TEST_VSTORE_CHARS       "crji"
+
+#define NUM_OPERAND_TYPES           2
+typedef enum
+{
+    BLIS_TEST_MATRIX_OPERAND  = 0,
+    BLIS_TEST_VECTOR_OPERAND  = 1
+} operand_t;
+
+typedef enum
+{
+    API_BLIS  = 0,
+    API_CBLAS = 1,
+    API_BLAS  = 2
+} api_t;
+
+typedef enum
+{
+    BLIS_DEFAULT    = 0,
+    BLIS_OVERFLOW   = 1,
+    BLIS_UNDERFLOW  = 2
+} vflg_t;
+
+typedef enum
+{
+    BLIS_TEST_DIMS_MNK        = 0,
+    BLIS_TEST_DIMS_MN         = 1,
+    BLIS_TEST_DIMS_MK         = 2,
+    BLIS_TEST_DIMS_M          = 3,
+    BLIS_TEST_DIMS_MF         = 4,
+    BLIS_TEST_DIMS_K          = 5,
+    BLIS_TEST_NO_DIMS         = 6
+} dimset_t;
+
+typedef enum
+{
+    BLIS_TEST_SEQ_UKERNEL     = 0,
+    BLIS_TEST_SEQ_FRONT_END   = 1,
+    BLIS_TEST_MT_FRONT_END    = 2
+} iface_t;
+
+
+typedef enum
+{
+    BLIS_TEST_RAND_REAL_VALUES = 0,
+    BLIS_TEST_RAND_NARROW_POW2 = 1
+} rand_t;
+
+typedef struct
+{
+    double failwarn;
+    double warnpass;
+} thresh_t;
+
+const thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 },   // warn, pass for s
+                                             { 1e-04, 1e-05 },   // warn, pass for c
+                                             { 1e-13, 1e-14 },   // warn, pass for d
+                                             { 1e-13, 1e-14 } }; // warn, pass for z
+
+#define SIZE 4
+typedef dcomplex atom_t;
+
+typedef struct
+{
+    dim_t m;
+    dim_t n;
+    dim_t k;
+} tensor_t;
+
+typedef struct
+{
+    unsigned int  n_repeats;
+    unsigned int  n_mstorage;
+    unsigned int  n_vstorage;
+    char          storage[ NUM_OPERAND_TYPES ][ MAX_NUM_MSTORAGE + 1 ];
+    unsigned int  mix_all_storage;
+    unsigned int  alignment;
+    unsigned int  rand_method;
+    unsigned int  gs_spacing;
+    unsigned int  n_datatypes;
+    char          datatype_char[ MAX_NUM_DATATYPES + 1 ];
+    num_t         datatype[ MAX_NUM_DATATYPES + 1 ];
+    unsigned int  mixed_domain;
+    unsigned int  mixed_precision;
+    unsigned int  p_first;
+    unsigned int  p_max;
+    unsigned int  p_inc;
+    unsigned int  ind_enable[ BLIS_NUM_IND_METHODS ];
+    unsigned int  n_app_threads;
+    char          reaction_to_failure;
+    unsigned int  output_matlab_format;
+    unsigned int  output_files;
+    unsigned int  error_checking_level;
+    unsigned int  is_mixed_dt;
+    unsigned int  n_param_combos;
+    unsigned int  n_store_combos;
+    unsigned int  n_dt_combos;
+    char**        pc_str;
+    char**        sc_str;
+    char**        dc_str;
+    unsigned int  indim[MAX_NUM_DATATYPES][BLIS_NAT+1];
+    unsigned int  indn[MAX_NUM_DATATYPES];
+    num_t         dt[MAX_NUM_DATATYPES];
+    bool          initparams;
+
+    api_t         api;
+    unsigned int  abf;
+    atom_t       *alpha;
+    atom_t       *beta;
+    unsigned int  nab;
+    unsigned int  ldf;
+    unsigned int  ld[3];
+    unsigned int  bitextf;
+    unsigned int  dimf;
+    unsigned int  ndim;
+    unsigned int  nanf;
+    tensor_t      *dim;
+
+    unsigned int  passflag;
+    vflg_t        oruflw;
+    unsigned int  bitrp;
+
+    char          op_t;
+
+} test_params_t;
+
+typedef struct
+{
+    char libblis_test_parameters_filename[ MAX_FILENAME_LENGTH + 1 ];
+    char libblis_test_operations_filename[ MAX_FILENAME_LENGTH + 1 ];
+    char libblis_test_alphabeta_parameter[ MAX_FILENAME_LENGTH + 1 ];
+} blis_string_t;
+
+typedef struct
+{
+    // parent test_ops_t struct
+    struct test_ops_s*   ops;
+
+    opid_t        opid;
+    dimset_t      dimset;
+
+    int           op_switch;
+    unsigned int  n_dims;
+
+    int           dim_spec[ MAX_NUM_DIMENSIONS ];
+    int           dim_aux[ MAX_NUM_DIMENSIONS ];
+    unsigned int  n_params;
+    char          params[ MAX_NUM_PARAMETERS ];
+    bool          test_done;
+} test_op_t;
+
+typedef struct test_ops_s
+{
+    // individual override
+    int       indiv_over;
+
+    // section overrides
+    int       util_over;
+    int       l1v_over;
+    int       l1m_over;
+    int       l1f_over;
+    int       l2_over;
+    int       l3ukr_over;
+    int       l3_over;
+
+    // util
+    test_op_t randv;
+    test_op_t randm;
+
+    // level-1v
+    test_op_t addv;
+    test_op_t amaxv;
+    test_op_t axpbyv;
+    test_op_t axpyv;
+    test_op_t copyv;
+    test_op_t dotv;
+    test_op_t dotxv;
+    test_op_t normfv;
+    test_op_t scalv;
+    test_op_t scal2v;
+    test_op_t setv;
+    test_op_t subv;
+    test_op_t xpbyv;
+
+    // level-1m
+    test_op_t addm;
+    test_op_t axpym;
+    test_op_t copym;
+    test_op_t normfm;
+    test_op_t scalm;
+    test_op_t scal2m;
+    test_op_t setm;
+    test_op_t subm;
+    test_op_t xpbym;
+
+    // level-1f
+    test_op_t axpy2v;
+    test_op_t dotaxpyv;
+    test_op_t axpyf;
+    test_op_t dotxf;
+    test_op_t dotxaxpyf;
+
+    // level-2
+    test_op_t gemv;
+    test_op_t ger;
+    test_op_t hemv;
+    test_op_t her;
+    test_op_t her2;
+    test_op_t symv;
+    test_op_t syr;
+    test_op_t syr2;
+    test_op_t trmv;
+    test_op_t trsv;
+
+    // level-3 micro-kernels
+    test_op_t gemm_ukr;
+    test_op_t trsm_ukr;
+    test_op_t gemmtrsm_ukr;
+
+    // level-3
+    test_op_t gemm;
+    test_op_t gemmt;
+    test_op_t hemm;
+    test_op_t herk;
+    test_op_t her2k;
+    test_op_t symm;
+    test_op_t syrk;
+    test_op_t syr2k;
+    test_op_t trmm;
+    test_op_t trmm3;
+    test_op_t trsm;
+
+    test_op_t gemm_u8s8s32os32;
+    test_op_t gemm_u8s8s32os8;
+    test_op_t gemm_f32f32f32of32;
+    test_op_t gemm_u8s8s16os16;
+    test_op_t gemm_u8s8s16os8;
+    test_op_t gemm_bf16bf16f32of32;
+    test_op_t gemm_bf16bf16f32obf16;
+
+} test_ops_t;
+
+typedef struct
+{
+    uint32_t tcnt;
+    uint32_t cntf;
+} printres_t;
+
+typedef struct
+{
+    test_params_t*         params;
+    test_op_t*             op;
+    const char*            str;
+    unsigned int           nt;
+    unsigned int           id;
+    iface_t                iface;
+    unsigned int           xc;
+    bli_pthread_barrier_t* barrier;
+    printres_t*            pfr;
+} thread_data_t;
+
+typedef struct
+{
+    char inputfile[ MAX_FILENAME_LENGTH ];
+    int  fileread;
+} input_file_t;
+
+typedef struct
+{
+    test_params_t *params;
+    test_ops_t    *ops;
+    input_file_t  *pfile;
+    bli_pthread_t *pthread;
+    thread_data_t *tdata;
+} input_data_t;
+
+void* libblis_test_randv_thread_entry( void* tdata_void );
+void* libblis_test_randm_thread_entry( void* tdata_void );
+
+void* libblis_test_addv_thread_entry( void* tdata_void );
+void* libblis_test_amaxv_thread_entry( void* tdata_void );
+void* libblis_test_axpbyv_thread_entry( void* tdata_void );
+void* libblis_test_axpyv_thread_entry( void* tdata_void );
+void* libblis_test_copyv_thread_entry( void* tdata_void );
+void* libblis_test_dotv_thread_entry( void* tdata_void );
+void* libblis_test_dotxv_thread_entry( void* tdata_void );
+void* libblis_test_normfv_thread_entry( void* tdata_void );
+void* libblis_test_scal2v_thread_entry( void* tdata_void );
+void* libblis_test_scalv_thread_entry( void* tdata_void );
+void* libblis_test_setv_thread_entry( void* tdata_void );
+void* libblis_test_subv_thread_entry( void* tdata_void );
+
+void* libblis_test_xpbyv_thread_entry( void* tdata_void );
+void* libblis_test_axpy2v_thread_entry( void* tdata_void );
+void* libblis_test_dotaxpyv_thread_entry( void* tdata_void );
+void* libblis_test_axpyf_thread_entry( void* tdata_void );
+void* libblis_test_dotxf_thread_entry( void* tdata_void );
+void* libblis_test_dotxaxpyf_thread_entry( void* tdata_void );
+
+void* libblis_test_addm_thread_entry( void* tdata_void );
+void* libblis_test_axpym_thread_entry( void* tdata_void );
+void* libblis_test_copym_thread_entry( void* tdata_void );
+void* libblis_test_normfm_thread_entry( void* tdata_void );
+void* libblis_test_scal2m_thread_entry( void* tdata_void );
+void* libblis_test_scalm_thread_entry( void* tdata_void );
+void* libblis_test_setm_thread_entry( void* tdata_void );
+void* libblis_test_subm_thread_entry( void* tdata_void );
+void* libblis_test_xpbym_thread_entry( void* tdata_void );
+
+void* libblis_test_gemv_thread_entry( void* tdata_void );
+void* libblis_test_ger_thread_entry( void* tdata_void );
+void* libblis_test_hemv_thread_entry( void* tdata_void );
+void* libblis_test_her_thread_entry( void* tdata_void );
+void* libblis_test_her2_thread_entry( void* tdata_void );
+void* libblis_test_symv_thread_entry( void* tdata_void );
+void* libblis_test_syr_thread_entry( void* tdata_void );
+void* libblis_test_syr2_thread_entry( void* tdata_void );
+void* libblis_test_trmv_thread_entry( void* tdata_void );
+void* libblis_test_trsv_thread_entry( void* tdata_void );
+
+void* libblis_test_gemm_thread_entry( void* tdata_void );
+void* libblis_test_gemmt_thread_entry( void* tdata_void );
+void* libblis_test_hemm_thread_entry( void* tdata_void );
+void* libblis_test_herk_thread_entry( void* tdata_void );
+void* libblis_test_her2k_thread_entry( void* tdata_void );
+void* libblis_test_symm_thread_entry( void* tdata_void );
+void* libblis_test_syrk_thread_entry( void* tdata_void );
+void* libblis_test_syr2k_thread_entry( void* tdata_void );
+void* libblis_test_trmm_thread_entry( void* tdata_void );
+void* libblis_test_trmm3_thread_entry( void* tdata_void );
+void* libblis_test_trsm_thread_entry( void* tdata_void );
+
+void* libblis_test_gemm_u8s8s32os32_thread_entry( void* tdata_void );
+void* libblis_test_gemm_f32f32f32of32_thread_entry( void* tdata_void );
+void* libblis_test_gemm_u8s8s16os8_thread_entry( void* tdata_void );
+void* libblis_test_gemm_u8s8s32os8_thread_entry( void* tdata_void );
+void* libblis_test_gemm_u8s8s16os16_thread_entry( void* tdata_void );
+void* libblis_test_gemm_bf16bf16f32of32_thread_entry( void* tdata_void );
+void* libblis_test_gemm_bf16bf16f32obf16_thread_entry( void* tdata_void );
+
+/*
+ * The derived class for Blis Test Suite
+ * where all the data members and member functions are
+ * declared and defined
+ */
+class AoclBlisTestFixture : public ::testing::TestWithParam<input_data_t>
+{
+    public:
+    void SetUp() override
+    {
+        params    = GetParam().params;
+        ops       = GetParam().ops;
+        pthread   = GetParam().pthread;
+        tdata     = GetParam().tdata;
+        pfile     = GetParam().pfile;
+
+        if(pfile->fileread != 1)
+        {
+            nt      = ( unsigned int )params->n_app_threads;
+            barrier =
+            (bli_pthread_barrier_t*)bli_malloc_user( sizeof( bli_pthread_barrier_t ) );
+            bli_pthread_barrier_init( barrier, NULL, nt );
+        }
+        pfr     = (printres_t*)bli_malloc_user( sizeof( printres_t ) );
+        memset(pfr, 0, sizeof(printres_t));
+    }
+
+    void TearDown() override
+    {
+        if(pfile->fileread != 1)
+        {
+            bli_pthread_barrier_destroy( barrier );
+            bli_free_user( barrier );
+        }
+        bli_free_user( pfr );
+    }
+
+    bool libblis_test_preprocess_params( test_params_t* params, test_op_t* op,
+                      iface_t iface, const char* p_types, const char* o_types);
+
+    bool create_params(test_params_t *params);
+
+    bool destroy_params(test_params_t *params);
+
+    int libblis_test_read_params_inpfile( char* filename, test_params_t* params,
+                                             test_ops_t* ops, printres_t* pfr);
+    protected:
+      unsigned int     nt;
+      input_data_t*    inData;
+      test_params_t*   params;
+      test_ops_t*      ops;
+      tensor_t*        dim;
+      bli_pthread_t*   pthread;
+      thread_data_t*   tdata;
+      printres_t*      pfr;
+      input_file_t*    pfile;
+      bli_pthread_barrier_t* barrier;
+};
+
+class BlisTestSuite
+{
+    private:
+        blis_string_t  blis_string;
+        input_file_t   pfile;
+        test_params_t  params;
+        test_ops_t     ops;
+    public:
+        ~BlisTestSuite( );
+        test_params_t* getParamsStr() { return &(this->params); }
+        blis_string_t* getStgStr() { return &(this->blis_string); }
+        test_ops_t* getOpsStr() { return &(this->ops); }
+        input_file_t* getfileStr() { return &(this->pfile); }
+
+        int libblis_test_init_strings(blis_string_t *test_data);
+        int libblis_test_inpfile( char* input_filename, input_file_t* pfile);
+        int libblis_test_read_params_file( char* input_filename,
+                                           test_params_t* params, char *abpf);
+        int libblis_test_read_ops_file( char* input_filename, test_ops_t* ops );
+        void CreateGtestFilters(test_ops_t* ops, string& str);
+        void CreateGtestFilters_api(input_file_t* pfile, string& str);
+};
+#endif  // BLIS_TEST_H
--- a/gtestsuite/inc/gtest/gtest-death-test.h
+++ b/gtestsuite/inc/gtest/gtest-death-test.h
@@ -0,0 +1,346 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the public API for death tests.  It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+
+#include "gtest/internal/gtest-death-test-internal.h"
+
+namespace testing {
+
+// This flag controls the style of death tests.  Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process.  Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests.  IMPORTANT: This is an internal utility.  Using it may break the
+// implementation of death tests.  User code MUST NOT use it.
+GTEST_API_ bool InDeathTestChild();
+
+}  // namespace internal
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+//   1. It generates a warning if there is more than one active
+//   thread.  This is because it's safe to fork() or clone() only
+//   when there is a single thread.
+//
+//   2. The parent process clone()s a sub-process and runs the death
+//   test in it; the sub-process exits with code 0 at the end of the
+//   death test, if it hasn't exited already.
+//
+//   3. The parent process waits for the sub-process to terminate.
+//
+//   4. The parent process checks the exit code and error message of
+//   the sub-process.
+//
+// Examples:
+//
+//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+//   for (int i = 0; i < 5; i++) {
+//     EXPECT_DEATH(server.ProcessRequest(i),
+//                  "Invalid request .* in ProcessRequest()")
+//                  << "Failed to die on request " << i;
+//   }
+//
+//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+//   bool KilledBySIGHUP(int exit_code) {
+//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+//   }
+//
+//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// The final parameter to each of these macros is a matcher applied to any data
+// the sub-process wrote to stderr.  For compatibility with existing tests, a
+// bare string is interpreted as a regular expression matcher.
+//
+// On the regular expressions used in death tests:
+//
+//   GOOGLETEST_CM0005 DO NOT DELETE
+//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
+//   which uses the POSIX extended regex syntax.
+//
+//   On other platforms (e.g. Windows or Mac), we only support a simple regex
+//   syntax implemented as part of Google Test.  This limited
+//   implementation should be enough most of the time when writing
+//   death tests; though it lacks many features you can find in PCRE
+//   or POSIX extended regex syntax.  For example, we don't support
+//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+//   repetition count ("x{5,7}"), among others.
+//
+//   Below is the syntax that we do support.  We chose it to be a
+//   subset of both PCRE and POSIX extended regex, so it's easy to
+//   learn wherever you come from.  In the following: 'A' denotes a
+//   literal character, period (.), or a single \\ escape sequence;
+//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
+//   natural numbers.
+//
+//     c     matches any literal character c
+//     \\d   matches any decimal digit
+//     \\D   matches any character that's not a decimal digit
+//     \\f   matches \f
+//     \\n   matches \n
+//     \\r   matches \r
+//     \\s   matches any ASCII whitespace, including \n
+//     \\S   matches any character that's not a whitespace
+//     \\t   matches \t
+//     \\v   matches \v
+//     \\w   matches any letter, _, or decimal digit
+//     \\W   matches any character that \\w doesn't match
+//     \\c   matches any literal character c, which must be a punctuation
+//     .     matches any single character except \n
+//     A?    matches 0 or 1 occurrences of A
+//     A*    matches 0 or many occurrences of A
+//     A+    matches 1 or many occurrences of A
+//     ^     matches the beginning of a string (not that of each line)
+//     $     matches the end of a string (not that of each line)
+//     xy    matches x followed by y
+//
+//   If you accidentally use PCRE or POSIX extended regex features
+//   not implemented by us, you will get a run-time failure.  In that
+//   case, please try to rewrite your regular expression within the
+//   above syntax.
+//
+//   This implementation is *not* meant to be as highly tuned or robust
+//   as a compiled regex library, but should perform well enough for a
+//   death test, which already incurs significant overhead by launching
+//   a child process.
+//
+// Known caveats:
+//
+//   A "threadsafe" style death test obtains the path to the test
+//   program from argv[0] and re-executes it in the sub-process.  For
+//   simplicity, the current implementation doesn't search the PATH
+//   when launching the sub-process.  This means that the user must
+//   invoke the test program via a path that contains at least one
+//   path separator (e.g. path/to/foo_test and
+//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
+//   is rarely a problem as people usually don't put the test binary
+//   directory in PATH.
+//
+
+// Asserts that a given `statement` causes the program to exit, with an
+// integer exit status that satisfies `predicate`, and emitting error output
+// that matches `matcher`.
+# define ASSERT_EXIT(statement, predicate, matcher) \
+    GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_FATAL_FAILURE_)
+
+// Like `ASSERT_EXIT`, but continues on to successive tests in the
+// test suite, if any:
+# define EXPECT_EXIT(statement, predicate, matcher) \
+    GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_NONFATAL_FAILURE_)
+
+// Asserts that a given `statement` causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches `matcher`.
+# define ASSERT_DEATH(statement, matcher) \
+    ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
+
+// Like `ASSERT_DEATH`, but continues on to successive tests in the
+// test suite, if any:
+# define EXPECT_DEATH(statement, matcher) \
+    EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+  explicit ExitedWithCode(int exit_code);
+  ExitedWithCode(const ExitedWithCode&) = default;
+  void operator=(const ExitedWithCode& other) = delete;
+  bool operator()(int exit_status) const;
+ private:
+  const int exit_code_;
+};
+
+# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+// GOOGLETEST_CM0006 DO NOT DELETE
+class GTEST_API_ KilledBySignal {
+ public:
+  explicit KilledBySignal(int signum);
+  bool operator()(int exit_status) const;
+ private:
+  const int signum_;
+};
+# endif  // !GTEST_OS_WINDOWS
+
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
+//
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+//   if (sideeffect) {
+//     *sideeffect = 12;
+//   }
+//   LOG(DFATAL) << "death";
+//   return 12;
+// }
+//
+// TEST(TestSuite, TestDieOr12WorksInDgbAndOpt) {
+//   int sideeffect = 0;
+//   // Only asserts in dbg.
+//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+//   // opt-mode has sideeffect visible.
+//   EXPECT_EQ(12, sideeffect);
+// #else
+//   // dbg-mode no visible sideeffect.
+//   EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects.  A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+//   // Side-effects here will have an effect after this statement in
+//   // opt mode, but none in debug mode.
+//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+# ifdef NDEBUG
+
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+# else
+
+#  define EXPECT_DEBUG_DEATH(statement, regex) \
+  EXPECT_DEATH(statement, regex)
+
+#  define ASSERT_DEBUG_DEATH(statement, regex) \
+  ASSERT_DEATH(statement, regex)
+
+# endif  // NDEBUG for EXPECT_DEBUG_DEATH
+#endif  // GTEST_HAS_DEATH_TEST
+
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// if and only if EXPECT_DEATH and ASSERT_DEATH compile with the same parameters
+// on systems that support death tests. This allows one to write such a macro on
+// a system that does not support death tests and be sure that it will compile
+// on a death-test supporting system. It is exposed publicly so that systems
+// that have death-tests with stricter requirements than GTEST_HAS_DEATH_TEST
+// can write their own equivalent of EXPECT_DEATH_IF_SUPPORTED and
+// ASSERT_DEATH_IF_SUPPORTED.
+//
+// Parameters:
+//   statement -  A statement that a macro such as EXPECT_DEATH would test
+//                for program termination. This macro has to make sure this
+//                statement is compiled but not executed, to ensure that
+//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+//                parameter if and only if EXPECT_DEATH compiles with it.
+//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
+//                the output of statement.  This parameter has to be
+//                compiled but not evaluated by this macro, to ensure that
+//                this macro only accepts expressions that a macro such as
+//                EXPECT_DEATH would accept.
+//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+//                compile inside functions where ASSERT_DEATH doesn't
+//                compile.
+//
+//  The branch that has an always false condition is used to ensure that
+//  statement and regex are compiled (and thus syntactically correct) but
+//  never executed. The unreachable code macro protects the terminator
+//  statement from generating an 'unreachable code' warning in case
+//  statement unconditionally returns or throws. The Message constructor at
+//  the end allows the syntax of streaming additional messages into the
+//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+# define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::AlwaysTrue()) { \
+      GTEST_LOG_(WARNING) \
+          << "Death tests are not supported on this platform.\n" \
+          << "Statement '" #statement "' cannot be verified."; \
+    } else if (::testing::internal::AlwaysFalse()) { \
+      ::testing::internal::RE::PartialMatch(".*", (regex)); \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+      terminator; \
+    } else \
+      ::testing::Message()
+
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning.  This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    EXPECT_DEATH(statement, regex)
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    ASSERT_DEATH(statement, regex)
+#else
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, )
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return)
+#endif
+
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
--- a/gtestsuite/inc/gtest/gtest-matchers.h
+++ b/gtestsuite/inc/gtest/gtest-matchers.h
@@ -0,0 +1,930 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file implements just enough of the matcher interface to allow
+// EXPECT_DEATH and friends to accept a matcher argument.
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+
+#include <atomic>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <type_traits>
+
+#include "gtest/gtest-printers.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
+// MSVC warning C5046 is new as of VS2017 version 15.8.
+#if defined(_MSC_VER) && _MSC_VER >= 1915
+#define GTEST_MAYBE_5046_ 5046
+#else
+#define GTEST_MAYBE_5046_
+#endif
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(
+    4251 GTEST_MAYBE_5046_ /* class A needs to have dll-interface to be used by
+                              clients of class B */
+    /* Symbol involving type with internal linkage not defined */)
+
+namespace testing {
+
+// To implement a matcher Foo for type T, define:
+//   1. a class FooMatcherMatcher that implements the matcher interface:
+//     using is_gtest_matcher = void;
+//     bool MatchAndExplain(const T&, std::ostream*);
+//       (MatchResultListener* can also be used instead of std::ostream*)
+//     void DescribeTo(std::ostream*);
+//     void DescribeNegationTo(std::ostream*);
+//
+//   2. a factory function that creates a Matcher<T> object from a
+//      FooMatcherMatcher.
+
+class MatchResultListener {
+ public:
+  // Creates a listener object with the given underlying ostream.  The
+  // listener does not own the ostream, and does not dereference it
+  // in the constructor or destructor.
+  explicit MatchResultListener(::std::ostream* os) : stream_(os) {}
+  virtual ~MatchResultListener() = 0;  // Makes this class abstract.
+
+  // Streams x to the underlying ostream; does nothing if the ostream
+  // is NULL.
+  template <typename T>
+  MatchResultListener& operator<<(const T& x) {
+    if (stream_ != nullptr) *stream_ << x;
+    return *this;
+  }
+
+  // Returns the underlying ostream.
+  ::std::ostream* stream() { return stream_; }
+
+  // Returns true if and only if the listener is interested in an explanation
+  // of the match result.  A matcher's MatchAndExplain() method can use
+  // this information to avoid generating the explanation when no one
+  // intends to hear it.
+  bool IsInterested() const { return stream_ != nullptr; }
+
+ private:
+  ::std::ostream* const stream_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(MatchResultListener);
+};
+
+inline MatchResultListener::~MatchResultListener() {
+}
+
+// An instance of a subclass of this knows how to describe itself as a
+// matcher.
+class GTEST_API_ MatcherDescriberInterface {
+ public:
+  virtual ~MatcherDescriberInterface() {}
+
+  // Describes this matcher to an ostream.  The function should print
+  // a verb phrase that describes the property a value matching this
+  // matcher should have.  The subject of the verb phrase is the value
+  // being matched.  For example, the DescribeTo() method of the Gt(7)
+  // matcher prints "is greater than 7".
+  virtual void DescribeTo(::std::ostream* os) const = 0;
+
+  // Describes the negation of this matcher to an ostream.  For
+  // example, if the description of this matcher is "is greater than
+  // 7", the negated description could be "is not greater than 7".
+  // You are not required to override this when implementing
+  // MatcherInterface, but it is highly advised so that your matcher
+  // can produce good error messages.
+  virtual void DescribeNegationTo(::std::ostream* os) const {
+    *os << "not (";
+    DescribeTo(os);
+    *os << ")";
+  }
+};
+
+// The implementation of a matcher.
+template <typename T>
+class MatcherInterface : public MatcherDescriberInterface {
+ public:
+  // Returns true if and only if the matcher matches x; also explains the
+  // match result to 'listener' if necessary (see the next paragraph), in
+  // the form of a non-restrictive relative clause ("which ...",
+  // "whose ...", etc) that describes x.  For example, the
+  // MatchAndExplain() method of the Pointee(...) matcher should
+  // generate an explanation like "which points to ...".
+  //
+  // Implementations of MatchAndExplain() should add an explanation of
+  // the match result *if and only if* they can provide additional
+  // information that's not already present (or not obvious) in the
+  // print-out of x and the matcher's description.  Whether the match
+  // succeeds is not a factor in deciding whether an explanation is
+  // needed, as sometimes the caller needs to print a failure message
+  // when the match succeeds (e.g. when the matcher is used inside
+  // Not()).
+  //
+  // For example, a "has at least 10 elements" matcher should explain
+  // what the actual element count is, regardless of the match result,
+  // as it is useful information to the reader; on the other hand, an
+  // "is empty" matcher probably only needs to explain what the actual
+  // size is when the match fails, as it's redundant to say that the
+  // size is 0 when the value is already known to be empty.
+  //
+  // You should override this method when defining a new matcher.
+  //
+  // It's the responsibility of the caller (Google Test) to guarantee
+  // that 'listener' is not NULL.  This helps to simplify a matcher's
+  // implementation when it doesn't care about the performance, as it
+  // can talk to 'listener' without checking its validity first.
+  // However, in order to implement dummy listeners efficiently,
+  // listener->stream() may be NULL.
+  virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0;
+
+  // Inherits these methods from MatcherDescriberInterface:
+  //   virtual void DescribeTo(::std::ostream* os) const = 0;
+  //   virtual void DescribeNegationTo(::std::ostream* os) const;
+};
+
+namespace internal {
+
+struct AnyEq {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a == b; }
+};
+struct AnyNe {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a != b; }
+};
+struct AnyLt {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a < b; }
+};
+struct AnyGt {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a > b; }
+};
+struct AnyLe {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a <= b; }
+};
+struct AnyGe {
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const { return a >= b; }
+};
+
+// A match result listener that ignores the explanation.
+class DummyMatchResultListener : public MatchResultListener {
+ public:
+  DummyMatchResultListener() : MatchResultListener(nullptr) {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DummyMatchResultListener);
+};
+
+// A match result listener that forwards the explanation to a given
+// ostream.  The difference between this and MatchResultListener is
+// that the former is concrete.
+class StreamMatchResultListener : public MatchResultListener {
+ public:
+  explicit StreamMatchResultListener(::std::ostream* os)
+      : MatchResultListener(os) {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamMatchResultListener);
+};
+
+struct SharedPayloadBase {
+  std::atomic<int> ref{1};
+  void Ref() { ref.fetch_add(1, std::memory_order_relaxed); }
+  bool Unref() { return ref.fetch_sub(1, std::memory_order_acq_rel) == 1; }
+};
+
+template <typename T>
+struct SharedPayload : SharedPayloadBase {
+  explicit SharedPayload(const T& v) : value(v) {}
+  explicit SharedPayload(T&& v) : value(std::move(v)) {}
+
+  static void Destroy(SharedPayloadBase* shared) {
+    delete static_cast<SharedPayload*>(shared);
+  }
+
+  T value;
+};
+
+// An internal class for implementing Matcher<T>, which will derive
+// from it.  We put functionalities common to all Matcher<T>
+// specializations here to avoid code duplication.
+template <typename T>
+class MatcherBase : private MatcherDescriberInterface {
+ public:
+  // Returns true if and only if the matcher matches x; also explains the
+  // match result to 'listener'.
+  bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
+    GTEST_CHECK_(vtable_ != nullptr);
+    return vtable_->match_and_explain(*this, x, listener);
+  }
+
+  // Returns true if and only if this matcher matches x.
+  bool Matches(const T& x) const {
+    DummyMatchResultListener dummy;
+    return MatchAndExplain(x, &dummy);
+  }
+
+  // Describes this matcher to an ostream.
+  void DescribeTo(::std::ostream* os) const final {
+    GTEST_CHECK_(vtable_ != nullptr);
+    vtable_->describe(*this, os, false);
+  }
+
+  // Describes the negation of this matcher to an ostream.
+  void DescribeNegationTo(::std::ostream* os) const final {
+    GTEST_CHECK_(vtable_ != nullptr);
+    vtable_->describe(*this, os, true);
+  }
+
+  // Explains why x matches, or doesn't match, the matcher.
+  void ExplainMatchResultTo(const T& x, ::std::ostream* os) const {
+    StreamMatchResultListener listener(os);
+    MatchAndExplain(x, &listener);
+  }
+
+  // Returns the describer for this matcher object; retains ownership
+  // of the describer, which is only guaranteed to be alive when
+  // this matcher object is alive.
+  const MatcherDescriberInterface* GetDescriber() const {
+    if (vtable_ == nullptr) return nullptr;
+    return vtable_->get_describer(*this);
+  }
+
+ protected:
+  MatcherBase() : vtable_(nullptr) {}
+
+  // Constructs a matcher from its implementation.
+  template <typename U>
+  explicit MatcherBase(const MatcherInterface<U>* impl) {
+    Init(impl);
+  }
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  MatcherBase(M&& m) {  // NOLINT
+    Init(std::forward<M>(m));
+  }
+
+  MatcherBase(const MatcherBase& other)
+      : vtable_(other.vtable_), buffer_(other.buffer_) {
+    if (IsShared()) buffer_.shared->Ref();
+  }
+
+  MatcherBase& operator=(const MatcherBase& other) {
+    if (this == &other) return *this;
+    Destroy();
+    vtable_ = other.vtable_;
+    buffer_ = other.buffer_;
+    if (IsShared()) buffer_.shared->Ref();
+    return *this;
+  }
+
+  MatcherBase(MatcherBase&& other)
+      : vtable_(other.vtable_), buffer_(other.buffer_) {
+    other.vtable_ = nullptr;
+  }
+
+  MatcherBase& operator=(MatcherBase&& other) {
+    if (this == &other) return *this;
+    Destroy();
+    vtable_ = other.vtable_;
+    buffer_ = other.buffer_;
+    other.vtable_ = nullptr;
+    return *this;
+  }
+
+  ~MatcherBase() override { Destroy(); }
+
+ private:
+  struct VTable {
+    bool (*match_and_explain)(const MatcherBase&, const T&,
+                              MatchResultListener*);
+    void (*describe)(const MatcherBase&, std::ostream*, bool negation);
+    // Returns the captured object if it implements the interface, otherwise
+    // returns the MatcherBase itself.
+    const MatcherDescriberInterface* (*get_describer)(const MatcherBase&);
+    // Called on shared instances when the reference count reaches 0.
+    void (*shared_destroy)(SharedPayloadBase*);
+  };
+
+  bool IsShared() const {
+    return vtable_ != nullptr && vtable_->shared_destroy != nullptr;
+  }
+
+  // If the implementation uses a listener, call that.
+  template <typename P>
+  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+                                  MatchResultListener* listener)
+      -> decltype(P::Get(m).MatchAndExplain(value, listener->stream())) {
+    return P::Get(m).MatchAndExplain(value, listener->stream());
+  }
+
+  template <typename P>
+  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+                                  MatchResultListener* listener)
+      -> decltype(P::Get(m).MatchAndExplain(value, listener)) {
+    return P::Get(m).MatchAndExplain(value, listener);
+  }
+
+  template <typename P>
+  static void DescribeImpl(const MatcherBase& m, std::ostream* os,
+                           bool negation) {
+    if (negation) {
+      P::Get(m).DescribeNegationTo(os);
+    } else {
+      P::Get(m).DescribeTo(os);
+    }
+  }
+
+  template <typename P>
+  static const MatcherDescriberInterface* GetDescriberImpl(
+      const MatcherBase& m) {
+    // If the impl is a MatcherDescriberInterface, then return it.
+    // Otherwise use MatcherBase itself.
+    // This allows us to implement the GetDescriber() function without support
+    // from the impl, but some users really want to get their impl back when
+    // they call GetDescriber().
+    // We use std::get on a tuple as a workaround of not having `if constexpr`.
+    return std::get<(
+        std::is_convertible<decltype(&P::Get(m)),
+                            const MatcherDescriberInterface*>::value
+            ? 1
+            : 0)>(std::make_tuple(&m, &P::Get(m)));
+  }
+
+  template <typename P>
+  const VTable* GetVTable() {
+    static constexpr VTable kVTable = {&MatchAndExplainImpl<P>,
+                                       &DescribeImpl<P>, &GetDescriberImpl<P>,
+                                       P::shared_destroy};
+    return &kVTable;
+  }
+
+  union Buffer {
+    // Add some types to give Buffer some common alignment/size use cases.
+    void* ptr;
+    double d;
+    int64_t i;
+    // And add one for the out-of-line cases.
+    SharedPayloadBase* shared;
+  };
+
+  void Destroy() {
+    if (IsShared() && buffer_.shared->Unref()) {
+      vtable_->shared_destroy(buffer_.shared);
+    }
+  }
+
+  template <typename M>
+  static constexpr bool IsInlined() {
+    return sizeof(M) <= sizeof(Buffer) && alignof(M) <= alignof(Buffer) &&
+           std::is_trivially_copy_constructible<M>::value &&
+           std::is_trivially_destructible<M>::value;
+  }
+
+  template <typename M, bool = MatcherBase::IsInlined<M>()>
+  struct ValuePolicy {
+    static const M& Get(const MatcherBase& m) {
+      // When inlined along with Init, need to be explicit to avoid violating
+      // strict aliasing rules.
+      const M *ptr = static_cast<const M*>(
+          static_cast<const void*>(&m.buffer_));
+      return *ptr;
+    }
+    static void Init(MatcherBase& m, M impl) {
+      ::new (static_cast<void*>(&m.buffer_)) M(impl);
+    }
+    static constexpr auto shared_destroy = nullptr;
+  };
+
+  template <typename M>
+  struct ValuePolicy<M, false> {
+    using Shared = SharedPayload<M>;
+    static const M& Get(const MatcherBase& m) {
+      return static_cast<Shared*>(m.buffer_.shared)->value;
+    }
+    template <typename Arg>
+    static void Init(MatcherBase& m, Arg&& arg) {
+      m.buffer_.shared = new Shared(std::forward<Arg>(arg));
+    }
+    static constexpr auto shared_destroy = &Shared::Destroy;
+  };
+
+  template <typename U, bool B>
+  struct ValuePolicy<const MatcherInterface<U>*, B> {
+    using M = const MatcherInterface<U>;
+    using Shared = SharedPayload<std::unique_ptr<M>>;
+    static const M& Get(const MatcherBase& m) {
+      return *static_cast<Shared*>(m.buffer_.shared)->value;
+    }
+    static void Init(MatcherBase& m, M* impl) {
+      m.buffer_.shared = new Shared(std::unique_ptr<M>(impl));
+    }
+
+    static constexpr auto shared_destroy = &Shared::Destroy;
+  };
+
+  template <typename M>
+  void Init(M&& m) {
+    using MM = typename std::decay<M>::type;
+    using Policy = ValuePolicy<MM>;
+    vtable_ = GetVTable<Policy>();
+    Policy::Init(*this, std::forward<M>(m));
+  }
+
+  const VTable* vtable_;
+  Buffer buffer_;
+};
+
+}  // namespace internal
+
+// A Matcher<T> is a copyable and IMMUTABLE (except by assignment)
+// object that can check whether a value of type T matches.  The
+// implementation of Matcher<T> is just a std::shared_ptr to const
+// MatcherInterface<T>.  Don't inherit from Matcher!
+template <typename T>
+class Matcher : public internal::MatcherBase<T> {
+ public:
+  // Constructs a null matcher.  Needed for storing Matcher objects in STL
+  // containers.  A default-constructed matcher is not yet initialized.  You
+  // cannot use it until a valid value has been assigned to it.
+  explicit Matcher() {}  // NOLINT
+
+  // Constructs a matcher from its implementation.
+  explicit Matcher(const MatcherInterface<const T&>* impl)
+      : internal::MatcherBase<T>(impl) {}
+
+  template <typename U>
+  explicit Matcher(
+      const MatcherInterface<U>* impl,
+      typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
+          nullptr)
+      : internal::MatcherBase<T>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m) : internal::MatcherBase<T>(std::forward<M>(m)) {}  // NOLINT
+
+  // Implicit constructor here allows people to write
+  // EXPECT_CALL(foo, Bar(5)) instead of EXPECT_CALL(foo, Bar(Eq(5))) sometimes
+  Matcher(T value);  // NOLINT
+};
+
+// The following two specializations allow the user to write str
+// instead of Eq(str) and "foo" instead of Eq("foo") when a std::string
+// matcher is expected.
+template <>
+class GTEST_API_ Matcher<const std::string&>
+    : public internal::MatcherBase<const std::string&> {
+ public:
+  Matcher() {}
+
+  explicit Matcher(const MatcherInterface<const std::string&>* impl)
+      : internal::MatcherBase<const std::string&>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<const std::string&>(std::forward<M>(m)) {}
+
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a std::string object.
+  Matcher(const std::string& s);  // NOLINT
+
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+};
+
+template <>
+class GTEST_API_ Matcher<std::string>
+    : public internal::MatcherBase<std::string> {
+ public:
+  Matcher() {}
+
+  explicit Matcher(const MatcherInterface<const std::string&>* impl)
+      : internal::MatcherBase<std::string>(impl) {}
+  explicit Matcher(const MatcherInterface<std::string>* impl)
+      : internal::MatcherBase<std::string>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<std::string>(std::forward<M>(m)) {}
+
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a string object.
+  Matcher(const std::string& s);  // NOLINT
+
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+};
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// The following two specializations allow the user to write str
+// instead of Eq(str) and "foo" instead of Eq("foo") when a absl::string_view
+// matcher is expected.
+template <>
+class GTEST_API_ Matcher<const internal::StringView&>
+    : public internal::MatcherBase<const internal::StringView&> {
+ public:
+  Matcher() {}
+
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<const internal::StringView&>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<const internal::StringView&>(std::forward<M>(m)) {
+  }
+
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a std::string object.
+  Matcher(const std::string& s);  // NOLINT
+
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+
+  // Allows the user to pass absl::string_views or std::string_views directly.
+  Matcher(internal::StringView s);  // NOLINT
+};
+
+template <>
+class GTEST_API_ Matcher<internal::StringView>
+    : public internal::MatcherBase<internal::StringView> {
+ public:
+  Matcher() {}
+
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<internal::StringView>(impl) {}
+  explicit Matcher(const MatcherInterface<internal::StringView>* impl)
+      : internal::MatcherBase<internal::StringView>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<internal::StringView>(std::forward<M>(m)) {}
+
+  // Allows the user to write str instead of Eq(str) sometimes, where
+  // str is a std::string object.
+  Matcher(const std::string& s);  // NOLINT
+
+  // Allows the user to write "foo" instead of Eq("foo") sometimes.
+  Matcher(const char* s);  // NOLINT
+
+  // Allows the user to pass absl::string_views or std::string_views directly.
+  Matcher(internal::StringView s);  // NOLINT
+};
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
+
+// Prints a matcher in a human-readable format.
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const Matcher<T>& matcher) {
+  matcher.DescribeTo(&os);
+  return os;
+}
+
+// The PolymorphicMatcher class template makes it easy to implement a
+// polymorphic matcher (i.e. a matcher that can match values of more
+// than one type, e.g. Eq(n) and NotNull()).
+//
+// To define a polymorphic matcher, a user should provide an Impl
+// class that has a DescribeTo() method and a DescribeNegationTo()
+// method, and define a member function (or member function template)
+//
+//   bool MatchAndExplain(const Value& value,
+//                        MatchResultListener* listener) const;
+//
+// See the definition of NotNull() for a complete example.
+template <class Impl>
+class PolymorphicMatcher {
+ public:
+  explicit PolymorphicMatcher(const Impl& an_impl) : impl_(an_impl) {}
+
+  // Returns a mutable reference to the underlying matcher
+  // implementation object.
+  Impl& mutable_impl() { return impl_; }
+
+  // Returns an immutable reference to the underlying matcher
+  // implementation object.
+  const Impl& impl() const { return impl_; }
+
+  template <typename T>
+  operator Matcher<T>() const {
+    return Matcher<T>(new MonomorphicImpl<const T&>(impl_));
+  }
+
+ private:
+  template <typename T>
+  class MonomorphicImpl : public MatcherInterface<T> {
+   public:
+    explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
+
+    void DescribeTo(::std::ostream* os) const override { impl_.DescribeTo(os); }
+
+    void DescribeNegationTo(::std::ostream* os) const override {
+      impl_.DescribeNegationTo(os);
+    }
+
+    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
+      return impl_.MatchAndExplain(x, listener);
+    }
+
+   private:
+    const Impl impl_;
+  };
+
+  Impl impl_;
+};
+
+// Creates a matcher from its implementation.
+// DEPRECATED: Especially in the generic code, prefer:
+//   Matcher<T>(new MyMatcherImpl<const T&>(...));
+//
+// MakeMatcher may create a Matcher that accepts its argument by value, which
+// leads to unnecessary copies & lack of support for non-copyable types.
+template <typename T>
+inline Matcher<T> MakeMatcher(const MatcherInterface<T>* impl) {
+  return Matcher<T>(impl);
+}
+
+// Creates a polymorphic matcher from its implementation.  This is
+// easier to use than the PolymorphicMatcher<Impl> constructor as it
+// doesn't require you to explicitly write the template argument, e.g.
+//
+//   MakePolymorphicMatcher(foo);
+// vs
+//   PolymorphicMatcher<TypeOfFoo>(foo);
+template <class Impl>
+inline PolymorphicMatcher<Impl> MakePolymorphicMatcher(const Impl& impl) {
+  return PolymorphicMatcher<Impl>(impl);
+}
+
+namespace internal {
+// Implements a matcher that compares a given value with a
+// pre-supplied value using one of the ==, <=, <, etc, operators.  The
+// two values being compared don't have to have the same type.
+//
+// The matcher defined here is polymorphic (for example, Eq(5) can be
+// used to match an int, a short, a double, etc).  Therefore we use
+// a template type conversion operator in the implementation.
+//
+// The following template definition assumes that the Rhs parameter is
+// a "bare" type (i.e. neither 'const T' nor 'T&').
+template <typename D, typename Rhs, typename Op>
+class ComparisonBase {
+ public:
+  explicit ComparisonBase(const Rhs& rhs) : rhs_(rhs) {}
+
+  using is_gtest_matcher = void;
+
+  template <typename Lhs>
+  bool MatchAndExplain(const Lhs& lhs, std::ostream*) const {
+    return Op()(lhs, Unwrap(rhs_));
+  }
+  void DescribeTo(std::ostream* os) const {
+    *os << D::Desc() << " ";
+    UniversalPrint(Unwrap(rhs_), os);
+  }
+  void DescribeNegationTo(std::ostream* os) const {
+    *os << D::NegatedDesc() << " ";
+    UniversalPrint(Unwrap(rhs_), os);
+  }
+
+ private:
+  template <typename T>
+  static const T& Unwrap(const T& v) {
+    return v;
+  }
+  template <typename T>
+  static const T& Unwrap(std::reference_wrapper<T> v) {
+    return v;
+  }
+
+  Rhs rhs_;
+};
+
+template <typename Rhs>
+class EqMatcher : public ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq> {
+ public:
+  explicit EqMatcher(const Rhs& rhs)
+      : ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq>(rhs) { }
+  static const char* Desc() { return "is equal to"; }
+  static const char* NegatedDesc() { return "isn't equal to"; }
+};
+template <typename Rhs>
+class NeMatcher : public ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe> {
+ public:
+  explicit NeMatcher(const Rhs& rhs)
+      : ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe>(rhs) { }
+  static const char* Desc() { return "isn't equal to"; }
+  static const char* NegatedDesc() { return "is equal to"; }
+};
+template <typename Rhs>
+class LtMatcher : public ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt> {
+ public:
+  explicit LtMatcher(const Rhs& rhs)
+      : ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt>(rhs) { }
+  static const char* Desc() { return "is <"; }
+  static const char* NegatedDesc() { return "isn't <"; }
+};
+template <typename Rhs>
+class GtMatcher : public ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt> {
+ public:
+  explicit GtMatcher(const Rhs& rhs)
+      : ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt>(rhs) { }
+  static const char* Desc() { return "is >"; }
+  static const char* NegatedDesc() { return "isn't >"; }
+};
+template <typename Rhs>
+class LeMatcher : public ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe> {
+ public:
+  explicit LeMatcher(const Rhs& rhs)
+      : ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe>(rhs) { }
+  static const char* Desc() { return "is <="; }
+  static const char* NegatedDesc() { return "isn't <="; }
+};
+template <typename Rhs>
+class GeMatcher : public ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe> {
+ public:
+  explicit GeMatcher(const Rhs& rhs)
+      : ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe>(rhs) { }
+  static const char* Desc() { return "is >="; }
+  static const char* NegatedDesc() { return "isn't >="; }
+};
+
+template <typename T, typename = typename std::enable_if<
+                          std::is_constructible<std::string, T>::value>::type>
+using StringLike = T;
+
+// Implements polymorphic matchers MatchesRegex(regex) and
+// ContainsRegex(regex), which can be used as a Matcher<T> as long as
+// T can be converted to a string.
+class MatchesRegexMatcher {
+ public:
+  MatchesRegexMatcher(const RE* regex, bool full_match)
+      : regex_(regex), full_match_(full_match) {}
+
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+  bool MatchAndExplain(const internal::StringView& s,
+                       MatchResultListener* listener) const {
+    return MatchAndExplain(std::string(s), listener);
+  }
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
+
+  // Accepts pointer types, particularly:
+  //   const char*
+  //   char*
+  //   const wchar_t*
+  //   wchar_t*
+  template <typename CharType>
+  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
+    return s != nullptr && MatchAndExplain(std::string(s), listener);
+  }
+
+  // Matches anything that can convert to std::string.
+  //
+  // This is a template, not just a plain function with const std::string&,
+  // because absl::string_view has some interfering non-explicit constructors.
+  template <class MatcheeStringType>
+  bool MatchAndExplain(const MatcheeStringType& s,
+                       MatchResultListener* /* listener */) const {
+    const std::string& s2(s);
+    return full_match_ ? RE::FullMatch(s2, *regex_)
+                       : RE::PartialMatch(s2, *regex_);
+  }
+
+  void DescribeTo(::std::ostream* os) const {
+    *os << (full_match_ ? "matches" : "contains") << " regular expression ";
+    UniversalPrinter<std::string>::Print(regex_->pattern(), os);
+  }
+
+  void DescribeNegationTo(::std::ostream* os) const {
+    *os << "doesn't " << (full_match_ ? "match" : "contain")
+        << " regular expression ";
+    UniversalPrinter<std::string>::Print(regex_->pattern(), os);
+  }
+
+ private:
+  const std::shared_ptr<const RE> regex_;
+  const bool full_match_;
+};
+}  // namespace internal
+
+// Matches a string that fully matches regular expression 'regex'.
+// The matcher takes ownership of 'regex'.
+inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+    const internal::RE* regex) {
+  return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, true));
+}
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+    const internal::StringLike<T>& regex) {
+  return MatchesRegex(new internal::RE(std::string(regex)));
+}
+
+// Matches a string that contains regular expression 'regex'.
+// The matcher takes ownership of 'regex'.
+inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+    const internal::RE* regex) {
+  return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, false));
+}
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+    const internal::StringLike<T>& regex) {
+  return ContainsRegex(new internal::RE(std::string(regex)));
+}
+
+// Creates a polymorphic matcher that matches anything equal to x.
+// Note: if the parameter of Eq() were declared as const T&, Eq("foo")
+// wouldn't compile.
+template <typename T>
+inline internal::EqMatcher<T> Eq(T x) { return internal::EqMatcher<T>(x); }
+
+// Constructs a Matcher<T> from a 'value' of type T.  The constructed
+// matcher matches any value that's equal to 'value'.
+template <typename T>
+Matcher<T>::Matcher(T value) { *this = Eq(value); }
+
+// Creates a monomorphic matcher that matches anything with type Lhs
+// and equal to rhs.  A user may need to use this instead of Eq(...)
+// in order to resolve an overloading ambiguity.
+//
+// TypedEq<T>(x) is just a convenient short-hand for Matcher<T>(Eq(x))
+// or Matcher<T>(x), but more readable than the latter.
+//
+// We could define similar monomorphic matchers for other comparison
+// operations (e.g. TypedLt, TypedGe, and etc), but decided not to do
+// it yet as those are used much less than Eq() in practice.  A user
+// can always write Matcher<T>(Lt(5)) to be explicit about the type,
+// for example.
+template <typename Lhs, typename Rhs>
+inline Matcher<Lhs> TypedEq(const Rhs& rhs) { return Eq(rhs); }
+
+// Creates a polymorphic matcher that matches anything >= x.
+template <typename Rhs>
+inline internal::GeMatcher<Rhs> Ge(Rhs x) {
+  return internal::GeMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything > x.
+template <typename Rhs>
+inline internal::GtMatcher<Rhs> Gt(Rhs x) {
+  return internal::GtMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything <= x.
+template <typename Rhs>
+inline internal::LeMatcher<Rhs> Le(Rhs x) {
+  return internal::LeMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything < x.
+template <typename Rhs>
+inline internal::LtMatcher<Rhs> Lt(Rhs x) {
+  return internal::LtMatcher<Rhs>(x);
+}
+
+// Creates a polymorphic matcher that matches anything != x.
+template <typename Rhs>
+inline internal::NeMatcher<Rhs> Ne(Rhs x) {
+  return internal::NeMatcher<Rhs>(x);
+}
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
--- a/gtestsuite/inc/gtest/gtest-message.h
+++ b/gtestsuite/inc/gtest/gtest-message.h
@@ -0,0 +1,219 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+#include <memory>
+#include <sstream>
+
+#include "gtest/internal/gtest-port.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+//   1. You stream a bunch of values to a Message object.
+//      It will remember the text in a stringstream.
+//   2. Then you stream the Message object to an ostream.
+//      This causes the text in the Message to be streamed
+//      to the ostream.
+//
+// For example;
+//
+//   testing::Message foo;
+//   foo << 1 << " != " << 2;
+//   std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from.  In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC.  You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do).  The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+  // The type of basic IO manipulators (endl, ends, and flush) for
+  // narrow streams.
+  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+  // Constructs an empty Message.
+  Message();
+
+  // Copy constructor.
+  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
+    *ss_ << msg.GetString();
+  }
+
+  // Constructs a Message from a C-string.
+  explicit Message(const char* str) : ss_(new ::std::stringstream) {
+    *ss_ << str;
+  }
+
+  // Streams a non-pointer value to this object.
+  template <typename T>
+  inline Message& operator <<(const T& val) {
+    // Some libraries overload << for STL containers.  These
+    // overloads are defined in the global namespace instead of ::std.
+    //
+    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+    // overloads are visible in either the std namespace or the global
+    // namespace, but not other namespaces, including the testing
+    // namespace which Google Test's Message class is in.
+    //
+    // To allow STL containers (and other types that has a << operator
+    // defined in the global namespace) to be used in Google Test
+    // assertions, testing::Message must access the custom << operator
+    // from the global namespace.  With this using declaration,
+    // overloads of << defined in the global namespace and those
+    // visible via Koenig lookup are both exposed in this function.
+    using ::operator <<;
+    *ss_ << val;
+    return *this;
+  }
+
+  // Streams a pointer value to this object.
+  //
+  // This function is an overload of the previous one.  When you
+  // stream a pointer to a Message, this definition will be used as it
+  // is more specialized.  (The C++ Standard, section
+  // [temp.func.order].)  If you stream a non-pointer, then the
+  // previous definition will be used.
+  //
+  // The reason for this overload is that streaming a NULL pointer to
+  // ostream is undefined behavior.  Depending on the compiler, you
+  // may get "0", "(nil)", "(null)", or an access violation.  To
+  // ensure consistent result across compilers, we always treat NULL
+  // as "(null)".
+  template <typename T>
+  inline Message& operator <<(T* const& pointer) {  // NOLINT
+    if (pointer == nullptr) {
+      *ss_ << "(null)";
+    } else {
+      *ss_ << pointer;
+    }
+    return *this;
+  }
+
+  // Since the basic IO manipulators are overloaded for both narrow
+  // and wide streams, we have to provide this specialized definition
+  // of operator <<, even though its body is the same as the
+  // templatized version above.  Without this definition, streaming
+  // endl or other basic IO manipulators to Message will confuse the
+  // compiler.
+  Message& operator <<(BasicNarrowIoManip val) {
+    *ss_ << val;
+    return *this;
+  }
+
+  // Instead of 1/0, we want to see true/false for bool values.
+  Message& operator <<(bool b) {
+    return *this << (b ? "true" : "false");
+  }
+
+  // These two overloads allow streaming a wide C string to a Message
+  // using the UTF-8 encoding.
+  Message& operator <<(const wchar_t* wide_c_str);
+  Message& operator <<(wchar_t* wide_c_str);
+
+#if GTEST_HAS_STD_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::std::wstring& wstr);
+#endif  // GTEST_HAS_STD_WSTRING
+
+  // Gets the text streamed to this object so far as an std::string.
+  // Each '\0' character in the buffer is replaced with "\\0".
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  std::string GetString() const;
+
+ private:
+  // We'll hold the text streamed to this object here.
+  const std::unique_ptr< ::std::stringstream> ss_;
+
+  // We declare (but don't implement) this to prevent the compiler
+  // from implementing the assignment operator.
+  void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+  return os << sb.GetString();
+}
+
+namespace internal {
+
+// Converts a streamable value to an std::string.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+  return (Message() << streamable).GetString();
+}
+
+}  // namespace internal
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
--- a/gtestsuite/inc/gtest/gtest-param-test.h
+++ b/gtestsuite/inc/gtest/gtest-param-test.h
@@ -0,0 +1,507 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Macros and functions for implementing parameterized tests
+// in Google C++ Testing and Mocking Framework (Google Test)
+//
+// GOOGLETEST_CM0001 DO NOT DELETE
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+
+// Value-parameterized tests allow you to test your code with different
+// parameters without writing multiple copies of the same test.
+//
+// Here is how you use value-parameterized tests:
+
+#if 0
+
+// To write value-parameterized tests, first you should define a fixture
+// class. It is usually derived from testing::TestWithParam<T> (see below for
+// another inheritance scheme that's sometimes useful in more complicated
+// class hierarchies), where the type of your parameter values.
+// TestWithParam<T> is itself derived from testing::Test. T can be any
+// copyable type. If it's a raw pointer, you are responsible for managing the
+// lifespan of the pointed values.
+
+class FooTest : public ::testing::TestWithParam<const char*> {
+  // You can implement all the usual class fixture members here.
+};
+
+// Then, use the TEST_P macro to define as many parameterized tests
+// for this fixture as you want. The _P suffix is for "parameterized"
+// or "pattern", whichever you prefer to think.
+
+TEST_P(FooTest, DoesBlah) {
+  // Inside a test, access the test parameter with the GetParam() method
+  // of the TestWithParam<T> class:
+  EXPECT_TRUE(foo.Blah(GetParam()));
+  ...
+}
+
+TEST_P(FooTest, HasBlahBlah) {
+  ...
+}
+
+// Finally, you can use INSTANTIATE_TEST_SUITE_P to instantiate the test
+// case with any set of parameters you want. Google Test defines a number
+// of functions for generating test parameters. They return what we call
+// (surprise!) parameter generators. Here is a summary of them, which
+// are all in the testing namespace:
+//
+//
+//  Range(begin, end [, step]) - Yields values {begin, begin+step,
+//                               begin+step+step, ...}. The values do not
+//                               include end. step defaults to 1.
+//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
+//  ValuesIn(container)        - Yields values from a C-style array, an STL
+//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
+//  Bool()                     - Yields sequence {false, true}.
+//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
+//                               for the math savvy) of the values generated
+//                               by the N generators.
+//
+// For more details, see comments at the definitions of these functions below
+// in this file.
+//
+// The following statement will instantiate tests from the FooTest test suite
+// each with parameter values "meeny", "miny", and "moe".
+
+INSTANTIATE_TEST_SUITE_P(InstantiationName,
+                         FooTest,
+                         Values("meeny", "miny", "moe"));
+
+// To distinguish different instances of the pattern, (yes, you
+// can instantiate it more than once) the first argument to the
+// INSTANTIATE_TEST_SUITE_P macro is a prefix that will be added to the
+// actual test suite name. Remember to pick unique prefixes for different
+// instantiations. The tests from the instantiation above will have
+// these names:
+//
+//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
+//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
+//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
+//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
+//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
+//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
+//
+// You can use these names in --gtest_filter.
+//
+// This statement will instantiate all tests from FooTest again, each
+// with parameter values "cat" and "dog":
+
+const char* pets[] = {"cat", "dog"};
+INSTANTIATE_TEST_SUITE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
+
+// The tests from the instantiation above will have these names:
+//
+//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
+//
+// Please note that INSTANTIATE_TEST_SUITE_P will instantiate all tests
+// in the given test suite, whether their definitions come before or
+// AFTER the INSTANTIATE_TEST_SUITE_P statement.
+//
+// Please also note that generator expressions (including parameters to the
+// generators) are evaluated in InitGoogleTest(), after main() has started.
+// This allows the user on one hand, to adjust generator parameters in order
+// to dynamically determine a set of tests to run and on the other hand,
+// give the user a chance to inspect the generated tests with Google Test
+// reflection API before RUN_ALL_TESTS() is executed.
+//
+// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
+// for more examples.
+//
+// In the future, we plan to publish the API for defining new parameter
+// generators. But for now this interface remains part of the internal
+// implementation and is subject to change.
+//
+//
+// A parameterized test fixture must be derived from testing::Test and from
+// testing::WithParamInterface<T>, where T is the type of the parameter
+// values. Inheriting from TestWithParam<T> satisfies that requirement because
+// TestWithParam<T> inherits from both Test and WithParamInterface. In more
+// complicated hierarchies, however, it is occasionally useful to inherit
+// separately from Test and WithParamInterface. For example:
+
+class BaseTest : public ::testing::Test {
+  // You can inherit all the usual members for a non-parameterized test
+  // fixture here.
+};
+
+class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
+  // The usual test fixture members go here too.
+};
+
+TEST_F(BaseTest, HasFoo) {
+  // This is an ordinary non-parameterized test.
+}
+
+TEST_P(DerivedTest, DoesBlah) {
+  // GetParam works just the same here as if you inherit from TestWithParam.
+  EXPECT_TRUE(foo.Blah(GetParam()));
+}
+
+#endif  // 0
+
+#include <iterator>
+#include <utility>
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-param-util.h"
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+
+// Functions producing parameter generators.
+//
+// Google Test uses these generators to produce parameters for value-
+// parameterized tests. When a parameterized test suite is instantiated
+// with a particular generator, Google Test creates and runs tests
+// for each element in the sequence produced by the generator.
+//
+// In the following sample, tests from test suite FooTest are instantiated
+// each three times with parameter values 3, 5, and 8:
+//
+// class FooTest : public TestWithParam<int> { ... };
+//
+// TEST_P(FooTest, TestThis) {
+// }
+// TEST_P(FooTest, TestThat) {
+// }
+// INSTANTIATE_TEST_SUITE_P(TestSequence, FooTest, Values(3, 5, 8));
+//
+
+// Range() returns generators providing sequences of values in a range.
+//
+// Synopsis:
+// Range(start, end)
+//   - returns a generator producing a sequence of values {start, start+1,
+//     start+2, ..., }.
+// Range(start, end, step)
+//   - returns a generator producing a sequence of values {start, start+step,
+//     start+step+step, ..., }.
+// Notes:
+//   * The generated sequences never include end. For example, Range(1, 5)
+//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
+//     returns a generator producing {1, 3, 5, 7}.
+//   * start and end must have the same type. That type may be any integral or
+//     floating-point type or a user defined type satisfying these conditions:
+//     * It must be assignable (have operator=() defined).
+//     * It must have operator+() (operator+(int-compatible type) for
+//       two-operand version).
+//     * It must have operator<() defined.
+//     Elements in the resulting sequences will also have that type.
+//   * Condition start < end must be satisfied in order for resulting sequences
+//     to contain any elements.
+//
+template <typename T, typename IncrementT>
+internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
+  return internal::ParamGenerator<T>(
+      new internal::RangeGenerator<T, IncrementT>(start, end, step));
+}
+
+template <typename T>
+internal::ParamGenerator<T> Range(T start, T end) {
+  return Range(start, end, 1);
+}
+
+// ValuesIn() function allows generation of tests with parameters coming from
+// a container.
+//
+// Synopsis:
+// ValuesIn(const T (&array)[N])
+//   - returns a generator producing sequences with elements from
+//     a C-style array.
+// ValuesIn(const Container& container)
+//   - returns a generator producing sequences with elements from
+//     an STL-style container.
+// ValuesIn(Iterator begin, Iterator end)
+//   - returns a generator producing sequences with elements from
+//     a range [begin, end) defined by a pair of STL-style iterators. These
+//     iterators can also be plain C pointers.
+//
+// Please note that ValuesIn copies the values from the containers
+// passed in and keeps them to generate tests in RUN_ALL_TESTS().
+//
+// Examples:
+//
+// This instantiates tests from test suite StringTest
+// each with C-string values of "foo", "bar", and "baz":
+//
+// const char* strings[] = {"foo", "bar", "baz"};
+// INSTANTIATE_TEST_SUITE_P(StringSequence, StringTest, ValuesIn(strings));
+//
+// This instantiates tests from test suite StlStringTest
+// each with STL strings with values "a" and "b":
+//
+// ::std::vector< ::std::string> GetParameterStrings() {
+//   ::std::vector< ::std::string> v;
+//   v.push_back("a");
+//   v.push_back("b");
+//   return v;
+// }
+//
+// INSTANTIATE_TEST_SUITE_P(CharSequence,
+//                          StlStringTest,
+//                          ValuesIn(GetParameterStrings()));
+//
+//
+// This will also instantiate tests from CharTest
+// each with parameter values 'a' and 'b':
+//
+// ::std::list<char> GetParameterChars() {
+//   ::std::list<char> list;
+//   list.push_back('a');
+//   list.push_back('b');
+//   return list;
+// }
+// ::std::list<char> l = GetParameterChars();
+// INSTANTIATE_TEST_SUITE_P(CharSequence2,
+//                          CharTest,
+//                          ValuesIn(l.begin(), l.end()));
+//
+template <typename ForwardIterator>
+internal::ParamGenerator<
+    typename std::iterator_traits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end) {
+  typedef typename std::iterator_traits<ForwardIterator>::value_type ParamType;
+  return internal::ParamGenerator<ParamType>(
+      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
+}
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
+  return ValuesIn(array, array + N);
+}
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container) {
+  return ValuesIn(container.begin(), container.end());
+}
+
+// Values() allows generating tests from explicitly specified list of
+// parameters.
+//
+// Synopsis:
+// Values(T v1, T v2, ..., T vN)
+//   - returns a generator producing sequences with elements v1, v2, ..., vN.
+//
+// For example, this instantiates tests from test suite BarTest each
+// with values "one", "two", and "three":
+//
+// INSTANTIATE_TEST_SUITE_P(NumSequence,
+//                          BarTest,
+//                          Values("one", "two", "three"));
+//
+// This instantiates tests from test suite BazTest each with values 1, 2, 3.5.
+// The exact type of values will depend on the type of parameter in BazTest.
+//
+// INSTANTIATE_TEST_SUITE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
+//
+//
+template <typename... T>
+internal::ValueArray<T...> Values(T... v) {
+  return internal::ValueArray<T...>(std::move(v)...);
+}
+
+// Bool() allows generating tests with parameters in a set of (false, true).
+//
+// Synopsis:
+// Bool()
+//   - returns a generator producing sequences with elements {false, true}.
+//
+// It is useful when testing code that depends on Boolean flags. Combinations
+// of multiple flags can be tested when several Bool()'s are combined using
+// Combine() function.
+//
+// In the following example all tests in the test suite FlagDependentTest
+// will be instantiated twice with parameters false and true.
+//
+// class FlagDependentTest : public testing::TestWithParam<bool> {
+//   virtual void SetUp() {
+//     external_flag = GetParam();
+//   }
+// }
+// INSTANTIATE_TEST_SUITE_P(BoolSequence, FlagDependentTest, Bool());
+//
+inline internal::ParamGenerator<bool> Bool() {
+  return Values(false, true);
+}
+
+// Combine() allows the user to combine two or more sequences to produce
+// values of a Cartesian product of those sequences' elements.
+//
+// Synopsis:
+// Combine(gen1, gen2, ..., genN)
+//   - returns a generator producing sequences with elements coming from
+//     the Cartesian product of elements from the sequences generated by
+//     gen1, gen2, ..., genN. The sequence elements will have a type of
+//     std::tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
+//     of elements from sequences produces by gen1, gen2, ..., genN.
+//
+// Example:
+//
+// This will instantiate tests in test suite AnimalTest each one with
+// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
+// tuple("dog", BLACK), and tuple("dog", WHITE):
+//
+// enum Color { BLACK, GRAY, WHITE };
+// class AnimalTest
+//     : public testing::TestWithParam<std::tuple<const char*, Color> > {...};
+//
+// TEST_P(AnimalTest, AnimalLooksNice) {...}
+//
+// INSTANTIATE_TEST_SUITE_P(AnimalVariations, AnimalTest,
+//                          Combine(Values("cat", "dog"),
+//                                  Values(BLACK, WHITE)));
+//
+// This will instantiate tests in FlagDependentTest with all variations of two
+// Boolean flags:
+//
+// class FlagDependentTest
+//     : public testing::TestWithParam<std::tuple<bool, bool> > {
+//   virtual void SetUp() {
+//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
+//     std::tie(external_flag_1, external_flag_2) = GetParam();
+//   }
+// };
+//
+// TEST_P(FlagDependentTest, TestFeature1) {
+//   // Test your code using external_flag_1 and external_flag_2 here.
+// }
+// INSTANTIATE_TEST_SUITE_P(TwoBoolSequence, FlagDependentTest,
+//                          Combine(Bool(), Bool()));
+//
+template <typename... Generator>
+internal::CartesianProductHolder<Generator...> Combine(const Generator&... g) {
+  return internal::CartesianProductHolder<Generator...>(g...);
+}
+
+#define TEST_P(test_suite_name, test_name)                                     \
+  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                     \
+      : public test_suite_name {                                               \
+   public:                                                                     \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {}                    \
+    void TestBody() override;                                                  \
+                                                                               \
+   private:                                                                    \
+    static int AddToRegistry() {                                               \
+      ::testing::UnitTest::GetInstance()                                       \
+          ->parameterized_test_registry()                                      \
+          .GetTestSuitePatternHolder<test_suite_name>(                         \
+              GTEST_STRINGIFY_(test_suite_name),                               \
+              ::testing::internal::CodeLocation(__FILE__, __LINE__))           \
+          ->AddTestPattern(                                                    \
+              GTEST_STRINGIFY_(test_suite_name), GTEST_STRINGIFY_(test_name),  \
+              new ::testing::internal::TestMetaFactory<GTEST_TEST_CLASS_NAME_( \
+                  test_suite_name, test_name)>(),                              \
+              ::testing::internal::CodeLocation(__FILE__, __LINE__));          \
+      return 0;                                                                \
+    }                                                                          \
+    static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_;               \
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,    \
+                                                           test_name));        \
+  };                                                                           \
+  int GTEST_TEST_CLASS_NAME_(test_suite_name,                                  \
+                             test_name)::gtest_registering_dummy_ =            \
+      GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::AddToRegistry();     \
+  void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
+
+// The last argument to INSTANTIATE_TEST_SUITE_P allows the user to specify
+// generator and an optional function or functor that generates custom test name
+// suffixes based on the test parameters. Such a function or functor should
+// accept one argument of type testing::TestParamInfo<class ParamType>, and
+// return std::string.
+//
+// testing::PrintToStringParamName is a builtin test suffix generator that
+// returns the value of testing::PrintToString(GetParam()).
+//
+// Note: test names must be non-empty, unique, and may only contain ASCII
+// alphanumeric characters or underscore. Because PrintToString adds quotes
+// to std::string and C strings, it won't work for these types.
+
+#define GTEST_EXPAND_(arg) arg
+#define GTEST_GET_FIRST_(first, ...) first
+#define GTEST_GET_SECOND_(first, second, ...) second
+
+#define INSTANTIATE_TEST_SUITE_P(prefix, test_suite_name, ...)                \
+  static ::testing::internal::ParamGenerator<test_suite_name::ParamType>      \
+      gtest_##prefix##test_suite_name##_EvalGenerator_() {                    \
+    return GTEST_EXPAND_(GTEST_GET_FIRST_(__VA_ARGS__, DUMMY_PARAM_));        \
+  }                                                                           \
+  static ::std::string gtest_##prefix##test_suite_name##_EvalGenerateName_(   \
+      const ::testing::TestParamInfo<test_suite_name::ParamType>& info) {     \
+    if (::testing::internal::AlwaysFalse()) {                                 \
+      ::testing::internal::TestNotEmpty(GTEST_EXPAND_(GTEST_GET_SECOND_(      \
+          __VA_ARGS__,                                                        \
+          ::testing::internal::DefaultParamName<test_suite_name::ParamType>,  \
+          DUMMY_PARAM_)));                                                    \
+      auto t = std::make_tuple(__VA_ARGS__);                                  \
+      static_assert(std::tuple_size<decltype(t)>::value <= 2,                 \
+                    "Too Many Args!");                                        \
+    }                                                                         \
+    return ((GTEST_EXPAND_(GTEST_GET_SECOND_(                                 \
+        __VA_ARGS__,                                                          \
+        ::testing::internal::DefaultParamName<test_suite_name::ParamType>,    \
+        DUMMY_PARAM_))))(info);                                               \
+  }                                                                           \
+  static int gtest_##prefix##test_suite_name##_dummy_                         \
+      GTEST_ATTRIBUTE_UNUSED_ =                                               \
+          ::testing::UnitTest::GetInstance()                                  \
+              ->parameterized_test_registry()                                 \
+              .GetTestSuitePatternHolder<test_suite_name>(                    \
+                  GTEST_STRINGIFY_(test_suite_name),                          \
+                  ::testing::internal::CodeLocation(__FILE__, __LINE__))      \
+              ->AddTestSuiteInstantiation(                                    \
+                  GTEST_STRINGIFY_(prefix),                                   \
+                  &gtest_##prefix##test_suite_name##_EvalGenerator_,          \
+                  &gtest_##prefix##test_suite_name##_EvalGenerateName_,       \
+                  __FILE__, __LINE__)
+
+
+// Allow Marking a Parameterized test class as not needing to be instantiated.
+#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(T)                   \
+  namespace gtest_do_not_use_outside_namespace_scope {}                   \
+  static const ::testing::internal::MarkAsIgnored gtest_allow_ignore_##T( \
+      GTEST_STRINGIFY_(T))
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define INSTANTIATE_TEST_CASE_P                                            \
+  static_assert(::testing::internal::InstantiateTestCase_P_IsDeprecated(), \
+                "");                                                       \
+  INSTANTIATE_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
--- a/gtestsuite/inc/gtest/gtest-printers.h
+++ b/gtestsuite/inc/gtest/gtest-printers.h
--- a/gtestsuite/inc/gtest/gtest-spi.h
+++ b/gtestsuite/inc/gtest/gtest-spi.h
@@ -0,0 +1,238 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// Utilities for testing Google Test itself and code that uses Google Test
+// (e.g. frameworks built on top of Google Test).
+
+// GOOGLETEST_CM0004 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+
+#include "gtest/gtest.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// This helper class can be used to mock out Google Test failure reporting
+// so that we can test Google Test or code that builds on Google Test.
+//
+// An object of this class appends a TestPartResult object to the
+// TestPartResultArray object given in the constructor whenever a Google Test
+// failure is reported. It can either intercept only failures that are
+// generated in the same thread that created this object or it can intercept
+// all generated failures. The scope of this mock object can be controlled with
+// the second argument to the two arguments constructor.
+class GTEST_API_ ScopedFakeTestPartResultReporter
+    : public TestPartResultReporterInterface {
+ public:
+  // The two possible mocking modes of this object.
+  enum InterceptMode {
+    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
+    INTERCEPT_ALL_THREADS           // Intercepts all failures.
+  };
+
+  // The c'tor sets this object as the test part result reporter used
+  // by Google Test.  The 'result' parameter specifies where to report the
+  // results. This reporter will only catch failures generated in the current
+  // thread. DEPRECATED
+  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
+
+  // Same as above, but you can choose the interception scope of this object.
+  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
+                                   TestPartResultArray* result);
+
+  // The d'tor restores the previous test part result reporter.
+  ~ScopedFakeTestPartResultReporter() override;
+
+  // Appends the TestPartResult object to the TestPartResultArray
+  // received in the constructor.
+  //
+  // This method is from the TestPartResultReporterInterface
+  // interface.
+  void ReportTestPartResult(const TestPartResult& result) override;
+
+ private:
+  void Init();
+
+  const InterceptMode intercept_mode_;
+  TestPartResultReporterInterface* old_reporter_;
+  TestPartResultArray* const result_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
+};
+
+namespace internal {
+
+// A helper class for implementing EXPECT_FATAL_FAILURE() and
+// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring.  If that's not the case, a
+// non-fatal failure will be generated.
+class GTEST_API_ SingleFailureChecker {
+ public:
+  // The constructor remembers the arguments.
+  SingleFailureChecker(const TestPartResultArray* results,
+                       TestPartResult::Type type, const std::string& substr);
+  ~SingleFailureChecker();
+ private:
+  const TestPartResultArray* const results_;
+  const TestPartResult::Type type_;
+  const std::string substr_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+// A set of macros for testing Google Test assertions or code that's expected
+// to generate Google Test fatal failures.  It verifies that the given
+// statement will cause exactly one fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+//   - 'statement' cannot reference local non-static variables or
+//     non-static members of the current object.
+//   - 'statement' cannot return a value.
+//   - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
+// gtest_unittest.cc will fail to compile if we do that.
+#define EXPECT_FATAL_FAILURE(statement, substr) \
+  do { \
+    class GTestExpectFatalFailureHelper {\
+     public:\
+      static void Execute() { statement; }\
+    };\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+      GTestExpectFatalFailureHelper::Execute();\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+  do { \
+    class GTestExpectFatalFailureHelper {\
+     public:\
+      static void Execute() { statement; }\
+    };\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ALL_THREADS, &gtest_failures);\
+      GTestExpectFatalFailureHelper::Execute();\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+// A macro for testing Google Test assertions or code that's expected to
+// generate Google Test non-fatal failures.  It asserts that the given
+// statement will cause exactly one non-fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// 'statement' is allowed to reference local variables and members of
+// the current object.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+//   - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works.  If we do that, the code won't compile when the user gives
+// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
+// expands to code containing an unprotected comma.  The
+// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
+// catches that.
+//
+// For the same reason, we have to write
+//   if (::testing::internal::AlwaysTrue()) { statement; }
+// instead of
+//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+// to avoid an MSVC warning on unreachable code.
+#define EXPECT_NONFATAL_FAILURE(statement, substr) \
+  do {\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+        (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+      if (::testing::internal::AlwaysTrue()) { statement; }\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+  do {\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+        (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
+          &gtest_failures);\
+      if (::testing::internal::AlwaysTrue()) { statement; }\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
--- a/gtestsuite/inc/gtest/gtest-test-part.h
+++ b/gtestsuite/inc/gtest/gtest-test-part.h
@@ -0,0 +1,184 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+
+#include <iosfwd>
+#include <vector>
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+  // The possible outcomes of a test part (i.e. an assertion or an
+  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+  enum Type {
+    kSuccess,          // Succeeded.
+    kNonFatalFailure,  // Failed but the test can continue.
+    kFatalFailure,     // Failed and the test should be terminated.
+    kSkip              // Skipped.
+  };
+
+  // C'tor.  TestPartResult does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestPartResult object.
+  TestPartResult(Type a_type, const char* a_file_name, int a_line_number,
+                 const char* a_message)
+      : type_(a_type),
+        file_name_(a_file_name == nullptr ? "" : a_file_name),
+        line_number_(a_line_number),
+        summary_(ExtractSummary(a_message)),
+        message_(a_message) {}
+
+  // Gets the outcome of the test part.
+  Type type() const { return type_; }
+
+  // Gets the name of the source file where the test part took place, or
+  // NULL if it's unknown.
+  const char* file_name() const {
+    return file_name_.empty() ? nullptr : file_name_.c_str();
+  }
+
+  // Gets the line in the source file where the test part took place,
+  // or -1 if it's unknown.
+  int line_number() const { return line_number_; }
+
+  // Gets the summary of the failure message.
+  const char* summary() const { return summary_.c_str(); }
+
+  // Gets the message associated with the test part.
+  const char* message() const { return message_.c_str(); }
+
+  // Returns true if and only if the test part was skipped.
+  bool skipped() const { return type_ == kSkip; }
+
+  // Returns true if and only if the test part passed.
+  bool passed() const { return type_ == kSuccess; }
+
+  // Returns true if and only if the test part non-fatally failed.
+  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
+
+  // Returns true if and only if the test part fatally failed.
+  bool fatally_failed() const { return type_ == kFatalFailure; }
+
+  // Returns true if and only if the test part failed.
+  bool failed() const { return fatally_failed() || nonfatally_failed(); }
+
+ private:
+  Type type_;
+
+  // Gets the summary of the failure message by omitting the stack
+  // trace in it.
+  static std::string ExtractSummary(const char* message);
+
+  // The name of the source file where the test part took place, or
+  // "" if the source file is unknown.
+  std::string file_name_;
+  // The line in the source file where the test part took place, or -1
+  // if the line number is unknown.
+  int line_number_;
+  std::string summary_;  // The test failure summary.
+  std::string message_;  // The test failure message.
+};
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
+
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+  TestPartResultArray() {}
+
+  // Appends the given TestPartResult to the array.
+  void Append(const TestPartResult& result);
+
+  // Returns the TestPartResult at the given index (0-based).
+  const TestPartResult& GetTestPartResult(int index) const;
+
+  // Returns the number of TestPartResult objects in the array.
+  int size() const;
+
+ private:
+  std::vector<TestPartResult> array_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
+};
+
+// This interface knows how to report a test part result.
+class GTEST_API_ TestPartResultReporterInterface {
+ public:
+  virtual ~TestPartResultReporterInterface() {}
+
+  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
+};
+
+namespace internal {
+
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+    : public TestPartResultReporterInterface {
+ public:
+  HasNewFatalFailureHelper();
+  ~HasNewFatalFailureHelper() override;
+  void ReportTestPartResult(const TestPartResult& result) override;
+  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
+ private:
+  bool has_new_fatal_failure_;
+  TestPartResultReporterInterface* original_reporter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
--- a/gtestsuite/inc/gtest/gtest-typed-test.h
+++ b/gtestsuite/inc/gtest/gtest-typed-test.h
@@ -0,0 +1,329 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// This header implements typed tests and type-parameterized tests.
+
+// Typed (aka type-driven) tests repeat the same test for types in a
+// list.  You must know which types you want to test with when writing
+// typed tests. Here's how you do it:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ public:
+  ...
+  typedef std::list<T> List;
+  static T shared_;
+  T value_;
+};
+
+// Next, associate a list of types with the test suite, which will be
+// repeated for each type in the list.  The typedef is necessary for
+// the macro to parse correctly.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+TYPED_TEST_SUITE(FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   TYPED_TEST_SUITE(FooTest, int);
+
+// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
+// tests for this test suite as you want.
+TYPED_TEST(FooTest, DoesBlah) {
+  // Inside a test, refer to the special name TypeParam to get the type
+  // parameter.  Since we are inside a derived class template, C++ requires
+  // us to visit the members of FooTest via 'this'.
+  TypeParam n = this->value_;
+
+  // To visit static members of the fixture, add the TestFixture::
+  // prefix.
+  n += TestFixture::shared_;
+
+  // To refer to typedefs in the fixture, add the "typename
+  // TestFixture::" prefix.
+  typename TestFixture::List values;
+  values.push_back(n);
+  ...
+}
+
+TYPED_TEST(FooTest, HasPropertyA) { ... }
+
+// TYPED_TEST_SUITE takes an optional third argument which allows to specify a
+// class that generates custom test name suffixes based on the type. This should
+// be a class which has a static template function GetName(int index) returning
+// a string for each type. The provided integer index equals the index of the
+// type in the provided type list. In many cases the index can be ignored.
+//
+// For example:
+//   class MyTypeNames {
+//    public:
+//     template <typename T>
+//     static std::string GetName(int) {
+//       if (std::is_same<T, char>()) return "char";
+//       if (std::is_same<T, int>()) return "int";
+//       if (std::is_same<T, unsigned int>()) return "unsignedInt";
+//     }
+//   };
+//   TYPED_TEST_SUITE(FooTest, MyTypes, MyTypeNames);
+
+#endif  // 0
+
+// Type-parameterized tests are abstract test patterns parameterized
+// by a type.  Compared with typed tests, type-parameterized tests
+// allow you to define the test pattern without knowing what the type
+// parameters are.  The defined pattern can be instantiated with
+// different types any number of times, in any number of translation
+// units.
+//
+// If you are designing an interface or concept, you can define a
+// suite of type-parameterized tests to verify properties that any
+// valid implementation of the interface/concept should have.  Then,
+// each implementation can easily instantiate the test suite to verify
+// that it conforms to the requirements, without having to write
+// similar tests repeatedly.  Here's an example:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+  ...
+};
+
+// Next, declare that you will define a type-parameterized test suite
+// (the _P suffix is for "parameterized" or "pattern", whichever you
+// prefer):
+TYPED_TEST_SUITE_P(FooTest);
+
+// Then, use TYPED_TEST_P() to define as many type-parameterized tests
+// for this type-parameterized test suite as you want.
+TYPED_TEST_P(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  TypeParam n = 0;
+  ...
+}
+
+TYPED_TEST_P(FooTest, HasPropertyA) { ... }
+
+// Now the tricky part: you need to register all test patterns before
+// you can instantiate them.  The first argument of the macro is the
+// test suite name; the rest are the names of the tests in this test
+// case.
+REGISTER_TYPED_TEST_SUITE_P(FooTest,
+                            DoesBlah, HasPropertyA);
+
+// Finally, you are free to instantiate the pattern with the types you
+// want.  If you put the above code in a header file, you can #include
+// it in multiple C++ source files and instantiate it multiple times.
+//
+// To distinguish different instances of the pattern, the first
+// argument to the INSTANTIATE_* macro is a prefix that will be added
+// to the actual test suite name.  Remember to pick unique prefixes for
+// different instances.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, int);
+//
+// Similar to the optional argument of TYPED_TEST_SUITE above,
+// INSTANTIATE_TEST_SUITE_P takes an optional fourth argument which allows to
+// generate custom names.
+//   INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes, MyTypeNames);
+
+#endif  // 0
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+#include "gtest/internal/gtest-type-util.h"
+
+// Implements typed tests.
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the typedef for the type parameters of the
+// given test suite.
+#define GTEST_TYPE_PARAMS_(TestSuiteName) gtest_type_params_##TestSuiteName##_
+
+// Expands to the name of the typedef for the NameGenerator, responsible for
+// creating the suffixes of the name.
+#define GTEST_NAME_GENERATOR_(TestSuiteName) \
+  gtest_type_params_##TestSuiteName##_NameGenerator
+
+#define TYPED_TEST_SUITE(CaseName, Types, ...)                          \
+  typedef ::testing::internal::GenerateTypeList<Types>::type            \
+      GTEST_TYPE_PARAMS_(CaseName);                                     \
+  typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \
+      GTEST_NAME_GENERATOR_(CaseName)
+
+#define TYPED_TEST(CaseName, TestName)                                        \
+  static_assert(sizeof(GTEST_STRINGIFY_(TestName)) > 1,                       \
+                "test-name must not be empty");                               \
+  template <typename gtest_TypeParam_>                                        \
+  class GTEST_TEST_CLASS_NAME_(CaseName, TestName)                            \
+      : public CaseName<gtest_TypeParam_> {                                   \
+   private:                                                                   \
+    typedef CaseName<gtest_TypeParam_> TestFixture;                           \
+    typedef gtest_TypeParam_ TypeParam;                                       \
+    void TestBody() override;                                                 \
+  };                                                                          \
+  static bool gtest_##CaseName##_##TestName##_registered_                     \
+      GTEST_ATTRIBUTE_UNUSED_ = ::testing::internal::TypeParameterizedTest<   \
+          CaseName,                                                           \
+          ::testing::internal::TemplateSel<GTEST_TEST_CLASS_NAME_(CaseName,   \
+                                                                  TestName)>, \
+          GTEST_TYPE_PARAMS_(                                                 \
+              CaseName)>::Register("",                                        \
+                                   ::testing::internal::CodeLocation(         \
+                                       __FILE__, __LINE__),                   \
+                                   GTEST_STRINGIFY_(CaseName),                \
+                                   GTEST_STRINGIFY_(TestName), 0,             \
+                                   ::testing::internal::GenerateNames<        \
+                                       GTEST_NAME_GENERATOR_(CaseName),       \
+                                       GTEST_TYPE_PARAMS_(CaseName)>());      \
+  template <typename gtest_TypeParam_>                                        \
+  void GTEST_TEST_CLASS_NAME_(CaseName,                                       \
+                              TestName)<gtest_TypeParam_>::TestBody()
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define TYPED_TEST_CASE                                                \
+  static_assert(::testing::internal::TypedTestCaseIsDeprecated(), ""); \
+  TYPED_TEST_SUITE
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// Implements type-parameterized tests.
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the namespace name that the type-parameterized tests for
+// the given type-parameterized test suite are defined in.  The exact
+// name of the namespace is subject to change without notice.
+#define GTEST_SUITE_NAMESPACE_(TestSuiteName) gtest_suite_##TestSuiteName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the variable used to remember the names of
+// the defined tests in the given test suite.
+#define GTEST_TYPED_TEST_SUITE_P_STATE_(TestSuiteName) \
+  gtest_typed_test_suite_p_state_##TestSuiteName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
+//
+// Expands to the name of the variable used to remember the names of
+// the registered tests in the given test suite.
+#define GTEST_REGISTERED_TEST_NAMES_(TestSuiteName) \
+  gtest_registered_test_names_##TestSuiteName##_
+
+// The variables defined in the type-parameterized test macros are
+// static as typically these macros are used in a .h file that can be
+// #included in multiple translation units linked together.
+#define TYPED_TEST_SUITE_P(SuiteName)              \
+  static ::testing::internal::TypedTestSuitePState \
+      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName)
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define TYPED_TEST_CASE_P                                                 \
+  static_assert(::testing::internal::TypedTestCase_P_IsDeprecated(), ""); \
+  TYPED_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#define TYPED_TEST_P(SuiteName, TestName)                             \
+  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                       \
+    template <typename gtest_TypeParam_>                              \
+    class TestName : public SuiteName<gtest_TypeParam_> {             \
+     private:                                                         \
+      typedef SuiteName<gtest_TypeParam_> TestFixture;                \
+      typedef gtest_TypeParam_ TypeParam;                             \
+      void TestBody() override;                                       \
+    };                                                                \
+    static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
+        GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName(       \
+            __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName),          \
+            GTEST_STRINGIFY_(TestName));                              \
+  }                                                                   \
+  template <typename gtest_TypeParam_>                                \
+  void GTEST_SUITE_NAMESPACE_(                                        \
+      SuiteName)::TestName<gtest_TypeParam_>::TestBody()
+
+// Note: this won't work correctly if the trailing arguments are macros.
+#define REGISTER_TYPED_TEST_SUITE_P(SuiteName, ...)                         \
+  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                             \
+    typedef ::testing::internal::Templates<__VA_ARGS__> gtest_AllTests_;    \
+  }                                                                         \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(                    \
+      SuiteName) GTEST_ATTRIBUTE_UNUSED_ =                                  \
+      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames( \
+          GTEST_STRINGIFY_(SuiteName), __FILE__, __LINE__, #__VA_ARGS__)
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define REGISTER_TYPED_TEST_CASE_P                                           \
+  static_assert(::testing::internal::RegisterTypedTestCase_P_IsDeprecated(), \
+                "");                                                         \
+  REGISTER_TYPED_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#define INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, SuiteName, Types, ...)       \
+  static_assert(sizeof(GTEST_STRINGIFY_(Prefix)) > 1,                       \
+                "test-suit-prefix must not be empty");                      \
+  static bool gtest_##Prefix##_##SuiteName GTEST_ATTRIBUTE_UNUSED_ =        \
+      ::testing::internal::TypeParameterizedTestSuite<                      \
+          SuiteName, GTEST_SUITE_NAMESPACE_(SuiteName)::gtest_AllTests_,    \
+          ::testing::internal::GenerateTypeList<Types>::type>::             \
+          Register(GTEST_STRINGIFY_(Prefix),                                \
+                   ::testing::internal::CodeLocation(__FILE__, __LINE__),   \
+                   &GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName),             \
+                   GTEST_STRINGIFY_(SuiteName),                             \
+                   GTEST_REGISTERED_TEST_NAMES_(SuiteName),                 \
+                   ::testing::internal::GenerateNames<                      \
+                       ::testing::internal::NameGeneratorSelector<          \
+                           __VA_ARGS__>::type,                              \
+                       ::testing::internal::GenerateTypeList<Types>::type>())
+
+// Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+#define INSTANTIATE_TYPED_TEST_CASE_P                                      \
+  static_assert(                                                           \
+      ::testing::internal::InstantiateTypedTestCase_P_IsDeprecated(), ""); \
+  INSTANTIATE_TYPED_TEST_SUITE_P
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
--- a/gtestsuite/inc/gtest/gtest.h
+++ b/gtestsuite/inc/gtest/gtest.h
--- a/gtestsuite/inc/gtest/gtest_pred_impl.h
+++ b/gtestsuite/inc/gtest/gtest_pred_impl.h
@@ -0,0 +1,359 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is AUTOMATICALLY GENERATED on 01/02/2019 by command
+// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
+//
+// Implements a family of generic predicate assertion macros.
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+#include "gtest/gtest.h"
+
+namespace testing {
+
+// This header implements a family of generic predicate assertion
+// macros:
+//
+//   ASSERT_PRED_FORMAT1(pred_format, v1)
+//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+//   ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult.  See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+//   ASSERT_PRED1(pred, v1)
+//   ASSERT_PRED2(pred, v1, v2)
+//   ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+// Please email googletestframework@googlegroups.com if you need
+// support for higher arities.
+
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce.  Don't use this in your code.
+
+#define GTEST_ASSERT_(expression, on_failure) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar = (expression)) \
+    ; \
+  else \
+    on_failure(gtest_ar.failure_message())
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text,
+                                  const char* e1,
+                                  Pred pred,
+                                  const T1& v1) {
+  if (pred(v1)) return AssertionSuccess();
+
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, v1), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
+                                             #v1, \
+                                             pred, \
+                                             v1), on_failure)
+
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2) {
+  if (pred(v1, v2)) return AssertionSuccess();
+
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2
+         << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             pred, \
+                                             v1, \
+                                             v2), on_failure)
+
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3) {
+  if (pred(v1, v2, v3)) return AssertionSuccess();
+
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2 << ", " << e3
+         << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+         << e3 << " evaluates to " << ::testing::PrintToString(v3);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3), on_failure)
+
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4) {
+  if (pred(v1, v2, v3, v4)) return AssertionSuccess();
+
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
+         << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+         << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
+         << e4 << " evaluates to " << ::testing::PrintToString(v4);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4), on_failure)
+
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4,
+          typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  const char* e5,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4,
+                                  const T5& v5) {
+  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
+
+  return AssertionFailure()
+         << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
+         << ", " << e5 << ") evaluates to false, where"
+         << "\n"
+         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
+         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
+         << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
+         << e4 << " evaluates to " << ::testing::PrintToString(v4) << "\n"
+         << e5 << " evaluates to " << ::testing::PrintToString(v5);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             #v5, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4, \
+                                             v5), on_failure)
+
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+
+
+
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
--- a/gtestsuite/inc/gtest/gtest_prod.h
+++ b/gtestsuite/inc/gtest/gtest_prod.h
@@ -0,0 +1,61 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+//
+// Google C++ Testing and Mocking Framework definitions useful in production code.
+// GOOGLETEST_CM0003 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class.  For example:
+//
+// class MyClass {
+//  private:
+//   void PrivateMethod();
+//   FRIEND_TEST(MyClassTest, PrivateMethodWorks);
+// };
+//
+// class MyClassTest : public testing::Test {
+//   // ...
+// };
+//
+// TEST_F(MyClassTest, PrivateMethodWorks) {
+//   // Can call MyClass::PrivateMethod() here.
+// }
+//
+// Note: The test class must be in the same namespace as the class being tested.
+// For example, putting MyClassTest in an anonymous namespace will not work.
+
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
--- a/gtestsuite/inc/gtest/internal/custom/README.md
+++ b/gtestsuite/inc/gtest/internal/custom/README.md
@@ -0,0 +1,56 @@
+# Customization Points
+
+The custom directory is an injection point for custom user configurations.
+
+## Header `gtest.h`
+
+### The following macros can be defined:
+
+*   `GTEST_OS_STACK_TRACE_GETTER_` - The name of an implementation of
+    `OsStackTraceGetterInterface`.
+*   `GTEST_CUSTOM_TEMPDIR_FUNCTION_` - An override for `testing::TempDir()`. See
+    `testing::TempDir` for semantics and signature.
+
+## Header `gtest-port.h`
+
+The following macros can be defined:
+
+### Flag related macros:
+
+*   `GTEST_FLAG(flag_name)`
+*   `GTEST_USE_OWN_FLAGFILE_FLAG_` - Define to 0 when the system provides its
+    own flagfile flag parsing.
+*   `GTEST_DECLARE_bool_(name)`
+*   `GTEST_DECLARE_int32_(name)`
+*   `GTEST_DECLARE_string_(name)`
+*   `GTEST_DEFINE_bool_(name, default_val, doc)`
+*   `GTEST_DEFINE_int32_(name, default_val, doc)`
+*   `GTEST_DEFINE_string_(name, default_val, doc)`
+
+### Logging:
+
+*   `GTEST_LOG_(severity)`
+*   `GTEST_CHECK_(condition)`
+*   Functions `LogToStderr()` and `FlushInfoLog()` have to be provided too.
+
+### Threading:
+
+*   `GTEST_HAS_NOTIFICATION_` - Enabled if Notification is already provided.
+*   `GTEST_HAS_MUTEX_AND_THREAD_LOCAL_` - Enabled if `Mutex` and `ThreadLocal`
+    are already provided. Must also provide `GTEST_DECLARE_STATIC_MUTEX_(mutex)`
+    and `GTEST_DEFINE_STATIC_MUTEX_(mutex)`
+*   `GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)`
+*   `GTEST_LOCK_EXCLUDED_(locks)`
+
+### Underlying library support features
+
+*   `GTEST_HAS_CXXABI_H_`
+
+### Exporting API symbols:
+
+*   `GTEST_API_` - Specifier for exported symbols.
+
+## Header `gtest-printers.h`
+
+*   See documentation at `gtest/gtest-printers.h` for details on how to define a
+    custom printer.
--- a/gtestsuite/inc/gtest/internal/custom/gtest-port.h
+++ b/gtestsuite/inc/gtest/internal/custom/gtest-port.h
@@ -0,0 +1,37 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
--- a/gtestsuite/inc/gtest/internal/custom/gtest-printers.h
+++ b/gtestsuite/inc/gtest/internal/custom/gtest-printers.h
@@ -0,0 +1,42 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// This file provides an injection point for custom printers in a local
+// installation of gTest.
+// It will be included from gtest-printers.h and the overrides in this file
+// will be visible to everyone.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
--- a/gtestsuite/inc/gtest/internal/custom/gtest.h
+++ b/gtestsuite/inc/gtest/internal/custom/gtest.h
@@ -0,0 +1,37 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Injection point for custom user configurations. See README for details
+//
+// ** Custom implementation starts here **
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
--- a/gtestsuite/inc/gtest/internal/gtest-death-test-internal.h
+++ b/gtestsuite/inc/gtest/internal/gtest-death-test-internal.h
@@ -0,0 +1,304 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests.  They are subject to change without notice.
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+#include "gtest/gtest-matchers.h"
+#include "gtest/internal/gtest-internal.h"
+
+#include <stdio.h>
+#include <memory>
+
+namespace testing {
+namespace internal {
+
+GTEST_DECLARE_string_(internal_run_death_test);
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
+
+#if GTEST_HAS_DEATH_TEST
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
+
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status:  The integer exit information in the format specified
+//               by wait(2)
+// exit code:    The integer code passed to exit(3), _exit(2), or
+//               returned from main()
+class GTEST_API_ DeathTest {
+ public:
+  // Create returns false if there was an error determining the
+  // appropriate action to take for the current death test; for example,
+  // if the gtest_death_test_style flag is set to an invalid value.
+  // The LastMessage method will return a more detailed message in that
+  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
+  // argument is set.  If the death test should be skipped, the pointer
+  // is set to NULL; otherwise, it is set to the address of a new concrete
+  // DeathTest object that controls the execution of the current test.
+  static bool Create(const char* statement, Matcher<const std::string&> matcher,
+                     const char* file, int line, DeathTest** test);
+  DeathTest();
+  virtual ~DeathTest() { }
+
+  // A helper class that aborts a death test when it's deleted.
+  class ReturnSentinel {
+   public:
+    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
+    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
+   private:
+    DeathTest* const test_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+  } GTEST_ATTRIBUTE_UNUSED_;
+
+  // An enumeration of possible roles that may be taken when a death
+  // test is encountered.  EXECUTE means that the death test logic should
+  // be executed immediately.  OVERSEE means that the program should prepare
+  // the appropriate environment for a child process to execute the death
+  // test, then wait for it to complete.
+  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
+
+  // An enumeration of the three reasons that a test might be aborted.
+  enum AbortReason {
+    TEST_ENCOUNTERED_RETURN_STATEMENT,
+    TEST_THREW_EXCEPTION,
+    TEST_DID_NOT_DIE
+  };
+
+  // Assumes one of the above roles.
+  virtual TestRole AssumeRole() = 0;
+
+  // Waits for the death test to finish and returns its status.
+  virtual int Wait() = 0;
+
+  // Returns true if the death test passed; that is, the test process
+  // exited during the test, its exit status matches a user-supplied
+  // predicate, and its stderr output matches a user-supplied regular
+  // expression.
+  // The user-supplied predicate may be a macro expression rather
+  // than a function pointer or functor, or else Wait and Passed could
+  // be combined.
+  virtual bool Passed(bool exit_status_ok) = 0;
+
+  // Signals that the death test did not die as expected.
+  virtual void Abort(AbortReason reason) = 0;
+
+  // Returns a human-readable outcome message regarding the outcome of
+  // the last death test.
+  static const char* LastMessage();
+
+  static void set_last_death_test_message(const std::string& message);
+
+ private:
+  // A string containing a description of the outcome of the last death test.
+  static std::string last_death_test_message_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
+};
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+// Factory interface for death tests.  May be mocked out for testing.
+class DeathTestFactory {
+ public:
+  virtual ~DeathTestFactory() { }
+  virtual bool Create(const char* statement,
+                      Matcher<const std::string&> matcher, const char* file,
+                      int line, DeathTest** test) = 0;
+};
+
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+  bool Create(const char* statement, Matcher<const std::string&> matcher,
+              const char* file, int line, DeathTest** test) override;
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// A string passed to EXPECT_DEATH (etc.) is caught by one of these overloads
+// and interpreted as a regex (rather than an Eq matcher) for legacy
+// compatibility.
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    ::testing::internal::RE regex) {
+  return ContainsRegex(regex.pattern());
+}
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(const char* regex) {
+  return ContainsRegex(regex);
+}
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    const ::std::string& regex) {
+  return ContainsRegex(regex);
+}
+
+// If a Matcher<const ::std::string&> is passed to EXPECT_DEATH (etc.), it's
+// used directly.
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    Matcher<const ::std::string&> matcher) {
+  return matcher;
+}
+
+// Traps C++ exceptions escaping statement and reports them as test
+// failures. Note that trapping SEH exceptions is not implemented here.
+# if GTEST_HAS_EXCEPTIONS
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  try { \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+  } catch (const ::std::exception& gtest_exception) { \
+    fprintf(\
+        stderr, \
+        "\n%s: Caught std::exception-derived exception escaping the " \
+        "death test statement. Exception message: %s\n", \
+        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
+        gtest_exception.what()); \
+    fflush(stderr); \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  } catch (...) { \
+    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+  }
+
+# else
+#  define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+
+# endif
+
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+#define GTEST_DEATH_TEST_(statement, predicate, regex_or_matcher, fail)        \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
+  if (::testing::internal::AlwaysTrue()) {                                     \
+    ::testing::internal::DeathTest* gtest_dt;                                  \
+    if (!::testing::internal::DeathTest::Create(                               \
+            #statement,                                                        \
+            ::testing::internal::MakeDeathTestMatcher(regex_or_matcher),       \
+            __FILE__, __LINE__, &gtest_dt)) {                                  \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__);                        \
+    }                                                                          \
+    if (gtest_dt != nullptr) {                                                 \
+      std::unique_ptr< ::testing::internal::DeathTest> gtest_dt_ptr(gtest_dt); \
+      switch (gtest_dt->AssumeRole()) {                                        \
+        case ::testing::internal::DeathTest::OVERSEE_TEST:                     \
+          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) {                \
+            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__);                  \
+          }                                                                    \
+          break;                                                               \
+        case ::testing::internal::DeathTest::EXECUTE_TEST: {                   \
+          ::testing::internal::DeathTest::ReturnSentinel gtest_sentinel(       \
+              gtest_dt);                                                       \
+          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt);            \
+          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE);   \
+          break;                                                               \
+        }                                                                      \
+        default:                                                               \
+          break;                                                               \
+      }                                                                        \
+    }                                                                          \
+  } else                                                                       \
+    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__)                                \
+        : fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
+
+// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
+// NDEBUG mode. In this case we need the statements to be executed and the macro
+// must accept a streamed message even though the message is never printed.
+// The regex object is not evaluated, but it is used to prevent "unused"
+// warnings and to avoid an expression that doesn't compile in debug mode.
+#define GTEST_EXECUTE_STATEMENT_(statement, regex_or_matcher)    \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                  \
+  if (::testing::internal::AlwaysTrue()) {                       \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);   \
+  } else if (!::testing::internal::AlwaysTrue()) {               \
+    ::testing::internal::MakeDeathTestMatcher(regex_or_matcher); \
+  } else                                                         \
+    ::testing::Message()
+
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+  InternalRunDeathTestFlag(const std::string& a_file,
+                           int a_line,
+                           int an_index,
+                           int a_write_fd)
+      : file_(a_file), line_(a_line), index_(an_index),
+        write_fd_(a_write_fd) {}
+
+  ~InternalRunDeathTestFlag() {
+    if (write_fd_ >= 0)
+      posix::Close(write_fd_);
+  }
+
+  const std::string& file() const { return file_; }
+  int line() const { return line_; }
+  int index() const { return index_; }
+  int write_fd() const { return write_fd_; }
+
+ private:
+  std::string file_;
+  int line_;
+  int index_;
+  int write_fd_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+};
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
--- a/gtestsuite/inc/gtest/internal/gtest-filepath.h
+++ b/gtestsuite/inc/gtest/internal/gtest-filepath.h
@@ -0,0 +1,211 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Google Test filepath utilities
+//
+// This header file declares classes and functions used internally by
+// Google Test.  They are subject to change without notice.
+//
+// This file is #included in gtest/internal/gtest-internal.h.
+// Do not include this header file separately!
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+
+#include "gtest/internal/gtest-string.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+namespace internal {
+
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
+
+class GTEST_API_ FilePath {
+ public:
+  FilePath() : pathname_("") { }
+  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
+
+  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
+    Normalize();
+  }
+
+  FilePath& operator=(const FilePath& rhs) {
+    Set(rhs);
+    return *this;
+  }
+
+  void Set(const FilePath& rhs) {
+    pathname_ = rhs.pathname_;
+  }
+
+  const std::string& string() const { return pathname_; }
+  const char* c_str() const { return pathname_.c_str(); }
+
+  // Returns the current working directory, or "" if unsuccessful.
+  static FilePath GetCurrentDir();
+
+  // Given directory = "dir", base_name = "test", number = 0,
+  // extension = "xml", returns "dir/test.xml". If number is greater
+  // than zero (e.g., 12), returns "dir/test_12.xml".
+  // On Windows platform, uses \ as the separator rather than /.
+  static FilePath MakeFileName(const FilePath& directory,
+                               const FilePath& base_name,
+                               int number,
+                               const char* extension);
+
+  // Given directory = "dir", relative_path = "test.xml",
+  // returns "dir/test.xml".
+  // On Windows, uses \ as the separator rather than /.
+  static FilePath ConcatPaths(const FilePath& directory,
+                              const FilePath& relative_path);
+
+  // Returns a pathname for a file that does not currently exist. The pathname
+  // will be directory/base_name.extension or
+  // directory/base_name_<number>.extension if directory/base_name.extension
+  // already exists. The number will be incremented until a pathname is found
+  // that does not already exist.
+  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+  // There could be a race condition if two or more processes are calling this
+  // function at the same time -- they could both pick the same filename.
+  static FilePath GenerateUniqueFileName(const FilePath& directory,
+                                         const FilePath& base_name,
+                                         const char* extension);
+
+  // Returns true if and only if the path is "".
+  bool IsEmpty() const { return pathname_.empty(); }
+
+  // If input name has a trailing separator character, removes it and returns
+  // the name, otherwise return the name string unmodified.
+  // On Windows platform, uses \ as the separator, other platforms use /.
+  FilePath RemoveTrailingPathSeparator() const;
+
+  // Returns a copy of the FilePath with the directory part removed.
+  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+  // returns an empty FilePath ("").
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveDirectoryName() const;
+
+  // RemoveFileName returns the directory path with the filename removed.
+  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveFileName() const;
+
+  // Returns a copy of the FilePath with the case-insensitive extension removed.
+  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+  // FilePath("dir/file"). If a case-insensitive extension is not
+  // found, returns a copy of the original FilePath.
+  FilePath RemoveExtension(const char* extension) const;
+
+  // Creates directories so that path exists. Returns true if successful or if
+  // the directories already exist; returns false if unable to create
+  // directories for any reason. Will also return false if the FilePath does
+  // not represent a directory (that is, it doesn't end with a path separator).
+  bool CreateDirectoriesRecursively() const;
+
+  // Create the directory so that path exists. Returns true if successful or
+  // if the directory already exists; returns false if unable to create the
+  // directory for any reason, including if the parent directory does not
+  // exist. Not named "CreateDirectory" because that's a macro on Windows.
+  bool CreateFolder() const;
+
+  // Returns true if FilePath describes something in the file-system,
+  // either a file, directory, or whatever, and that something exists.
+  bool FileOrDirectoryExists() const;
+
+  // Returns true if pathname describes a directory in the file-system
+  // that exists.
+  bool DirectoryExists() const;
+
+  // Returns true if FilePath ends with a path separator, which indicates that
+  // it is intended to represent a directory. Returns false otherwise.
+  // This does NOT check that a directory (or file) actually exists.
+  bool IsDirectory() const;
+
+  // Returns true if pathname describes a root directory. (Windows has one
+  // root directory per disk drive.)
+  bool IsRootDirectory() const;
+
+  // Returns true if pathname describes an absolute path.
+  bool IsAbsolutePath() const;
+
+ private:
+  // Replaces multiple consecutive separators with a single separator.
+  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+  // redundancies that might be in a pathname involving "." or "..".
+  //
+  // A pathname with multiple consecutive separators may occur either through
+  // user error or as a result of some scripts or APIs that generate a pathname
+  // with a trailing separator. On other platforms the same API or script
+  // may NOT generate a pathname with a trailing "/". Then elsewhere that
+  // pathname may have another "/" and pathname components added to it,
+  // without checking for the separator already being there.
+  // The script language and operating system may allow paths like "foo//bar"
+  // but some of the functions in FilePath will not handle that correctly. In
+  // particular, RemoveTrailingPathSeparator() only removes one separator, and
+  // it is called in CreateDirectoriesRecursively() assuming that it will change
+  // a pathname from directory syntax (trailing separator) to filename syntax.
+  //
+  // On Windows this method also replaces the alternate path separator '/' with
+  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+  // "bar\\foo".
+
+  void Normalize();
+
+  // Returns a pointer to the last occurrence of a valid path separator in
+  // the FilePath. On Windows, for example, both '/' and '\' are valid path
+  // separators. Returns NULL if no path separator was found.
+  const char* FindLastPathSeparator() const;
+
+  std::string pathname_;
+};  // class FilePath
+
+}  // namespace internal
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
--- a/gtestsuite/inc/gtest/internal/gtest-internal.h
+++ b/gtestsuite/inc/gtest/internal/gtest-internal.h
--- a/gtestsuite/inc/gtest/internal/gtest-param-util.h
+++ b/gtestsuite/inc/gtest/internal/gtest-param-util.h
@@ -0,0 +1,947 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Type and function utilities for implementing parameterized tests.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+
+#include <ctype.h>
+
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+#include "gtest/gtest-printers.h"
+#include "gtest/gtest-test-part.h"
+
+namespace testing {
+// Input to a parameterized test name generator, describing a test parameter.
+// Consists of the parameter value and the integer parameter index.
+template <class ParamType>
+struct TestParamInfo {
+  TestParamInfo(const ParamType& a_param, size_t an_index) :
+    param(a_param),
+    index(an_index) {}
+  ParamType param;
+  size_t index;
+};
+
+// A builtin parameterized test name generator which returns the result of
+// testing::PrintToString.
+struct PrintToStringParamName {
+  template <class ParamType>
+  std::string operator()(const TestParamInfo<ParamType>& info) const {
+    return PrintToString(info.param);
+  }
+};
+
+namespace internal {
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// Utility Functions
+
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test suite. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestSuiteType(const char* test_suite_name,
+                                           CodeLocation code_location);
+
+template <typename> class ParamGeneratorInterface;
+template <typename> class ParamGenerator;
+
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+  virtual ~ParamIteratorInterface() {}
+  // A pointer to the base generator instance.
+  // Used only for the purposes of iterator comparison
+  // to make sure that two iterators belong to the same generator.
+  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+  // Advances iterator to point to the next element
+  // provided by the generator. The caller is responsible
+  // for not calling Advance() on an iterator equal to
+  // BaseGenerator()->End().
+  virtual void Advance() = 0;
+  // Clones the iterator object. Used for implementing copy semantics
+  // of ParamIterator<T>.
+  virtual ParamIteratorInterface* Clone() const = 0;
+  // Dereferences the current iterator and provides (read-only) access
+  // to the pointed value. It is the caller's responsibility not to call
+  // Current() on an iterator equal to BaseGenerator()->End().
+  // Used for implementing ParamGenerator<T>::operator*().
+  virtual const T* Current() const = 0;
+  // Determines whether the given iterator and other point to the same
+  // element in the sequence generated by the generator.
+  // Used for implementing ParamGenerator<T>::operator==().
+  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
+};
+
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
+ public:
+  typedef T value_type;
+  typedef const T& reference;
+  typedef ptrdiff_t difference_type;
+
+  // ParamIterator assumes ownership of the impl_ pointer.
+  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+  ParamIterator& operator=(const ParamIterator& other) {
+    if (this != &other)
+      impl_.reset(other.impl_->Clone());
+    return *this;
+  }
+
+  const T& operator*() const { return *impl_->Current(); }
+  const T* operator->() const { return impl_->Current(); }
+  // Prefix version of operator++.
+  ParamIterator& operator++() {
+    impl_->Advance();
+    return *this;
+  }
+  // Postfix version of operator++.
+  ParamIterator operator++(int /*unused*/) {
+    ParamIteratorInterface<T>* clone = impl_->Clone();
+    impl_->Advance();
+    return ParamIterator(clone);
+  }
+  bool operator==(const ParamIterator& other) const {
+    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
+  }
+  bool operator!=(const ParamIterator& other) const {
+    return !(*this == other);
+  }
+
+ private:
+  friend class ParamGenerator<T>;
+  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+  std::unique_ptr<ParamIteratorInterface<T> > impl_;
+};
+
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+  typedef T ParamType;
+
+  virtual ~ParamGeneratorInterface() {}
+
+  // Generator interface definition
+  virtual ParamIteratorInterface<T>* Begin() const = 0;
+  virtual ParamIteratorInterface<T>* End() const = 0;
+};
+
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template<typename T>
+class ParamGenerator {
+ public:
+  typedef ParamIterator<T> iterator;
+
+  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+
+  ParamGenerator& operator=(const ParamGenerator& other) {
+    impl_ = other.impl_;
+    return *this;
+  }
+
+  iterator begin() const { return iterator(impl_->Begin()); }
+  iterator end() const { return iterator(impl_->End()); }
+
+ private:
+  std::shared_ptr<const ParamGeneratorInterface<T> > impl_;
+};
+
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  RangeGenerator(T begin, T end, IncrementT step)
+      : begin_(begin), end_(end),
+        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
+  ~RangeGenerator() override {}
+
+  ParamIteratorInterface<T>* Begin() const override {
+    return new Iterator(this, begin_, 0, step_);
+  }
+  ParamIteratorInterface<T>* End() const override {
+    return new Iterator(this, end_, end_index_, step_);
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+             IncrementT step)
+        : base_(base), value_(value), index_(index), step_(step) {}
+    ~Iterator() override {}
+
+    const ParamGeneratorInterface<T>* BaseGenerator() const override {
+      return base_;
+    }
+    void Advance() override {
+      value_ = static_cast<T>(value_ + step_);
+      index_++;
+    }
+    ParamIteratorInterface<T>* Clone() const override {
+      return new Iterator(*this);
+    }
+    const T* Current() const override { return &value_; }
+    bool Equals(const ParamIteratorInterface<T>& other) const override {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const int other_index =
+          CheckedDowncastToActualType<const Iterator>(&other)->index_;
+      return index_ == other_index;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : ParamIteratorInterface<T>(),
+          base_(other.base_), value_(other.value_), index_(other.index_),
+          step_(other.step_) {}
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<T>* const base_;
+    T value_;
+    int index_;
+    const IncrementT step_;
+  };  // class RangeGenerator::Iterator
+
+  static int CalculateEndIndex(const T& begin,
+                               const T& end,
+                               const IncrementT& step) {
+    int end_index = 0;
+    for (T i = begin; i < end; i = static_cast<T>(i + step))
+      end_index++;
+    return end_index;
+  }
+
+  // No implementation - assignment is unsupported.
+  void operator=(const RangeGenerator& other);
+
+  const T begin_;
+  const T end_;
+  const IncrementT step_;
+  // The index for the end() iterator. All the elements in the generated
+  // sequence are indexed (0-based) to aid iterator comparison.
+  const int end_index_;
+};  // class RangeGenerator
+
+
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  template <typename ForwardIterator>
+  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+      : container_(begin, end) {}
+  ~ValuesInIteratorRangeGenerator() override {}
+
+  ParamIteratorInterface<T>* Begin() const override {
+    return new Iterator(this, container_.begin());
+  }
+  ParamIteratorInterface<T>* End() const override {
+    return new Iterator(this, container_.end());
+  }
+
+ private:
+  typedef typename ::std::vector<T> ContainerType;
+
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base,
+             typename ContainerType::const_iterator iterator)
+        : base_(base), iterator_(iterator) {}
+    ~Iterator() override {}
+
+    const ParamGeneratorInterface<T>* BaseGenerator() const override {
+      return base_;
+    }
+    void Advance() override {
+      ++iterator_;
+      value_.reset();
+    }
+    ParamIteratorInterface<T>* Clone() const override {
+      return new Iterator(*this);
+    }
+    // We need to use cached value referenced by iterator_ because *iterator_
+    // can return a temporary object (and of type other then T), so just
+    // having "return &*iterator_;" doesn't work.
+    // value_ is updated here and not in Advance() because Advance()
+    // can advance iterator_ beyond the end of the range, and we cannot
+    // detect that fact. The client code, on the other hand, is
+    // responsible for not calling Current() on an out-of-range iterator.
+    const T* Current() const override {
+      if (value_.get() == nullptr) value_.reset(new T(*iterator_));
+      return value_.get();
+    }
+    bool Equals(const ParamIteratorInterface<T>& other) const override {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      return iterator_ ==
+          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+          // The explicit constructor call suppresses a false warning
+          // emitted by gcc when supplied with the -Wextra option.
+        : ParamIteratorInterface<T>(),
+          base_(other.base_),
+          iterator_(other.iterator_) {}
+
+    const ParamGeneratorInterface<T>* const base_;
+    typename ContainerType::const_iterator iterator_;
+    // A cached value of *iterator_. We keep it here to allow access by
+    // pointer in the wrapping iterator's operator->().
+    // value_ needs to be mutable to be accessed in Current().
+    // Use of std::unique_ptr helps manage cached value's lifetime,
+    // which is bound by the lifespan of the iterator itself.
+    mutable std::unique_ptr<const T> value_;
+  };  // class ValuesInIteratorRangeGenerator::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const ValuesInIteratorRangeGenerator& other);
+
+  const ContainerType container_;
+};  // class ValuesInIteratorRangeGenerator
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Default parameterized test name generator, returns a string containing the
+// integer test parameter index.
+template <class ParamType>
+std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
+  Message name_stream;
+  name_stream << info.index;
+  return name_stream.GetString();
+}
+
+template <typename T = int>
+void TestNotEmpty() {
+  static_assert(sizeof(T) == 0, "Empty arguments are not allowed.");
+}
+template <typename T = int>
+void TestNotEmpty(const T&) {}
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+  typedef typename TestClass::ParamType ParamType;
+  explicit ParameterizedTestFactory(ParamType parameter) :
+      parameter_(parameter) {}
+  Test* CreateTest() override {
+    TestClass::SetParam(&parameter_);
+    return new TestClass();
+  }
+
+ private:
+  const ParamType parameter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+  virtual ~TestMetaFactoryBase() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestSuiteInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestSuite>
+class TestMetaFactory
+    : public TestMetaFactoryBase<typename TestSuite::ParamType> {
+ public:
+  using ParamType = typename TestSuite::ParamType;
+
+  TestMetaFactory() {}
+
+  TestFactoryBase* CreateTestFactory(ParamType parameter) override {
+    return new ParameterizedTestFactory<TestSuite>(parameter);
+  }
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteInfoBase is a generic interface
+// to ParameterizedTestSuiteInfo classes. ParameterizedTestSuiteInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_SUITE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestSuiteRegistry class holds
+// a collection of pointers to the ParameterizedTestSuiteInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestSuiteInfoBase {
+ public:
+  virtual ~ParameterizedTestSuiteInfoBase() {}
+
+  // Base part of test suite name for display purposes.
+  virtual const std::string& GetTestSuiteName() const = 0;
+  // Test suite id to verify identity.
+  virtual TypeId GetTestSuiteTypeId() const = 0;
+  // UnitTest class invokes this method to register tests in this
+  // test suite right before running them in RUN_ALL_TESTS macro.
+  // This method should not be called more than once on any single
+  // instance of a ParameterizedTestSuiteInfoBase derived class.
+  virtual void RegisterTests() = 0;
+
+ protected:
+  ParameterizedTestSuiteInfoBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteInfoBase);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Report a the name of a test_suit as safe to ignore
+// as the side effect of construction of this type.
+struct GTEST_API_ MarkAsIgnored {
+  explicit MarkAsIgnored(const char* test_suite);
+};
+
+GTEST_API_ void InsertSyntheticTestCase(const std::string& name,
+                                        CodeLocation location, bool has_test_p);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test suite and generators
+// obtained from INSTANTIATE_TEST_SUITE_P macro invocations for that
+// test suite. It registers tests with all values generated by all
+// generators when asked.
+template <class TestSuite>
+class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
+ public:
+  // ParamType and GeneratorCreationFunc are private types but are required
+  // for declarations of public methods AddTestPattern() and
+  // AddTestSuiteInstantiation().
+  using ParamType = typename TestSuite::ParamType;
+  // A function that returns an instance of appropriate generator type.
+  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+  using ParamNameGeneratorFunc = std::string(const TestParamInfo<ParamType>&);
+
+  explicit ParameterizedTestSuiteInfo(const char* name,
+                                      CodeLocation code_location)
+      : test_suite_name_(name), code_location_(code_location) {}
+
+  // Test suite base name for display purposes.
+  const std::string& GetTestSuiteName() const override {
+    return test_suite_name_;
+  }
+  // Test suite id to verify identity.
+  TypeId GetTestSuiteTypeId() const override { return GetTypeId<TestSuite>(); }
+  // TEST_P macro uses AddTestPattern() to record information
+  // about a single test in a LocalTestInfo structure.
+  // test_suite_name is the base name of the test suite (without invocation
+  // prefix). test_base_name is the name of an individual test without
+  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+  // test suite base name and DoBar is test base name.
+  void AddTestPattern(const char* test_suite_name, const char* test_base_name,
+                      TestMetaFactoryBase<ParamType>* meta_factory,
+                      CodeLocation code_location) {
+    tests_.push_back(std::shared_ptr<TestInfo>(new TestInfo(
+        test_suite_name, test_base_name, meta_factory, code_location)));
+  }
+  // INSTANTIATE_TEST_SUITE_P macro uses AddGenerator() to record information
+  // about a generator.
+  int AddTestSuiteInstantiation(const std::string& instantiation_name,
+                                GeneratorCreationFunc* func,
+                                ParamNameGeneratorFunc* name_func,
+                                const char* file, int line) {
+    instantiations_.push_back(
+        InstantiationInfo(instantiation_name, func, name_func, file, line));
+    return 0;  // Return value used only to run this method in namespace scope.
+  }
+  // UnitTest class invokes this method to register tests in this test suite
+  // right before running tests in RUN_ALL_TESTS macro.
+  // This method should not be called more than once on any single
+  // instance of a ParameterizedTestSuiteInfoBase derived class.
+  // UnitTest has a guard to prevent from calling this method more than once.
+  void RegisterTests() override {
+    bool generated_instantiations = false;
+
+    for (typename TestInfoContainer::iterator test_it = tests_.begin();
+         test_it != tests_.end(); ++test_it) {
+      std::shared_ptr<TestInfo> test_info = *test_it;
+      for (typename InstantiationContainer::iterator gen_it =
+               instantiations_.begin(); gen_it != instantiations_.end();
+               ++gen_it) {
+        const std::string& instantiation_name = gen_it->name;
+        ParamGenerator<ParamType> generator((*gen_it->generator)());
+        ParamNameGeneratorFunc* name_func = gen_it->name_func;
+        const char* file = gen_it->file;
+        int line = gen_it->line;
+
+        std::string test_suite_name;
+        if ( !instantiation_name.empty() )
+          test_suite_name = instantiation_name + "/";
+        test_suite_name += test_info->test_suite_base_name;
+
+        size_t i = 0;
+        std::set<std::string> test_param_names;
+        for (typename ParamGenerator<ParamType>::iterator param_it =
+                 generator.begin();
+             param_it != generator.end(); ++param_it, ++i) {
+          generated_instantiations = true;
+
+          Message test_name_stream;
+
+          std::string param_name = name_func(
+              TestParamInfo<ParamType>(*param_it, i));
+
+          GTEST_CHECK_(IsValidParamName(param_name))
+              << "Parameterized test name '" << param_name
+              << "' is invalid, in " << file
+              << " line " << line << std::endl;
+
+          GTEST_CHECK_(test_param_names.count(param_name) == 0)
+              << "Duplicate parameterized test name '" << param_name
+              << "', in " << file << " line " << line << std::endl;
+
+          test_param_names.insert(param_name);
+
+          if (!test_info->test_base_name.empty()) {
+            test_name_stream << test_info->test_base_name << "/";
+          }
+          test_name_stream << param_name;
+          MakeAndRegisterTestInfo(
+              test_suite_name.c_str(), test_name_stream.GetString().c_str(),
+              nullptr,  // No type parameter.
+              PrintToString(*param_it).c_str(), test_info->code_location,
+              GetTestSuiteTypeId(),
+              SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(file, line),
+              SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(file, line),
+              test_info->test_meta_factory->CreateTestFactory(*param_it));
+        }  // for param_it
+      }  // for gen_it
+    }  // for test_it
+
+    if (!generated_instantiations) {
+      // There are no generaotrs, or they all generate nothing ...
+      InsertSyntheticTestCase(GetTestSuiteName(), code_location_,
+                              !tests_.empty());
+    }
+  }    // RegisterTests
+
+ private:
+  // LocalTestInfo structure keeps information about a single test registered
+  // with TEST_P macro.
+  struct TestInfo {
+    TestInfo(const char* a_test_suite_base_name, const char* a_test_base_name,
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory,
+             CodeLocation a_code_location)
+        : test_suite_base_name(a_test_suite_base_name),
+          test_base_name(a_test_base_name),
+          test_meta_factory(a_test_meta_factory),
+          code_location(a_code_location) {}
+
+    const std::string test_suite_base_name;
+    const std::string test_base_name;
+    const std::unique_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+    const CodeLocation code_location;
+  };
+  using TestInfoContainer = ::std::vector<std::shared_ptr<TestInfo> >;
+  // Records data received from INSTANTIATE_TEST_SUITE_P macros:
+  //  <Instantiation name, Sequence generator creation function,
+  //     Name generator function, Source file, Source line>
+  struct InstantiationInfo {
+      InstantiationInfo(const std::string &name_in,
+                        GeneratorCreationFunc* generator_in,
+                        ParamNameGeneratorFunc* name_func_in,
+                        const char* file_in,
+                        int line_in)
+          : name(name_in),
+            generator(generator_in),
+            name_func(name_func_in),
+            file(file_in),
+            line(line_in) {}
+
+      std::string name;
+      GeneratorCreationFunc* generator;
+      ParamNameGeneratorFunc* name_func;
+      const char* file;
+      int line;
+  };
+  typedef ::std::vector<InstantiationInfo> InstantiationContainer;
+
+  static bool IsValidParamName(const std::string& name) {
+    // Check for empty string
+    if (name.empty())
+      return false;
+
+    // Check for invalid characters
+    for (std::string::size_type index = 0; index < name.size(); ++index) {
+      if (!IsAlNum(name[index]) && name[index] != '_')
+        return false;
+    }
+
+    return true;
+  }
+
+  const std::string test_suite_name_;
+  CodeLocation code_location_;
+  TestInfoContainer tests_;
+  InstantiationContainer instantiations_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteInfo);
+};  // class ParameterizedTestSuiteInfo
+
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+template <class TestCase>
+using ParameterizedTestCaseInfo = ParameterizedTestSuiteInfo<TestCase>;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestSuiteRegistry contains a map of
+// ParameterizedTestSuiteInfoBase classes accessed by test suite names. TEST_P
+// and INSTANTIATE_TEST_SUITE_P macros use it to locate their corresponding
+// ParameterizedTestSuiteInfo descriptors.
+class ParameterizedTestSuiteRegistry {
+ public:
+  ParameterizedTestSuiteRegistry() {}
+  ~ParameterizedTestSuiteRegistry() {
+    for (auto& test_suite_info : test_suite_infos_) {
+      delete test_suite_info;
+    }
+  }
+
+  // Looks up or creates and returns a structure containing information about
+  // tests and instantiations of a particular test suite.
+  template <class TestSuite>
+  ParameterizedTestSuiteInfo<TestSuite>* GetTestSuitePatternHolder(
+      const char* test_suite_name, CodeLocation code_location) {
+    ParameterizedTestSuiteInfo<TestSuite>* typed_test_info = nullptr;
+    for (auto& test_suite_info : test_suite_infos_) {
+      if (test_suite_info->GetTestSuiteName() == test_suite_name) {
+        if (test_suite_info->GetTestSuiteTypeId() != GetTypeId<TestSuite>()) {
+          // Complain about incorrect usage of Google Test facilities
+          // and terminate the program since we cannot guaranty correct
+          // test suite setup and tear-down in this case.
+          ReportInvalidTestSuiteType(test_suite_name, code_location);
+          posix::Abort();
+        } else {
+          // At this point we are sure that the object we found is of the same
+          // type we are looking for, so we downcast it to that type
+          // without further checks.
+          typed_test_info = CheckedDowncastToActualType<
+              ParameterizedTestSuiteInfo<TestSuite> >(test_suite_info);
+        }
+        break;
+      }
+    }
+    if (typed_test_info == nullptr) {
+      typed_test_info = new ParameterizedTestSuiteInfo<TestSuite>(
+          test_suite_name, code_location);
+      test_suite_infos_.push_back(typed_test_info);
+    }
+    return typed_test_info;
+  }
+  void RegisterTests() {
+    for (auto& test_suite_info : test_suite_infos_) {
+      test_suite_info->RegisterTests();
+    }
+  }
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  template <class TestCase>
+  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+      const char* test_case_name, CodeLocation code_location) {
+    return GetTestSuitePatternHolder<TestCase>(test_case_name, code_location);
+  }
+
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+ private:
+  using TestSuiteInfoContainer = ::std::vector<ParameterizedTestSuiteInfoBase*>;
+
+  TestSuiteInfoContainer test_suite_infos_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteRegistry);
+};
+
+// Keep track of what type-parameterized test suite are defined and
+// where as well as which are intatiated. This allows susequently
+// identifying suits that are defined but never used.
+class TypeParameterizedTestSuiteRegistry {
+ public:
+  // Add a suite definition
+  void RegisterTestSuite(const char* test_suite_name,
+                         CodeLocation code_location);
+
+  // Add an instantiation of a suit.
+  void RegisterInstantiation(const char* test_suite_name);
+
+  // For each suit repored as defined but not reported as instantiation,
+  // emit a test that reports that fact (configurably, as an error).
+  void CheckForInstantiations();
+
+ private:
+  struct TypeParameterizedTestSuiteInfo {
+    explicit TypeParameterizedTestSuiteInfo(CodeLocation c)
+        : code_location(c), instantiated(false) {}
+
+    CodeLocation code_location;
+    bool instantiated;
+  };
+
+  std::map<std::string, TypeParameterizedTestSuiteInfo> suites_;
+};
+
+}  // namespace internal
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container);
+
+namespace internal {
+// Used in the Values() function to provide polymorphic capabilities.
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
+
+template <typename... Ts>
+class ValueArray {
+ public:
+  explicit ValueArray(Ts... v) : v_(FlatTupleConstructTag{}, std::move(v)...) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {  // NOLINT
+    return ValuesIn(MakeVector<T>(MakeIndexSequence<sizeof...(Ts)>()));
+  }
+
+ private:
+  template <typename T, size_t... I>
+  std::vector<T> MakeVector(IndexSequence<I...>) const {
+    return std::vector<T>{static_cast<T>(v_.template Get<I>())...};
+  }
+
+  FlatTuple<Ts...> v_;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+template <typename... T>
+class CartesianProductGenerator
+    : public ParamGeneratorInterface<::std::tuple<T...>> {
+ public:
+  typedef ::std::tuple<T...> ParamType;
+
+  CartesianProductGenerator(const std::tuple<ParamGenerator<T>...>& g)
+      : generators_(g) {}
+  ~CartesianProductGenerator() override {}
+
+  ParamIteratorInterface<ParamType>* Begin() const override {
+    return new Iterator(this, generators_, false);
+  }
+  ParamIteratorInterface<ParamType>* End() const override {
+    return new Iterator(this, generators_, true);
+  }
+
+ private:
+  template <class I>
+  class IteratorImpl;
+  template <size_t... I>
+  class IteratorImpl<IndexSequence<I...>>
+      : public ParamIteratorInterface<ParamType> {
+   public:
+    IteratorImpl(const ParamGeneratorInterface<ParamType>* base,
+             const std::tuple<ParamGenerator<T>...>& generators, bool is_end)
+        : base_(base),
+          begin_(std::get<I>(generators).begin()...),
+          end_(std::get<I>(generators).end()...),
+          current_(is_end ? end_ : begin_) {
+      ComputeCurrentValue();
+    }
+    ~IteratorImpl() override {}
+
+    const ParamGeneratorInterface<ParamType>* BaseGenerator() const override {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    void Advance() override {
+      assert(!AtEnd());
+      // Advance the last iterator.
+      ++std::get<sizeof...(T) - 1>(current_);
+      // if that reaches end, propagate that up.
+      AdvanceIfEnd<sizeof...(T) - 1>();
+      ComputeCurrentValue();
+    }
+    ParamIteratorInterface<ParamType>* Clone() const override {
+      return new IteratorImpl(*this);
+    }
+
+    const ParamType* Current() const override { return current_value_.get(); }
+
+    bool Equals(const ParamIteratorInterface<ParamType>& other) const override {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const IteratorImpl* typed_other =
+          CheckedDowncastToActualType<const IteratorImpl>(&other);
+
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      if (AtEnd() && typed_other->AtEnd()) return true;
+
+      bool same = true;
+      bool dummy[] = {
+          (same = same && std::get<I>(current_) ==
+                              std::get<I>(typed_other->current_))...};
+      (void)dummy;
+      return same;
+    }
+
+   private:
+    template <size_t ThisI>
+    void AdvanceIfEnd() {
+      if (std::get<ThisI>(current_) != std::get<ThisI>(end_)) return;
+
+      bool last = ThisI == 0;
+      if (last) {
+        // We are done. Nothing else to propagate.
+        return;
+      }
+
+      constexpr size_t NextI = ThisI - (ThisI != 0);
+      std::get<ThisI>(current_) = std::get<ThisI>(begin_);
+      ++std::get<NextI>(current_);
+      AdvanceIfEnd<NextI>();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = std::make_shared<ParamType>(*std::get<I>(current_)...);
+    }
+    bool AtEnd() const {
+      bool at_end = false;
+      bool dummy[] = {
+          (at_end = at_end || std::get<I>(current_) == std::get<I>(end_))...};
+      (void)dummy;
+      return at_end;
+    }
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    std::tuple<typename ParamGenerator<T>::iterator...> begin_;
+    std::tuple<typename ParamGenerator<T>::iterator...> end_;
+    std::tuple<typename ParamGenerator<T>::iterator...> current_;
+    std::shared_ptr<ParamType> current_value_;
+  };
+
+  using Iterator = IteratorImpl<typename MakeIndexSequence<sizeof...(T)>::type>;
+
+  std::tuple<ParamGenerator<T>...> generators_;
+};
+
+template <class... Gen>
+class CartesianProductHolder {
+ public:
+  CartesianProductHolder(const Gen&... g) : generators_(g...) {}
+  template <typename... T>
+  operator ParamGenerator<::std::tuple<T...>>() const {
+    return ParamGenerator<::std::tuple<T...>>(
+        new CartesianProductGenerator<T...>(generators_));
+  }
+
+ private:
+  std::tuple<Gen...> generators_;
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
--- a/gtestsuite/inc/gtest/internal/gtest-port-arch.h
+++ b/gtestsuite/inc/gtest/internal/gtest-port-arch.h
@@ -0,0 +1,114 @@
+// Copyright 2015, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file defines the GTEST_OS_* macro.
+// It is separate from gtest-port.h so that custom/gtest-port.h can include it.
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+
+// Determines the platform on which Google Test is compiled.
+#ifdef __CYGWIN__
+# define GTEST_OS_CYGWIN 1
+# elif defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
+#  define GTEST_OS_WINDOWS_MINGW 1
+#  define GTEST_OS_WINDOWS 1
+#elif defined _WIN32
+# define GTEST_OS_WINDOWS 1
+# ifdef _WIN32_WCE
+#  define GTEST_OS_WINDOWS_MOBILE 1
+# elif defined(WINAPI_FAMILY)
+#  include <winapifamily.h>
+#  if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#   define GTEST_OS_WINDOWS_DESKTOP 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
+#   define GTEST_OS_WINDOWS_PHONE 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+#   define GTEST_OS_WINDOWS_RT 1
+#  elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE)
+#   define GTEST_OS_WINDOWS_PHONE 1
+#   define GTEST_OS_WINDOWS_TV_TITLE 1
+#  else
+    // WINAPI_FAMILY defined but no known partition matched.
+    // Default to desktop.
+#   define GTEST_OS_WINDOWS_DESKTOP 1
+#  endif
+# else
+#  define GTEST_OS_WINDOWS_DESKTOP 1
+# endif  // _WIN32_WCE
+#elif defined __OS2__
+# define GTEST_OS_OS2 1
+#elif defined __APPLE__
+# define GTEST_OS_MAC 1
+# include <TargetConditionals.h>
+# if TARGET_OS_IPHONE
+#  define GTEST_OS_IOS 1
+# endif
+#elif defined __DragonFly__
+# define GTEST_OS_DRAGONFLY 1
+#elif defined __FreeBSD__
+# define GTEST_OS_FREEBSD 1
+#elif defined __Fuchsia__
+# define GTEST_OS_FUCHSIA 1
+#elif defined(__GLIBC__) && defined(__FreeBSD_kernel__)
+# define GTEST_OS_GNU_KFREEBSD 1
+#elif defined __linux__
+# define GTEST_OS_LINUX 1
+# if defined __ANDROID__
+#  define GTEST_OS_LINUX_ANDROID 1
+# endif
+#elif defined __MVS__
+# define GTEST_OS_ZOS 1
+#elif defined(__sun) && defined(__SVR4)
+# define GTEST_OS_SOLARIS 1
+#elif defined(_AIX)
+# define GTEST_OS_AIX 1
+#elif defined(__hpux)
+# define GTEST_OS_HPUX 1
+#elif defined __native_client__
+# define GTEST_OS_NACL 1
+#elif defined __NetBSD__
+# define GTEST_OS_NETBSD 1
+#elif defined __OpenBSD__
+# define GTEST_OS_OPENBSD 1
+#elif defined __QNX__
+# define GTEST_OS_QNX 1
+#elif defined(__HAIKU__)
+#define GTEST_OS_HAIKU 1
+#elif defined ESP8266
+#define GTEST_OS_ESP8266 1
+#elif defined ESP32
+#define GTEST_OS_ESP32 1
+#elif defined(__XTENSA__)
+#define GTEST_OS_XTENSA 1
+#endif  // __CYGWIN__
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
--- a/gtestsuite/inc/gtest/internal/gtest-port.h
+++ b/gtestsuite/inc/gtest/internal/gtest-port.h
--- a/gtestsuite/inc/gtest/internal/gtest-string.h
+++ b/gtestsuite/inc/gtest/internal/gtest-string.h
@@ -0,0 +1,175 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test.  They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by gtest-internal.h.
+// It should not be #included by other files.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+# include <mem.h>
+#endif
+
+#include <string.h>
+#include <cstdint>
+#include <string>
+
+#include "gtest/internal/gtest-port.h"
+
+namespace testing {
+namespace internal {
+
+// String - an abstract class holding static string utilities.
+class GTEST_API_ String {
+ public:
+  // Static utility methods
+
+  // Clones a 0-terminated C string, allocating memory using new.  The
+  // caller is responsible for deleting the return value using
+  // delete[].  Returns the cloned string, or NULL if the input is
+  // NULL.
+  //
+  // This is different from strdup() in string.h, which allocates
+  // memory using malloc().
+  static const char* CloneCString(const char* c_str);
+
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+  // able to pass strings to Win32 APIs on CE we need to convert them
+  // to 'Unicode', UTF-16.
+
+  // Creates a UTF-16 wide string from the given ANSI string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the wide string, or NULL if the
+  // input is NULL.
+  //
+  // The wide string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static LPCWSTR AnsiToUtf16(const char* c_str);
+
+  // Creates an ANSI string from the given wide string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the ANSI string, or NULL if the
+  // input is NULL.
+  //
+  // The returned string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
+#endif
+
+  // Compares two C strings.  Returns true if and only if they have the same
+  // content.
+  //
+  // Unlike strcmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CStringEquals(const char* lhs, const char* rhs);
+
+  // Converts a wide C string to a String using the UTF-8 encoding.
+  // NULL will be converted to "(null)".  If an error occurred during
+  // the conversion, "(failed to convert from wide string)" is
+  // returned.
+  static std::string ShowWideCString(const wchar_t* wide_c_str);
+
+  // Compares two wide C strings.  Returns true if and only if they have the
+  // same content.
+  //
+  // Unlike wcscmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+
+  // Compares two C strings, ignoring case.  Returns true if and only if
+  // they have the same content.
+  //
+  // Unlike strcasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CaseInsensitiveCStringEquals(const char* lhs,
+                                           const char* rhs);
+
+  // Compares two wide C strings, ignoring case.  Returns true if and only if
+  // they have the same content.
+  //
+  // Unlike wcscasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL wide C string,
+  // including the empty string.
+  // NB: The implementations on different platforms slightly differ.
+  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+  // environment variable. On GNU platform this method uses wcscasecmp
+  // which compares according to LC_CTYPE category of the current locale.
+  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+  // current locale.
+  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                               const wchar_t* rhs);
+
+  // Returns true if and only if the given string ends with the given suffix,
+  // ignoring case. Any string is considered to end with an empty suffix.
+  static bool EndsWithCaseInsensitive(
+      const std::string& str, const std::string& suffix);
+
+  // Formats an int value as "%02d".
+  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
+
+  // Formats an int value to given width with leading zeros.
+  static std::string FormatIntWidthN(int value, int width);
+
+  // Formats an int value as "%X".
+  static std::string FormatHexInt(int value);
+
+  // Formats an int value as "%X".
+  static std::string FormatHexUInt32(uint32_t value);
+
+  // Formats a byte as "%02X".
+  static std::string FormatByte(unsigned char value);
+
+ private:
+  String();  // Not meant to be instantiated.
+};  // class String
+
+// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
--- a/gtestsuite/inc/gtest/internal/gtest-type-util.h
+++ b/gtestsuite/inc/gtest/internal/gtest-type-util.h
@@ -0,0 +1,183 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Type utilities needed for implementing typed and type-parameterized
+// tests.
+
+// GOOGLETEST_CM0001 DO NOT DELETE
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+#include "gtest/internal/gtest-port.h"
+
+// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+# if GTEST_HAS_CXXABI_H_
+#  include <cxxabi.h>
+# elif defined(__HP_aCC)
+#  include <acxx_demangle.h>
+# endif  // GTEST_HASH_CXXABI_H_
+
+namespace testing {
+namespace internal {
+
+// Canonicalizes a given name with respect to the Standard C++ Library.
+// This handles removing the inline namespace within `std` that is
+// used by various standard libraries (e.g., `std::__1`).  Names outside
+// of namespace std are returned unmodified.
+inline std::string CanonicalizeForStdLibVersioning(std::string s) {
+  static const char prefix[] = "std::__";
+  if (s.compare(0, strlen(prefix), prefix) == 0) {
+    std::string::size_type end = s.find("::", strlen(prefix));
+    if (end != s.npos) {
+      // Erase everything between the initial `std` and the second `::`.
+      s.erase(strlen("std"), end - strlen("std"));
+    }
+  }
+  return s;
+}
+
+#if GTEST_HAS_RTTI
+// GetTypeName(const std::type_info&) returns a human-readable name of type T.
+inline std::string GetTypeName(const std::type_info& type) {
+  const char* const name = type.name();
+#if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+  int status = 0;
+  // gcc's implementation of typeid(T).name() mangles the type name,
+  // so we have to demangle it.
+#if GTEST_HAS_CXXABI_H_
+  using abi::__cxa_demangle;
+#endif  // GTEST_HAS_CXXABI_H_
+  char* const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
+  const std::string name_str(status == 0 ? readable_name : name);
+  free(readable_name);
+  return CanonicalizeForStdLibVersioning(name_str);
+#else
+  return name;
+#endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+}
+#endif  // GTEST_HAS_RTTI
+
+// GetTypeName<T>() returns a human-readable name of type T if and only if
+// RTTI is enabled, otherwise it returns a dummy type name.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+#if GTEST_HAS_RTTI
+  return GetTypeName(typeid(T));
+#else
+  return "<type>";
+#endif  // GTEST_HAS_RTTI
+}
+
+// A unique type indicating an empty node
+struct None {};
+
+# define GTEST_TEMPLATE_ template <typename T> class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>.  This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
+  template <typename T>
+  struct Bind {
+    typedef Tmpl<T> type;
+  };
+};
+
+# define GTEST_BIND_(TmplSel, T) \
+  TmplSel::template Bind<T>::type
+
+template <GTEST_TEMPLATE_ Head_, GTEST_TEMPLATE_... Tail_>
+struct Templates {
+  using Head = TemplateSel<Head_>;
+  using Tail = Templates<Tail_...>;
+};
+
+template <GTEST_TEMPLATE_ Head_>
+struct Templates<Head_> {
+  using Head = TemplateSel<Head_>;
+  using Tail = None;
+};
+
+// Tuple-like type lists
+template <typename Head_, typename... Tail_>
+struct Types {
+  using Head = Head_;
+  using Tail = Types<Tail_...>;
+};
+
+template <typename Head_>
+struct Types<Head_> {
+  using Head = Head_;
+  using Tail = None;
+};
+
+// Helper metafunctions to tell apart a single type from types
+// generated by ::testing::Types
+template <typename... Ts>
+struct ProxyTypeList {
+  using type = Types<Ts...>;
+};
+
+template <typename>
+struct is_proxy_type_list : std::false_type {};
+
+template <typename... Ts>
+struct is_proxy_type_list<ProxyTypeList<Ts...>> : std::true_type {};
+
+// Generator which conditionally creates type lists.
+// It recognizes if a requested type list should be created
+// and prevents creating a new type list nested within another one.
+template <typename T>
+struct GenerateTypeList {
+ private:
+  using proxy = typename std::conditional<is_proxy_type_list<T>::value, T,
+                                          ProxyTypeList<T>>::type;
+
+ public:
+  using type = typename proxy::type;
+};
+
+}  // namespace internal
+
+template <typename... Ts>
+using Types = internal::ProxyTypeList<Ts...>;
+
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
--- a/gtestsuite/input.general
+++ b/gtestsuite/input.general
@@ -0,0 +1,63 @@
+# ----------------------------------------------------------------------
+#
+#  input.general
+#  BLIS GtestSuite
+#
+#  This file contains input values that control how BLIS operations are
+#  tested. Comments explain the purpose of each parameter as well as
+#  accepted values.
+#
+
+1       # Number of repeats per experiment (best result is reported)
+rc      # Matrix storage scheme(s) to test:
+        #   'c' = col-major storage; 'g' = general stride storage;
+        #   'r' = row-major storage
+cj      # Vector storage scheme(s) to test:
+        #   'c' = colvec / unit stride; 'j' = colvec / non-unit stride;
+        #   'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride
+0       # Test all combinations of storage schemes?
+1       # Perform all tests with alignment?
+        #   '0' = do NOT align buffers/ldims; '1' = align buffers/ldims
+0       # Randomize vectors and matrices using:
+        #   '0' = real values on [-1,1];
+        #   '1' = powers of 2 in narrow precision range
+32      # General stride spacing (for cases when testing general stride)
+sdcz    # Datatype(s) to test:
+        #   's' = single real; 'c' = single complex;
+        #   'd' = double real; 'z' = double complex
+0       # Test gemm with mixed-domain operands?
+0       # Test gemm with mixed-precision operands?
+10      # Problem size: first to test
+100     # Problem size: maximum to test
+10      # Problem size: increment between experiments
+        # Complex level-3 implementations to test:
+0       #   3mh  ('1' = enable; '0' = disable)
+0       #   3m1  ('1' = enable; '0' = disable)
+0       #   4mh  ('1' = enable; '0' = disable)
+0       #   4m1b ('1' = enable; '0' = disable)
+0       #   4m1a ('1' = enable; '0' = disable)
+0       #   1m   ('1' = enable; '0' = disable)
+1       #   native ('1' = enable; '0' = disable)
+1       # Simulate application-level threading:
+        #   '1' = disable / use one testsuite thread;
+        #   'n' = enable and use n testsuite threads
+1       # Error-checking level:
+        #   '0' = disable error checking; '1' = full error checking
+i       # Reaction to test failure:
+        #   'i' = ignore; 's' = sleep() and continue; 'a' = abort
+0       # Output results in matlab/octave format? ('1' = yes; '0' = no)
+0       # Output results to stdout AND files? ('1' = yes; '0' = no)
+0       # api        : '0' = BLIS / '1' = CBLAS / '2' = BLAS
+0       # m/n/k      : '0' = Values where m=n=k  / '1' = Multiple combinations of m/n/k
+0       # alpha/beta : '0' = Fixed value   / '1' = Multiple values with combination
+0       # Integer bit-exactness testing
+        # '0' = 's/d/c/z' datatype testing
+        # '1' = Integer bit-exactness testing
+0       # '0' = Print only fail cases
+        # '1' = Print pass and fail cases
+0       # Bit-Reproducibility
+        # '0' = Disable
+        # '1' = Enable
+r       # lpgemm memformat reorder
+        # 'p' = No reorder
+        # 'r' = reorder
--- a/gtestsuite/input.operations
+++ b/gtestsuite/input.operations
@@ -0,0 +1,321 @@
+# --------------------------------------------------------------------------
+#
+#  input.operations
+#  BLIS GtestSuite
+#
+#  This file contains input values that control which BLIS operations are
+#  tested as well as how those test runs are parameterized. We will now
+#  describe how each section or line type may be edited.
+#
+#  ENABLING/DISABLING INDIVIDUAL OPERATION TESTS
+#    Given that an operation's section override switch is set to 1
+#    (enabled), whether or not that operation will get tested is
+#    determined by its local switch. For example, if the level-1v section
+#    override is set to 1, and there is a 1 on the line marked "addv",
+#    then the addv operation will be tested. Similarly, a 0 would cause
+#    addv to not be tested.
+#
+#  CHANGING PROBLEM SIZE/SHAPES TESTED
+#    The problem sizes tested by an operation are determined by the
+#    dimension specifiers on the line marked "dimensions: <spec_labels>".
+#    If, for example, <spec_labels> contains two dimension labels (e.g.
+#    "m n"), then the line should begin with two dimension specifiers.
+#    Dimension specifiers of -1 cause the corresponding dimension to be
+#    bound to the problem size, which is determined by values set in
+#    input.general. Positive values cause the corresponding dimension to
+#    be fixed to that value and held constant.
+#
+#    Examples of dimension specifiers (where the dimensions are m and n):
+#
+#       -1 -1     Dimensions m and n grow with problem size (resulting in
+#                 square matrices).
+#       -1 150    Dimension m grows with problem size and n is fixed at
+#                 150.
+#       -1 -2     Dimension m grows with problem size and n grows
+#                 proportional to half the problem size.
+#
+#  CHANGING PARAMTER COMBINATIONS TESTED
+#    The parameter combinations tested by an operation are determined by
+#    the parameter specifier characters on the line marked "parameters:
+#    <param_labels>". If, for example, <param_labels> contains two
+#    parameter labels (e.g. "transa conjx"), then the line should contain
+#    two parameter specifier characters. The '?' specifier character
+#    serves as a wildcard--it causes all possible values of that parameter
+#    to be tested. A character such as 'n' or 't' causes only that value
+#    to be tested.
+#
+#    Examples of parameter specifiers (where the parameters are transa
+#    and conjx):
+#
+#       ??        All combinations of the transa and conjx parameters are
+#                 tested: nn, nc, tn, tc, cn, cc, hn, hc.
+#       ?n        conjx is fixed to "no conjugate" but transa is allowed
+#                 to vary: nn, tn, cn, hn.
+#       hc        Only the case where transa is "Hermitian-transpose" and
+#                 conjx is "conjugate" is tested.
+#
+#    Here is a full list of the parameter types used by the various BLIS
+#    operations along with their possible character encodings:
+#
+#       side:   l,r      left, right
+#       uplo:   l,u      lower, upper
+#       trans:  n,t,c,h  no transpose, transpose, conjugate, Hermitian-
+#                        transpose (i.e. conjugate-transpose)
+#       conj:   n,c      no conjugate, conjugate
+#       diag:   n,u      non-unit diagonal, unit diagonal
+#
+# --------------------------------------------------------------------------
+
+# --- Utility --------------------------------------------------------------
+
+0        # randv
+-1       #   dimensions: m
+
+0        # randm
+-1 -1    #   dimensions: m n
+
+
+# --- Level-1v -------------------------------------------------------------
+
+0        # addv
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+0        # amaxv
+-1       #   dimensions: m
+
+0        # axpbyv
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+0        # axpyv
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+0        # copyv
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+0        # dotv
+-1       #   dimensions: m
+??       #   parameters: conjx conjy
+
+0        # dotxv
+-1       #   dimensions: m
+??       #   parameters: conjx conjy
+
+0        # normfv
+-1       #   dimensions: m
+
+0        # scalv
+-1       #   dimensions: m
+?        #   parameters: conjbeta
+
+0        # scal2v
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+0        # setv
+-1       #   dimensions: m
+
+0        # subv
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+0        # xpbyv
+-1       #   dimensions: m
+?        #   parameters: conjx
+
+
+# --- Level-1m -------------------------------------------------------------
+
+0        # addm
+-1 -2    #   dimensions: m n
+?        #   parameters: transa
+
+0        # axpym
+-1 -1    #   dimensions: m n
+?        #   parameters: transa
+
+0        # copym
+-1 -2    #   dimensions: m n
+?        #   parameters: transa
+
+0        # normfm
+-1 -2    #   dimensions: m n
+
+0        # scalm
+-1 -2    #   dimensions: m n
+?        #   parameters: conjbeta
+
+0        # scal2m
+-1 -2    #   dimensions: m n
+?        #   parameters: transa
+
+0        # setm
+-1 -2    #   dimensions: m n
+
+0        # subm
+-1 -2    #   dimensions: m n
+?        #   parameters: transa
+
+0        # xpbym
+-1 -1    #   dimensions: m n
+?        #   parameters: transa
+
+
+# --- Level-1f kernels -----------------------------------------------------
+
+0        # axpy2v
+-1       #   dimensions: m
+??       #   parameters: conjx conjy
+
+0        # dotaxpyv
+-1       #   dimensions: m
+???      #   parameters: conjxt conjx conjy
+
+0        # axpyf
+-1       #   dimensions: m
+??       #   parameters: conja conjx
+
+0        # dotxf
+-1       #   dimensions: m
+??       #   parameters: conjat conjx
+
+0        # dotxaxpyf
+-1       #   dimensions: m
+????     #   parameters: conjat conja conjw conjx
+
+
+# --- Level-2 --------------------------------------------------------------
+
+0        # gemv
+-1 -2    #   dimensions: m n
+??       #   parameters: transa conjx
+
+0        # ger
+-1 -2    #   dimensions: m n
+??       #   parameters: conjx conjy
+
+0        # hemv
+-1       #   dimensions: m
+???      #   parameters: uploa conja conjx
+
+0        # her
+-1       #   dimensions: m
+??       #   parameters: uploc conjx
+
+0        # her2
+-1       #   dimensions: m
+???      #   parameters: uploc conjx conjy
+
+0        # symv
+-1       #   dimensions: m
+???      #   parameters: uploa conja conjx
+
+0        # syr
+-1       #   dimensions: m
+??       #   parameters: uploc conjx
+
+0        # syr2
+-1       #   dimensions: m
+???      #   parameters: uploc conjx conjy
+
+0        # trmv
+-1       #   dimensions: m
+???      #   parameters: uploa transa diaga
+
+0        # trsv
+-1       #   dimensions: m
+???      #   parameters: uploa transa diaga
+
+
+# --- Level-3 micro-kernels ------------------------------------------------
+
+0        # gemm
+-1       #   dimensions: k
+
+0        # trsm
+?        #   parameters: uploa
+
+0        # gemmtrsm
+-1       #   dimensions: k
+?        #   parameters: uploa
+
+
+# --- Level-3 --------------------------------------------------------------
+
+1        # gemm
+-1 -1 -1 #   dimensions: m n k
+??       #   parameters: transa transb
+
+0        # gemmt
+-1 -1    #   dimensions: m k
+???      #   parameters: uploc transa transb
+
+0        # hemm
+-1 -1    #   dimensions: m n
+??       #   parameters: side uploa
+
+0        # herk
+-1 -1    #   dimensions: m k
+??       #   parameters: uploc trans
+
+0        # her2k
+-1 -1    #   dimensions: m k
+??       #   parameters: uploc trans
+
+0        # symm
+-1 -1    #   dimensions: m n
+??       #   parameters: side uploa
+
+0        # syrk
+-1 -1    #   dimensions: m k
+??       #   parameters: uploc trans
+
+0        # syr2k
+-1 -1    #   dimensions: m k
+??       #   parameters: uploc trans
+
+0        # trmm
+-1 -1    #   dimensions: m n
+????     #   parameters: side uploa transa diaga
+
+0        # trmm3
+-1 -1    #   dimensions: m n
+?????    #   parameters: side uploa transa diaga transb
+
+0        # trsm
+-1 -1    #   dimensions: m n
+????     #   parameters: side uploa transa diaga
+
+# --- lpgemm --------------------------------------------------------------
+
+0        # gemm_u8s8s32os32
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
+0        # gemm_u8s8s32os8
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
+0        # gemm_f32f32f32of32
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
+0        # gemm_u8s8s16os16
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
+0        # gemm_u8s8s16os8
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
+0        # gemm_bf16bf16f32of32
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
+0        # gemm_bf16bf16f32obf16
+-1 -1 -1 #   dimensions: m n k
+nn       #   parameters: transa transb
+
--- a/gtestsuite/inputfile.txt
+++ b/gtestsuite/inputfile.txt
@@ -0,0 +1,3 @@
+dgemm_ D N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000
+dgemv_ D N 1 14 -1.000000 0.000000 10000 1 1.000000 0.000000 1
+sgemm_ S N N 7 1026 1026 0.900000 0.000000 1026 1026 -1.100000 0.000000 1026
--- a/gtestsuite/lib/libgtest.a
+++ b/gtestsuite/lib/libgtest.a
--- a/gtestsuite/src/blis_api.cpp
+++ b/gtestsuite/src/blis_api.cpp
--- a/gtestsuite/src/blis_api.h
+++ b/gtestsuite/src/blis_api.h
@@ -0,0 +1,737 @@
+#ifndef BLIS_API_H
+#define BLIS_API_H
+
+#include "blis_utils.h"
+#include "blis_inpfile.h"
+
+char* libblis_test_get_result
+     (
+       double          resid,
+       const thresh_t* thresh,
+       char*           dc_str,
+       test_params_t*  params
+     );
+
+void fill_string_with_n_spaces( char* str, unsigned int n_spaces );
+
+void libblis_test_build_function_string
+     (
+       char*        prefix_str,
+       opid_t       opid,
+       ind_t        method,
+       char*        ind_str,
+       const char*  op_str,
+       unsigned int is_mixed_dt,
+       char*        dc_str,
+       unsigned int n_param_combos,
+       char*        pc_str,
+       char*        sc_str,
+       char*        funcname_str
+     );
+
+void libblis_test_build_dims_string(test_op_t* op, tensor_t* dim, char* dims_str);
+
+static char* libblis_test_result( double resid, const thresh_t* thresh,
+                                char* dc_str, test_params_t* params )  {
+    char* r_val;
+    return r_val = libblis_test_get_result ( resid, thresh, dc_str, params );
+}
+
+static void libblis_build_function_string( test_params_t* params,
+  opid_t opid, const char *op_str, ind_t method, unsigned int dci,
+  unsigned int pci, unsigned int sci, char* fucnptr ) {
+
+    char* ind_str = NULL;
+    char* str     = NULL;
+    if( params->api == API_CBLAS )
+      str = (char*)CBLAS_FILEDATA_PREFIX_STR;
+    else if( params->api == API_BLAS )
+      str = (char*)BLAS_FILEDATA_PREFIX_STR;
+    else
+      str = (char*)BLIS_FILEDATA_PREFIX_STR;
+
+    if ( method != BLIS_NAT ) {
+        ind_str = bli_ind_get_impl_string( method );
+    }
+
+    // Build a string unique to the operation, datatype combo,
+    // parameter combo, and storage combo being tested.
+    libblis_test_build_function_string( str,
+         opid, method, ind_str, op_str, params->is_mixed_dt,
+         params->dc_str[dci], params->n_param_combos, params->pc_str[pci],
+         params->sc_str[sci], fucnptr );
+}
+
+static void displayProps( const char* fucnptr, test_params_t* prms, test_op_t* op,
+    tensor_t* dim, double& resid, char *ps, printres_t *ptr)
+{
+    char blank_str[32];
+    char dims_str[64];
+    string sas = ps ;
+    string sfs = BLIS_TEST_FAIL_STRING ;
+    string sos = BLIS_TEST_OVERFLOW_STRING;
+    string sus = BLIS_TEST_UNDERFLOW_STRING;
+    string sps = BLIS_TEST_PASS_STRING;
+    string sws = BLIS_TEST_WARN_STRING;
+
+    // Compute the number of spaces we have left to fill given
+    // length of our operation's name.
+    unsigned int  n_spaces = MAX_FUNC_STRING_LENGTH - strlen( fucnptr );
+    fill_string_with_n_spaces( blank_str, n_spaces );
+
+    // Print all dimensions to a single string.
+    libblis_test_build_dims_string( op, dim, dims_str );
+
+/*   if(( prms->passflag && ( strcmp(ps, BLIS_TEST_FAIL_STRING) != 0 )) ||
+       ( prms->oruflw && (( strcmp(ps, BLIS_TEST_OVERFLOW_STRING) != 0 ) ||
+       ( strcmp(ps, BLIS_TEST_UNDERFLOW_STRING) != 0 ))))*/
+   if( prms->passflag && (( sas == sps) || ( sas == sws)) )
+    {
+        fprintf( stdout,
+                 "%s%s      %s  %8.2le   %s\n",
+                 fucnptr, blank_str,
+                 dims_str, resid,
+                 ps );
+    }
+
+/*    if(( strcmp(ps, BLIS_TEST_FAIL_STRING) == 0 ) ||
+       ( prms->oruflw && (( strcmp(ps, BLIS_TEST_OVERFLOW_STRING) == 0 ) ||
+       ( strcmp(ps, BLIS_TEST_UNDERFLOW_STRING) == 0 ))))*/
+    if(( sas == sfs) || ( prms->oruflw && (( sas == sos) || ( sas == sus))))
+    {
+        fprintf( stdout,
+                 "%s%s      %s  %8.2le   %s\n",
+                 fucnptr, blank_str,
+                 dims_str, resid,
+                 ps );
+
+        ptr->cntf++;
+    }
+}
+
+double libblis_test_op_randv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_randm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_addv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_amaxv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_axpbyv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_axpyv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_copyv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_dotv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_dotxv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_normfv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_scal2v
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_scalv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_setv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_xpbyv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_subv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_axpy2v
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_dotaxpyv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_axpyf
+     (
+       test_params_t* params,
+       test_op_t*     op,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_dotxf
+     (
+       test_params_t* params,
+       test_op_t*     op,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_dotxaxpyf
+     (
+       test_params_t* params,
+       test_op_t*     op,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_addm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_axpym
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_copym
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_normfm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_scal2m
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_scalm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_setm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_subm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim
+     );
+
+double libblis_test_op_xpbym
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_gemv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_ger
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_hemv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_her
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_her2
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_symv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_syr
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_syr2
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_trmv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_trsv
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_gemm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemmt
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_hemm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_herk
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_her2k
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_symm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_syrk
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_syr2k
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_trmm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_trmm3
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_trsm
+     (
+       test_params_t* params,
+       iface_t        iface,
+       char*          dc_str,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha
+     );
+
+double libblis_test_op_gemm_u8s8s32os32
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemm_u8s8s32os8
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemm_f32f32f32of32
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemm_u8s8s16os8
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemm_u8s8s16os16
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemm_bf16bf16f32obf16
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+double libblis_test_op_gemm_bf16bf16f32of32
+     (
+       test_params_t* params,
+       char*          pc_str,
+       char*          sc_str,
+       tensor_t*      dim,
+       atom_t         alpha,
+       atom_t         beta
+     );
+
+#endif  // BLIS_API_H
+
--- a/gtestsuite/src/blis_inpfile.cpp
+++ b/gtestsuite/src/blis_inpfile.cpp
--- a/gtestsuite/src/blis_inpfile.h
+++ b/gtestsuite/src/blis_inpfile.h
@@ -0,0 +1,490 @@
+#ifndef BLIS_INPFILE_H
+#define BLIS_INPFILE_H
+
+#include "blis_test.h"
+
+void libblis_read_inpprms
+     (
+       string         str,
+       test_params_t* params,
+       test_ops_t*    ops,
+       printres_t*    pfr
+     );
+
+void libblis_read_inpops
+     (
+       string         ss,
+       test_params_t* params,
+       test_ops_t*    ops,
+       string         api,
+       printres_t*    pfr
+     );
+
+void libblis_read_api
+     (
+       test_ops_t*  ops,
+       opid_t       opid,
+       dimset_t     dimset,
+       unsigned int n_params,
+       test_op_t*   op
+     );
+
+int libblis_test_read_randv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_randm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_addv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_amaxv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_axpbyv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_axpyv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_copyv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_dotv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_dotxv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_normfv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_scal2v_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_scalv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_setv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_xpbyv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_subv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_axpyf_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_axpy2v_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_dotxf_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_dotaxpyv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_dotxaxpyf_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_addm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_axpym_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_copym_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_normfm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_scal2m_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_scalm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_setm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_subm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_xpbym_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+
+int libblis_test_read_gemv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_ger_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_hemv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_her_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_her2_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_symv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_syr_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_syr2_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_trmv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_trsv_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemmt_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_hemm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_herk_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_her2k_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_symm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_syrk_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_syr2k_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_trmm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_trsm_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_u8s8s32os32_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_u8s8s32os8_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_f32f32f32of32_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_u8s8s32os32_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_u8s8s16os16_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_u8s8s16os8_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_bf16bf16f32of32_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+int libblis_test_read_gemm_bf16bf16f32obf16_params
+     (
+       char*          str,
+       test_op_t*     op,
+       test_params_t* params,
+       printres_t*    pfr
+     );
+
+#endif  // BLIS_INPFILE_H
+
--- a/gtestsuite/src/blis_utils.cpp
+++ b/gtestsuite/src/blis_utils.cpp
--- a/gtestsuite/src/blis_utils.h
+++ b/gtestsuite/src/blis_utils.h
@@ -0,0 +1,503 @@
+#ifndef BLIS_UTILS_H
+#define BLIS_UTILS_H
+
+#include <limits>
+#include <math.h>
+
+#include "blis_test.h"
+
+using namespace std;
+
+#define abscomplex(x)  (abs(x.real) + abs(x.imag))
+
+#define mulr(x,y)  (( x.real * y.real ) - ( x.imag * y.imag ))
+#define muli(x,y)  (( x.real * y.imag ) + ( x.imag * y.real ))
+
+#define ELD    8
+#define PAT32  0x78abcdef
+#define PAT64  0x0123456789abcdef
+
+template <typename T>
+T real( T x )
+{
+    T r = { 0.0, 0.0 };
+    r.real = x.real;
+    r.imag = 0;
+    return r;
+}
+
+template <typename T>
+T conjugate( T x )
+{
+    T r = { 0.0, 0.0 };
+    r.real =   x.real;
+    r.imag = -(x.imag);
+    return r;
+}
+
+template <typename T>
+T addc( T xx, T yy ) {
+  T r = { 0.0, 0.0 };
+  r.real = xx.real + yy.real;
+  r.imag = xx.imag + yy.imag;
+  return r;
+}
+
+template <typename T>
+T subc( T xx, T yy )
+{
+    T r = { 0.0, 0.0 };
+    r.real = xx.real - yy.real;
+    r.imag = xx.imag - yy.imag;
+    return r;
+}
+
+template <typename T>
+T mulc( T xx, T yy )
+{
+    T r = { 0.0, 0.0 };
+    r.real = mulr( xx, yy );
+    r.imag = muli( xx, yy );
+    return r;
+}
+
+template <typename T, typename U>
+T divc( T yy, T xx )
+{
+    T r = { 0.0, 0.0 };
+    U s    = bli_fmaxabs( (xx.real),(xx.imag) );
+    U xxrs = ( (xx.real)/s );
+    U xxis = ( (xx.imag)/s );
+    U deno = ( (xxrs * xx.real) + (xxis * xx.imag) );
+    r.real = ( ((yy.real * xxrs) + (yy.imag * xxis))/deno );
+    r.imag = ( ((yy.imag * xxrs) - (yy.real * xxis))/deno );
+    return r;
+}
+
+template <typename T, typename U>
+T divct( T yy, T xx )
+{
+    T r = { 0.0, 0.0 };
+    U deno = ( (xx.real * xx.real) + (xx.imag * xx.imag) );
+    r.real = ( ((xx.real * yy.real) + (xx.imag * yy.imag))/deno );
+    r.imag = ( ((yy.imag * xx.real) - (yy.real * xx.imag))/deno );
+    return r;
+}
+
+template <typename T>
+double computediffrv( dim_t len, dim_t incy, T *act, T *ref )
+{
+    double resid = 0.0;
+    unsigned int j,jy = 0;
+    for( j = 0 ; j < len ; j++ )
+    {
+        auto av = ref[jy];
+        auto xc = act[jy];
+        resid += xc - av;
+        jy = jy + incy;
+    }
+    return abs(resid);
+}
+
+template <typename T>
+double computediffiv( dim_t len, dim_t incy, T *act, T *ref )
+{
+    double resid = 0.0;
+    unsigned int j,jy = 0;
+    double rr,ri;
+    rr = ri = 0.0;
+    for( j = 0 ; j < len ; j++ )
+    {
+        auto av = ref[jy];
+        auto xc = act[jy];
+        rr += xc.real - av.real;
+        ri += xc.imag - av.imag;
+        jy = jy + incy;
+    }
+    resid = rr + ri;
+    return abs(resid);
+}
+
+template <typename T>
+double computediffrm( dim_t m,dim_t n, T *act, T *ref, dim_t rsc, dim_t csc )
+{
+    double resid = 0.0;
+    unsigned int i,j;
+    for( i = 0 ; i < m ; i++ )
+    {
+        for( j = 0 ; j < n ; j++ )
+        {
+            auto av = ref[ i*rsc + j*csc ];
+            auto xc = act[ i*rsc + j*csc ];
+            resid += xc - av;
+        }
+    }
+    return abs(resid);
+}
+
+template <typename T>
+double computediffim( dim_t m,dim_t n, T *act, T *ref, dim_t rsc, dim_t csc )
+{
+    unsigned int i,j;
+    double rr,ri;
+    double resid = 0.0;
+    rr = ri = 0.0;
+    for( i = 0 ; i < m ; i++ )
+    {
+        for( j = 0 ; j < n ; j++ )
+        {
+            auto av = ref[ i*rsc + j*csc ];
+            auto xc = act[ i*rsc + j*csc ];
+            rr += xc.real - av.real;
+            ri += xc.imag - av.imag;
+        }
+    }
+    resid = rr + ri;
+    return abs(resid);
+}
+
+template <typename T>
+double libblis_vector_check_real( vflg_t flg, dim_t len, dim_t incy, T *buf )
+{
+    double resid = 0.0;
+    unsigned int j,jy=0;
+    T val = 0.0;
+    if(flg == BLIS_OVERFLOW)
+    {
+        val = (std::numeric_limits<T>::max)();
+        for( j = 0 ; j < len ; j++ )
+        {
+            auto res = buf[jy];
+            if((isnan(res)) || (fabs(res) > val))
+            {
+                return abs(res);
+            }
+            jy = jy + incy;
+        }
+    }
+    else
+    {
+        val = (std::numeric_limits<T>::min)();
+        for( j = 0 ; j < len ; j++ ) {
+            auto res = buf[jy];
+            if((isnan(res)) || (fabs(res) < val))
+            {
+                return abs(res);
+            }
+            jy = jy + incy;
+        }
+    }
+    return resid;
+}
+
+template <typename T, typename U>
+double libblis_vector_check_cmplx( vflg_t flg, dim_t len, dim_t incy, T *buf )
+{
+    double resid = 0.0;
+    unsigned int j,jy=0;
+    U val = 0.0;
+    if(flg == BLIS_OVERFLOW)
+    {
+        val = (std::numeric_limits<U>::max)();
+        for( j = 0 ; j < len ; j++ )
+        {
+            auto res = buf[jy];
+            if((isnan(res.real) || (isnan(res.imag))) ||
+               (fabs(res.real) > val) || (fabs(res.imag) > val))
+            {
+                resid = (fabs(res.real) > fabs(res.imag)) ? res.real : res.imag;
+                return abs(resid);
+            }
+            jy = jy + incy;
+        }
+    }
+    else
+    {
+        val = (std::numeric_limits<U>::min)();
+        for( j = 0 ; j < len ; j++ )
+        {
+            auto res = buf[jy];
+            if((isnan(res.real) || (isnan(res.imag))) ||
+               (fabs(res.real) < val) || (fabs(res.imag) < val))
+            {
+                resid = (fabs(res.real) < fabs(res.imag)) ? res.imag : res.real;
+                return abs(resid);
+            }
+            jy = jy + incy;
+        }
+    }
+    return resid;
+}
+
+template <typename T>
+double libblis_matrix_check_real( vflg_t flg, T* buf, dim_t m, dim_t n,
+                                  dim_t rsc, dim_t csc )
+{
+    double resid = 0.0;
+    unsigned int i,j;
+    T val = 0.0;
+    if(flg == BLIS_OVERFLOW)
+    {
+        val = (std::numeric_limits<T>::max)();
+        for( i = 0 ; i < m ; i++ )
+        {
+            for( j = 0 ; j < n ; j++ )
+            {
+                auto res = buf[ i*rsc + j*csc ];
+                if((isnan(res)) || (fabs(res) > val))
+                {
+                    return abs(res);
+                }
+            }
+        }
+    }
+    else
+    {
+        val = (std::numeric_limits<T>::min)();
+        for( i = 0 ; i < m ; i++ )
+        {
+            for( j = 0 ; j < n ; j++ )
+            {
+                auto res = buf[ i*rsc + j*csc ];
+                if((isnan(res)) || (fabs(res) < val))
+                {
+                    return abs(res);
+                }
+            }
+        }
+    }
+    return resid;
+}
+
+template <typename T, typename U>
+double libblis_matrix_check_cmplx(vflg_t flg, T* buf, dim_t m, dim_t n,
+                                                     dim_t rsc, dim_t csc)
+{
+    double resid = 0.0;
+    unsigned int i,j;
+    U val = 0.0;
+    if(flg == BLIS_OVERFLOW)
+    {
+        val = (std::numeric_limits<U>::max)();
+        for( i = 0 ; i < m ; i++ )
+        {
+            for( j = 0 ; j < n ; j++ )
+            {
+                auto res = buf[ i*rsc + j*csc ];
+                if((isnan(res.real) || (isnan(res.imag))) ||
+                   (fabs(res.real) > val) || (fabs(res.imag) > val))
+                {
+                    resid = (fabs(res.real) > fabs(res.imag)) ? res.real : res.imag;
+                    return abs(resid);
+                }
+            }
+        }
+    }
+    else
+    {
+        val = (std::numeric_limits<U>::min)();
+        for( i = 0 ; i < m ; i++ )
+        {
+            for( j = 0 ; j < n ; j++ )
+            {
+                auto res = buf[ i*rsc + j*csc ];
+                if((isnan(res.real) || (isnan(res.imag))) ||
+                   (fabs(res.real) < val) || (fabs(res.imag) < val))
+                {
+                    resid = (fabs(res.real) < fabs(res.imag)) ? res.imag : res.real;
+                    return abs(resid);
+                }
+            }
+        }
+    }
+    return resid;
+}
+
+template <typename T>
+void conjugatematrix(T* X, dim_t m, dim_t n, dim_t rs, dim_t cs)
+{
+    dim_t  i,j;
+    for( i = 0 ; i < m ; i++ )
+    {
+        for( j = 0 ; j < n ; j++ )
+        {
+            X[i*rs + j*cs] = conjugate<T>( X[i*rs + j*cs] );
+        }
+    }
+    return;
+}
+
+
+template <typename T>
+void test_mmfill( T* dst, T* src, f77_int m, f77_int n, f77_int ld, T val )
+{
+  f77_int i,j;
+  f77_int ldm = ld-ELD;
+  if( n == ldm )
+  {
+     f77_int tmp;
+     tmp = n;
+     n = m;
+     m = tmp;
+  }
+
+  for( j = 0 ; j < (n+ELD) ; j++ ) {
+    for( i = 0 ; i < (m+ELD) ; i++ ) {
+      dst[ i + j*ld ] = val;
+    }
+  }
+
+  for( j = 0 ; j < n ; j++ ) {
+    for( i = 0 ; i < m ; i++ ) {
+      dst[ i + j*ld ] = src[ i + j*ldm ];
+    }
+  }
+/*
+  for( j = 0 ; j < n ; j++ ) {
+    for( i = m ; i < (m+ELD) ; i++ ) {
+       dst[ i + j*ld ] = val;
+    }
+  }
+
+  for( j = n ; j < (n+ELD) ; j++ ) {
+    for( i = 0 ; i < (m+ELD) ; i++ ) {
+      dst[ i + j*ld ] = val;
+    }
+  }
+*/
+}
+
+template <typename T>
+double test_mmchk( T* dst, f77_int m, f77_int n, f77_int ld, T val )
+{
+  f77_int i,j;
+  f77_int ldm = ld-ELD;
+
+  if( n == ldm )
+  {
+     f77_int tmp;
+     tmp = n;
+     n = m;
+     m = tmp;
+  }
+
+  for( j = 0 ; j < n ; j++ ) {
+    for( i = m ; i < (m+ELD) ; i++ ) {
+      if( dst[ i + j*ld ] != val ) {
+        cout << "Invalid Access" << endl;
+        return val;
+      }
+    }
+  }
+
+  for( j = n ; j < (n+ELD) ; j++ ) {
+    for( i = 0 ; i < (m+ELD) ; i++ ) {
+      if( dst[ i + j*ld ] != val ) {
+        cout << "Invalid Access" << endl;
+        return val;
+      }
+    }
+  }
+  return 0;
+}
+
+template <typename T, typename U>
+double test_mmchkc( T* dst, f77_int m, f77_int n, f77_int ld, U val )
+{
+  f77_int i,j;
+  f77_int ldm = ld-ELD;
+
+  if( n == ldm )
+  {
+     f77_int tmp;
+     tmp = n;
+     n = m;
+     m = tmp;
+  }
+
+  for( j = 0 ; j < n ; j++ ) {
+    for( i = m ; i < (m+ELD) ; i++ ) {
+      T tmp = dst[ i + j*ld ];
+      if((tmp.real != val) ||(tmp.imag != val)) {
+        cout << "Invalid Access" << endl;
+        return val;
+      }
+    }
+  }
+
+  for( j = n ; j < (n+ELD) ; j++ ) {
+    for( i = 0 ; i < (m+ELD) ; i++ ) {
+      T tmp = dst[ i + j*ld ];
+      if((tmp.real != val) ||(tmp.imag != val)) {
+        cout << "Invalid Access" << endl;
+        return val;
+      }
+    }
+  }
+  return 0;
+}
+
+void conjugate_tensor( obj_t* aa, num_t dt );
+void libblis_test_build_col_labels_string( test_params_t* params,
+                                           test_op_t* op, char* l_str );
+unsigned int libblis_test_get_n_dims_from_dimset( dimset_t dimset ) ;
+void bli_param_map_char_to_blas_trans( char trans, trans_t* blas_trans );
+void bli_param_map_char_to_herk_trans( char trans, trans_t* herk_trans );
+void bli_param_map_char_to_syrk_trans( char trans, trans_t* syrk_trans );
+void libblis_test_fprintf( FILE* output_stream, const char* message, ... );
+ind_t ind_enable_get_str( test_params_t*, unsigned int d, unsigned int x,
+                                                          test_op_t* op );
+
+double libblis_test_vector_check( test_params_t* params, obj_t* y );
+double libblis_test_matrix_check( test_params_t* params, obj_t* y );
+double libblis_test_bitrp_vector( obj_t* c, obj_t* r, num_t dt );
+double libblis_test_bitrp_matrix( obj_t* c, obj_t* r, num_t dt );
+void libblis_test_mobj_irandomize( test_params_t* params, obj_t* x );
+void libblis_test_vobj_irandomize( test_params_t* params, obj_t* x );
+void test_fillbuffmem( obj_t* c, num_t dt );
+void test_fillbuffmem_diag( obj_t* c, num_t dt );
+
+int libblis_test_dt_str_has_sp_char_str( int n, char* str );
+int libblis_test_dt_str_has_dp_char_str( int n, char* str );
+int libblis_test_dt_str_has_rd_char_str( int n, char* str );
+
+void bli_map_blis_to_netlib_trans( trans_t trans, char* blas_trans );
+bool libblis_test_op_is_done( test_op_t* op );
+int libblis_test_l3_is_disabled( test_op_t* op );
+bool libblis_test_get_string_for_result( double resid, num_t dt,
+                                         const thresh_t* thresh, char *r_val );
+
+void libblis_test_read_next_line( char* buffer, FILE* input_stream );
+void libblis_test_fopen_check_stream( char* filename_str, FILE* stream );
+void libblis_test_read_section_override( test_ops_t*  ops,
+                                         FILE* input_stream, int* override );
+void libblis_test_read_op_info( test_ops_t*  ops, FILE* input_stream,
+      opid_t opid, dimset_t dimset,  unsigned int n_params, test_op_t* op );
+void libblis_test_output_section_overrides( FILE* os, test_ops_t* ops );
+void libblis_test_output_params_struct( FILE* os, test_params_t* params );
+
+param_t libblis_test_get_param_type_for_char( char p_type );
+unsigned int libblis_test_count_combos ( unsigned int n_operands, char* spec_str, char** char_sets );
+void libblis_test_fill_param_strings( char* p_spec_str, char** chars_for_param,
+         unsigned int  n_params, unsigned int  n_param_combos, char** pc_str );
+operand_t libblis_test_get_operand_type_for_char( char o_type ) ;
+int libblis_test_dt_str_has_rd_char( test_params_t* params );
+int libblis_test_dt_str_has_cd_char( test_params_t* params );
+int libblis_test_dt_str_has_sp_char( test_params_t* params );
+int libblis_test_dt_str_has_dp_char( test_params_t* params );
+char libblis_test_proj_dtchar_to_precchar( char dt_char );
+
+void libblis_test_printf_error( const char* message, ... );
+void libblis_test_check_empty_problem( obj_t* c, double* resid );
+void libblis_test_mobj_create( test_params_t* params, num_t dt, trans_t trans,
+                                     char storage, dim_t m, dim_t n, obj_t* a );
+void libblis_test_vobj_create( test_params_t* params, num_t dt,
+                                                  char storage, dim_t m, obj_t* x );
+void libblis_test_mobj_randomize( test_params_t* params, bool normalize, obj_t* a );
+void libblis_test_mobj_load_diag( test_params_t* params, obj_t* a );
+void libblis_test_vobj_randomize( test_params_t* params, bool normalize, obj_t* x );
+
+void libblis_test_alloc_buffer( obj_t* a );
+void libblis_test_obj_free( obj_t* a );
+
+#endif
--- a/gtestsuite/src/blis_utils_int.cpp
+++ b/gtestsuite/src/blis_utils_int.cpp
@@ -0,0 +1,625 @@
+#include <time.h>
+#include "blis_utils.h"
+
+//#define DEF_SRAND
+
+void bli_isrands( float* alpha )
+{
+    /* 24*24*k < 23 bits max value to avoid
+               rounding off errors */
+    int32_t a = ( int32_t ) (rand() % 4);
+    *alpha = ( float ) a ;
+}
+
+void bli_idrands( double* alpha )
+{
+    int64_t a = ( int64_t ) (rand() % 4);
+    *alpha = ( double ) a ;
+}
+
+void bli_icrands( scomplex* alpha )
+{
+    bli_isrands( &(alpha->real) );
+    bli_isrands( &(alpha->imag) );
+}
+
+void bli_izrands( dcomplex* alpha )
+{
+    bli_idrands( &(alpha->real) );
+    bli_idrands( &(alpha->imag) );
+}
+
+void bli_israndv( int n, float* x, int incx )
+{
+    float* chi;
+    int i;
+#ifdef DEF_SRAND
+    srand(time(0));
+#endif
+    for ( i = 0; i < n; ++i )	{
+        chi = x + i*incx;
+        bli_isrands( chi );
+    }
+}
+
+void bli_idrandv( int n, double* x, int incx )
+{
+    double* chi;
+    int i;
+#ifdef DEF_SRAND
+    srand(time(0));
+#endif
+    for ( i = 0; i < n; ++i )
+    {
+        chi = x + i*incx;
+        bli_idrands( chi );
+    }
+}
+
+void bli_icrandv( int n, scomplex* x, int incx )
+{
+    scomplex* chi;
+    int       i;
+#ifdef DEF_SRAND
+    srand(time(0));
+#endif
+    for ( i = 0; i < n; ++i )
+    {
+        chi = x + i*incx;
+        bli_icrands( chi );
+    }
+}
+
+void bli_izrandv( int n, dcomplex* x, int incx )
+{
+    dcomplex* chi;
+    int       i;
+#ifdef DEF_SRAND
+    srand(time(0));
+#endif
+    for ( i = 0; i < n; ++i )
+    {
+        chi = x + i*incx;
+        bli_izrands( chi );
+    }
+}
+
+
+void bli_israndm(test_params_t* params, int m, int n, float* a, int a_rs, int a_cs )
+{
+    float*    a_begin, *x;
+    inc_t     inca, lda;
+    inc_t     n_iter;
+    inc_t     n_elem;
+    int       i,j;
+
+    // Return early if possible.
+    if ( bli_zero_dim2( m, n ) ) return;
+
+    // Initialize with optimal values for column-major storage.
+    inca   = a_rs;
+    lda    = a_cs;
+    n_iter = n;
+    n_elem = m;
+
+    // An optimization: if A is row-major, then let's access the matrix by
+    // rows instead of by columns for increased spatial locality.
+    if ( bli_is_row_stored( a_rs, a_cs ) )
+    {
+        bli_swap_incs( &n_iter, &n_elem );
+        bli_swap_incs( &lda, &inca );
+    }
+
+    if(1)   //if(params->oruflw == BLIS_DEFAULT)
+    {
+        for ( j = 0; j < n_iter; j++ )
+        {
+            a_begin = a + j*lda;
+            bli_israndv( n_elem, a_begin, inca );
+        }
+    }
+    else
+    {
+        float val;
+        val = (std::numeric_limits<float>::max)();
+        if(params->oruflw == BLIS_UNDERFLOW)
+        {
+            val = (std::numeric_limits<float>::min)();
+        }
+        for ( j = 0; j < n_iter; j++ )
+        {
+            x = a + j*lda;
+            for ( i = 0; i < n_elem; ++i )
+            {
+                x[i*inca] = val ;
+            }
+        }
+    }
+}
+
+void bli_idrandm(test_params_t* params, int m, int n, double* a, int a_rs, int a_cs )
+{
+    double*   a_begin, *x;
+    inc_t     inca, lda;
+    inc_t     n_iter;
+    inc_t     n_elem;
+    int       i,j;
+
+    // Return early if possible.
+    if ( bli_zero_dim2( m, n ) ) return;
+
+    // Initialize with optimal values for column-major storage.
+    inca   = a_rs;
+    lda    = a_cs;
+    n_iter = n;
+    n_elem = m;
+
+    // An optimization: if A is row-major, then let's access the matrix by
+    // rows instead of by columns for increased spatial locality.
+    if ( bli_is_row_stored( a_rs, a_cs ) )
+    {
+        bli_swap_incs( &n_iter, &n_elem );
+        bli_swap_incs( &lda, &inca );
+    }
+
+    if(1)   //if(params->oruflw == BLIS_DEFAULT)
+    {
+        for ( j = 0; j < n_iter; j++ )
+        {
+            a_begin = a + j*lda;
+            bli_idrandv( n_elem, a_begin, inca );
+        }
+    }
+    else
+    {
+        double val;
+        val = (std::numeric_limits<double>::max)();
+        if(params->oruflw == BLIS_UNDERFLOW)
+        {
+            val = (std::numeric_limits<double>::min)();
+        }
+        for ( j = 0; j < n_iter; j++ )	{
+            x = a + j*lda;
+            for ( i = 0; i < n_elem; ++i )	{
+                x[i*inca] = val ;
+            }
+        }
+    }
+}
+
+void bli_icrandm(test_params_t* params, int m, int n, scomplex* a, int a_rs, int a_cs )
+{
+    scomplex* a_begin, *x;
+    inc_t     inca, lda;
+    inc_t     n_iter;
+    inc_t     n_elem;
+    int       i,j;
+
+    // Return early if possible.
+    if ( bli_zero_dim2( m, n ) ) return;
+
+    // Initialize with optimal values for column-major storage.
+    inca   = a_rs;
+    lda    = a_cs;
+    n_iter = n;
+    n_elem = m;
+
+    // An optimization: if A is row-major, then let's access the matrix by
+    // rows instead of by columns for increased spatial locality.
+    if ( bli_is_row_stored( a_rs, a_cs ) )
+    {
+        bli_swap_incs( &n_iter, &n_elem );
+        bli_swap_incs( &lda, &inca );
+    }
+
+    if(1)   //if(params->oruflw == BLIS_DEFAULT)
+    {
+        for ( j = 0; j < n_iter; j++ )
+        {
+            a_begin = a + j*lda;
+            bli_icrandv( n_elem, a_begin, inca );
+        }
+    }
+    else
+    {
+        float val;
+        val = (std::numeric_limits<float>::max)();
+        if(params->oruflw == BLIS_UNDERFLOW)
+        {
+            val = (std::numeric_limits<float>::min)();
+        }
+        scomplex cval = {val, val};
+        for ( j = 0; j < n_iter; j++ )
+        {
+            x = a + j*lda;
+            for ( i = 0; i < n_elem; ++i )
+            {
+                x[i*inca] = cval ;
+            }
+        }
+    }
+}
+
+void bli_izrandm(test_params_t* params, int m, int n, dcomplex* a, int a_rs, int a_cs ) {
+    dcomplex* a_begin, *x;
+    inc_t     inca, lda;
+    inc_t     n_iter;
+    inc_t     n_elem;
+    int       i,j;
+
+    // Return early if possible.
+    if ( bli_zero_dim2( m, n ) ) return;
+
+    // Initialize with optimal values for column-major storage.
+    inca   = a_rs;
+    lda    = a_cs;
+    n_iter = n;
+    n_elem = m;
+
+    // An optimization: if A is row-major, then let's access the matrix by
+    // rows instead of by columns for increased spatial locality.
+    if ( bli_is_row_stored( a_rs, a_cs ) )
+    {
+        bli_swap_incs( &n_iter, &n_elem );
+        bli_swap_incs( &lda, &inca );
+    }
+
+    if(1)   //if(params->oruflw == BLIS_DEFAULT)
+    {
+        for ( j = 0; j < n_iter; j++ )
+        {
+            a_begin = a + j*lda;
+            bli_izrandv( n_elem, a_begin, inca );
+        }
+    }
+    else
+    {
+        double val;
+        val = (std::numeric_limits<double>::max)();
+        if(params->oruflw == BLIS_UNDERFLOW)
+        {
+            val = (std::numeric_limits<double>::min)();
+        }
+        dcomplex cval = {val, val};
+        for ( j = 0; j < n_iter; j++ )
+        {
+            x = a + j*lda;
+            for ( i = 0; i < n_elem; ++i )
+            {
+                x[i*inca] = cval ;
+            }
+        }
+    }
+}
+
+void libblis_test_mobj_irandomize(test_params_t* params, obj_t* x )
+{
+    num_t dt = bli_obj_dt( x );
+    dim_t m  = bli_obj_length( x );
+    dim_t n  = bli_obj_width( x );
+    inc_t rs = bli_obj_row_stride( x );
+    inc_t cs = bli_obj_col_stride( x );
+
+    switch( dt )
+    {
+        case BLIS_FLOAT :
+        {
+            float *buff = ( float * ) bli_obj_buffer_at_off( x );
+            bli_israndm(params, m, n, buff, rs, cs );
+            break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double *buff = ( double * ) bli_obj_buffer_at_off( x );
+            bli_idrandm(params, m, n, buff, rs, cs );
+            break;
+        }
+        case BLIS_SCOMPLEX :
+        {
+            scomplex *buff = ( scomplex * ) bli_obj_buffer_at_off( x );
+            bli_icrandm(params, m, n, buff, rs, cs );
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex *buff = ( dcomplex * ) bli_obj_buffer_at_off( x );
+            bli_izrandm(params, m, n, buff, rs, cs );
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+}
+
+void libblis_test_vobj_irandomize(test_params_t* params, obj_t* x )
+{
+    num_t dt  = bli_obj_dt( x );
+    dim_t n   = bli_obj_vector_dim( x );
+    inc_t inx = bli_obj_vector_inc( x );
+    int       i;
+
+    switch( dt )
+    {
+        case BLIS_FLOAT :
+        {
+            float *buff = ( float * ) bli_obj_buffer_at_off( x );
+            if(1)       //if(params->oruflw == BLIS_DEFAULT)
+            {
+                bli_israndv( n, buff, inx );
+            }
+            else
+            {
+                float val;
+                if(params->oruflw == BLIS_OVERFLOW)
+                {
+                   val = (std::numeric_limits<float>::max)();
+                }
+                else
+                {
+                    val = (std::numeric_limits<float>::min)();
+                }
+                for ( i = 0; i < n; ++i )
+                {
+                    buff[i*inx] = val ;
+                }
+            }
+            break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double *buff = ( double * ) bli_obj_buffer_at_off( x );
+            if(1)       //if(params->oruflw == BLIS_DEFAULT)
+            {
+                bli_idrandv( n, buff, inx );
+            }
+            else
+            {
+                double val;
+                if(params->oruflw == BLIS_OVERFLOW)
+                {
+                    val = (std::numeric_limits<double>::max)();
+                }
+                else
+                {
+                    val = (std::numeric_limits<double>::min)();
+                }
+                for ( i = 0; i < n; ++i )
+                {
+                    buff[i*inx] = val ;
+                }
+            }
+            break;
+        }
+        case BLIS_SCOMPLEX :
+        {
+            scomplex *buff = ( scomplex * ) bli_obj_buffer_at_off( x );;
+            if(1)       //if(params->oruflw == BLIS_DEFAULT)
+            {
+                bli_icrandv( n, buff, inx );
+            }
+            else
+            {
+                float val;
+                if(params->oruflw == BLIS_OVERFLOW)
+                {
+                    val = (std::numeric_limits<float>::max)();
+                }
+                else
+                {
+                    val = (std::numeric_limits<float>::min)();
+                }
+                scomplex cval = {val, val};
+                for ( i = 0; i < n; ++i )
+                {
+                    buff[i*inx] = cval ;
+                }
+            }
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex *buff = ( dcomplex * ) bli_obj_buffer_at_off( x );
+            if(1)       //if(params->oruflw == BLIS_DEFAULT)
+            {
+                bli_izrandv( n, buff, inx );
+            }
+            else
+            {
+                double val;
+                if(params->oruflw == BLIS_OVERFLOW)
+                {
+                    val = (std::numeric_limits<double>::max)();
+                }
+                else
+                {
+                    val = (std::numeric_limits<double>::min)();
+                }
+                dcomplex cval = {val, val};
+                for ( i = 0; i < n; ++i )
+                {
+                    buff[i*inx] = cval ;
+                }
+            }
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+using namespace std;
+template <typename T>
+void fillcbuff( dim_t rsc, dim_t csc, obj_t* c )
+{
+    dim_t  M = bli_obj_length( c );
+    dim_t  N = bli_obj_width( c );
+    dim_t  i,j;
+    T* C = (T*) bli_obj_buffer( c );
+    T Nan =(T)NAN;
+
+    for( i = 0 ; i < M ; i++ )
+    {
+        for( j = 0 ; j < N ; j++ )
+        {
+            C[ i*rsc + j*csc ] = ( Nan );
+        }
+    }
+    return;
+}
+
+template <typename U, typename T>
+void fillicbuff ( dim_t rsc, dim_t csc, obj_t* c )
+{
+    dim_t  M = bli_obj_length( c );
+    dim_t  N = bli_obj_width( c );
+    dim_t  i,j;
+    U* C = (U*) bli_obj_buffer( c );
+    T Nan =(T)NAN;
+
+    U tv = {0,0};
+    tv.real = Nan;
+    tv.imag = Nan;
+    for( i = 0 ; i < M ; i++ )
+    {
+      for( j = 0 ; j < N ; j++ )
+      {
+          C[ i*rsc + j*csc ] = tv ;
+      }
+    }
+    return;
+}
+
+void test_fillbuffmem(obj_t* c, num_t dt )
+{
+    dim_t  rsc, csc;
+
+    if( bli_obj_row_stride( c ) == 1 )
+    {
+      rsc = 1;
+      csc = bli_obj_col_stride( c );
+    }
+    else
+    {
+      rsc = bli_obj_row_stride( c );
+      csc = 1 ;
+    }
+
+    switch( dt )
+    {
+      case BLIS_FLOAT :
+      {
+          fillcbuff<float>( rsc, csc, c );
+          break;
+      }
+      case BLIS_DOUBLE :
+      {
+          fillcbuff<double>( rsc, csc, c );
+          break;
+      }
+      case BLIS_SCOMPLEX :
+      {
+          fillicbuff<scomplex, float>( rsc, csc, c );
+          break;
+      }
+      case BLIS_DCOMPLEX :
+      {
+          fillicbuff<dcomplex, double>( rsc, csc, c );
+          break;
+      }
+      default :
+          bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+  return ;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+using namespace std;
+template <typename T>
+void fillcbuff_diag( dim_t rsc, dim_t csc, obj_t* c )
+{
+    dim_t  M = bli_obj_length( c );
+    dim_t  N = bli_obj_width( c );
+    dim_t  i,j;
+    T* C  = (T*) bli_obj_buffer( c );
+    T val = (T) 2.0;
+
+    for( i = 0 ; i < M ; i++ )
+    {
+        for( j = 0 ; j < N ; j++ )
+        {
+            if(i == j)
+            {
+                C[ i*rsc + j*csc ] = ( val );
+            }
+        }
+    }
+    return;
+}
+
+template <typename T>
+void fillicbuff_diag ( dim_t rsc, dim_t csc, obj_t* c )
+{
+    dim_t  M = bli_obj_length( c );
+    dim_t  N = bli_obj_width( c );
+    dim_t  i,j;
+    T* C = (T*) bli_obj_buffer( c );
+
+    T val = {2.0,2.0};
+    for( i = 0 ; i < M ; i++ )
+    {
+        for( j = 0 ; j < N ; j++ )
+        {
+            if(i == j)
+            {
+              C[ i*rsc + j*csc ] = ( val );
+            }
+        }
+    }
+    return;
+}
+
+void test_fillbuffmem_diag( obj_t* c, num_t dt )
+{
+    dim_t  rsc, csc;
+
+    if( bli_obj_row_stride( c ) == 1 )
+    {
+      rsc = 1;
+      csc = bli_obj_col_stride( c );
+    }
+    else
+    {
+      rsc = bli_obj_row_stride( c );
+      csc = 1 ;
+    }
+
+    switch( dt )
+    {
+        case BLIS_FLOAT :
+        {
+            fillcbuff_diag<float>( rsc, csc, c );
+            break;
+        }
+        case BLIS_DOUBLE :
+        {
+            fillcbuff_diag<double>( rsc, csc, c );
+            break;
+        }
+        case BLIS_SCOMPLEX :
+        {
+            fillicbuff_diag<scomplex>( rsc, csc, c );
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            fillicbuff_diag<dcomplex>( rsc, csc, c );
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+
+    return ;
+}
+///////////////////////////////////////////////////////////////////////////////////////////
--- a/gtestsuite/src/gtest_pthread.cpp
+++ b/gtestsuite/src/gtest_pthread.cpp
@@ -0,0 +1,62 @@
+#include "gtest_pthread.h"
+
+#if defined(BLIS_DISABLE_SYSTEM)
+
+#elif defined(_MSC_VER) // !defined(BLIS_DISABLE_SYSTEM)
+
+#include <errno.h>
+
+int gtest_pthread_create
+     (
+       bli_pthread_t*            thread,
+       const bli_pthread_attr_t* attr,
+       void*                    (*start_routine)(void*),
+       void*                     arg
+     )
+{
+    if (attr) return EINVAL;
+    LPTHREAD_START_ROUTINE func = (LPTHREAD_START_ROUTINE )start_routine;
+    thread->handle = CreateThread(NULL, 0, func, arg, 0, NULL);
+    if ( !thread->handle ) return EAGAIN;
+    return 0;
+}
+
+int gtest_pthread_join
+     (
+       bli_pthread_t thread,
+       void**        retval
+     )
+{
+  return bli_pthread_join(thread, retval);
+}
+
+#else // !defined(BLIS_DISABLE_SYSTEM) && !defined(_MSC_VER)
+
+// This branch defines a pthreads-like API, bli_pthreads_*(), and implements it
+// in terms of the corresponding pthreads_*() types, macros, and function calls.
+// This branch is compiled for Linux and other non-Windows environments where
+// we assume that *some* implementation of pthreads is provided (although it
+// may lack barriers--see below).
+
+// -- pthread_create(), pthread_join() --
+
+int gtest_pthread_create
+     (
+       bli_pthread_t*            thread,
+       const bli_pthread_attr_t* attr,
+       void*                   (*start_routine)(void*),
+       void*                     arg
+     )
+{
+    return bli_pthread_create( thread, attr, start_routine, arg );
+}
+
+int gtest_pthread_join
+     (
+       bli_pthread_t thread,
+       void**        retval
+     )
+{
+    return bli_pthread_join( thread , retval );
+}
+#endif // !defined(BLIS_DISABLE_SYSTEM) && !defined(_MSC_VER)
--- a/gtestsuite/src/gtest_pthread.h
+++ b/gtestsuite/src/gtest_pthread.h
@@ -0,0 +1,29 @@
+#ifndef GTEST_PTHREAD_H
+#define GTEST_PTHREAD_H
+
+#include "blis.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int gtest_pthread_create
+     (
+       bli_pthread_t*            thread,
+       const bli_pthread_attr_t* attr,
+       void*                     (*start_routine)(void*),
+       void*                     arg
+     );
+
+
+int gtest_pthread_join
+     (
+       bli_pthread_t thread,
+       void**        retval
+     );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/gtestsuite/src/gtest_suite.cpp
+++ b/gtestsuite/src/gtest_suite.cpp
--- a/gtestsuite/src/lpgemm_utils.cpp
+++ b/gtestsuite/src/lpgemm_utils.cpp
@@ -0,0 +1,112 @@
+#include <iostream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <cstdio>
+#include "lpgemm_utils.h"
+
+#ifdef BLIS_ENABLE_ADDONS
+
+bfloat16 mat_mul_accuracy_check_downscale_bf16( float temp_accum,  bfloat16 out_temp_accum,
+                                   aocl_post_op*  post_op, dim_t j)
+{
+  float_to_bf16( ( &temp_accum ), ( &out_temp_accum ) );
+  return out_temp_accum;
+}
+
+float bf16_to_float( bfloat16 bf16_val )
+{
+  int32_t inter_temp = *( ( int16_t* ) &bf16_val );
+  inter_temp = inter_temp << 16;
+  float float_value = *( float* ) ( &inter_temp );
+  return float_value;
+}
+
+float mat_mul_accuracy_check_accum_bf16
+    (
+      bfloat16* a,
+      bfloat16* b,
+      float*    c_ref,
+      float     temp_accum,
+      float     alpha,
+      float     beta,
+      dim_t     rs_a,
+      dim_t     rs_b,
+      dim_t     cs_a,
+      dim_t     cs_b,
+      dim_t     rs_c_ref,
+      dim_t     cs_c_ref,
+      dim_t     i,
+      dim_t     j,
+      dim_t     k
+    )
+{
+  for ( dim_t p = 0; p < k; ++p)
+  {
+    float a_float = bf16_to_float( *( a + i * rs_a + p * cs_a ) );
+    float b_float = bf16_to_float( *( b + p * rs_b + j * cs_b ) );
+    temp_accum += ( ( a_float ) * ( b_float ) );
+  }
+  temp_accum = ( beta * ( * (c_ref + ( rs_c_ref * i ) + ( cs_c_ref * j ) ) ) )
+                 + ( alpha * temp_accum );
+  return temp_accum;
+}
+
+float mat_mul_accuracy_check_accum_bf16
+    (
+      bfloat16* a,
+      bfloat16* b,
+      bfloat16* c_ref,
+      float     temp_accum,
+      float     alpha,
+      float     beta,
+      dim_t     rs_a,
+      dim_t     rs_b,
+      dim_t     cs_a,
+      dim_t     cs_b,
+      dim_t     rs_c_ref,
+      dim_t     cs_c_ref,
+      dim_t     i,
+      dim_t     j,
+      dim_t     k
+    )
+{
+  for ( dim_t p = 0; p < k; ++p)
+  {
+    float a_float = bf16_to_float( *( a + i*rs_a + p*cs_a ) );
+    float b_float = bf16_to_float( *( b + p*rs_b + j*cs_b ) );
+    temp_accum += ( ( a_float ) * ( b_float ) );
+  }
+  float c_ref_float = bf16_to_float( *( c_ref + i*rs_c_ref + j*cs_c_ref ) );
+  temp_accum = ( beta * ( c_ref_float ) ) + ( alpha * temp_accum );
+
+  return temp_accum;
+}
+
+void lpgemm_destroy_post_ops_struct( aocl_post_op* post_ops )
+{
+  if ( post_ops == NULL )
+  {
+    return;
+  }
+
+  if ( post_ops->eltwise.algo.alpha != NULL )
+  {
+    free( post_ops->eltwise.algo.alpha );
+  }
+  if ( post_ops->sum.scale_factor != NULL )
+  {
+    free( post_ops->sum.scale_factor );
+  }
+  if ( post_ops->bias.bias != NULL )
+  {
+    free( post_ops->bias.bias );
+  }
+  if( post_ops->seq_vector != NULL )
+  {
+    free( post_ops->seq_vector );
+  }
+
+  free( post_ops );
+}
+#endif
--- a/gtestsuite/src/lpgemm_utils.h
+++ b/gtestsuite/src/lpgemm_utils.h
@@ -0,0 +1,481 @@
+#include <iostream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <cstdio>
+
+#include "blis_test.h"
+
+#ifdef BLIS_ENABLE_ADDONS
+
+#define S8_MIN  (-128)
+#define S8_MAX  (+127)
+
+static inline int max (int a, int b)
+{
+	return ( a > b ? a : b );
+}
+
+static inline int min (int a, int b)
+{
+	return ( a < b ? a : b );
+}
+
+template <typename T>
+void fill_array ( T* arr, dim_t size )
+{
+    T* temp_arr = ( T* ) arr;
+    for ( dim_t i = 0; i < size; ++i )
+    {
+        temp_arr[i] = ( T )( i % 10 );
+    }
+}
+
+template <typename T>
+void fill_array_post_ops( T* arr, dim_t size )
+{
+    T* temp_arr = ( T* ) arr;
+    for ( dim_t i = 0; i < size; ++i )
+    {
+        temp_arr[i] = ( T )( i % 20 );
+    }
+}
+
+static void float_to_bf16( float* float_value, bfloat16* bf16_val )
+{
+    /*Set offset 2 to copy most significant 2 bytes of float
+    to convert float values to bf16 values*/
+    memcpy( ( bf16_val ), (char *)( float_value ) + 2, sizeof ( bfloat16 ) );
+}
+
+static inline void convert_float_arr_to_bf16( float* array, bfloat16* array_bf16, int size )
+{
+    for (int i = 0 ; i < size ; i++)
+    {
+        float_to_bf16( ( array + i ), ( array_bf16 + i ) );
+    }
+}
+
+/* Only supports bias followed by RELU and vice versa for now.*/
+template <typename X, typename Z>
+aocl_post_op* lpgemm_create_post_ops_struct( dim_t m, dim_t n,
+                  char* post_ops_str, bool dscale_out )
+{
+  aocl_post_op* post_ops = NULL;
+  post_ops = ( aocl_post_op* ) malloc( sizeof( aocl_post_op ) );
+
+  if ( ( post_ops == NULL ) && ( dscale_out ) )
+  {
+    return NULL;
+  }
+
+  /* Only supporting 3 post ops at max for now.*/
+  dim_t max_post_ops_seq_length = 3;
+  post_ops->seq_vector = ( AOCL_POST_OP_TYPE* )
+        malloc( max_post_ops_seq_length * sizeof( AOCL_POST_OP_TYPE ) );
+
+  if ( post_ops->seq_vector == NULL )
+  {
+    free( post_ops );
+    return NULL;
+  }
+
+  /* Parse post ops list.*/
+  dim_t cur_op_index = 0;
+  /* Ensure the buffers that use NULL check in deinit code is properly set to NULL.*/
+  post_ops->eltwise.algo.alpha = NULL;
+  post_ops->bias.bias = NULL;
+  post_ops->sum.scale_factor = NULL;
+  if ( post_ops_str != NULL )
+  {
+    char* ops_tok = strtok(post_ops_str, ", " );
+    bool is_param_relu = FALSE;
+    while ( ops_tok )
+    {
+      if ( strcmp( ops_tok, "bias") == 0 )
+      {
+        post_ops->seq_vector[cur_op_index] = BIAS;
+      }
+      else if ( strcmp( ops_tok, "relu") == 0 )
+      {
+        post_ops->seq_vector[cur_op_index] = ELTWISE;
+      }
+      else if ( strcmp( ops_tok, "prelu") == 0 )
+      {
+        post_ops->seq_vector[cur_op_index] = ELTWISE;
+        is_param_relu = TRUE;
+      }
+      ops_tok = strtok( NULL, ", " );
+      cur_op_index++;
+    }
+
+    /* Allocate bias buffer, return early if alloc fails.*/
+    post_ops->bias.bias = malloc( n * sizeof( X ) );
+    if ( post_ops->bias.bias == NULL )
+    {
+      free( post_ops->seq_vector );
+      free( post_ops );
+      return NULL;
+    }
+    fill_array_post_ops<X>((X*)post_ops->bias.bias, n );
+
+    post_ops->eltwise.is_power_of_2 = FALSE;
+    post_ops->eltwise.scale_factor = NULL;
+    post_ops->eltwise.algo.alpha = NULL;
+    post_ops->eltwise.algo.algo_type = RELU;
+    if ( is_param_relu == TRUE )
+    {
+      post_ops->eltwise.algo.alpha = malloc( sizeof( X ) );
+      *( ( X* ) post_ops->eltwise.algo.alpha ) = ( X )6;
+      post_ops->eltwise.algo.algo_type = PRELU;
+    }
+    post_ops->eltwise.algo.beta = NULL;
+  }
+
+  if ( dscale_out )
+  {
+    post_ops->seq_vector[cur_op_index] = SCALE;
+    cur_op_index++;
+
+    post_ops->sum.is_power_of_2 = FALSE;
+    post_ops->sum.scale_factor = NULL;
+    post_ops->sum.buff = NULL;
+    post_ops->sum.zero_point = NULL;
+    if ( dscale_out )
+    {
+      /* Allocate scale buffer, return early if alloc fails.*/
+      post_ops->sum.scale_factor = malloc( n * sizeof( Z ) );
+      if ( post_ops->sum.scale_factor == NULL )
+      {
+       free ( post_ops->bias.bias );
+       free( post_ops->seq_vector );
+       free( post_ops );
+       return NULL;
+      }
+      /* Fill scale factor.*/
+      Z* temp_dscale_ptr = ( Z* )post_ops->sum.scale_factor;
+      for ( dim_t i = 0; i < n; ++i )
+      {
+        temp_dscale_ptr[i] = ( ( Z )1 )/ ( ( Z )1000 );
+      }
+    }
+  }
+
+  post_ops->seq_length = cur_op_index;
+
+  return post_ops;
+}
+
+void lpgemm_destroy_post_ops_struct( aocl_post_op* post_ops );
+
+template <typename B, typename Z, typename ST>
+B mat_mul_accuracy_check_downscale( Z temp_accum, B out_temp_accum,
+                                   aocl_post_op*  post_op,  dim_t j )
+{
+  out_temp_accum = ( B ) min ( max ( nearbyintf( ( ST )temp_accum *
+      ( *( ( ST* )post_op->sum.scale_factor + j ) ) ), S8_MIN ), S8_MAX ) ;
+  return 	out_temp_accum;
+}
+
+template <typename A, typename B, typename X, typename Z>
+Z mat_mul_accuracy_check_accum
+    (
+      A*    a,
+      B*    b,
+      X*    c_ref,
+      Z     temp_accum,
+      Z     alpha,
+      Z     beta,
+      dim_t rs_a,
+      dim_t rs_b,
+      dim_t cs_a,
+      dim_t cs_b,
+      dim_t rs_c_ref,
+      dim_t cs_c_ref,
+      dim_t i,
+      dim_t j,
+      dim_t k
+    )
+{
+  dim_t p;
+
+  for( p = 0 ; p < k ; ++p )
+  {
+    temp_accum += ( *( a + ( i * rs_a ) + ( cs_a * p ) ) *
+                    *( b + ( rs_b * p ) + ( cs_b * j ) ) );
+  }
+
+  temp_accum = ( beta * ( * (c_ref + ( rs_c_ref * i ) + ( cs_c_ref * j ) ) ) )
+                 + ( alpha * temp_accum );
+  return temp_accum;
+}
+
+template <typename A, typename B, typename X, typename Z, typename ST>
+double mat_mul_accuracy_check_driver
+    (
+      dim_t   m,
+      dim_t   n,
+      dim_t   k,
+      Z       alpha,
+      A*      a,
+      dim_t   rs_a,
+      dim_t   cs_a,
+      B*      b,
+      dim_t   rs_b,
+      dim_t   cs_b,
+      Z       beta,
+      X*      c,
+      dim_t   rs_c,
+      dim_t   cs_c,
+      X*      c_ref,
+      aocl_post_op*  post_op,
+      bool    dscale_out
+    )
+{
+  double resid = 0.0;
+  dim_t rs_c_ref = rs_c;
+  dim_t cs_c_ref = cs_c;
+  dim_t i,j;
+
+  for( i = 0 ; i < m ; ++i )
+  {
+    for( j = 0 ; j < n ; ++j )
+    {
+      Z temp_accum = 0;
+      X out_temp_accum = 0;
+
+      temp_accum = mat_mul_accuracy_check_accum<A,B,X,Z> (a, b, c_ref,
+                   temp_accum, alpha, beta, rs_a, rs_b, cs_a, cs_b,
+                   rs_c_ref, cs_c_ref, i, j, k);
+
+      if ( post_op != NULL )
+      {
+        /* Apply bias followed by relu. */
+        if ( post_op->seq_vector[0] == BIAS )
+        {
+          if ( post_op->seq_length >= 1 )
+          {
+            temp_accum += ( *( ( Z* )post_op->bias.bias + j ) );
+          }
+          if ( ( post_op->seq_length > 1 ) &&
+            ( post_op->seq_vector[1] == ELTWISE ) )
+          {
+            if ( post_op->eltwise.algo.alpha != NULL ) /* PReLU*/
+            {
+              temp_accum = ( temp_accum > 0 ) ?
+              temp_accum :
+              ( temp_accum *
+              *( ( Z* ) post_op->eltwise.algo.alpha ) );
+            }
+            else
+            {
+              temp_accum = ( temp_accum > 0 ) ? temp_accum : 0 ;
+            }
+          }
+        }
+        else if ( post_op->seq_vector[0] == ELTWISE )
+        {
+          if ( post_op->seq_length >= 1 )
+          {
+            if ( post_op->eltwise.algo.alpha != NULL ) /* PReLU*/
+            {
+              temp_accum = ( temp_accum > 0 ) ?
+               temp_accum :
+               ( temp_accum * *( ( Z* ) post_op->eltwise.algo.alpha ) );
+            }
+            else
+            {
+              temp_accum = ( temp_accum > 0 ) ? temp_accum : 0 ;
+            }
+          }
+          if ( ( post_op->seq_length > 1 ) && ( post_op->seq_vector[1] == BIAS ) )
+          {
+            temp_accum += ( *( ( Z* )post_op->bias.bias + j ) );
+          }
+        }
+      }
+      if ( dscale_out )
+      {
+        out_temp_accum = mat_mul_accuracy_check_downscale<B, Z, ST>
+                                     ( temp_accum, out_temp_accum, post_op, j);
+      }
+      else
+      {
+        out_temp_accum = ( X )temp_accum;
+      }
+
+      if( *( c + ( rs_c * i ) + ( cs_c * j ) ) != out_temp_accum )
+      {
+        auto tmp = *( c + ( rs_c * i ) + ( cs_c * j ) );
+        resid += abs( tmp - out_temp_accum );
+        //return resid;
+      }
+    }
+  }
+	 return resid;
+}
+
+bfloat16 mat_mul_accuracy_check_downscale_bf16
+        ( float temp_accum,  bfloat16 out_temp_accum, aocl_post_op*  post_op, dim_t j);
+
+float bf16_to_float( bfloat16 bf16_val );
+
+float mat_mul_accuracy_check_accum_bf16
+    (
+      bfloat16* a,
+      bfloat16* b,
+      float*    c_ref,
+      float     temp_accum,
+      float     alpha,
+      float     beta,
+      dim_t     rs_a,
+      dim_t     rs_b,
+      dim_t     cs_a,
+      dim_t     cs_b,
+      dim_t     rs_c_ref,
+      dim_t     cs_c_ref,
+      dim_t     i,
+      dim_t     j,
+      dim_t     k
+    );
+
+float mat_mul_accuracy_check_accum_bf16
+    (
+      bfloat16* a,
+      bfloat16* b,
+      bfloat16* c_ref,
+      float     temp_accum,
+      float     alpha,
+      float     beta,
+      dim_t     rs_a,
+      dim_t     rs_b,
+      dim_t     cs_a,
+      dim_t     cs_b,
+      dim_t     rs_c_ref,
+      dim_t     cs_c_ref,
+      dim_t     i,
+      dim_t     j,
+      dim_t     k
+    );
+
+template <typename A, typename B, typename X, typename Z, typename ST>
+double mat_mul_accuracy_check_driver_bf16
+    (
+      dim_t   m,
+      dim_t   n,
+      dim_t   k,
+      Z       alpha,
+      A*      a,
+      dim_t   rs_a,
+      dim_t   cs_a,
+      B*      b,
+      dim_t   rs_b,
+      dim_t   cs_b,
+      Z       beta,
+      X*      c,
+      dim_t   rs_c,
+      dim_t   cs_c,
+      X*      c_ref,
+      aocl_post_op*  post_op,
+      bool    dscale_out
+    )
+{
+  double resid = 0.0;
+  dim_t rs_c_ref = rs_c;
+  dim_t cs_c_ref = cs_c;
+
+  for ( dim_t i = 0; i < m; ++i )
+  {
+    for ( dim_t j = 0; j < n; ++j )
+    {
+      Z temp_accum = 0;
+      X out_temp_accum = 0;
+
+      temp_accum = mat_mul_accuracy_check_accum_bf16(a, b, c_ref,
+                   temp_accum, alpha, beta, rs_a, rs_b, cs_a, cs_b,
+                   rs_c_ref, cs_c_ref, i, j, k);
+
+      if ( post_op != NULL )
+      {
+        /* Apply bias followed by relu. */
+        if ( post_op->seq_vector[0] == BIAS )
+        {
+          if ( post_op->seq_length >= 1 )
+          {
+            temp_accum += ( *( ( Z* )post_op->bias.bias + j ) );
+          }
+          if ( ( post_op->seq_length > 1 ) &&
+            ( post_op->seq_vector[1] == ELTWISE ) )
+          {
+            if ( post_op->eltwise.algo.alpha != NULL ) /* PReLU*/
+            {
+              temp_accum = ( temp_accum > 0 ) ?
+              temp_accum :
+              ( temp_accum *
+              *( ( Z* ) post_op->eltwise.algo.alpha ) );
+            }
+            else
+            {
+              temp_accum = ( temp_accum > 0 ) ? temp_accum : 0 ;
+            }
+          }
+        }
+        else if ( post_op->seq_vector[0] == ELTWISE )
+        {
+          if ( post_op->seq_length >= 1 )
+          {
+            if ( post_op->eltwise.algo.alpha != NULL ) /* PReLU*/
+            {
+              temp_accum = ( temp_accum > 0 ) ?
+               temp_accum :
+               ( temp_accum * *( ( Z* ) post_op->eltwise.algo.alpha ) );
+            }
+            else
+            {
+              temp_accum = ( temp_accum > 0 ) ? temp_accum : 0 ;
+            }
+          }
+          if ( ( post_op->seq_length > 1 ) && ( post_op->seq_vector[1] == BIAS ) )
+          {
+            temp_accum += ( *( ( Z* )post_op->bias.bias + j ) );
+          }
+        }
+      }
+      if ( dscale_out )
+      {
+        out_temp_accum = mat_mul_accuracy_check_downscale_bf16
+                                     ( temp_accum, out_temp_accum, post_op, j);
+      }
+      else
+      {
+        out_temp_accum = ( X )temp_accum;
+      }
+
+      if( *( c + ( rs_c * i ) + ( cs_c * j ) ) != out_temp_accum )
+      {
+        auto tmp = *( c + ( rs_c * i ) + ( cs_c * j ) );
+        resid += abs( tmp - out_temp_accum );
+        //return resid;
+      }
+    }
+  }
+	 return resid;
+}
+#endif
+
+template <typename T>
+void print_matrics(T *x , dim_t mm, dim_t nn, dim_t ld)
+{
+    dim_t i,j;
+    int32_t val;
+    for ( i = 0; i < mm; ++i ) {
+        for ( j = 0; j < nn; ++j ) {
+             val = (int32_t)(x[i*ld + j]);
+             printf("%9d", val);
+        }
+        cout << endl;
+    }
+    cout << endl;
+    return;
+}
+
--- a/gtestsuite/src/main.cpp
+++ b/gtestsuite/src/main.cpp
@@ -0,0 +1,79 @@
+#include <cstdio>
+#include <thread>
+
+#include <iostream>
+#include <cfenv>
+#include <cmath>
+#include <cerrno>
+
+#include "blis_test.h"
+
+vector<input_data_t>	inputData;
+
+int main(int argc, char **argv)
+{
+    BlisTestSuite	bts;
+    test_params_t* params  = NULL;
+    blis_string_t* strData = NULL;
+    test_ops_t*        ops = NULL;
+    input_file_t*    pfile = NULL;
+    input_data_t inData;
+    string filter_data( "" );
+
+    params  = bts.getParamsStr();
+    strData = bts.getStgStr();
+    ops     = bts.getOpsStr();
+    pfile   = bts.getfileStr();
+
+    unsigned int n = std::thread::hardware_concurrency();
+    std::cout << n << " concurrent threads are supported.\n";
+
+    memset( &inData, 0, sizeof( input_data_t ) );
+    memset( params, 0, sizeof( test_params_t ) );
+    memset( strData, 0, sizeof( blis_string_t ) );
+    memset( ops, 0, sizeof( test_ops_t ) );
+    memset( pfile, 0, sizeof( input_file_t ) );
+
+    // Initialize some strings.
+    bts.libblis_test_init_strings( strData );
+
+    if(argc <= 1)
+    {
+         // Read the global parameters file.
+        bts.libblis_test_read_params_file( strData->libblis_test_parameters_filename,
+                           params, strData->libblis_test_alphabeta_parameter);
+
+       // Read the operations parameter file.
+        bts.libblis_test_read_ops_file(strData->libblis_test_operations_filename, ops);
+
+        bts.CreateGtestFilters(ops, filter_data);
+    }
+    else
+    {
+        // Read the global parameters file.
+        bts.libblis_test_inpfile(argv[1], pfile);
+
+        bts.CreateGtestFilters_api(pfile, filter_data);
+    }
+
+    inData.params  = params;
+    inData.ops     = ops;
+    inData.pfile   = pfile;
+    if(pfile->fileread != 1) {
+        inData.pthread = (bli_pthread_t *)malloc( sizeof( bli_pthread_t ) * params->n_app_threads );
+        inData.tdata   = (thread_data_t *)malloc( sizeof( thread_data_t ) * params->n_app_threads );
+    }
+
+    inputData.push_back(inData);
+
+    ::testing::GTEST_FLAG(filter) = filter_data.c_str();
+    testing::InitGoogleTest(&argc, argv);
+    int retval = RUN_ALL_TESTS();
+
+    if(pfile->fileread != 1) {
+        free( inData.pthread );
+        free( inData.tdata );
+    }
+
+    return retval;
+}
--- a/gtestsuite/src/ref_addm.cpp
+++ b/gtestsuite/src/ref_addm.cpp
@@ -0,0 +1,130 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_addm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> ADDM performs matrix operations
+//*>    B := B + transa(A)
+//*>    where B is an m x n matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaddm_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx,
+                                      T* Y, dim_t rsy, dim_t csy, T* YY) {
+
+  dim_t i, j;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] =  Y[i*rsy + j*csy] + X[i*rsx + j*csx];
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaddm_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx,
+                               conj_t conjx, T* Y, dim_t rsy, dim_t csy) {
+
+  dim_t i, j;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        X[i*rsx + j*csx] = conjugate<T>(X[i*rsx + j*csx]);
+      }
+    }
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = addc<T>(Y[i*rsy + j*csy], X[i*rsx + j*csx]);
+    }
+  }
+
+  return;
+}
+
+double libblis_test_iaddm_check(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  bool  transx = bli_obj_has_trans( x );
+  conj_t conjx = bli_obj_conj_status( x );
+  dim_t  M     = bli_obj_length( y );
+  dim_t  N     = bli_obj_width( y );
+  dim_t  rsy   = bli_obj_row_stride( y ) ;
+  dim_t  csy   = bli_obj_col_stride( y ) ;
+  double resid = 0.0;
+  dim_t  rsx, csx;
+
+  if( bli_obj_is_col_stored( x ) ) {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  } else {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iaddm_check<float, int32_t>( M, N, X, rsx, csx,
+                                                 Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iaddm_check<double, int64_t>( M, N, X, rsx, csx,
+                                                  Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icaddm_check<scomplex, int32_t>( M, N, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icaddm_check<dcomplex, int64_t>( M, N, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_addv.cpp
+++ b/gtestsuite/src/ref_addv.cpp
@@ -0,0 +1,117 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_addv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> ADDV performs vector operations
+//*>    y := y + conjx(x)
+//*>    where x and y are vectors of length n.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaddv_check(dim_t len, T* X, dim_t incx, T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  if (len == 0) {
+      return;
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = Y[iy] + X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaddv_check(dim_t len, T* X, dim_t incx, T* Y,
+                                                   dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  if (len == 0) {
+      return;
+  }
+
+  ix = 0;
+  if(cfx) {
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = addc<T>(Y[iy] , X[ix]);
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_iaddv_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  beta,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  dim_t incx  = bli_obj_vector_inc( x );
+  dim_t incy  = bli_obj_vector_inc( y_orig );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iaddv_check<float, int32_t>( M, X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iaddv_check<double, int64_t>( M, X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icaddv_check<scomplex, int32_t>( M, X, incx, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icaddv_check<dcomplex, int64_t>( M, X, incx, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_amaxv.cpp
+++ b/gtestsuite/src/ref_amaxv.cpp
@@ -0,0 +1,105 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_amaxv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> Given a vector of length n, return the zero-based index index of
+//*> the element of vector x that contains the largest absolute value
+//*> (or, in the complex domain, the largest complex modulus).
+//*  ==========================================================================
+
+template <typename T>
+dim_t libblis_iamaxv_check(dim_t len, T* X, dim_t incx) {
+
+  dim_t i, ix, iamax = 0;
+
+  if (len == 0) {
+      return 0;
+  }
+
+  ix = 0;
+  T smax = abs(X[ix]);
+  for(i = 0 ; i < len ; i++) {
+    if(abs(X[ix]) > smax) {
+      iamax = i;
+      smax = abs(X[ix]);
+    }
+    ix = ix + incx;
+  }
+
+  return iamax;
+}
+
+template <typename T, typename U>
+dim_t libblis_icamaxv_check(dim_t len, T* X, dim_t incx) {
+
+  dim_t i, ix, iamax = 0;
+  if (len == 0) {
+      return 0;
+  }
+
+  ix = 0;
+  U smax = abscomplex(X[ix]);
+  for(i = 0 ; i < len ; i++) {
+    if(abscomplex(X[ix]) > smax) {
+      iamax = i;
+      smax = abscomplex(X[ix]);
+    }
+    ix = ix + incx;
+  }
+
+  return iamax;
+}
+
+double libblis_test_iamaxv_check(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  index
+){
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  double resid = 0.0;
+  dim_t ind = 0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*  X      = (float*) bli_obj_buffer( x );
+      f77_int* indx  = (f77_int*) bli_obj_buffer( index );
+      ind   = libblis_iamaxv_check<float>( M, X, incx );
+      resid = (double)(*indx - ind);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X     = (double*) bli_obj_buffer( x );
+      f77_int* indx   = (f77_int*) bli_obj_buffer( index );
+      ind   = libblis_iamaxv_check<double>( M, X, incx );
+      resid = (double)(*indx - ind);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X  = (scomplex*) bli_obj_buffer( x );
+      f77_int* indx  = (f77_int*) bli_obj_buffer( index );
+      ind   = libblis_icamaxv_check<scomplex, float>( M, X, incx );
+      resid = (double)(*indx - ind);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X   = (dcomplex*) bli_obj_buffer( x );
+      f77_int* indx   = (f77_int*) bli_obj_buffer( index );
+      ind   = libblis_icamaxv_check<dcomplex, double>( M, X, incx );
+      resid = (double)(*indx - ind);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_axpbyv.cpp
+++ b/gtestsuite/src/ref_axpbyv.cpp
@@ -0,0 +1,202 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpbyv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> AXPBYV performs vector operations
+//*>    y := beta * y + alpha * conjx(x)
+//*>    where x and y are vectors of length n, and alpha and beta are scalars
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaxpbyv_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                              T* beta, T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE = 1.0 ;
+  ZERO = 0.0 ;
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+  if (len == 0){
+    return;
+  }
+
+  //*     First form  y := beta*y.
+  if (Beta != ONE) {
+    iy = 0;
+    if (Beta == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = Beta*Y[iy];
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if (Alpha != ONE) {
+    ix = 0;
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = Alpha*X[ix];
+        ix = ix + incx;
+      }
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = Y[iy] + X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaxpbyv_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                       T* beta, T* Y, dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+  T Beta  = *beta;
+
+  if (len == 0) {
+    return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  /* First form  y := beta*y. */
+  iy = 0;
+  if ((Beta.real == ZERO.real) && (Beta.imag == ZERO.imag)) {
+    for(i = 0; i < len ; i++) {
+      Y[iy] = ZERO;
+      iy = iy + incy;
+    }
+  }
+  else {
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = mulc<T>(Beta , Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  ix = 0;
+  if ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)) {
+    for(i = 0; i < len ; i++) {
+      X[ix] = ZERO;
+      ix = ix + incx;
+    }
+  }
+  else {
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = mulc<T>(Alpha , X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = addc<T>(Y[iy] , X[ix]);
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_iaxpbyv_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  beta,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y_orig );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float* YY         = (float*) bli_obj_buffer( y );
+      libblis_iaxpbyv_check<float, int32_t>( M, Alpha, X, incx,
+                                                       Beta, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iaxpbyv_check<double, int64_t>( M, Alpha, X, incx,
+                                                       Beta, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icaxpbyv_check<scomplex, int32_t>( M, Alpha, X, incx,
+                                                       Beta, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icaxpbyv_check<dcomplex, int64_t>( M, Alpha, X, incx,
+                                                       Beta, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_axpy2v.cpp
+++ b/gtestsuite/src/ref_axpy2v.cpp
@@ -0,0 +1,231 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpy2v.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> AXPY2V performs vector operations
+//*>    z := y + alphax * conjx(x) + alphay * conjy(y)
+//*>    where x, y, and z are vectors of length m. The kernel is implemented
+//*>    as a fused pair of calls to axpyv.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaxpy2v_check(dim_t len, T* alphax, T* alphay, T* X, dim_t incx,
+                                        T* Y, dim_t incy, T* Z, dim_t incz) {
+  dim_t i, ix, iy, iz;
+  T ONE, ZERO;
+  ONE = 1.0 ;
+  ZERO = 0.0 ;
+  T Alphax = alphax[0];
+  T Alphay = alphay[0];
+
+  if (len == 0){
+    return;
+  }
+
+  if (Alphax != ONE) {
+    ix = 0;
+    if (Alphax == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = Alphax * X[ix];
+        ix = ix + incx;
+      }
+    }
+  }
+
+  if (Alphay != ONE) {
+    iy = 0;
+    if (Alphay == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = Alphay * Y[iy];
+        iy = iy + incy;
+      }
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  iz = 0;
+  for(i = 0 ; i < len ; i++) {
+    Z[iz] = Z[iz] + X[ix] + Y[iy] ;
+    ix = ix + incx;
+    iy = iy + incy;
+    iz = iz + incz;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaxpy2v_check(dim_t len, T* alphax, T* alphay, T* X, dim_t incx,
+                      bool cfx, T* Y, dim_t incy, bool cfy, T* Z, dim_t incz) {
+
+  dim_t i, ix, iy, iz;
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+  T Alphax = *alphax;
+  T Alphay = *alphay;
+
+  if (len == 0) {
+    return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(cfy) {
+    iy = 0;
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = conjugate<T>(Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  if ((Alphax.real != ONE.real) && (Alphax.imag != ONE.imag)) {
+    ix = 0;
+    if ((Alphax.real == ZERO.real) && (Alphax.imag == ZERO.imag)) {
+      for(i = 0; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = mulc<T>(Alphax , X[ix]);
+        ix = ix + incx;
+      }
+    }
+  }
+
+  if ((Alphay.real != ONE.real) && (Alphay.imag != ONE.imag)) {
+    iy = 0;
+    if ((Alphay.real == ZERO.real) && (Alphay.imag == ZERO.imag)) {
+      for(i = 0; i < len ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = mulc<T>(Alphay , Y[iy]);
+        iy = iy + incy;
+      }
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  iz = 0;
+  for(i = 0 ; i < len ; i++) {
+    auto xx = X[ix];
+    auto yy = Y[iy];
+    auto zz = Z[iz];
+    zz.real = zz.real + xx.real + yy.real ;
+    zz.imag = zz.imag + xx.imag + yy.imag ;
+    Z[iz] = zz;
+    ix = ix + incx;
+    iy = iy + incy;
+    iz = iz + incz;
+  }
+
+  return;
+}
+
+double libblis_test_iaxpy2v_check (
+  test_params_t* params,
+  obj_t*         alphax,
+  obj_t*         alphay,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         z,
+  obj_t*         z_orig
+) {
+  num_t  dt    = bli_obj_dt( z );
+  dim_t  M     = bli_obj_vector_dim( z );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y );
+  bool cfx     = bli_obj_has_conj( x );
+  bool cfy     = bli_obj_has_conj( y );
+  f77_int incz = bli_obj_vector_inc( z );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alphax   = (float*) bli_obj_buffer( alphax );
+      float*   Alphay   = (float*) bli_obj_buffer( alphay );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   Z        = (float*) bli_obj_buffer( z_orig );
+      float*   ZZ       = (float*) bli_obj_buffer( z );
+      libblis_iaxpy2v_check<float, int32_t>( M, Alphax, Alphay, X, incx,
+                                                        Y, incy, Z, incz );
+      resid = computediffrv(M, incz, ZZ, Z);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alphax  = (double*) bli_obj_buffer( alphax );
+      double*   Alphay  = (double*) bli_obj_buffer( alphay );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y );
+      double*   Z       = (double*) bli_obj_buffer( z_orig );
+      double*   ZZ      = (double*) bli_obj_buffer( z );
+      libblis_iaxpy2v_check<double, int64_t>( M, Alphax, Alphay, X, incx,
+                                                        Y, incy, Z, incz );
+      resid = computediffrv(M, incz, ZZ, Z);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alphax = (scomplex*) bli_obj_buffer( alphax );
+      scomplex*   Alphay = (scomplex*) bli_obj_buffer( alphay );
+      scomplex*   X      = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y      = (scomplex*) bli_obj_buffer( y );
+      scomplex*   Z      = (scomplex*) bli_obj_buffer( z_orig );
+      scomplex*   ZZ     = (scomplex*) bli_obj_buffer( z );
+      libblis_icaxpy2v_check<scomplex, int32_t>( M, Alphax, Alphay, X, incx,
+                                                cfx, Y, incy, cfy, Z, incz );
+      resid = computediffiv(M, incz, ZZ, Z);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alphax = (dcomplex*) bli_obj_buffer( alphax );
+      dcomplex*   Alphay = (dcomplex*) bli_obj_buffer( alphay );
+      dcomplex*   X      = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y      = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   Z      = (dcomplex*) bli_obj_buffer( z_orig );
+      dcomplex*   ZZ     = (dcomplex*) bli_obj_buffer( z );
+      libblis_icaxpy2v_check<dcomplex, int64_t>( M, Alphax, Alphay, X, incx,
+                                                cfx, Y, incy, cfy, Z, incz );
+      resid = computediffiv(M, incz, ZZ, Z);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_axpyf.cpp
+++ b/gtestsuite/src/ref_axpyf.cpp
@@ -0,0 +1,156 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpyv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> AXPYF performs vector operations
+//*>    y := y + alpha * conja(A) * conjx(x)
+//*>    where A is an m x b matrix, and y and x are vectors.
+//*>    The kernel  is implemented as a fused series of calls to axpyv
+//*>    where b is less than or equal to an implementation-dependent
+//*>    fusing factor specific to axpyf
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaxpyf_check(dim_t M, dim_t N, T* alpha, T* A, dim_t rsa,
+                             dim_t csa, T* X, dim_t incx, T* Y, dim_t incy) {
+  dim_t i, j, ix, iy;
+  T Alpha = alpha[0];
+  T temp;
+  if((M == 0) || (N == 0)) {
+    return;
+  }
+
+  ix = 0;
+  for(j = 0 ; j < N ; j++) {
+    temp = Alpha * X[ix];
+    iy = 0;
+    for(i = 0 ; i < M ; i++) {
+      Y[iy] = Y[iy] + temp * A[i*rsa + j*csa];
+      iy = iy + incy;
+    }
+    ix = ix + incx;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaxpyf_check(dim_t M, dim_t N, T* alpha, T* A, dim_t rsa,
+        dim_t csa, bool cfa, T* X, dim_t incx, bool cfx, T* Y, dim_t incy ) {
+
+  dim_t i, j, ix, iy;
+  T Alpha = *alpha;
+  T temp;
+
+  if((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(cfa) {
+    for(j = 0 ; j < N ; j++) {
+      for(i = 0 ; i < M ; i++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  ix = 0;
+  for(j = 0 ; j < N ; j++) {
+    temp = mulc<T>(Alpha , X[ix]);
+    iy = 0;
+    for(i = 0 ; i < M ; i++) {
+      Y[iy] = addc<T>(Y[iy] , mulc<T>(temp , A[i*rsa + j*csa]));
+      iy = iy + incy;
+    }
+    ix = ix + incx;
+  }
+
+  return;
+}
+
+double libblis_test_iaxpyf_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         y_orig
+) {
+  num_t  dt    = bli_obj_dt( y );
+  dim_t  M     = bli_obj_vector_dim( y );
+  dim_t  N     = bli_obj_width( a );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y );
+  bool cfx     = bli_obj_has_conj( x );
+  bool cfa     = bli_obj_has_conj( a );
+  f77_int rsa  = bli_obj_row_stride( a ) ;
+  f77_int csa  = bli_obj_col_stride( a ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iaxpyf_check<float, int32_t>( M,  N, Alpha, A, rsa, csa,
+                                                    X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iaxpyf_check<double, int64_t>( M,  N, Alpha, A, rsa, csa,
+                                                    X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icaxpyf_check<scomplex, int32_t>( M,  N, Alpha, A, rsa, csa,
+                                               cfa, X, incx, cfx, Y, incy );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icaxpyf_check<dcomplex, int64_t>( M,  N, Alpha, A, rsa, csa,
+                                               cfa, X, incx, cfx, Y, incy );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_axpym.cpp
+++ b/gtestsuite/src/ref_axpym.cpp
@@ -0,0 +1,137 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpym.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> AXPYM performs matrix operations
+//*>    B := B + alpha * transa(A)
+//*>    where B is an m x n matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaxpym_check(dim_t M, dim_t N, T* alpha,
+     T* X, dim_t rsx, dim_t csx, T* Y, dim_t rsy, dim_t csy, T* YY) {
+
+  dim_t i, j;
+  T Alpha = alpha[0];
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = Y[i*rsy + j*csy] + ( Alpha * X[i*rsx + j*csx] );
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaxpym_check(dim_t M, dim_t N, T* alpha,
+     T* X, dim_t rsx, dim_t csx, conj_t conjx, T* Y, dim_t rsy, dim_t csy) {
+
+  dim_t i, j;
+  T Alpha = *alpha;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        X[i*rsx + j*csx] = conjugate<T>(X[i*rsx + j*csx]);
+      }
+    }
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = addc<T>(Y[i*rsy + j*csy] , mulc<T>(Alpha , X[i*rsx + j*csx]));
+    }
+  }
+
+  return;
+}
+
+double libblis_test_iaxpym_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         y_orig
+){
+  num_t  dt    = bli_obj_dt( x );
+  bool  transx = bli_obj_has_trans( x );
+  conj_t conjx = bli_obj_conj_status( x );
+  dim_t  M     = bli_obj_length( y );
+  dim_t  N     = bli_obj_width( y );
+  dim_t  rsy   = bli_obj_row_stride( y ) ;
+  dim_t  csy   = bli_obj_col_stride( y ) ;
+  double resid = 0.0;
+  dim_t  rsx, csx;
+
+  if( bli_obj_is_col_stored( x ) ) {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  } else {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iaxpym_check<float, int32_t>( M, N, Alpha, X, rsx, csx,
+                                                 Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iaxpym_check<double, int64_t>( M, N, Alpha, X, rsx, csx,
+                                                  Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icaxpym_check<scomplex, int32_t>( M, N, Alpha, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icaxpym_check<dcomplex, int64_t>( M, N, Alpha, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_axpyv.cpp
+++ b/gtestsuite/src/ref_axpyv.cpp
@@ -0,0 +1,160 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpyv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> AXPYV performs vector operations
+//*>    y := y + alpha * conjx(x)
+//*>    where x and y are vectors of length n, and alpha is a scalar
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iaxpyv_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                               T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE = 1.0 ;
+  ZERO = 0.0 ;
+  T Alpha = alpha[0];
+
+  if (len == 0){
+      return;
+  }
+
+  if (Alpha != ONE) {
+    ix = 0;
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = Alpha*X[ix];
+        ix = ix + incx;
+      }
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = Y[iy] + X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icaxpyv_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                               T* Y, dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+
+  if(len == 0){
+      return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  /* First form  y := beta*y. */
+  if (Alpha.real != ONE.real) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = mulc<T>(Alpha , X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = addc<T>(Y[iy] , X[ix]);
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_iaxpyv_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y_orig );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iaxpyv_check<float, int32_t>( M, Alpha, X, incx,
+                                                       Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iaxpyv_check<double, int64_t>( M, Alpha, X, incx,
+                                                        Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icaxpyv_check<scomplex, int32_t>( M, Alpha, X, incx,
+                                                        Y, incy, cfx );
+
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icaxpyv_check<dcomplex, int64_t>( M, Alpha, X, incx,
+                                                        Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_copym.cpp
+++ b/gtestsuite/src/ref_copym.cpp
@@ -0,0 +1,130 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_copym.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> COPYM performs matrix operations
+//*>    B := transa(A)
+//*>    where B is an m x n matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_icopym_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx,
+                                      T* Y, dim_t rsy, dim_t csy, T* YY) {
+
+  dim_t i, j;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] =  X[i*rsx + j*csx];
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_iccopym_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx,
+                               conj_t conjx, T* Y, dim_t rsy, dim_t csy) {
+
+  dim_t i, j;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        X[i*rsx + j*csx] = conjugate<T>(X[i*rsx + j*csx]);
+      }
+    }
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = X[i*rsx + j*csx];
+    }
+  }
+
+  return;
+}
+
+double libblis_test_icopym_check(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  bool  transx = bli_obj_has_trans( x );
+  conj_t conjx = bli_obj_conj_status( x );
+  dim_t  M     = bli_obj_length( y );
+  dim_t  N     = bli_obj_width( y );
+  dim_t  rsy   = bli_obj_row_stride( y ) ;
+  dim_t  csy   = bli_obj_col_stride( y ) ;
+  double resid = 0.0;
+  dim_t  rsx, csx;
+
+  if( bli_obj_is_col_stored( x ) ) {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  } else {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_icopym_check<float, int32_t>( M, N, X, rsx, csx,
+                                                 Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_icopym_check<double, int64_t>( M, N, X, rsx, csx,
+                                                 Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_iccopym_check<scomplex, int32_t>( M, N, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_iccopym_check<dcomplex, int64_t>( M, N, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_copyv.cpp
+++ b/gtestsuite/src/ref_copyv.cpp
@@ -0,0 +1,114 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_copyv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> COPYV performs vector operations
+//*>    y := conjx(x)
+//*>    where x and y are vectors of length n.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_icopyv_check(dim_t len, T* X, dim_t incx, T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  if (len == 0) {
+      return;
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_iccopyv_check(dim_t len, T* X, dim_t incx, T* Y,
+                                                   dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  if (len == 0) {
+      return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_icopyv_check(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y_orig );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_icopyv_check<float, int32_t>( M, X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_icopyv_check<double, int64_t>( M, X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_iccopyv_check<scomplex, int32_t>( M, X, incx, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_iccopyv_check<dcomplex, int64_t>( M, X, incx, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_dotaxpyv.cpp
+++ b/gtestsuite/src/ref_dotaxpyv.cpp
@@ -0,0 +1,228 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_dotaxpyv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> DOTAXPYV performs fused operations
+//*>    rho := conjxt(x^T) * conjy(y)
+//*>      y   := y + alpha * conjx(x)
+//*>    where x, y, and z are vectors of length m and alpha and rho are scalars.
+//*>    The kernel is implemented as a fusion of calls to dotv and axpyv
+//*  ==========================================================================
+
+template <typename T, typename U>
+T libblis_idotaxpyv_check(dim_t len, T* alpha, T* XT, dim_t incxt,
+     T* X, dim_t incx, T* Y, dim_t incy, T* Z, dim_t incz ) {
+
+  dim_t i, ixt, ix, iy, iz;
+  T ONE = 1.0 ;
+  T ZERO = 0.0 ;
+  T Alpha = alpha[0];
+  T pr = 0.0;
+  if (len == 0) {
+    return pr;
+  }
+
+  ixt = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    pr = pr + XT[ixt] * Y[iy];
+    ixt = ixt + incxt;
+    iy = iy + incy;
+  }
+
+  if (Alpha != ONE) {
+    ix = 0;
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = Alpha*X[ix];
+        ix = ix + incx;
+      }
+    }
+  }
+
+  ix = 0;
+  iz = 0;
+  for(i = 0 ; i < len ; i++) {
+    Z[iz] = Z[iz] + X[ix];
+    ix = ix + incx;
+    iz = iz + incz;
+  }
+
+  return pr;
+}
+
+template <typename T, typename U>
+T libblis_icdotaxpyv_check(dim_t len, T* alpha, T* XT, dim_t incxt, bool cfxt,
+     T* X, dim_t incx, bool cfx, T* Y, dim_t incy, bool cfy, T* Z, dim_t incz ) {
+
+  dim_t i, ixt, ix, iy, iz;
+  T ONE  = {1.0 , 0.0};
+  T ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+
+  T pr = {0.0, 0.0};
+  if (len == 0) {
+      return pr;
+  }
+
+  if(cfxt) {
+    ixt = 0;
+    for(i = 0 ; i < len ; i++) {
+      XT[ixt] = conjugate<T>(XT[ixt]);
+      ixt = ixt + incxt;
+    }
+  }
+
+  if(cfy) {
+    iy = 0;
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = conjugate<T>(Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  ixt = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    pr = addc<T>(pr, mulc<T>(Y[iy] , XT[ixt]));
+    ixt = ixt + incxt;
+    iy  = iy + incy;
+  }
+
+  if(cfx != cfxt) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if ((Alpha.real != ONE.real) && (Alpha.imag != ONE.imag)) {
+    ix = 0;
+    if ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = mulc<T>(Alpha, X[ix]);
+        ix = ix + incx;
+      }
+    }
+  }
+
+  ix = 0;
+  iz = 0;
+  for(i = 0 ; i < len ; i++) {
+    Z[iz] = addc<T>(Z[iz] , X[ix]);
+    ix = ix + incx;
+    iz = iz + incz;
+  }
+
+  return pr;
+
+}
+
+double libblis_test_idotaxpyv_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         xt,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         rho_orig,
+  obj_t*         z,
+  obj_t*         z_orig
+) {
+  num_t  dt     = bli_obj_dt( y );
+  dim_t  M      = bli_obj_vector_dim( z );
+  f77_int incxt = bli_obj_vector_inc( xt );
+  f77_int incx  = bli_obj_vector_inc( x );
+  f77_int incy  = bli_obj_vector_inc( y );
+  f77_int incz  = bli_obj_vector_inc( z );
+  bool cfxt     = bli_obj_has_conj( xt );
+  bool cfx      = bli_obj_has_conj( x );
+  bool cfy      = bli_obj_has_conj( y );
+  double r1,r2,resid ;
+  r1 = r2 = resid  = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   XT       = (float*) bli_obj_buffer( xt );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   Z        = (float*) bli_obj_buffer( z_orig );
+      float*   ZZ       = (float*) bli_obj_buffer( z );
+      float*   av       = (float*) bli_obj_buffer( rho_orig );
+      float ref = libblis_idotaxpyv_check<float, int32_t>( M, Alpha,
+                                   XT, incxt, X, incx, Y, incy, Z, incz );
+      r1 = computediffrv(M, incy, ZZ, Z);
+      r2 = (*av - ref);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha    = (double*) bli_obj_buffer( alpha );
+      double*   X        = (double*) bli_obj_buffer( x );
+      double*   XT       = (double*) bli_obj_buffer( xt );
+      double*   Y        = (double*) bli_obj_buffer( y );
+      double*   Z        = (double*) bli_obj_buffer( z_orig );
+      double*   ZZ       = (double*) bli_obj_buffer( z );
+      double*   av       = (double*) bli_obj_buffer( rho_orig );
+      double ref = libblis_idotaxpyv_check<double, int64_t>( M, Alpha,
+                                   XT, incxt, X, incx, Y, incy, Z, incz );
+      r1 = computediffrv(M, incy, ZZ, Z);
+      r2 = (*av - ref);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha  = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X      = (scomplex*) bli_obj_buffer( x );
+      scomplex*   XT     = (scomplex*) bli_obj_buffer( xt );
+      scomplex*   Y      = (scomplex*) bli_obj_buffer( y );
+      scomplex*   Z      = (scomplex*) bli_obj_buffer( z_orig );
+      scomplex*   ZZ     = (scomplex*) bli_obj_buffer( z );
+      scomplex*   av     = (scomplex*) bli_obj_buffer( rho_orig );
+      scomplex ref = libblis_icdotaxpyv_check<scomplex, int32_t>( M, Alpha,
+                   XT, incxt, cfxt, X, incx, cfx, Y, incy, cfy, Z, incz );
+      r1 = computediffiv(M, incy, ZZ, Z);
+      r2 = ((*av).real - ref.real);
+      r2 +=((*av).imag - ref.imag);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha  = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X      = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   XT     = (dcomplex*) bli_obj_buffer( xt );
+      dcomplex*   Y      = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   Z      = (dcomplex*) bli_obj_buffer( z_orig );
+      dcomplex*   ZZ     = (dcomplex*) bli_obj_buffer( z );
+      dcomplex*   av     = (dcomplex*) bli_obj_buffer( rho_orig );
+      dcomplex ref = libblis_icdotaxpyv_check<dcomplex, int64_t>( M, Alpha,
+                   XT, incxt, cfxt, X, incx, cfx, Y, incy, cfy, Z, incz );
+      r1 = computediffiv(M, incy, ZZ, Z);
+      r2 = ((*av).real - ref.real);
+      r2 +=((*av).imag - ref.imag);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  resid = abs(bli_fmaxabs( r1, r2 ));
+  return resid;
+}
--- a/gtestsuite/src/ref_dotv.cpp
+++ b/gtestsuite/src/ref_dotv.cpp
@@ -0,0 +1,129 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_dotv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> DOTV performs vector operations
+//*>    rho := conjx(x)^T * conjy(y)
+//*>    where x and y are vectors of length n, and rho is a scalar.
+//*  ==========================================================================
+
+template <typename T, typename U>
+T libblis_idotv_check(dim_t len, T* X, dim_t incx, T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  T pr = 0.0;
+  if (len == 0) {
+    return pr;
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    pr = pr + X[ix] * Y[iy];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return pr;
+}
+
+template <typename T, typename U>
+T libblis_icdotv_check(dim_t len, T* X, dim_t incx, T* Y,
+                                        dim_t incy, bool cfx, bool cfy) {
+  dim_t i, ix, iy;
+  T pr = {0.0, 0.0};
+  if (len == 0) {
+      return pr;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(cfy) {
+    iy = 0;
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = conjugate<T>(Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    pr = addc<T>(pr, mulc<T>(Y[iy] , X[ix]));
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return pr;
+}
+
+double libblis_test_idotv_check(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  rho
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  bool cfy     = bli_obj_has_conj( y );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   av       = (float*) bli_obj_internal_scalar_buffer( rho );
+      float ref = libblis_idotv_check<float, int32_t>( M, X, incx, Y, incy );
+      resid = (*av - ref);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y );
+      double*   av      = (double*) bli_obj_internal_scalar_buffer( rho );
+      double ref = libblis_idotv_check<double, int64_t>( M, X, incx, Y, incy );
+      resid = (*av - ref);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y );
+      scomplex*   av    = (scomplex*) bli_obj_internal_scalar_buffer( rho );
+      scomplex ref = libblis_icdotv_check<scomplex, int32_t>( M, X, incx,
+                                                         Y, incy, cfx, cfy );
+      resid = ((*av).real - ref.real);
+      resid +=((*av).imag - ref.imag);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   av    = (dcomplex*) bli_obj_internal_scalar_buffer( rho );
+      dcomplex ref = libblis_icdotv_check<dcomplex, int64_t>( M, X, incx,
+                                                         Y, incy, cfx, cfy );
+      resid = ((*av).real - ref.real);
+      resid +=((*av).imag - ref.imag);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_dotxaxpyf.cpp
+++ b/gtestsuite/src/ref_dotxaxpyf.cpp
@@ -0,0 +1,245 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_dotxaxpyf.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> DOTXAXPYF performs fused operations
+//*>     y := beta * y + alpha * conjat(A^T) * conjw(w)
+//*>     z :=        z + alpha * conja(A)    * conjx(x)
+//*>    where A is an m x b matrix, w and z are vectors of length m,
+//*>    x and y are vectors of length b, and alpha and beta are scalars.
+//*>    The kernel is implemented as a fusion of calls to dotxf and axpyf.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_idotxaxpyf_check(dim_t M, dim_t N, T* alpha, T* AT, T* A, dim_t rsa,
+                 dim_t csa, T* W, dim_t incw, T* X, dim_t incx, T* beta, T* Y,
+                 dim_t incy, T* Z, dim_t incz) {
+
+  dim_t i, j, iw, ix, iy, iz;
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+  T temp;
+  if((M == 0) || (N == 0)) {
+    return;
+  }
+
+  //y := beta * y + alpha * conjat(A^T) * conjw(w)
+  iy = 0;
+  for(j = 0 ; j < N ; j++) {
+    iw = 0;
+    temp = 0.0;
+    for(i = 0 ; i < M ; i++) {
+      temp += W[iw] * AT[i*rsa + j*csa];
+      iw = iw + incw;
+    }
+    temp = Alpha * temp;
+    Y[iy] = (Y[iy] * Beta) + temp;
+    iy = iy + incy;
+  }
+
+  //z :=        z + alpha * conja(A)    * conjx(x)
+  ix = 0;
+  for(j = 0 ; j < N ; j++) {
+    temp = Alpha * X[ix];
+    iz = 0;
+    for(i = 0 ; i < M ; i++) {
+      Z[iz] = Z[iz] + temp * A[i*rsa + j*csa];
+      iz = iz + incz;
+    }
+    ix = ix + incx;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icdotxaxpyf_check(dim_t M, dim_t N, T* alpha, T* AT, bool conjat,
+   T* A, dim_t rsa, dim_t csa, bool conja, T* W, dim_t incw, bool conjw, T* X,
+   dim_t incx, bool conjx, T* beta, T* Y, dim_t incy, T* Z, dim_t incz) {
+
+  dim_t i, j, iw, ix, iy, iz;
+  //T ONE  = {1.0 , 0.0};
+  T ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+  T Beta  = *beta;
+  T temp;
+
+  if((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if(conjw) {
+    iw = 0;
+    for(i = 0 ; i < M ; i++) {
+      W[iw] = conjugate<T>(W[iw]);
+      iw = iw + incw;
+    }
+  }
+
+  if(conjat) {
+    for(j = 0 ; j < N ; j++) {
+      for(i = 0 ; i < M ; i++) {
+        AT[i*rsa + j*csa] = conjugate<T>(AT[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  //y := beta * y + alpha * conjat(A^T) * conjw(w)
+  iy = 0;
+  for(j = 0 ; j < N ; j++) {
+    iw = 0;
+    temp = ZERO;
+    for(i = 0 ; i < M ; i++) {
+      temp = addc<T>(temp , mulc<T>(W[iw] , AT[i*rsa + j*csa]));
+      iw = iw + incw;
+    }
+    temp = mulc<T>(Alpha , temp);
+    Y[iy] = addc<T>(temp , mulc<T>(Y[iy] , Beta));
+    iy = iy + incy;
+  }
+
+  //z := z + alpha * conja(A) * conjx(x)
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(conja != conjat) {
+    for(j = 0 ; j < N ; j++) {
+      for(i = 0 ; i < M ; i++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  //z :=        z + alpha * conja(A)    * conjx(x)
+  ix = 0;
+  for(j = 0 ; j < N ; j++) {
+    temp = mulc<T>(Alpha , X[ix]);
+    iz = 0;
+    for(i = 0 ; i < M ; i++) {
+      Z[iz] = addc<T>(Z[iz] , mulc<T>(temp , A[i*rsa + j*csa]));
+      iz = iz + incz;
+    }
+    ix = ix + incx;
+  }
+
+  return;
+}
+
+double libblis_test_idotxaxpyf_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         at,
+  obj_t*         a,
+  obj_t*         w,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         z,
+  obj_t*         y_orig,
+  obj_t*         z_orig
+) {
+  num_t  dt     = bli_obj_dt( a );
+  dim_t  M      = bli_obj_vector_dim( z );
+  dim_t  N      = bli_obj_vector_dim( y );
+  f77_int rsa   = bli_obj_row_stride( a ) ;
+  f77_int csa   = bli_obj_col_stride( a ) ;
+  f77_int incw  = bli_obj_vector_inc( w );
+  f77_int incx  = bli_obj_vector_inc( x );
+  f77_int incy  = bli_obj_vector_inc( y );
+  f77_int incz  = bli_obj_vector_inc( z );
+  bool conjat   = bli_obj_has_conj( at );
+  bool conja    = bli_obj_has_conj( a );
+  bool conjw    = bli_obj_has_conj( w );
+  bool conjx    = bli_obj_has_conj( x );
+  double resid  = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   AT       = (float*) bli_obj_buffer( at );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   W        = (float*) bli_obj_buffer( w );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   Z        = (float*) bli_obj_buffer( z_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      float*   ZZ       = (float*) bli_obj_buffer( z );
+      libblis_idotxaxpyf_check<float, int32_t>(M, N, Alpha, AT,
+              A, rsa, csa, W, incw, X, incx, Beta, Y, incy, Z, incz);
+      resid  = computediffrv(M, incz, ZZ, Z);
+      resid += computediffrv(N, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   AT      = (double*) bli_obj_buffer( at );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   W       = (double*) bli_obj_buffer( w );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   Z       = (double*) bli_obj_buffer( z_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      double*   ZZ      = (double*) bli_obj_buffer( z );
+      libblis_idotxaxpyf_check<double, int64_t>(M, N, Alpha, AT,
+              A, rsa, csa, W, incw, X, incx, Beta, Y, incy, Z, incz);
+      resid  = computediffrv(M, incz, ZZ, Z);
+      resid += computediffrv(N, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   AT    = (scomplex*) bli_obj_buffer( at );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   W     = (scomplex*) bli_obj_buffer( w );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   Z     = (scomplex*) bli_obj_buffer( z_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      scomplex*   ZZ    = (scomplex*) bli_obj_buffer( z );
+      libblis_icdotxaxpyf_check<scomplex, int32_t>(M, N, Alpha, AT, conjat,
+              A, rsa, csa, conja, W, incw, conjw, X, incx, conjx, Beta,
+                                                        Y, incy, Z, incz);
+      resid  = computediffiv(M, incz, ZZ, Z);
+      resid += computediffiv(N, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   AT    = (dcomplex*) bli_obj_buffer( at );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   W     = (dcomplex*) bli_obj_buffer( w );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   Z     = (dcomplex*) bli_obj_buffer( z_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   ZZ    = (dcomplex*) bli_obj_buffer( z );
+      libblis_icdotxaxpyf_check<dcomplex, int64_t>(M, N, Alpha, AT, conjat,
+              A, rsa, csa, conja, W, incw, conjw, X, incx, conjx, Beta,
+                                                        Y, incy, Z, incz);
+      resid  = computediffiv(M, incz, ZZ, Z);
+      resid += computediffiv(N, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return abs(resid);
+}
+
--- a/gtestsuite/src/ref_dotxf.cpp
+++ b/gtestsuite/src/ref_dotxf.cpp
@@ -0,0 +1,172 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_dotxf.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> DOTXF performs vector operations
+//*>    y := beta * y_orig + alpha * conjat(A^T) * conjx(x)
+//*>    where A is an m x b matrix, and y and x are vectors.
+//*>    The kernel is implemented as a fused series of calls to dotxv
+//*>    where b is less than or equal to an implementation-dependent fusing
+//*>    factor specific to dotxf
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_idotxf_check(dim_t M, dim_t N, T* alpha, T* A, dim_t rsa,
+                   dim_t csa, T* X, dim_t incx, T* beta, T* Y, dim_t incy) {
+
+  dim_t i, j, ix, iy;
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+  T temp;
+  if((M == 0) || (N == 0)) {
+    return;
+  }
+
+  iy = 0;
+  for(j = 0 ; j < M ; j++) {
+    ix = 0;
+    temp = 0.0;
+    for(i = 0 ; i < N ; i++) {
+      temp += X[ix] * A[i*rsa + j*csa];
+      ix = ix + incx;
+    }
+    temp = Alpha * temp;
+    Y[iy] = (Y[iy] * Beta) + temp;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icdotxf_check(dim_t M, dim_t N, T* alpha, T* A, dim_t rsa,
+dim_t csa, bool cfa, T* X, dim_t incx, bool cfx, T* beta, T* Y, dim_t incy ) {
+
+  dim_t i, j, ix, iy;
+  //T ONE  = {1.0 , 0.0};
+  T ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+  T Beta  = *beta;
+  T temp;
+
+  if((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(cfa) {
+    for(j = 0 ; j < M ; j++) {
+      for(i = 0 ; i < N ; i++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  iy = 0;
+  for(j = 0 ; j < M ; j++) {
+    ix = 0;
+    temp = ZERO;
+    for(i = 0 ; i < N ; i++) {
+      temp = addc<T>(temp , mulc<T>(X[ix] , A[i*rsa + j*csa]));
+      ix = ix + incx;
+    }
+    temp = mulc<T>(Alpha , temp);
+    Y[iy] = addc<T>(temp , mulc<T>(Y[iy] , Beta));
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_idotxf_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig
+) {
+  num_t  dt    = bli_obj_dt( y );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y );
+  bool cfx     = bli_obj_has_conj( x );
+  bool cfa     = bli_obj_has_conj( a );
+  double resid = 0.0;
+
+  //martix transpose
+  dim_t  N     = bli_obj_vector_dim( x );
+  dim_t  M     = bli_obj_vector_dim( y );
+  f77_int rsa  = bli_obj_row_stride( a );
+  f77_int csa  = bli_obj_col_stride( a );
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_idotxf_check<float, int32_t>( M, N, Alpha, A, rsa, csa,
+                                                    X, incx, Beta, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_idotxf_check<double, int64_t>( M, N, Alpha, A, rsa, csa,
+                                                    X, incx, Beta, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icdotxf_check<scomplex, int32_t>( M, N, Alpha, A, rsa, csa,
+                                          cfa, X, incx, cfx, Beta, Y, incy );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icdotxf_check<dcomplex, int64_t>( M, N, Alpha, A, rsa, csa,
+                                          cfa, X, incx, cfx, Beta, Y, incy );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_dotxv.cpp
+++ b/gtestsuite/src/ref_dotxv.cpp
@@ -0,0 +1,197 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_dotxv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> DOTXV performs vector operations
+//*>    rho := beta * rho + alpha * conjx(x)^T * conjy(y)
+//*>    where x and y are vectors of length n, and alpha, beta, and rho are scalars.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_idotxv_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                  T* beta, T* Y, dim_t incy, T* rhorig ) {
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE = 1.0 ;
+  ZERO = 0.0 ;
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+  T rho   = *rhorig;
+
+  if(len == 0) {
+    return;
+  }
+
+  rho = rho * Beta;
+
+  if (Alpha != ONE) {
+    ix = 0;
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = Alpha * X[ix];
+        ix = ix + incx;
+      }
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    rho = rho + X[ix] * Y[iy];
+    ix  = ix + incx;
+    iy  = iy + incy;
+  }
+
+  *rhorig = rho;
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icdotxv_check(dim_t len, T* alpha, T* X, dim_t incx,  T* beta,
+                         T* Y, dim_t incy, T* rhorig, bool cfx, bool cfy) {
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+  T Beta  = *beta;
+  T rho   = *rhorig;
+
+  if (len == 0) {
+    return;
+  }
+
+  rho = mulc<T>(rho , Beta);
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if((Alpha.real != ONE.real) && (Alpha.imag != ONE.imag)) {
+    ix = 0;
+    if((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)) {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        X[ix] = mulc<T>(Alpha , X[ix]);
+        ix = ix + incx;
+      }
+    }
+  }
+
+  if(cfy) {
+    iy = 0;
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = conjugate<T>(Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    rho = addc<T>(rho, mulc<T>(Y[iy] , X[ix]));
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  *rhorig = rho;
+  return;
+}
+
+double libblis_test_idotxv_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  beta,
+  obj_t*  rho,
+  obj_t*  rho_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  bool cfy     = bli_obj_has_conj( y );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   rhorig   = (float*) bli_obj_internal_scalar_buffer( rho_orig );
+      float*   rhp      = (float*) bli_obj_internal_scalar_buffer( rho );
+      libblis_idotxv_check<float, int32_t>( M, Alpha, X, incx,
+                                                   Beta, Y, incy, rhorig );
+      resid = (*rhp - *rhorig);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y );
+      double*   rhorig  = (double*) bli_obj_internal_scalar_buffer( rho_orig );
+      double*   rhp     = (double*) bli_obj_internal_scalar_buffer( rho );
+      libblis_idotxv_check<double, int64_t>( M, Alpha, X, incx,
+                                                   Beta, Y, incy, rhorig );
+      resid = (*rhp - *rhorig);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha  = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X      = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta   = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y      = (scomplex*) bli_obj_buffer( y );
+      scomplex*   rhorig = (scomplex*) bli_obj_internal_scalar_buffer( rho_orig );
+      scomplex*   rhp    = (scomplex*) bli_obj_internal_scalar_buffer( rho );
+      libblis_icdotxv_check<scomplex, int32_t>( M, Alpha, X, incx,
+                                         Beta, Y, incy, rhorig, cfx, cfy );
+      resid  = ((*rhp).real - (*rhorig).real);
+      resid += ((*rhp).imag - (*rhorig).imag);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha  = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X      = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta   = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y      = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   rhorig = (dcomplex*) bli_obj_internal_scalar_buffer( rho_orig );
+      dcomplex*   rhp    = (dcomplex*) bli_obj_internal_scalar_buffer( rho );
+      libblis_icdotxv_check<dcomplex, int64_t>( M, Alpha, X, incx,
+                                         Beta, Y, incy, rhorig, cfx, cfy );
+      resid  = ((*rhp).real - (*rhorig).real);
+      resid += ((*rhp).imag - (*rhorig).imag);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return abs(resid);
+}
+
--- a/gtestsuite/src/ref_gemm.cpp
+++ b/gtestsuite/src/ref_gemm.cpp
@@ -0,0 +1,317 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_gemm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> GEMM  performs one of the matrix-matrix operations
+//*>    C := alpha*op( A )*op( B ) + beta*C,
+//*> where  op( X ) is one of
+//*>    op( X ) = X   or   op( X ) = X**T   or   op( X ) = X**H,
+//*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
+//*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+/*
+Reference GEMM implemenation C = C*Beta + Alpha*A*B
+Row major A=mxk , B=kxn and C=mxn , lda=rsa=k, csa=1, ldb=rsb=n,csb=1, ldc=rsc=n,csc=1
+Col major A=mxk , B=kxn and C=mxn , rsa=1, lda=csa=m, rsb=1,ldb=csb=k, rsc=1,ldc=csc=m
+*/
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_igemm_check(dim_t M, dim_t N, dim_t K, T *alpha, T *A,
+  dim_t rsa, dim_t csa, T *B, dim_t rsb, dim_t csb, T* beta,
+  T *C, dim_t rsc, dim_t csc){
+
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+  int  i,j,k;
+
+  if(( Alpha != 0.)  && ( Beta != 0. )) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+       T sum = 0.0;
+        for( k = 0 ; k < K ; k++ ) {
+          sum += A[i*rsa + k*csa] * B[k*rsb + j*csb];
+        }
+        sum = ((Beta * C[i*rsc + j*csc]) + (Alpha * sum));
+        C[i*rsc + j*csc] = sum;
+      }
+    }
+  }
+  else if(( Alpha != 0.)  && ( Beta == 0. )) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+        T sum = 0.0;
+        for( k = 0 ; k < K ; k++ ) {
+          sum += A[i*rsa + k*csa] * B[k*rsb + j*csb];
+        }
+        sum = (Alpha * sum);
+        C[i*rsc + j*csc] = sum;
+      }
+    }
+  }
+  else if(( Alpha == 0.)  && ( Beta != 0. )) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+        T sum = (Beta * C[ i*rsc + j*csc ]);
+        C[i*rsc + j*csc] = sum;
+      }
+    }
+  }
+  else /*if(( Alpha == 0.) && ( Beta == 0. ))*/ {
+    //
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icgemm_check(dim_t M, dim_t N, dim_t K, T *alpha,
+  T *A, dim_t rsa, dim_t csa, bool conja, T *B, dim_t rsb, dim_t csb,
+  bool conjb, T* beta, T *C, dim_t rsc, dim_t csc){
+
+  T Alpha = *alpha;
+  T Beta  = *beta;
+  int  i,j,k;
+
+  if(conja) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( k = 0 ; k < K ; k++ ) {
+        A[i*rsa + k*csa] = conjugate<T>(A[i*rsa + k*csa]);
+      }
+    }
+  }
+  if(conjb) {
+    for( k = 0 ; k < K ; k++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+        B[k*rsb + j*csb] = conjugate<T>(B[k*rsb + j*csb]);
+      }
+    }
+  }
+
+  if((Alpha.real != 0.)  && (Beta.real != 0.)) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+        T sum = {0.0, 0.0};
+        for( k = 0 ; k < K ; k++ ) {
+          T aa = A[i*rsa + k*csa];
+          T bb = B[k*rsb + j*csb];
+          sum = addc<T>(sum , mulc<T>(aa , bb));
+        }
+        T xc = C[i*rsc + j*csc];
+        sum = mulc<T>(Alpha,sum);
+        xc  = mulc<T>(Beta,xc);
+        C[i*rsc + j*csc] = addc<T>(xc , sum);
+      }
+    }
+  }
+  else if(( Alpha.real != 0.)  && ( Beta.real == 0. )) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+        T sum = {0.0, 0.0};
+        for( k = 0 ; k < K ; k++ ) {
+          T aa = A[i*rsa + k*csa];
+          T bb = B[k*rsb + j*csb];
+          sum = addc<T>(sum , mulc<T>(aa , bb));
+        }
+        sum = mulc<T>(Alpha,sum);
+        C[i*rsc + j*csc] = sum;
+      }
+    }
+  }
+  else if(( Alpha.real == 0.)  && ( Beta.real != 0. )) {
+    for( i = 0 ; i < M ; i++ ) {
+      for( j = 0 ; j < N ; j++ ) {
+        T sum ;
+        T cc = C[i*rsc + j*csc];
+        sum = mulc<T>(Beta,cc);
+        C[i*rsc + j*csc] = sum;
+      }
+    }
+  } else /*if(( Alpha == 0.) && ( Beta == 0. ))*/ {
+    //
+  }
+  return ;
+}
+
+double libblis_test_igemm_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         b,
+  obj_t*         beta,
+  obj_t*         c,
+  obj_t*         c_orig,
+  num_t          dt
+){
+  dim_t  M = bli_obj_length( c_orig );
+  dim_t  N = bli_obj_width( c_orig );
+  dim_t  K = bli_obj_width_after_trans( a );
+  dim_t  rsa, csa;
+  dim_t  rsb, csb;
+  dim_t  rsc, csc;
+  bool conja = bli_obj_has_conj( a );
+  bool conjb = bli_obj_has_conj( b );
+  trans_t transA = bli_obj_onlytrans_status( a );
+  trans_t transB = bli_obj_onlytrans_status( b );
+  double resid   = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsa = transA ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+    csa = transA ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+    rsb = transB ? bli_obj_col_stride( b ) : bli_obj_row_stride( b ) ;
+    csb = transB ? bli_obj_row_stride( b ) : bli_obj_col_stride( b ) ;
+    rsc = 1;
+    csc = bli_obj_col_stride( c_orig );
+  } else {
+    rsa = transA ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+    csa = transA ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+    rsb = transB ? bli_obj_col_stride( b ) : bli_obj_row_stride( b ) ;
+    csb = transB ? bli_obj_row_stride( b ) : bli_obj_col_stride( b ) ;
+    rsc = bli_obj_row_stride( c_orig );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   B        = (float*) bli_obj_buffer( b );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   C        = (float*) bli_obj_buffer( c_orig );
+      float*   CC       = (float*) bli_obj_buffer( c );
+      libblis_igemm_check<float, int32_t>(M, N, K, Alpha, A, rsa, csa,
+                                       B, rsb, csb, Beta, C, rsc, csc);
+      resid = computediffrm<float>(M, N, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha    = (double*) bli_obj_buffer( alpha );
+      double*   A        = (double*) bli_obj_buffer( a );
+      double*   B        = (double*) bli_obj_buffer( b );
+      double*   Beta     = (double*) bli_obj_buffer( beta );
+      double*   C        = (double*) bli_obj_buffer( c_orig );
+      double*   CC       = (double*) bli_obj_buffer( c );
+      libblis_igemm_check<double, int64_t>(M, N, K, Alpha, A, rsa, csa,
+                                       B, rsb, csb, Beta, C, rsc, csc);
+      resid = computediffrm<double>(M, N, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha    = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A        = (scomplex*) bli_obj_buffer( a );
+      scomplex*   B        = (scomplex*) bli_obj_buffer( b );
+      scomplex*   Beta     = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   C        = (scomplex*) bli_obj_buffer( c_orig );
+      scomplex*   CC       = (scomplex*) bli_obj_buffer( c );
+      libblis_icgemm_check<scomplex, int32_t>(M, N, K, Alpha, A, rsa, csa,
+                             conja, B, rsb, csb, conjb, Beta, C, rsc, csc);
+      resid = computediffim<scomplex>(M, N, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha    = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A        = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   B        = (dcomplex*) bli_obj_buffer( b );
+      dcomplex*   Beta     = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   C        = (dcomplex*) bli_obj_buffer( c_orig );
+      dcomplex*   CC       = (dcomplex*) bli_obj_buffer( c );
+      libblis_icgemm_check<dcomplex, int64_t>(M, N, K, Alpha, A, rsa, csa,
+                             conja, B, rsb, csb, conjb, Beta, C, rsc, csc);
+      resid = computediffim<dcomplex>(M, N, CC, C, rsc, csc);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rsc, dim_t csc, obj_t* c ) {
+
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  T* C = (T*) bli_obj_buffer( c );
+  double resid = 0.0;
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rsc, dim_t csc, obj_t* c ) {
+
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  T* C = (T*) bli_obj_buffer( c );
+  double resid = 0.0;
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_gemm(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_gemmt.cpp
+++ b/gtestsuite/src/ref_gemmt.cpp
@@ -0,0 +1,445 @@
+#include <complex>
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_gemmt.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> GEMMT performs one of the matrix-matrix operations
+//*>    C := beta * C + alpha * transa(A) * transb(B)
+//*  ==========================================================================
+
+void libblis_gemv_check(trans_t transA , dim_t M, dim_t N, float* Alpha,
+  float* A, dim_t rsa, dim_t csa, bool conja, float* X, dim_t incx, bool conjx,
+  float* Beta, float* Y, dim_t incy) {
+  libblis_igemv_check<float, int32_t>(transA, M,  N, Alpha, A, rsa, csa,
+                                                       X, incx, Beta, Y, incy);
+  return;
+}
+
+void libblis_gemv_check(trans_t transA , dim_t M, dim_t N, double* Alpha,
+  double* A, dim_t rsa, dim_t csa, bool conja, double* X, dim_t incx,
+  bool conjx, double* Beta, double* Y, dim_t incy) {
+  libblis_igemv_check<double, int64_t>(transA, M,  N, Alpha, A, rsa, csa,
+                                                       X, incx, Beta, Y, incy);
+  return;
+}
+
+void libblis_gemv_check(trans_t transA , dim_t M, dim_t N, scomplex* Alpha,
+  scomplex* A, dim_t rsa, dim_t csa, bool conja, scomplex* X, dim_t incx,
+  bool conjx, scomplex* Beta, scomplex* Y, dim_t incy) {
+  libblis_icgemv_check<scomplex, int32_t>(transA, M,  N, Alpha, A, rsa, csa,
+                                          conja, X, incx, Beta, Y, incy, conjx);
+  return;
+}
+void libblis_gemv_check(trans_t transA , dim_t M, dim_t N, dcomplex* Alpha,
+  dcomplex* A, dim_t rsa, dim_t csa, bool conja, dcomplex* X, dim_t incx,
+  bool conjx, dcomplex* Beta, dcomplex* Y, dim_t incy) {
+  libblis_icgemv_check<dcomplex, int64_t>(transA, M,  N, Alpha, A, rsa, csa,
+                                          conja, X, incx, Beta, Y, incy, conjx);
+  return;
+}
+
+
+void libblis_gemm_check(dim_t M, dim_t N, dim_t K, float* Alpha, float* A,
+  dim_t rsa, dim_t csa, bool conja, float* B, dim_t rsb, dim_t csb, bool conjb,
+  float* Beta, float* C, dim_t rsc, dim_t csc) {
+  libblis_igemm_check<float, int32_t>(M, N, K, Alpha, A, rsa, csa, B, rsb,
+                                                     csb, Beta, C, rsc, csc);
+  return;
+}
+
+void libblis_gemm_check(dim_t M, dim_t N, dim_t K, double* Alpha, double* A,
+  dim_t rsa, dim_t csa, bool conja, double* B, dim_t rsb, dim_t csb, bool conjb,
+  double* Beta, double* C, dim_t rsc, dim_t csc) {
+  libblis_igemm_check<double, int64_t>(M, N, K, Alpha, A, rsa, csa, B, rsb,
+                                                     csb, Beta, C, rsc, csc);
+  return;
+}
+
+void libblis_gemm_check(dim_t M, dim_t N, dim_t K, scomplex* Alpha, scomplex* A,
+  dim_t rsa, dim_t csa, bool conja, scomplex* B, dim_t rsb, dim_t csb, bool conjb,
+  scomplex* Beta, scomplex* C, dim_t rsc, dim_t csc) {
+  libblis_icgemm_check<scomplex, int32_t>(M, N, K, Alpha, A, rsa, csa, conja,
+                                        B, rsb, csb, conjb, Beta, C, rsc, csc);
+  return;
+}
+
+void libblis_gemm_check(dim_t M, dim_t N, dim_t K, dcomplex* Alpha, dcomplex* A,
+  dim_t rsa, dim_t csa, bool conja, dcomplex* B, dim_t rsb, dim_t csb, bool conjb,
+  dcomplex* Beta, dcomplex* C, dim_t rsc, dim_t csc) {
+  libblis_icgemm_check<dcomplex, int64_t>(M, N, K, Alpha, A, rsa, csa, conja,
+                                        B, rsb, csb, conjb, Beta, C, rsc, csc);
+  return;
+}
+
+#define CROSSOVER_GEMMT 24
+
+dim_t rec_split(dim_t n, num_t dt) {
+  dim_t res = 0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      res = ((n >= 32) ? ((n + 16) / 32) * 16 : n / 2);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      res = ((n >= 16) ? ((n + 8) / 16) * 8 : n / 2);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      res = ((n >= 16) ? ((n + 8) / 16) * 8 : n / 2);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      res = ((n >= 8) ? ((n + 4) / 8) * 4 : n / 2);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return res;
+}
+
+/** sgemmt's unblocked compute kernel */
+template <typename T, typename U>
+static void gemmt_rec2(uplo_t uploc, trans_t transA, trans_t transB,
+  dim_t n, dim_t k, T* alpha,  T* A,  dim_t ldA, bool conja, T* B,
+  dim_t ldB, bool conjb, T* beta, T* C, dim_t ldC ) {
+
+  dim_t incB, incC;
+  dim_t rsa, csa;
+  dim_t i;
+
+  rsa = (transA == BLIS_NO_TRANSPOSE) ? 1 : ldA ;
+  csa = (transA == BLIS_NO_TRANSPOSE) ? ldA : 1 ;
+  incB = (transB == BLIS_NO_TRANSPOSE) ? 1 : ldB ;
+  incC = 1;
+
+  for (i = 0; i < n; i++) {
+    // A_0
+    // A_i
+    T * A_0 = A;
+    T * A_i = A + ((transA == BLIS_NO_TRANSPOSE) ? i : ldA * i);
+
+    // * B_i *
+    T * B_i = B + ((transB == BLIS_NO_TRANSPOSE) ? ldB * i : i);
+
+    // * C_0i *
+    // * C_ii *
+    T * C_0i = C + ldC * i;
+    T * C_ii = C + ldC * i + i;
+
+    if (uploc == BLIS_LOWER) {
+      int nmi = n - i;
+      if (transA == BLIS_NO_TRANSPOSE)
+        libblis_gemv_check(transA, nmi, k, alpha, A_i, rsa, csa, conja, B_i, incB, conjb, beta, C_ii, incC);
+      else
+        libblis_gemv_check(transA, k, nmi, alpha, A_i, rsa, csa, conja, B_i, incB, conjb, beta, C_ii, incC);
+    } else {
+      int ip1 = i + 1;
+      if (transA == BLIS_NO_TRANSPOSE)
+        libblis_gemv_check(transA, ip1, k, alpha, A_0, rsa, csa, conja, B_i, incB, conjb, beta, C_0i, incC);
+      else
+        libblis_gemv_check(transA, k, ip1, alpha, A_0, rsa, csa, conja, B_i, incB, conjb, beta, C_0i, incC);
+    }
+  }
+}
+
+/** sgemmt's recursive compute kernel */
+template <typename T, typename U>
+static void gemmt_rec(uplo_t uploc,  trans_t transA,  trans_t transB,
+  dim_t n, dim_t k, T* alpha,  T* A,  dim_t ldA, bool cfA, T* B,
+  dim_t ldB, bool cfB, T* beta, T* C, dim_t ldC, num_t dt ) {
+  if (n <= max(CROSSOVER_GEMMT, 1)) {
+    // Unblocked
+    gemmt_rec2<T,U>(uploc, transA, transB, n, k, alpha, A, ldA, cfA,
+                                          B, ldB, cfB, beta, C, ldC);
+    return;
+  }
+
+  dim_t  rsa, csa;
+  dim_t  rsb, csb;
+  dim_t  rsc, csc;
+
+  rsa = (transA == BLIS_NO_TRANSPOSE) ? 1 : ldA ;
+  csa = (transA == BLIS_NO_TRANSPOSE) ? ldA : 1 ;
+  rsb = (transB == BLIS_NO_TRANSPOSE) ? 1 : ldB ;
+  csb = (transB == BLIS_NO_TRANSPOSE) ? ldB : 1 ;
+  rsc = 1  ;
+  csc = ldC;
+
+  // Splitting
+  dim_t n1 = rec_split(n, dt);       //SREC_SPLIT(n);
+  dim_t n2 = n - n1;
+
+  // A_T
+  // A_B
+  T * A_T = A;
+  T * A_B = A + ((transA == BLIS_NO_TRANSPOSE) ? n1 : ldA * n1);
+
+  // B_L B_R
+  T * B_L = B;
+  T * B_R = B + ((transB == BLIS_NO_TRANSPOSE) ? ldB * n1 : n1);
+
+  // C_TL C_TR
+  // C_BL C_BR
+  T * C_TL = C;
+  T * C_TR = C + ldC * n1;
+  T * C_BL = C            + n1;
+  T * C_BR = C + ldC * n1 + n1;
+
+  // recursion(C_TL)
+  gemmt_rec<T,U>(uploc, transA, transB, n1, k, alpha, A_T, ldA, cfA, B_L, ldB,
+                                               cfB, beta, C_TL, ldC, dt);
+
+  if (uploc == BLIS_LOWER)
+    // C_BL = alpha A_B B_L + beta C_BL
+    libblis_gemm_check(n2, n1, k, alpha, A_B, rsa, csa, cfA,
+                               B_L, rsb, csb, cfB, beta, C_BL, rsc, csc);
+  else
+    // C_TR = alpha A_T B_R + beta C_TR
+    libblis_gemm_check(n1, n2, k, alpha, A_T, rsa, csa, cfA,
+                               B_R, rsb, csb, cfB, beta, C_TR, rsc, csc);
+
+  // recursion(C_BR)
+  gemmt_rec<T,U>(uploc, transA, transB, n2, k, alpha, A_B, ldA, cfA, B_R, ldB,
+                                               cfB, beta, C_BR, ldC, dt);
+}
+
+double computediff(dim_t n,dim_t k, float *act, float *ref, dim_t rsc, dim_t csc) {
+  return computediffrm(n, k, act, ref, rsc, csc);
+}
+
+double computediff(dim_t n,dim_t k, double *act, double *ref, dim_t rsc, dim_t csc) {
+  return computediffrm(n, k, act, ref, rsc, csc);
+}
+
+double computediff(dim_t n,dim_t k, scomplex *act, scomplex *ref, dim_t rsc, dim_t csc) {
+  return computediffim(n, k, act, ref, rsc, csc);
+}
+double computediff(dim_t n,dim_t k, dcomplex *act, dcomplex *ref, dim_t rsc, dim_t csc) {
+  return computediffim(n, k, act, ref, rsc, csc);
+}
+
+/** GEMMT computes a matrix-matrix product with general matrices but updates
+ * only the upper or lower triangular part of the result matrix.
+ * */
+template <typename T, typename U>
+double libblis_igemmt_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  a,
+  obj_t*  b,
+  obj_t*  beta,
+  obj_t*  c,
+  obj_t*  c_orig,
+  num_t   dt
+){
+  dim_t k        = bli_obj_width_after_trans( a );
+  dim_t n        = bli_obj_width( c );
+  uplo_t  uploc  = bli_obj_uplo( c );
+  trans_t transA = bli_obj_onlytrans_status( a );
+  trans_t transB = bli_obj_onlytrans_status( b );
+  dim_t lda, ldb, ldc;
+  dim_t  rsa, csa;
+  dim_t  rsb, csb;
+  dim_t  rsc, csc;
+
+  bool crsf = bli_obj_is_row_stored( c );
+
+  if ( crsf ) {
+    rsa = transA ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+    csa = transA ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+    rsb = transB ? bli_obj_col_stride( b ) : bli_obj_row_stride( b ) ;
+    csb = transB ? bli_obj_row_stride( b ) : bli_obj_col_stride( b ) ;
+    rsc = bli_obj_row_stride( c_orig ) ;
+    csc = 1 ;
+    lda = transA ? csa : rsa ;
+    ldb = transB ? csb : rsb ;
+    ldc = rsc ;
+  } else {
+    rsa = transA ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+    csa = transA ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+    rsb = transB ? bli_obj_col_stride( b ) : bli_obj_row_stride( b ) ;
+    csb = transB ? bli_obj_row_stride( b ) : bli_obj_col_stride( b ) ;
+    rsc = 1 ;
+    csc = bli_obj_col_stride( c_orig ) ; ;
+    lda = transA ? rsa : csa ;
+    ldb = transB ? rsb : csb ;
+    ldc = csc ;
+  }
+
+  T* A       = (T*) bli_obj_buffer( a );
+  T* B       = (T*) bli_obj_buffer( b );
+  T* C       = (T*) bli_obj_buffer( c_orig );
+  T* Alpha   = (T*) bli_obj_buffer( alpha );
+  T* Beta    = (T*) bli_obj_buffer( beta );
+  bool conja = bli_obj_has_conj( a );
+  bool conjb = bli_obj_has_conj( b );
+
+  if(bli_obj_has_conj(a)) {
+     conjugate_tensor(a, dt);
+     transA = bli_obj_onlytrans_status( a );
+     conja  = false;
+  }
+
+  if(bli_obj_has_conj(b)) {
+     conjugate_tensor(b, dt);
+     transB = bli_obj_onlytrans_status( b );
+     conjb  = false;
+  }
+
+  // Recursive kernel
+  if( !crsf ) {
+    gemmt_rec<T,U>(uploc, transA, transB, n, k, Alpha, A, lda,
+                                  conja, B, ldb, conjb, Beta, C, ldc, dt);
+  }else {
+    if( uploc == BLIS_UPPER)
+      uploc = BLIS_LOWER;
+    else if(uploc == BLIS_LOWER)
+      uploc = BLIS_UPPER;
+
+    gemmt_rec<T,U>(uploc, transB, transA, n, k, Alpha, B, ldb,
+                                  conjb, A, lda, conja, Beta, C, ldc, dt);
+  }
+
+  T* CC = (T*) bli_obj_buffer( c );
+
+  double resid = 0.0;
+  resid = computediff(n, k, C, CC, rsc, csc);
+
+  return resid;
+}
+
+double libblis_test_igemmt_check(
+  test_params_t *params,
+  obj_t *alpha,
+  obj_t *a,
+  obj_t *b,
+  obj_t *beta,
+  obj_t *c,
+  obj_t *c_orig
+) {
+  double resid = 0.0;
+  num_t  dt    = bli_obj_dt(c);
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      resid = libblis_igemmt_check<float, int32_t>( params, alpha, a, b, beta,
+                                       c, c_orig, dt );
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      resid = libblis_igemmt_check<double, int64_t>( params, alpha, a, b, beta,
+                                       c, c_orig, dt );
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      resid = libblis_igemmt_check<scomplex, int32_t>( params, alpha, a, b, beta,
+                                       c, c_orig, dt );
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      resid = libblis_igemmt_check<dcomplex, int64_t>( params, alpha, a, b, beta,
+                                       c, c_orig, dt );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
+
+template <typename T>
+double libblis_check_nan_real( dim_t rsc, dim_t csc, obj_t* c ) {
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* C = (T*) bli_obj_buffer( c );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename U, typename T>
+double libblis_check_nan_complex( dim_t rsc, dim_t csc, obj_t* c ) {
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  double resid = 0.0;
+  U* C = (U*) bli_obj_buffer( c );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_gemmt(obj_t* c) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  num_t dt = bli_obj_dt(c);
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex, float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex, double>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
+
--- a/gtestsuite/src/ref_gemv.cpp
+++ b/gtestsuite/src/ref_gemv.cpp
@@ -0,0 +1,319 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_gemv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> GEMV performs one of the matrix-vector operations
+//*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
+//*>    y := alpha*A**H*x + beta*y,
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_igemv_check(trans_t transA , dim_t M, dim_t N, T* alpha, T* A,
+         dim_t rsa, dim_t csa, T* X, dim_t incx, T* beta, T* Y, dim_t incy) {
+  T ONE, ZERO;
+  T temp;
+  dim_t i, ix, iy, j, jx, jy, kx, ky, lenx, leny;
+  bool NOTRANSA;
+
+  ONE = 1.0 ;
+  ZERO = 0.0 ;
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+
+  if (((M == 0) || (N == 0)) ||
+    ((Alpha == ZERO) && (Beta == ONE))) {
+      return;
+  }
+
+  NOTRANSA = ((transA == BLIS_NO_TRANSPOSE) || (transA == BLIS_CONJ_NO_TRANSPOSE));
+
+  /*   Set  lenx  and  leny, the lengths of the vectors x and y,
+  and set up the start points in  X  and  Y. */
+  if (NOTRANSA) {
+    lenx = N;
+    leny = M;
+  }
+  else {
+    lenx = M;
+    leny = N;
+  }
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (lenx - 1) * incx;
+  }
+
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (leny - 1) * incy;
+  }
+
+  //*     Start the operations. Here, the elements of A are
+  //*     accessed sequentially with one pass through A.
+  //*     First form  y := beta*y.
+  if (Beta != ONE) {
+    iy = ky;
+    if (Beta == ZERO) {
+      for(i = 0 ; i < leny ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < leny ; i++) {
+        Y[iy] = Beta*Y[iy];
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if(Alpha == ZERO)
+    return;
+
+  if(NOTRANSA) {
+    /*  Form  y := alpha*A*x + y.*/
+    jx = kx;
+    for(j = 0 ; j < N ; j++) {
+      temp = Alpha*X[jx];
+      iy = ky;
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = Y[iy] + temp * A[i*rsa + j*csa];
+        iy = iy + incy;
+      }
+      jx = jx + incx;
+    }
+  }
+  else {
+    //*        Form  y := alpha*A**T*x + y.
+    jy = ky;
+    for(i = 0 ; i < N ; i++) {
+      temp = ZERO;
+      ix = kx;
+      for(j = 0 ; j < M ; j++) {
+        temp = temp + A[i*rsa + j*csa] * X[ix];
+        ix = ix + incx;
+      }
+      Y[jy] = Y[jy] + Alpha*temp;
+      jy = jy + incy;
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icgemv_check(trans_t transA , dim_t M, dim_t N, T* alpha, T* A,
+         dim_t rsa, dim_t csa, bool conja, T* X, dim_t incx, T* beta, T* Y,
+         dim_t incy, bool  conjx) {
+  T ONE;
+  T ZERO;
+  T temp;
+  dim_t i, ix, iy, j, jx, jy, kx, ky, lenx, leny;
+  bool NOTRANSA;
+
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+  T Beta  = *beta;
+
+  if (((M == 0) || (N == 0)) ||
+    ((Alpha.real == ZERO.real) && (Beta.real == ONE.real))) {
+    return ;
+  }
+
+  NOTRANSA = ((transA == BLIS_NO_TRANSPOSE) || (transA == BLIS_CONJ_NO_TRANSPOSE));
+
+  /*     Set  lenx  and  leny, the lengths of the vectors x and y,
+       and set up the start points in  X  and  Y. */
+  if(NOTRANSA) {
+    lenx = N;
+    leny = M;
+  }
+  else {
+    lenx = M;
+    leny = N;
+  }
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else{
+    kx = 1 - (lenx - 1) * incx;
+  }
+
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (leny - 1)*incy;
+  }
+
+  if (Alpha.real == ZERO.real)
+    return;
+
+  if( conja ) {
+    for(i = 0; i < leny ; i++) {
+      for(j = 0 ; j < lenx ; j++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  if(conjx) {
+    ix = kx;
+    for(i = 0 ; i < lenx ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  /* First form  y := beta*y. */
+  if (Beta.real != ONE.real) {
+    iy = ky;
+    if (Beta.real == ZERO.real) {
+      for(i = 0; i < leny ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < leny ; i++) {
+        Y[iy] = mulc<T>(Beta , Y[iy]);
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if (NOTRANSA) {
+    /* Form  y := alpha*A*x + y. */
+    jx = kx;
+    for(j = 0; j < N ; j++) {
+      temp = mulc<T>(Alpha , X[jx]);
+      iy = ky;
+      for(i = 0; i < M ; i++) {
+        Y[iy] = addc<T>(Y[iy] , mulc<T>(temp , A[i*rsa + j*csa]));
+        iy = iy + incy;
+      }
+      jx = jx + incx;
+    }
+  }
+  else {
+    /* Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y. */
+    jy = ky;
+    for(i = 0 ; i < N ; i++) {
+      temp = ZERO;
+      ix = kx;
+      for(j = 0 ; j < M ; j++) {
+        temp = addc<T>(temp , mulc<T>(A[i*rsa + j*csa] , X[ix]));
+        ix = ix + incx;
+      }
+      Y[jy] = addc<T>(Y[jy] , mulc<T>(Alpha , temp));
+      jy = jy + incy;
+    }
+  }
+  return;
+}
+
+double libblis_test_igemv_check(
+  obj_t*  alpha,
+  obj_t*  a,
+  obj_t*  x,
+  obj_t*  beta,
+  obj_t*  y,
+  obj_t*  y_orig,
+  num_t   dt
+){
+  double resid = 0.0;
+  f77_int  rsa, csa;
+  trans_t transA = bli_obj_onlytrans_status( a );
+  f77_int M      = transA ? bli_obj_vector_dim( x )     : bli_obj_vector_dim( y_orig );
+  f77_int N      = transA ? bli_obj_vector_dim( y_orig ): bli_obj_vector_dim( x );
+  f77_int incx   = bli_obj_vector_inc( x );
+  f77_int incy   = bli_obj_vector_inc( y_orig );
+  f77_int len    = bli_obj_vector_dim( y_orig );
+  bool cfx       = bli_obj_has_conj( x );
+  bool cfa       = bli_obj_has_conj( a );
+  bool sf        = bli_obj_is_col_stored( a );
+
+  if( sf ) {
+    rsa = bli_obj_has_trans( a ) ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+    csa = bli_obj_has_trans( a ) ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+  } else {
+    rsa = bli_obj_has_trans( a ) ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+    csa = bli_obj_has_trans( a ) ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+
+    if(transA == BLIS_NO_TRANSPOSE)               transA = BLIS_TRANSPOSE;
+    else if(transA == BLIS_TRANSPOSE)             transA = BLIS_NO_TRANSPOSE;
+    else if ( transA == BLIS_CONJ_NO_TRANSPOSE)   transA = BLIS_CONJ_TRANSPOSE;
+    else /*if ( transa == BLIS_CONJ_TRANSPOSE )*/ transA = BLIS_CONJ_NO_TRANSPOSE;
+    M = M ^ N;
+    N = M ^ N;
+    M = M ^ N;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      libblis_igemv_check<float, int32_t>(transA, M,  N, Alpha, A, rsa, csa,
+                                                 X, incx, Beta, Y, incy );
+      float* YY         = (float*) bli_obj_buffer( y );
+      resid = computediffrv(len, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      libblis_igemv_check<double, int64_t>(transA, M,  N, Alpha, A, rsa, csa,
+                                                 X, incx, Beta, Y, incy );
+      double*   YY        = (double*) bli_obj_buffer( y );
+      resid = computediffrv(len, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      libblis_icgemv_check<scomplex, int32_t>(transA, M,  N, Alpha, A, rsa,
+                                       csa, cfa, X, incx, Beta, Y, incy, cfx );
+      scomplex*   YY        = (scomplex*) bli_obj_buffer( y );
+      resid = computediffiv(len, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      libblis_icgemv_check<dcomplex, int64_t>(transA, M,  N, Alpha, A, rsa,
+                                       csa, cfa, X, incx, Beta, Y, incy, cfx );
+      dcomplex*   YY        = (dcomplex*) bli_obj_buffer( y );
+      resid = computediffiv(len, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_ger.cpp
+++ b/gtestsuite/src/ref_ger.cpp
@@ -0,0 +1,184 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_ger.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> GER performs the rank 1 operation
+//*>    A := alpha*x*y**T + A,
+//*> where alpha is a scalar, x is an m element vector, y is an n element
+//*> vector and A is an m by n matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iger_check(dim_t M, dim_t N, T *alpha, T *X, dim_t incx,
+                          T* Y, dim_t incy, T* A, dim_t rsa, dim_t csa) {
+  T Alpha = alpha[0];
+  T temp;
+  dim_t i, ix, j, jy, kx;
+  T ZERO = 0.0;
+
+  if ((M == 0) || (N == 0) ||
+    (Alpha == ZERO))
+      return;
+
+  if (incy > 0) {
+    jy = 0;
+  }
+  else {
+    jy = 1 - (N - 1)*incy;
+  }
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (M - 1)*incx;
+  }
+
+  for(j = 0; j < N ; j++) {
+    if (Y[jy] != ZERO) {
+      temp = Alpha * Y[jy];
+      ix = kx;
+      for(i = 0 ; i <  M ; i++) {
+        A[i*rsa + j *csa] = A[i*rsa + j *csa] + temp * X[ix];
+        ix = ix + incx;
+      }
+    }
+    jy = jy + incy;
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icger_check(dim_t M, dim_t N, T *alpha, T *X, dim_t incx,
+  bool conjx, T* Y, dim_t incy, bool conjy, T* A, dim_t rsa, dim_t csa) {
+
+  T Alpha = alpha[0];
+  T temp;
+  dim_t i, ix, j, jy, kx;
+  T ZERO = {0.0 , 0.0};
+
+  if ((M == 0) || (N == 0) ||
+    ((Alpha.real == ZERO.real) &&(Alpha.imag == ZERO.imag)))
+      return;
+
+  ix = 0;
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  jy = 0;
+  if(conjy) {
+    for(j = 0; j < N ; j++) {
+      Y[jy] = conjugate<T>(Y[jy]);
+      jy = jy + incy;
+    }
+  }
+
+  if (incy > 0) {
+    jy = 0;
+  }
+  else {
+    jy = 1 - (N - 1)*incy;
+  }
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (M - 1)*incx;
+  }
+
+  for(j = 0; j < N ; j++) {
+    if ((Y[jy].real != ZERO.real) || (Y[jy].imag != ZERO.imag)) {
+      temp = mulc<T>(Alpha , Y[jy]);
+      ix = kx;
+      for(i = 0 ; i <  M ; i++) {
+        A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , mulc<T>(temp , X[ix]));
+        ix = ix + incx;
+      }
+    }
+    jy = jy + incy;
+  }
+  return;
+}
+
+double libblis_test_iger_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         a,
+  obj_t*         a_orig
+){
+  num_t dt   = bli_obj_dt( x );
+  dim_t M    = bli_obj_length( a );
+  dim_t N    = bli_obj_width( a );
+  dim_t incx = bli_obj_vector_inc( x );
+  dim_t incy = bli_obj_vector_inc( y );
+  bool conjx = bli_obj_has_conj( x );
+  bool conjy = bli_obj_has_conj( y );
+  dim_t rsa = bli_obj_row_stride( a ) ;
+  dim_t csa = bli_obj_col_stride( a ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a_orig );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   AA       = (float*) bli_obj_buffer( a );
+      libblis_iger_check<float, int32_t>(M, N, Alpha, X, incx,
+                                                      Y, incy, A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a_orig );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y );
+      double*   AA      = (double*) bli_obj_buffer( a );
+      libblis_iger_check<double, int64_t>(M, N, Alpha, X, incx,
+                                                      Y, incy, A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a_orig );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y );
+      scomplex*   AA    = (scomplex*) bli_obj_buffer( a );
+      libblis_icger_check<scomplex, int32_t>(M, N, Alpha, X, incx, conjx,
+                                                Y, incy, conjy, A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a_orig );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   AA    = (dcomplex*) bli_obj_buffer( a );
+      libblis_icger_check<dcomplex, int64_t>(M, N, Alpha, X, incx, conjx,
+                                                Y, incy, conjy, A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_hemm.cpp
+++ b/gtestsuite/src/ref_hemm.cpp
@@ -0,0 +1,486 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_hemm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> HEMM  performs one of the matrix-matrix operations
+//*>    C := alpha*A*B + beta*C,
+//*> or
+//*>    C := alpha*B*A + beta*C,
+//*> where alpha and beta are scalars, A is an hermitian matrix and  B and
+//*> C are m by n matrices.
+//*  ==========================================================================
+
+template <typename T>
+void libblis_ihemm_check(side_t side, uplo_t uplo, dim_t M, dim_t N,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T ONE = 1.0;
+    T ZERO = 0.0;
+    T tmp1, tmp2;
+    bool LSIDE, UPPER;
+    dim_t i, j, k;
+
+    //*     Test the input parameters.
+    LSIDE   = (side == BLIS_LEFT);
+    UPPER   = (uplo == BLIS_UPPER);
+
+    if( (M == 0 || N == 0) || ( Alpha == ZERO && Beta == ONE ) )
+      return;
+
+    //*     And when  Alpha.eq.zero.
+    if( Alpha == ZERO )
+    {
+        if( Beta == ZERO )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = ZERO;
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( LSIDE )
+    {
+        //* Form  C := Alpha*A*B + Beta*C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp1 = Alpha*B[i*rsb + j*csb];
+                    tmp2 = ZERO;
+                    for( k = 0 ; k < i ; k++ )
+                    {
+                        C[k*rsc + j*csc] = C[k*rsc + j*csc] + tmp1*A[k*rsa + i*csa];
+                        tmp2 = tmp2 + B[k*rsb + j*csb] * A[k*rsa + i*csa];
+                    }
+                    if (Beta == ZERO)
+                    {
+                        C[i*rsc + j*csc] = tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = (M-1) ; i >= 0 ; i-- )
+                {
+                    tmp1 = Alpha*B[i*rsb + j*csb];
+                    tmp2 = ZERO;
+                    for( k = (i+1) ; k < M ; k++ )
+                    {
+                        C[k*rsc + j*csc] = C[k*rsc + j*csc] + tmp1*A[k*rsa + i*csa];
+                        tmp2 = tmp2 + B[k*rsb + j*csb]*A[k*rsa + i*csa];
+                    }
+                    if (Beta == ZERO)
+                    {
+                        C[i*rsc + j*csc] = tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* Form  C := Alpha*B*A + Beta*C.
+        for( j = 0 ; j < N ; j++ )
+        {
+            tmp1 = Alpha*A[j*rsa + j*csa];
+            if( Beta == ZERO )
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = tmp1*B[i*rsb + j*csb];
+                }
+            }
+            else
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1*B[i*rsb + j*csb];
+                }
+            }
+            for( k = 0 ; k < j ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = Alpha*A[k*rsa + j*csa];
+                }
+                else
+                {
+                    tmp1 = Alpha*A[j*rsa + k*csa];
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp1*B[i*rsb + k*csb];
+                }
+            }
+            for( k = (j+1) ; k < N ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = Alpha*A[j*rsa + k*csa];
+                }
+                else
+                {
+                    tmp1 = Alpha*A[k*rsa + j*csa];
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp1*B[i*rsb + k*csb];
+                }
+            }
+        }
+    }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_ichemm_check(side_t side, uplo_t uplo, dim_t M, dim_t N,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T ONE  = {1.0 , 0.0};
+    T ZERO = {0.0 , 0.0};
+    T tmp1, tmp2;
+    bool LSIDE, UPPER;
+    dim_t i, j, k;
+
+    //*     Test the input parameters.
+    LSIDE   = (side == BLIS_LEFT);
+    UPPER   = (uplo == BLIS_UPPER);
+
+    if( (M == 0 || N == 0) || ( Alpha.real == ZERO.real && Beta.real == ONE.real ) )
+      return;
+
+    //*     And when  Alpha.eq.zero.
+    if( Alpha.real == ZERO.real )
+    {
+        if( Beta.real == ZERO.real )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = ZERO;
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( LSIDE )
+    {
+        //* Form  C := Alpha*A*B + Beta*C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp1 = mulc<T>(Alpha , B[i*rsb + j*csb]);
+                    tmp2 = ZERO;
+                    for( k = 0 ; k < i ; k++ )
+                    {
+                        C[k*rsc + j*csc] = addc<T>(C[k*rsc + j*csc] , mulc<T>(tmp1 , A[k*rsa + i*csa]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(B[k*rsb + j*csb] , conjugate<T>(A[k*rsa + i*csa])));
+                    }
+                    if (Beta.real == ZERO.real)
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(tmp1 , real<T>(A[i*rsa + i*csa])) , mulc<T>(Alpha , tmp2));
+                    }
+                    else
+                    {
+                        tmp2 = addc<T>(mulc<T>(tmp1 , real<T>(A[i*rsa + i*csa])) , mulc<T>(Alpha , tmp2));
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , tmp2);
+                        //C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]) + mulc<T>(tmp1 , real<T>(A[i*rsa + i*csa])) + mulc<T>(Alpha , tmp2);
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = (M-1) ; i >= 0 ; i-- )
+                {
+                    tmp1 = mulc<T>(Alpha , B[i*rsb + j*csb]);
+                    tmp2 = ZERO;
+                    for( k = (i+1) ; k < M ; k++ )
+                    {
+                        C[k*rsc + j*csc] = addc<T>(C[k*rsc + j*csc] , mulc<T>(tmp1 , A[k*rsa + i*csc]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(B[k*rsb + j*csb] , conjugate<T>(A[k*rsa + i*csa])));
+                    }
+                    if (Beta.real == ZERO.real)
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(tmp1 , real<T>(A[i*rsa + i*csa])) , mulc<T>(Alpha , tmp2));
+                    }
+                    else
+                    {
+                        tmp2 = addc<T>(mulc<T>(tmp1 , real<T>(A[i*rsa + i*csa])) , mulc<T>(Alpha , tmp2));
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , tmp2);
+                        //C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]) + mulc<T>(tmp1 , real<T>(A[i*rsa + i*csa])) + mulc<T>(Alpha , tmp2);
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* Form  C := Alpha*B*A + Beta*C.
+        for( j = 0 ; j < N ; j++ )
+        {
+            tmp1 = mulc<T>(Alpha , real<T>(A[j*rsa + j*csa]));
+            if (Beta.real == ZERO.real)
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = mulc<T>(tmp1 , B[i*rsb + j*csb]);
+                }
+            }
+            else
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , mulc<T>(tmp1 , B[i*rsb + j*csb]));
+                }
+            }
+            for( k = 0 ; k < j ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = mulc<T>(Alpha , A[k*rsa + j*csa]);
+                }
+                else
+                {
+                    tmp1 = mulc<T>(Alpha , conjugate<T>(A[j*rsa + k*csa]));
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp1 , B[i*rsb + k*csb]));
+                }
+            }
+            for( k = (j+1) ; k < N ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = mulc<T>(Alpha , conjugate<T>(A[j*rsa + k*csa]));
+                }
+                else
+                {
+                    tmp1 = mulc<T>(Alpha , A[k*rsa + j*csa]);
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp1 , B[i*rsb + k*csb]));
+                }
+            }
+        }
+    }
+    return;
+}
+
+double libblis_test_ihemm_check
+    (
+      test_params_t* params,
+      side_t         side,
+      obj_t*         alpha,
+      obj_t*         a,
+      obj_t*         b,
+      obj_t*         beta,
+      obj_t*         c,
+      obj_t*         c_orig
+    )
+{
+
+    num_t dt     = bli_obj_dt( a );
+    uplo_t uploa = bli_obj_uplo( a );
+    dim_t M      = bli_obj_length( c );
+    dim_t N      = bli_obj_width( c );
+    dim_t rsa    = bli_obj_row_stride( a ) ;
+    dim_t csa    = bli_obj_col_stride( a ) ;
+    dim_t rsb    = bli_obj_row_stride( b ) ;
+    dim_t csb    = bli_obj_col_stride( b ) ;
+    dim_t rsc    = bli_obj_row_stride( c ) ;
+    dim_t csc    = bli_obj_col_stride( c ) ;
+    double resid = 0.0;
+
+    switch( dt )  {
+        case BLIS_FLOAT :
+        {
+            float*   Alpha = (float*) bli_obj_buffer( alpha );
+            float*   A     = (float*) bli_obj_buffer( a );
+            float*   B     = (float*) bli_obj_buffer( b );
+            float*   Beta  = (float*) bli_obj_buffer( beta );
+            float*   C     = (float*) bli_obj_buffer( c_orig );
+            float*   CC    = (float*) bli_obj_buffer( c );
+            libblis_ihemm_check<float>(side, uploa, M, N, *Alpha, A, rsa, csa,
+                                            B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, N, CC, C, rsc, csc);
+			break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double*  Alpha = (double*) bli_obj_buffer( alpha );
+            double*  A     = (double*) bli_obj_buffer( a );
+            double*  B     = (double*) bli_obj_buffer( b );
+            double*  Beta  = (double*) bli_obj_buffer( beta );
+            double*  C     = (double*) bli_obj_buffer( c_orig );
+            double*  CC    = (double*) bli_obj_buffer( c );
+            libblis_ihemm_check<double>(side, uploa, M, N, *Alpha, A, rsa, csa,
+                                            B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, N, CC, C, rsc, csc);
+        }
+            break;
+        case BLIS_SCOMPLEX :
+        {
+            scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+            scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+            scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+            scomplex*   B     = (scomplex*) bli_obj_buffer( b );
+            scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+            scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+            libblis_ichemm_check<scomplex, float>(side, uploa, M, N, *Alpha,
+                             A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, N, CC, C, rsc, csc);
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+            dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+            dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+            dcomplex*   B     = (dcomplex*) bli_obj_buffer( b );
+            dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+            dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+            libblis_ichemm_check<dcomplex, double>(side, uploa, M, N, *Alpha,
+                             A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, N, CC, C, rsc, csc);
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+    return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_hemm(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_hemv.cpp
+++ b/gtestsuite/src/ref_hemv.cpp
@@ -0,0 +1,305 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_hemv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> HEMV performs the matrix-vector  operation
+//*>    y := alpha*A*x + beta*y
+//*> where alpha and beta are scalars, x and y are n element vectors and
+//*> A is an n by n hermitian matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_ihemv_check(uplo_t uploa, dim_t M, T* alpha, T* A,
+   dim_t rsa, dim_t csa, T* X, dim_t incx, T* beta, T* Y, dim_t incy) {
+  T ONE = 1.0;
+  T ZERO = 0.0;
+  T Alpha = alpha[0];
+  T Beta = beta[0];
+  T tmp1, tmp2;
+  dim_t i, ix, iy, j, jx, jy, kx, ky;
+
+  if ((M == 0) ||
+    ((Alpha == ZERO) && (Beta == ONE)))
+      return ;
+
+  //*     Set up the start points in  X  and  Y.
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (M * incx);
+  }
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (M * incy);
+  }
+
+  //*     First form  y := beta*y.
+  if (Beta != ONE) {
+    iy = ky;
+    if (Beta == ZERO) {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = (Beta * Y[iy]);
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if (Alpha == ZERO)
+    return;
+
+  T tmp = 0.0 ;
+  if(uploa == BLIS_UPPER) {
+    //* Form  y  when A is stored in upper triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = (Alpha * X[jx]);
+      tmp2 = ZERO;
+      ix = kx;
+      iy = ky;
+      for(i = 0 ; i < j ; i++) {
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = Y[iy] + (tmp1 * tmp);
+        tmp2 = tmp2 + (tmp * X[ix]);
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      tmp = A[j*rsa + j*csa];
+      Y[jy] = Y[jy] + (tmp1 * tmp) + (Alpha * tmp2);
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  y  when A is stored in lower triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = (Alpha * X[jx]);
+      tmp = A[j*rsa + j*csa];
+      tmp2 = ZERO;
+      Y[jy] = Y[jy] + (tmp1 * tmp);
+      ix = jx;
+      iy = jy;
+      for(i = (j+1) ; i < M ; i++) {
+        ix = ix + incx;
+        iy = iy + incy;
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = Y[iy] + (tmp1 * tmp);
+        tmp2 = tmp2 + (tmp * X[ix]);
+      }
+      Y[jy] = Y[jy] + (Alpha * tmp2);
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_ichemv_check(uplo_t uploa, dim_t M, T* alpha, T* A, dim_t rsa,
+dim_t csa, bool conja, T* X, dim_t incx, bool conjx, T* beta, T* Y, dim_t incy) {
+  T ONE   = { 1.0, 0.0 };
+  T ZERO  = { 0.0, 0.0 };
+  T Alpha = *alpha;
+  T Beta  = *beta;
+  T tmp1, tmp2;
+  dim_t i, ix, iy, j, jx, jy, kx, ky;
+
+  if ((M == 0) ||
+    ((Alpha.real == ZERO.real) && (Beta.real == ONE.real)))
+      return ;
+
+  //*     Set up the start points in  X  and  Y.
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (M * incx);
+  }
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (M * incy);
+  }
+
+  //*     First form  y := beta*y.
+  if((Beta.real != ONE.real) && (Beta.imag != ONE.imag)) {
+    iy = ky;
+    if((Beta.real != ZERO.real) && (Beta.imag != ZERO.imag)) {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = mulc<T>(Beta , Y[iy]);
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag))
+    return;
+
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < M ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(conja) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < M ; j++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  T tmp = {0.0, 0.0};
+  if(uploa == BLIS_UPPER) {
+    //* Form  y  when A is stored in upper triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = mulc<T>(Alpha , X[jx]);
+      tmp2 = ZERO;
+      ix = kx;
+      iy = ky;
+      for(i = 0 ; i < j ; i++) {
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = addc<T>(Y[iy] , mulc<T>(tmp1 , tmp));
+        tmp2  = addc<T>(tmp2 , mulc<T>(conjugate<T>(tmp) , X[ix]));
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      tmp = A[j*rsa + j*csa];
+      tmp = addc<T>(mulc<T>(tmp1 , real<T>(tmp)) , mulc<T>(Alpha , tmp2));
+      Y[jy] = addc<T>(Y[jy] , tmp );
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  y  when A is stored in lower triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = mulc<T>(Alpha , X[jx]);
+      tmp  = A[j*rsa + j*csa];
+      tmp2 = ZERO;
+      Y[jy] = addc<T>(Y[jy] , mulc<T>(tmp1 , real<T>(tmp)));
+      ix = jx;
+      iy = jy;
+      for(i = (j+1) ; i < M ; i++) {
+        ix = ix + incx;
+        iy = iy + incy;
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = addc<T>(Y[iy] , mulc<T>(tmp1 , tmp));
+        tmp2  = addc<T>(tmp2 , mulc<T>(conjugate<T>(tmp) , X[ix]));
+      }
+      Y[jy] = addc<T>(Y[jy] , mulc<T>(Alpha , tmp2));
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+
+  return;
+}
+
+double libblis_test_ihemv_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig
+){
+  num_t  dt    = bli_obj_dt( a );
+  uplo_t uploa = bli_obj_uplo( a );
+  dim_t M      = bli_obj_length( a );
+  dim_t rsa    = bli_obj_row_stride( a );
+  dim_t csa    = bli_obj_col_stride( a );
+  bool conja   = bli_obj_has_conj( a );
+  dim_t incx   = bli_obj_vector_inc( x );
+  dim_t incy   = bli_obj_vector_inc( y );
+  bool conjx   = bli_obj_has_conj( x );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha = (float*) bli_obj_buffer( alpha );
+      float*   A     = (float*) bli_obj_buffer( a );
+      float*   X     = (float*) bli_obj_buffer( x );
+      float*   Beta  = (float*) bli_obj_buffer( beta );
+      float*   Y     = (float*) bli_obj_buffer( y_orig );
+      float*   YY    = (float*) bli_obj_buffer( y );
+      libblis_ihemv_check<float, int32_t>(uploa, M, Alpha, A, rsa, csa,
+                                                 X, incx, Beta, Y, incy);
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_ihemv_check<double, int64_t>(uploa, M, Alpha, A, rsa, csa,
+                                                 X, incx, Beta, Y, incy);
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_ichemv_check<scomplex, int32_t>(uploa, M, Alpha, A, rsa, csa,
+                                    conja, X, incx, conjx, Beta, Y, incy);
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_ichemv_check<dcomplex, int64_t>(uploa, M, Alpha, A, rsa, csa,
+                                    conja, X, incx, conjx, Beta, Y, incy);
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_her.cpp
+++ b/gtestsuite/src/ref_her.cpp
@@ -0,0 +1,203 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_her.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> HER performs the hermitian rank 1 operation
+//*>    A := alpha*x*x**H + A
+//*>  where alpha is a real scalar, x is an n element vector and A is an
+//*>  n by n hermitian matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iher_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+                                               T* A, dim_t rsa, dim_t csa) {
+  T ZERO = 0.0;
+  T Alpha = alpha[0];
+  T temp;
+  int i, ix, j, jx, kx;
+
+  if((N == 0) || (Alpha == ZERO))
+    return;
+
+  /* Set the start point in X if the increment is not unity. */
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if(uploa == BLIS_UPPER) {
+    /* Form  A  when A is stored in upper triangle. */
+    jx = kx;
+    for(j = 0 ; j < N ; j++) {
+      if (X[jx] != ZERO) {
+        temp = Alpha * X[jx];
+        ix = kx;
+        for(i = 0 ; i <= j ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + (X[ix] * temp);
+          ix = ix + incx;
+        }
+      }
+      jx = jx + incx;
+    }
+  }
+  else {
+    /* Form  A  when A is stored in lower triangle. */
+    jx = kx;
+    for(j = 0; j < N ; j++) {
+      if (X[jx] != ZERO) {
+        temp = Alpha * X[jx];
+        ix = jx;
+        for(i = j ; i < N ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + (X[ix] * temp);
+          ix = ix + incx;
+        }
+      }
+      jx = jx + incx;
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icher_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+                                    bool conjx, T* A, dim_t rsa, dim_t csa) {
+  T ZERO  = {0.0 , 0.0};
+  T Alpha = alpha[0];
+  T temp;
+  int i, ix, j, jx, kx;
+
+  if ((N == 0) || ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)))
+   return;
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(uploa == BLIS_UPPER) {
+    /* Form  A  when A is stored in upper triangle. */
+    jx = kx;
+    for(j = 0 ; j < N ; j++) {
+      if ((X[jx].real != ZERO.real) || (X[jx].imag != ZERO.imag)) {
+        temp = mulc<T>(Alpha , conjugate<T>(X[jx]));
+        ix = kx;
+        for(i = 0 ; i < j ; i++) {
+          A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , mulc<T>(X[ix] , temp));
+          ix = ix + incx;
+        }
+        A[j*rsa + j*csa] = real<T>(addc<T>(A[j*rsa + j*csa] , mulc<T>(X[jx] , temp)));
+      }
+      else {
+        A[j*rsa + j*csa] = real<T>(A[j*rsa + j*csa]);
+      }
+      jx = jx + incx;
+    }
+  }
+  else {
+    /* Form  A  when A is stored in lower triangle. */
+    jx = kx;
+    for(j = 0; j < N ; j++) {
+      if ((X[jx].real != ZERO.real) || (X[jx].imag != ZERO.imag)) {
+        temp = mulc<T>(Alpha , conjugate<T>(X[jx]));
+        A[j*rsa + j*csa] = real<T>(addc<T>(A[j*rsa + j*csa] , mulc<T>(temp , X[jx])));
+        ix = jx;
+        for( i = (j+1) ; i < N ; i++) {
+          ix = ix + incx;
+          A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , mulc<T>(X[ix] , temp));
+        }
+      }
+      else {
+        A[j*rsa + j*csa] = real<T>(A[j*rsa + j*csa]);
+      }
+      jx = jx + incx;
+    }
+  }
+
+  return;
+}
+
+double libblis_test_iher_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         a,
+  obj_t*         a_orig
+){
+
+  num_t dt     = bli_obj_dt( x );
+  uplo_t uploa = bli_obj_uplo( a );
+  dim_t M      = bli_obj_length( a );
+  dim_t N      = bli_obj_width( a );
+  dim_t incx   = bli_obj_vector_inc( x );
+  bool conjx   = bli_obj_has_conj( x );
+  dim_t rsa    = bli_obj_row_stride( a ) ;
+  dim_t csa    = bli_obj_col_stride( a ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a_orig );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   AA       = (float*) bli_obj_buffer( a );
+      libblis_iher_check<float, int32_t>(uploa, M, Alpha, X, incx,
+                                                          A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a_orig );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   AA      = (double*) bli_obj_buffer( a );
+      libblis_iher_check<double, int64_t>(uploa, M, Alpha, X, incx,
+                                                           A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a_orig );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   AA    = (scomplex*) bli_obj_buffer( a );
+      libblis_icher_check<scomplex, float>(uploa, M, Alpha, X, incx, conjx,
+                                                              A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a_orig );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   AA    = (dcomplex*) bli_obj_buffer( a );
+      libblis_icher_check<dcomplex, double>(uploa, M, Alpha, X, incx, conjx,
+                                                              A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
--- a/gtestsuite/src/ref_her2.cpp
+++ b/gtestsuite/src/ref_her2.cpp
@@ -0,0 +1,259 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_her2.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> HER2  performs the hermitian rank 2 operation
+//*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
+//*> where alpha is a scalar, x and y are n element vectors and A is an n
+//*> by n hermitian matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iher2_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+                             T* Y, dim_t incy, T* A, dim_t rsa, dim_t csa) {
+
+  T ZERO  = 0.0;
+  T Alpha = alpha[0];
+  T tmp1, tmp2;
+  int i, ix, iy, j, jx, jy, kx, ky;
+
+  if ((N == 0) || (Alpha == ZERO))
+    return;
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (N * incy);
+  }
+  jx = kx;
+  jy = ky;
+
+  if(uploa == BLIS_UPPER) {
+    //* Form  A  when A is stored in the upper triangle.
+    for(j = 0 ; j < N ; j++) {
+      if ((X[jx] != ZERO) || (Y[jy] != ZERO)) {
+        tmp1 = Alpha * Y[jy];
+        tmp2 = Alpha * X[jx];
+        ix = kx;
+        iy = ky;
+        for(i = 0 ; i < j ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + X[ix]*tmp1 + Y[iy]*tmp2;
+          ix = ix + incx;
+          iy = iy + incy;
+        }
+        A[j*rsa + j*csa] = A[j*rsa + j*csa] + X[jx]*tmp1 + Y[jy]*tmp2;
+      }
+      else {
+        A[j*rsa + j*csa] = A[j*rsa + j*csa];
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  A  when A is stored in the lower triangle.
+    for(j = 0 ; j < N ; j++) {
+      if((X[jx] != ZERO) || (Y[jy] != ZERO)) {
+        tmp1 = Alpha * Y[jy];
+        tmp2 = Alpha * X[jx];
+        A[j*rsa + j*csa] = A[j*rsa + j*csa] +  X[jx]*tmp1 + Y[jy]*tmp2;
+        ix = jx;
+        iy = jy;
+        for(i = (j+1) ;i < N; i++) {
+          ix = ix + incx;
+          iy = iy + incy;
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + X[ix]*tmp1 + Y[iy]*tmp2;
+        }
+      }
+      else {
+        A[j*rsa + j*csa] = A[j*rsa + j*csa];
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_icher2_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+      bool conjx, T* Y, dim_t incy, bool conjy, T* A, dim_t rsa, dim_t csa) {
+
+  T ZERO  = {0.0, 0.0};
+  T Alpha = *alpha;
+  T tmp1, tmp2;
+  int i, ix, iy, j, jx, jy, kx, ky;
+
+  if((N == 0) || ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)))
+    return;
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (N * incy);
+  }
+  jx = kx;
+  jy = ky;
+
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(conjy) {
+    iy = 0;
+    for(i = 0 ; i < N ; i++) {
+      Y[iy] = conjugate<T>(Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  T p1, p2, p;
+  if(uploa == BLIS_UPPER) {
+    //* Form  A  when A is stored in the upper triangle.
+    for(j = 0 ; j < N ; j++) {
+      tmp1 = mulc<T>(Alpha , conjugate<T>(Y[jy]));
+      tmp2 = conjugate<T>(mulc<T>(Alpha , X[jx]));
+      ix = kx;
+      iy = ky;
+      for(i = 0 ; i < j ; i++) {
+        p1 = mulc<T>(X[ix] , tmp1);
+        p2 = mulc<T>(Y[iy] , tmp2);
+        p  = addc<T>(p1 , p2);
+        A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , p);
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      p1 = mulc<T>(X[jx] , tmp1);
+      p2 = mulc<T>(Y[jy] , tmp2);
+      p = addc<T>(p1 , p2);
+      A[j*rsa + j*csa] = real<T>(addc<T>(A[j*rsa + j*csa] , p));
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  A  when A is stored in the lower triangle.
+    for(j = 0; j < N ; j++) {
+      tmp1 = mulc<T>(Alpha , conjugate<T>(Y[jy]));
+      tmp2 = conjugate<T>(mulc<T>(Alpha , X[jx]));
+      p1 = mulc<T>(X[jx] , tmp1);
+      p2 = mulc<T>(Y[jy] , tmp2);
+      p = addc<T>(p1 , p2);
+      A[j*rsa + j*csa] = real<T>(addc<T>(A[j*rsa + j*csa] , p));
+      ix = jx;
+      iy = jy;
+      for(i = (j+1) ;i < N; i++) {
+        ix = ix + incx;
+        iy = iy + incy;
+        p1 = mulc<T>(X[ix] , tmp1);
+        p2 = mulc<T>(Y[iy] , tmp2);
+        p  = addc<T>(p1 , p2);
+        A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , p);
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  return;
+}
+
+double libblis_test_iher2_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         a,
+  obj_t*         a_orig
+){
+
+  num_t dt     = bli_obj_dt( x );
+  uplo_t uploa = bli_obj_uplo( a );
+  dim_t M      = bli_obj_length( a );
+  dim_t N      = bli_obj_width( a );
+  dim_t incx   = bli_obj_vector_inc( x );
+  dim_t incy   = bli_obj_vector_inc( y );
+  bool conjx   = bli_obj_has_conj( x );
+  bool conjy   = bli_obj_has_conj( y );
+  dim_t rsa    = bli_obj_row_stride( a ) ;
+  dim_t csa    = bli_obj_col_stride( a ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a_orig );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   AA       = (float*) bli_obj_buffer( a );
+      libblis_iher2_check<float, int32_t>(uploa, M, Alpha, X, incx,
+                                                 Y, incy, A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a_orig );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y );
+      double*   AA      = (double*) bli_obj_buffer( a );
+      libblis_iher2_check<double, int64_t>(uploa, M, Alpha, X, incx,
+                                                 Y, incy, A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a_orig );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y );
+      scomplex*   AA    = (scomplex*) bli_obj_buffer( a );
+      libblis_icher2_check<scomplex, int32_t>(uploa, M, Alpha, X, incx, conjx,
+                                               Y, incy, conjy, A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a_orig );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y    = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   AA    = (dcomplex*) bli_obj_buffer( a );
+      libblis_icher2_check<dcomplex, int64_t>(uploa, M, Alpha, X, incx, conjx,
+                                               Y, incy, conjy, A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
+
--- a/gtestsuite/src/ref_her2k.cpp
+++ b/gtestsuite/src/ref_her2k.cpp
@@ -0,0 +1,666 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_her2k.h"
+
+using namespace std;
+
+//*============================================================================
+//*> HER2K  performs one of the hermitian rank 2k operations
+//*>    C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
+//*> or
+//*>    C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
+//*============================================================================
+
+template <typename T>
+void libblis_iher2k_check( uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T tmp1, tmp2;
+    int i, j, l;
+    bool UPPER, NOTRANS;
+
+    T ONE  = 1.0 ;
+    T ZERO = 0.0 ;
+
+    //*     Test the input parameters.
+    UPPER   = ( uplo == BLIS_UPPER );
+    NOTRANS = ( trans == BLIS_NO_TRANSPOSE );
+
+    if( N == 0 || (( Alpha == ZERO || K == 0 ) && Beta == ONE ))
+      return;
+
+    //*     And when  alpha.eq.zero.
+    if( Alpha == ZERO )
+    {
+        if( UPPER )
+        {
+            if( Beta == ZERO )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+            }
+        }
+        else
+        {
+            if( Beta == ZERO )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( NOTRANS )
+    {
+        //* C := alpha*A*B**T + alpha*B*A**T + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta == ZERO )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( Beta != ONE )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                    if( (A[j*rsa + l*csa] != ZERO) || (B[j*rsb + l*csb] != ZERO) )
+                    {
+                        tmp1 = Alpha*B[j*rsb + l*csb];
+                        tmp2 = Alpha*A[j*rsa + l*csa];
+                        for( i = 0 ; i <= j ; i++ )
+                        {
+                            C[i*rsc + j*csc] = C[i*rsc + j*csc] + A[i*rsa + l*csa]*tmp1 +  B[i*rsb + l*csb]*tmp2;
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta == ZERO )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( Beta != ONE )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                    if( (A[j*rsa + l*csa] != ZERO) || (B[j*rsb + l*csb] != ZERO) )
+                    {
+                        tmp1 = Alpha*B[j*rsb + l*csb];
+                        tmp2 = Alpha*A[j*rsa + l*csa];
+                        for( i = j; i < N ; i++ )
+                        {
+                            C[i*rsc + j*csc] = C[i*rsc + j*csc] + A[i*rsa + l*csa]*tmp1 + B[i*rsb + l*csb]*tmp2;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* C := alpha*A**T*B + alpha*B**T*A + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i <= j ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = tmp1 + A[l*rsa + i*csa]*B[l*rsb + j*csb];
+                        tmp2 = tmp2 + B[l*rsb + i*csb]*A[l*rsa + j*csa];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp1 + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp1 + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = j ; i < N ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = tmp1 + A[l*rsa + i*csa]*B[l*rsb + j*csb];
+                        tmp2 = tmp2 + B[l*rsb + i*csb]*A[l*rsa + j*csa];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp1 + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp1 + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_icher2k_check( uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T tmp1, tmp2;
+    T tmpa, tmpb;
+    int i, j, l;
+    bool UPPER, NOTRANS;
+
+    T ONE  = { 1.0 , 0.0 };
+    T ZERO = { 0.0 , 0.0 };
+
+    //*     Test the input parameters.
+    UPPER   = (uplo == BLIS_UPPER);
+    NOTRANS = (trans == BLIS_NO_TRANSPOSE);
+
+    if( N == 0 || (( Alpha.real == ZERO.real || K == 0 ) && Beta.real == ONE.real ))
+      return;
+
+    //*     And when  alpha.eq.zero.
+    if( Alpha.real == ZERO.real )
+    {
+        if( UPPER )
+        {
+            if( Beta.real == ZERO.real )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i < j ; i++)
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                    C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+                }
+            }
+        }
+        else
+        {
+            if( Beta.real == ZERO.real )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+                    for( i = (j+1) ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( NOTRANS )
+    {
+        //*        Form  C := alpha*A*B**H + conjg( alpha )*B*A**H + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta.real == ZERO.real )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( Beta.real != ONE.real )
+                {
+                    for(i = 0 ; i < j ; i++)
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                    C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+                }
+                else
+                {
+                    C[j*rsc + j*csc] = real<T>(C[j*rsc + j*csc]);
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                   if( ((A[j*rsa + l*csa].real != ZERO.real) || (A[j*rsa + l*csa].imag != ZERO.imag))
+                     || ((B[j*rsb + l*csb].real != ZERO.real) || (B[j*rsb + l*csb].imag != ZERO.imag)) )
+                    {
+                        tmp1 = mulc<T>(Alpha , conjugate<T>(B[j*rsb + l*csb]));
+                        tmp2 = conjugate<T>(mulc<T>(Alpha , A[j*rsa + l*csa]));
+                        for( i = 0 ; i < j ; i++)
+                        {
+                            tmpa = mulc<T>(A[i*rsa + l*csa] , tmp1);
+                            tmpb = mulc<T>(B[i*rsb + l*csb] , tmp2);
+                            tmpa = addc<T>(tmpa , tmpb);
+                            C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , tmpa);
+                        }
+                        tmpa = mulc<T>(A[j*rsa + l*csa] , tmp1);
+                        tmpb = mulc<T>(B[j*rsb + l*csb] , tmp2);
+                        tmpa = addc<T>(tmpa , tmpb);
+                        C[j*rsc + j*csc] = addc<T>(real<T>(C[j*rsc + j*csc]) , real<T>(tmpa));
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta.real == ZERO.real )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( Beta.real != ONE.real )
+                {
+                    for( i = (j+1) ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                    C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+                }
+                else
+                {
+                    C[j*rsc + j*csc] = real<T>(C[j*rsc + j*csc]);
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                    if( ((A[j*rsa + l*csa].real != ZERO.real) || (A[j*rsa + l*csa].imag != ZERO.imag))
+                     || ((B[j*rsb + l*csb].real != ZERO.real) || (B[j*rsb + l*csb].imag != ZERO.imag)) )
+                    {
+                        tmp1 = mulc<T>(Alpha , conjugate<T>(B[j*rsb + l*csb]));
+                        tmp2 = conjugate<T>(mulc<T>(Alpha , A[j*rsa + l*csa]));
+                        for( i = (j+1) ; i < N ; i++ )
+                        {
+                            tmpa = mulc<T>(A[i*rsa + l*csa] , tmp1);
+                            tmpb = mulc<T>(B[i*rsb + l*csb] , tmp2);
+                            tmpa = addc<T>(tmpa, tmpb);
+                            C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , tmpa);
+                        }
+                        tmpa = mulc<T>(A[j*rsa + l*csa] , tmp1);
+                        tmpb = mulc<T>(B[j*rsb + l*csb] , tmp2);
+                        tmpa = addc<T>(tmpa, tmpb);
+                        C[j*rsc + j*csc] = addc<T>(real<T>(C[j*rsc + j*csc]) , real<T>(tmpa));
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //*        Form  C := alpha*A**H*B + conjg( alpha )*B**H*A + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i <= j ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = addc<T>(tmp1 , mulc<T>(conjugate<T>(A[l*rsa + i*csa]) , B[l*rsb + j*csb]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(conjugate<T>(B[l*rsb + i*csb]) , A[l*rsa + j*csa]));
+                    }
+                    if( i == j )
+                    {
+                        if( Beta.real == ZERO.real )
+                        {
+                            tmpa = mulc<T>(Alpha , tmp1);
+                            tmpb = mulc<T>(conjugate<T>(Alpha) , tmp2);
+                            C[j*rsc + j*csc] = real<T>(addc<T>(tmpa, tmpb));
+                        }
+                        else
+                        {
+                            tmpa = mulc<T>(Alpha , tmp1);
+                            tmpb = mulc<T>(conjugate<T>(Alpha) , tmp2);
+                            tmpa = addc<T>(tmpa, tmpb);
+                            C[j*rsc + j*csc] = addc<T>(mulc<T>(Beta , real<T>(C[j*rsc + j*csc])) , real<T>(tmpa));
+                        }
+                    }
+                    else
+                    {
+                        if( Beta.real == ZERO.real )
+                        {
+                            C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp1) , mulc<T>(conjugate<T>(Alpha) ,tmp2));
+                        }
+                        else
+                        {
+                            tmpa = mulc<T>(Alpha , tmp1);
+                            tmpb = mulc<T>(conjugate<T>(Alpha) , tmp2);
+                            tmpa = addc<T>(tmpa , tmpb);
+                            C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) ,tmpa);
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = j ; i < N ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = addc<T>(tmp1 , mulc<T>(conjugate<T>(A[l*rsa + i*csa]) , B[l*rsb + j*csb]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(conjugate<T>(B[l*rsb + i*csb]) , A[l*rsa + j*csa]));
+                    }
+                    if( i == j )
+                    {
+                        if( Beta.real == ZERO.real )
+                        {
+                            C[j*rsc + j*csc] = real<T>(addc<T>(mulc<T>(Alpha , tmp1) , mulc<T>(conjugate<T>(Alpha) , tmp2)));
+                        }
+                        else
+                        {
+                            tmpa = mulc<T>(Alpha , tmp1);
+                            tmpb = mulc<T>(conjugate<T>(Alpha) , tmp2);
+                            tmpa = addc<T>(tmpa, tmpb);
+                            C[j*rsc + j*csc] = addc<T>(mulc<T>(Beta , real<T>(C[j*rsc + j*csc])) , real<T>(tmpa));
+                        }
+                    }
+                    else
+                    {
+                        if( Beta.real == ZERO.real )
+                        {
+                            C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp1) , mulc<T>(conjugate<T>(Alpha) , tmp2));
+                        }
+                        else
+                        {
+                            tmpa = mulc<T>(Alpha , tmp1);
+                            tmpb = mulc<T>(conjugate<T>(Alpha) , tmp2);
+                            tmpa = addc<T>(tmpa, tmpb);
+                            C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , tmpa);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+double libblis_test_iher2k_check
+    (
+      test_params_t* params,
+      obj_t*         alpha,
+      obj_t*         a,
+      obj_t*         b,
+      obj_t*         beta,
+      obj_t*         c,
+      obj_t*         c_orig
+    )
+{
+    num_t dt      = bli_obj_dt( c );
+    uplo_t uploc  = bli_obj_uplo( c );
+    dim_t  M      = bli_obj_length( c );
+    dim_t  K      = bli_obj_width_after_trans( a );
+    trans_t trans = bli_obj_onlytrans_status( a );
+    dim_t rsa     = bli_obj_row_stride( a ) ;
+    dim_t csa     = bli_obj_col_stride( a ) ;
+    dim_t rsb     = bli_obj_row_stride( b ) ;
+    dim_t csb     = bli_obj_col_stride( b ) ;
+    dim_t rsc     = bli_obj_row_stride( c ) ;
+    dim_t csc     = bli_obj_col_stride( c ) ;
+    double resid  = 0.0;
+    f77_int lda, ldb, ldc;
+
+   if( bli_obj_is_col_stored( c ) ) {
+     lda  = bli_obj_col_stride( a );
+     ldb  = bli_obj_col_stride( b );
+     ldc  = bli_obj_col_stride( c );
+   } else {
+     lda  = bli_obj_row_stride( a );
+     ldb  = bli_obj_row_stride( b );
+     ldc  = bli_obj_row_stride( c );
+   }
+
+   int nrowa;
+   if (trans == BLIS_NO_TRANSPOSE) {
+     nrowa = M;
+   } else {
+     nrowa = K;
+   }
+
+   if( lda < max(1, nrowa) ) {
+     return resid;
+   }
+   if( ldb < max(1, nrowa) ) {
+     return resid;
+   }
+   if( ldc < max(1, (int)M) ) {
+     return resid;
+   }
+
+    switch( dt )  {
+        case BLIS_FLOAT :
+        {
+            float*   Alpha = (float*) bli_obj_buffer( alpha );
+            float*   A     = (float*) bli_obj_buffer( a );
+            float*   B     = (float*) bli_obj_buffer( b );
+            float*   Beta  = (float*) bli_obj_buffer( beta );
+            float*   C     = (float*) bli_obj_buffer( c_orig );
+            float*   CC    = (float*) bli_obj_buffer( c );
+            libblis_iher2k_check<float>(uploc, trans, M, K, *Alpha, A,
+                                 rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, M, CC, C, rsc, csc);
+			break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double*  Alpha = (double*) bli_obj_buffer( alpha );
+            double*  A     = (double*) bli_obj_buffer( a );
+            double*  B     = (double*) bli_obj_buffer( b );
+            double*  Beta  = (double*) bli_obj_buffer( beta );
+            double*  C     = (double*) bli_obj_buffer( c_orig );
+            double*  CC    = (double*) bli_obj_buffer( c );
+            libblis_iher2k_check<double>(uploc, trans, M, K, *Alpha, A,
+                                 rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, M, CC, C, rsc, csc);
+        }
+            break;
+        case BLIS_SCOMPLEX :
+        {
+            scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+            scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+            scomplex*   B     = (scomplex*) bli_obj_buffer( b );
+            scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+            scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+            scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+            Beta->imag = 0.0 ;
+            libblis_icher2k_check<scomplex, float>(uploc, trans, M, K, *Alpha,
+                                A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, M, CC, C, rsc, csc);
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+            dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+            dcomplex*   B     = (dcomplex*) bli_obj_buffer( b );
+            dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+            dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+            dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+            Beta->imag = 0.0 ;
+            libblis_icher2k_check<dcomplex, double>(uploc, trans, M, K, *Alpha,
+                                A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, M, CC, C, rsc, csc);
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+    return abs(resid);
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_her2k(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_herk.cpp
+++ b/gtestsuite/src/ref_herk.cpp
@@ -0,0 +1,648 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_herk.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> HERK  performs one of the hermitian rank k operations
+//*>    C := alpha*A*A**H + beta*C,
+//*> or
+//*>    C := alpha*A**H*A + beta*C,
+//*> where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+//*> matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+//*> matrix in the second case.
+//*  ==========================================================================
+
+template <typename T>
+void libblis_iherk_check( uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T Beta, T* C, dim_t rsc, dim_t csc )
+{
+  T tmp, rtmp;
+  dim_t i, j, l;
+  bool UPPER, NOTRANS;
+
+  T ONE  = 1.0;
+  T ZERO = 0.0;
+
+  UPPER    = (uplo == BLIS_UPPER);
+  NOTRANS  = (trans == BLIS_NO_TRANSPOSE) || (trans == BLIS_CONJ_NO_TRANSPOSE);
+
+  if( (N == 0) || (( Alpha == ZERO || K == 0) && Beta == ONE ) )
+    return;
+
+  //*     And when  alpha.eq.zero.
+  if( Alpha == ZERO )
+  {
+    if( UPPER )
+    {
+      if( Beta == ZERO )
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          for( i = 0 ; i <= j ; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          for( i = 0 ; i < j ; i++ )
+          {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+          C[j*rsc + j*csc] = Beta*(C[j*rsc + j*csc]);
+        }
+      }
+    }
+    else
+    {
+      if( Beta == ZERO )
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          for( i = j ; i < N ; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          C[j*rsc + j*csc] = Beta*C[j*rsc + j*csc];
+          for( i = (j+1) ; i < N ; i++ )
+          {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+    }
+    return;
+  }
+
+  //*     Start the operations.
+  if( NOTRANS )
+  {
+    //* Form  C := alpha*A*A**H + beta*C.
+    if( UPPER )
+    {
+      for( j = 0; j < N ; j++ )
+      {
+        if( Beta == ZERO )
+        {
+          for( i = 0 ; i <= j ; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+        else if( Beta != ONE )
+        {
+          for(i = 0 ; i < j ; i++ )
+          {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+          C[j*rsc + j*csc] = Beta*C[j*rsc + j*csc];
+        }
+        for( l = 0 ; l < K ; l++ )
+        {
+          if( A[j*rsa + l*csa] != ZERO )
+          {
+            tmp = Alpha*A[j*rsa + l*csa] ;
+            for( i = 0 ; i < j ; i++ )
+            {
+              C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp*A[i*rsa + l*csa];
+            }
+            C[j*rsc + j*csc] = C[j*rsc + j*csc] + tmp*A[i*rsa + l*csa];
+          }
+        }
+      }
+    }
+    else
+    {
+      for( j = 0; j < N ; j++ )
+      {
+        if( Beta == ZERO )
+        {
+          for( i = j ; i < N; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+        else if( Beta != ONE )
+        {
+          C[j*rsc + j*csc] = Beta*C[j*rsc + j*csc];
+          for(i = (j+1) ; i < N ; i++ )
+          {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+        }
+        for( l = 0 ; l < K ; l++ )
+        {
+          if( A[j*rsa + l*csa] != ZERO )
+          {
+            tmp = Alpha*A[j*rsa + l*csa];
+            C[j*rsc + j*csc] = C[j*rsc + j*csc] + tmp*A[j*rsa + l*csa];
+            for( i = (j+1) ; i < N ; i++ )
+            {
+              C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp*A[i*rsa + l*csa];
+            }
+          }
+        }
+      }
+    }
+  }
+  else
+  {
+    //*        Form  C := alpha*A**H*A + beta*C.
+    if( UPPER )
+    {
+      for( j = 0; j < N ; j++ )
+      {
+        for( i = 0 ; i < j ; i++ )
+        {
+          tmp = ZERO;
+          for( l = 0 ; l < K ; l++ )
+          {
+            tmp = tmp + A[l*rsa + i*csa]*A[l*rsa + j*csa];
+          }
+          if( Beta == ZERO )
+          {
+            C[i*rsc + j*csc] = Alpha*tmp;
+          }
+          else
+          {
+            C[i*rsc + j*csc] = Alpha*tmp + Beta*C[i*rsc + j*csc];
+          }
+        }
+        rtmp = ZERO;
+        for( l = 0 ; l < K ; l++ )
+        {
+          rtmp = rtmp + A[l*rsa + j*csa]*A[l*rsa + j*csa];
+        }
+        if( Beta == ZERO )
+        {
+          C[j*rsc + j*csc] = Alpha*rtmp;
+        }
+        else
+        {
+          C[j*rsc + j*csc] = Alpha*rtmp + Beta*C[j*rsc + j*csc];
+        }
+      }
+    }
+    else
+    {
+      for( j = 0; j < N ; j++ )
+      {
+        rtmp = ZERO;
+        for( l = 0 ; l < K ; l++ )
+        {
+          rtmp = rtmp + A[l*rsa + j*csa]*A[l*rsa + j*csa];
+        }
+        if( Beta == ZERO )
+        {
+          C[j*rsc + j*csc] = Alpha*rtmp;
+        }
+        else
+        {
+          C[j*rsc + j*csc] = Alpha*rtmp + Beta*C[j*rsc + j*csc];
+        }
+        for( i = (j+1) ; i < N ; i++ )
+        {
+          tmp = ZERO;
+          for( l = 0 ; l < K ; l++ )
+          {
+            tmp = tmp + A[l*rsa + i*csa]*A[l*rsa + j*csa];
+          }
+          if( Beta == ZERO )
+          {
+            C[i*rsc + j*csc] = Alpha*tmp;
+          }
+          else
+          {
+            C[i*rsc + j*csc] = Alpha*tmp + Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icherk_check(uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T Alpha,T* A, dim_t rsa, dim_t csa, T Beta, T* C, dim_t rsc, dim_t csc)
+{
+  T tmp;
+  T rtmp;
+  dim_t i, j, l;
+  bool UPPER, NOTRANS;
+  T ONE  = {1.0 , 0.0};
+  T ZERO = {0.0 , 0.0};
+
+  UPPER    = (uplo == BLIS_UPPER);
+  NOTRANS  = (trans == BLIS_NO_TRANSPOSE) || (trans == BLIS_CONJ_NO_TRANSPOSE);
+
+  //* Quick return if possible.
+  if( (N == 0) ||
+    (((Alpha.real == ZERO.real) || (K == 0)) && (Beta.real == ONE.real)) )
+  {
+    return;
+  }
+
+//*     And when  alpha.eq.zero.
+  if( Alpha.real == ZERO.real )
+  {
+    if( UPPER )
+    {
+      if( Beta.real == ZERO.real )
+      {
+        for( j = 0 ; j < N; j++ )
+        {
+          for( i = 0 ; i <= j ; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          for( i = 0; i < j ; i++ )
+          {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+          C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+        }
+      }
+    }
+    else
+    {
+      if( Beta.real == ZERO.real )
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          for( i = j ; i < N ; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else
+      {
+        for( j = 0 ; j < N ; j++ )
+        {
+          C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+          for( i = (j+1) ; i < N ; i++ )
+          {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+        }
+      }
+    }
+    return;
+  }
+
+  //* Start the operations.
+  if( NOTRANS )
+  {
+    //*Form  C := alpha*A*A**H + beta*C.
+    if( UPPER )
+    {
+      for( j = 0 ; j < N ; j++ )
+      {
+        if( Beta.real == ZERO.real )
+        {
+          for( i = 0 ; i <= j ; i++ )
+          {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+        else if( Beta.real != ONE.real )
+        {
+          for( i = 0 ; i < j ; i++ )
+          {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+          C[j*rsc + j*csc] = mulc<T>(Beta , real<T>(C[j*rsc + j*csc]));
+        }
+        else
+        {
+          C[j*rsc + j*csc] = real<T>(C[j*rsc + j*csc]);
+        }
+
+        for( l = 0; l < K ; l++ )
+        {
+          if((A[j*rsa + l*csa].real != ZERO.real) || (A[j*rsa + l*csa].imag != ZERO.imag))
+          {
+            tmp = mulc<T>(Alpha , conjugate<T>(A[j*rsa + l*csa]));
+            for( i = 0 ; i < j ; i++ )
+            {
+              C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp , A[i*rsa + l*csa]));
+            }
+            C[j*rsc + j*csc] = addc<T>(real<T>(C[j*rsc + j*csc]) , real<T>(mulc<T>(tmp ,A[i*rsa + l*csa])));
+          }
+        }
+      }
+    }
+    else
+    {
+      for( j = 0; j < N ; j++ )
+      {
+        if( Beta.real == ZERO.real )
+        {
+          for( i = j ; i < N ; i++ )
+          {
+            C[j*rsc + j*csc] = ZERO;
+          }
+        }
+        else if( Beta.real != ONE.real )
+        {
+          C[j*rsc + j*csc] = mulc<T>(Beta ,real<T>(C[j*rsc + j*csc]));
+          for( i = (j+1) ; i < N ; i++ )
+          {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+        }
+        else
+        {
+          C[j*rsc + j*csc] = real<T>(C[j*rsc + j*csc]);
+        }
+
+        for( l = 0; l < K ; l++ )
+        {
+          if( (A[j*rsa + l*csa].real != ZERO.real)||(A[j*rsa + l*csa].imag != ZERO.imag) )
+          {
+            tmp = mulc<T>(Alpha , conjugate<T>(A[j*rsa + l*csa]));
+            C[j*rsc + j*csc] = addc<T>(real<T>(C[j*rsc + j*csc]) , real<T>(mulc<T>(tmp , A[j*rsa + l*csa])));
+            for( i = (j+1) ; i < N; i++ )
+            {
+              C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp , A[i*rsa + l*csa]));
+            }
+          }
+        }
+      }
+    }
+  }
+  else
+  {
+    //* Form  C := alpha*A**H*A + beta*C.
+    if( UPPER )
+    {
+      for( j = 0 ; j < N ; j++ )
+      {
+          for( i = 0 ; i < j ; i++ )
+          {
+            tmp = ZERO;
+            for( l = 0 ; l < K ; l++ )
+            {
+              tmp = addc<T>(tmp , mulc<T>(conjugate<T>(A[l*rsa + i*csa]) , A[l*rsa + j*csa]));
+            }
+            if( Beta.real == ZERO.real )
+            {
+              C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+            }
+            else
+            {
+              C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp) , mulc<T>(Beta , C[i*rsc + j*csc]));
+            }
+        }
+        rtmp = ZERO;
+        for( l = 0 ; l < K ; l++ )
+        {
+          rtmp = addc<T>(rtmp , mulc<T>(conjugate<T>(A[l*rsa + j*csa]) , A[l*rsa + j*csa]));
+        }
+        if( Beta.real == ZERO.real )
+        {
+          C[j*rsc + j*csc] = mulc<T>(Alpha , rtmp);
+        }
+        else
+        {
+          C[j*rsc + j*csc] = addc<T>(mulc<T>(Alpha , rtmp) , mulc<T>(Beta , real<T>(C[j*rsc + j*csc])));
+        }
+      }
+    }
+    else
+    {
+      for( j = 0 ; j < N ; j++ )
+      {
+        rtmp = ZERO;
+        for( l = 0 ; l < K ; l++ )
+        {
+          rtmp = addc<T>(rtmp , mulc<T>(conjugate<T>(A[l*rsa + j*csa]) , A[l*rsa + j*csa]));
+        }
+        if( Beta.real == ZERO.real )
+        {
+          C[j*rsc + j*csc] = mulc<T>(Alpha , rtmp);
+        }
+        else
+        {
+          C[j*rsc + j*csc] = addc<T>(mulc<T>(Alpha , rtmp) , mulc<T>(Beta , real<T>(C[j*rsc + j*csc])));
+        }
+        for( i = (j+1) ; i < N ; i++ )
+        {
+          tmp = ZERO;
+          for( l = 0 ; l < K ; l++ )
+          {
+            tmp = addc<T>(tmp , mulc<T>(conjugate<T>(A[l*rsa + i*csa]) , A[l*rsa + j*csa]));
+          }
+          if( Beta.real == ZERO.real )
+          {
+             C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+          }
+          else
+          {
+             C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp) , mulc<T>(Beta , C[i*rsc + j*csc]));
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+double libblis_test_iherk_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         beta,
+  obj_t*         c,
+  obj_t*         c_orig
+){
+  num_t dt       = bli_obj_dt( a );
+  dim_t M        = bli_obj_length( c );
+  dim_t K        = bli_obj_width_after_trans( a );
+  uplo_t uplo    = bli_obj_uplo( c );
+  trans_t trans  = bli_obj_onlytrans_status( a );
+  double resid   = 0.0;
+  dim_t  rsa, csa;
+  dim_t  rsc, csc;
+  f77_int lda;
+
+  rsa = bli_obj_row_stride( a ) ;
+  csa = bli_obj_col_stride( a ) ;
+  rsc = bli_obj_row_stride( c ) ;
+  csc = bli_obj_col_stride( c ) ;
+
+   if( bli_obj_is_col_stored( c ) ) {
+     lda    = bli_obj_col_stride( a );
+   } else {
+     lda    = bli_obj_row_stride( a );
+   }
+
+   int nrowa;
+   if (trans == BLIS_NO_TRANSPOSE) {
+     nrowa = M;
+   } else {
+     nrowa = K;
+   }
+
+   if (lda < max(1, nrowa)) {
+     return resid;
+   }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   C        = (float*) bli_obj_buffer( c_orig );
+      float*   CC       = (float*) bli_obj_buffer( c );
+      libblis_iherk_check<float>( uplo, trans, M, K, *Alpha,
+                                       A, rsa, csa, *Beta, C, rsc, csc );
+      resid = computediffrm(M, M, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   C       = (double*) bli_obj_buffer( c_orig );
+      double*   CC      = (double*) bli_obj_buffer( c );
+      libblis_iherk_check<double>(uplo, trans, M, K, *Alpha,
+                                       A, rsa, csa, *Beta, C, rsc, csc );
+      resid = computediffrm(M, M, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+      scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+      Alpha->imag = 0.0 ;
+      Beta->imag  = 0.0 ;
+      libblis_icherk_check<scomplex, float>(uplo, trans, M, K, *Alpha,
+                                        A, rsa, csa, *Beta, C, rsc, csc);
+      resid = computediffim(M, M, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+      dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+      Alpha->imag = 0.0 ;
+      Beta->imag  = 0.0 ;
+      libblis_icherk_check<dcomplex, double>(uplo, trans, M, K, *Alpha,
+                                        A, rsa, csa, *Beta, C, rsc, csc);
+      resid = computediffim(M, M, CC, C, rsc, csc);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rsc, dim_t csc, obj_t* c ) {
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* C = (T*) bli_obj_buffer( c );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename U, typename T>
+double libblis_check_nan_complex( dim_t rsc, dim_t csc, obj_t* c ) {
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  double resid = 0.0;
+  U* C = (U*) bli_obj_buffer( c );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_herk(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_is_col_stored( c ) ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex, float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex, double>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_normfm.cpp
+++ b/gtestsuite/src/ref_normfm.cpp
@@ -0,0 +1,109 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_normfm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> NORMFM performs matrix operation
+//*>    Compute the Frobenius norm (bli_?normfm())
+//*>    of the elements in an m x n matrix A. The resulting norm is stored to norm
+//*  ==========================================================================
+
+template <typename T, typename U>
+T libblis_inormfm_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx ) {
+
+  dim_t i, j;
+  T sum = 0.0;
+  T norm = 0.0;
+
+  if ((M == 0) || (N == 0)) {
+      return norm;
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      sum +=  X[i*rsx + j*csx] * X[i*rsx + j*csx];
+    }
+  }
+
+  norm = sqrt( abs(sum) );
+
+ 	return norm;
+}
+
+template <typename T, typename U>
+U libblis_icnormfm_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx ) {
+
+  dim_t i, j;
+  T rr = { 0.0, 0.0 };
+  U norm = 0.0;
+
+  if ((M == 0) || (N == 0)) {
+      return norm;
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      auto a = X[i*rsx + j*csx];
+      rr.real += a.real * a.real;
+      rr.imag += a.imag * a.imag;
+    }
+  }
+
+  U r = rr.real + rr.imag;
+  norm = sqrt( abs(r) );
+
+ 	return norm;
+}
+
+double libblis_test_inormfm_check(
+  test_params_t* params,
+  obj_t*         x,
+  obj_t*         norm
+){
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_length( x );
+  dim_t  N     = bli_obj_width( x );
+  dim_t  rsx   = bli_obj_row_stride( x ) ;
+  dim_t  csx   = bli_obj_col_stride( x ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float* X    = (float*) bli_obj_buffer( x );
+      float* av   = (float*) bli_obj_internal_scalar_buffer( norm );
+      float rv    = libblis_inormfm_check<float, int32_t>(M, N, X, rsx, csx);
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double* X   = (double*) bli_obj_buffer( x );
+      double* av  = (double*) bli_obj_internal_scalar_buffer( norm );
+      double rv   = libblis_inormfm_check<double, int64_t>(M, N, X, rsx, csx);
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex* X  = (scomplex*) bli_obj_buffer( x );
+      float* av    = (float*) bli_obj_internal_scalar_buffer( norm );
+      float  rv = libblis_icnormfm_check<scomplex, float>(M, N, X, rsx, csx);
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex* X  = (dcomplex*) bli_obj_buffer( x );
+      double* av   = (double*) bli_obj_internal_scalar_buffer( norm );
+      double  rv = libblis_icnormfm_check<dcomplex, double>(M, N, X, rsx, csx);
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return abs(resid);
+}
--- a/gtestsuite/src/ref_normfv.cpp
+++ b/gtestsuite/src/ref_normfv.cpp
@@ -0,0 +1,107 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_normfv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> NORMFV performs vector operations
+//*>    Compute the Frobenius norm (bli_?normfv())
+//*>    of the elements in a vector x of length n. The resulting norm is stored to norm
+//*  ==========================================================================
+
+template <typename T, typename U>
+T libblis_inormfv_check(dim_t len, T* X, dim_t incx ) {
+  dim_t i, ix;
+  T sum = 0.0;
+  T norm = 0.0;
+
+  if (len == 0){
+      return norm;
+  }
+
+  ix = 0;
+  for(i = 0 ; i < len ; i++) {
+    sum += X[ix] * X[ix];
+    ix = ix + incx;
+  }
+
+  norm = sqrt( abs(sum) );
+
+ 	return norm;
+}
+
+template <typename T, typename U>
+U libblis_icnormfv_check(dim_t len, T* X, dim_t incx ) {
+  dim_t i, ix;
+  T rr = { 0.0, 0.0 };
+  U norm = 0.0;
+  if(len == 0) {
+      return norm;
+  }
+
+  ix = 0;
+  for(i = 0 ; i < len ; i++) {
+    //rr = addc<T>(rr, mulc<T>(X[ix] , X[ix]));
+    auto a = X[ix];
+    rr.real += a.real * a.real;
+    rr.imag += a.imag * a.imag;
+    ix = ix + incx;
+  }
+
+  U r = rr.real + rr.imag;
+  norm = sqrt( abs(r) );
+
+ 	return norm;
+}
+
+double libblis_test_inormfv_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  n
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float* X    = (float*) bli_obj_buffer( x );
+      float* av   = (float*) bli_obj_internal_scalar_buffer( n );
+      float rv    = libblis_inormfv_check<float, int32_t>(M, X, incx );
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double* X   = (double*) bli_obj_buffer( x );
+      double* av  = (double*) bli_obj_internal_scalar_buffer( n );
+      double rv   = libblis_inormfv_check<double, int64_t>(M, X, incx );
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex* X  = (scomplex*) bli_obj_buffer( x );
+      float* av    = (float*) bli_obj_internal_scalar_buffer( n );
+      float  rv = libblis_icnormfv_check<scomplex, float>(M, X, incx );
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex* X  = (dcomplex*) bli_obj_buffer( x );
+      double* av   = (double*) bli_obj_internal_scalar_buffer( n );
+      double  rv = libblis_icnormfv_check<dcomplex, double>(M, X, incx );
+      resid = (double)(abs(rv - *av)/abs(rv));
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_scal2m.cpp
+++ b/gtestsuite/src/ref_scal2m.cpp
@@ -0,0 +1,163 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_scal2m.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SCAL2M performs matrix operations
+//*>    B := alpha * transa(A)
+//*>    where A is an m x n matrix, and alpha is a scalar.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iscal2m_check(dim_t M, dim_t N, T* alpha,
+           T* X, dim_t rsx, dim_t csx, T* Y, dim_t rsy, dim_t csy ) {
+
+  dim_t i, j;
+  T ONE = 1.0 ;
+  T ZERO = 0.0 ;
+  T Alpha = alpha[0];
+
+  if ((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if (Alpha != ONE) {
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = ZERO;
+        }
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = Alpha * X[i*rsx + j*csx];
+        }
+      }
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icscal2m_check(dim_t M, dim_t N, T* alpha,
+   T* X, dim_t rsx, dim_t csx, bool conjx, T* Y, dim_t rsy, dim_t csy) {
+  dim_t i, j;
+  T ONE  = {1.0, 0.0} ;
+  T ZERO = {0.0, 0.0} ;
+  T Alpha = *alpha;
+
+  if ((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        X[i*rsx + j*csx] = conjugate<T>(X[i*rsx + j*csx]);
+      }
+    }
+  }
+
+  /* First form  x := Alpha*x. */
+  if ((Alpha.real != ONE.real) && (Alpha.imag != ONE.imag)) {
+    if ((Alpha.real != ZERO.real) && (Alpha.imag != ZERO.imag)) {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+           Y[i*rsy + j*csy]= ZERO;
+        }
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = mulc<T>(Alpha , X[i*rsx + j*csx]);
+        }
+      }
+    }
+  }
+
+  return;
+}
+
+double libblis_test_iscal2m_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         y_orig
+){
+  num_t  dt    = bli_obj_dt( y );
+  dim_t  M     = bli_obj_length( y );
+  dim_t  N     = bli_obj_width( y );
+  bool  transx = bli_obj_has_trans( x );
+  bool   conjx = bli_obj_has_conj( x );
+  dim_t  rsy   = bli_obj_row_stride( y ) ;
+  dim_t  csy   = bli_obj_col_stride( y ) ;
+  dim_t  rsx, csx;
+  double resid = 0.0;
+
+  if( bli_obj_is_col_stored( x ) ) {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  } else {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iscal2m_check<float, int32_t>(M, N, Alpha, X, rsx, csx,
+                                                         Y, rsy, csy);
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iscal2m_check<double, int64_t>(M, N, Alpha, X, rsx, csx,
+                                                          Y, rsy, csy);
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icscal2m_check<scomplex, int32_t>(M, N, Alpha, X, rsx, csx,
+                                                      conjx, Y, rsy, csy);
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icscal2m_check<dcomplex, int64_t>(M, N, Alpha, X, rsx, csx,
+                                                      conjx, Y, rsy, csy);
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_scal2v.cpp
+++ b/gtestsuite/src/ref_scal2v.cpp
@@ -0,0 +1,146 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_scal2v.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SCAL2V performs vector operations
+//*>     y := alpha * conjx(x)
+//*>    where x is a vector of length n, and alpha is a scalar.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iscal2v_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                                T* Y, dim_t incy) {
+  dim_t i, ix, iy;
+  //T ONE = 1.0 ;
+  T ZERO = 0.0 ;
+  T Alpha = alpha[0];
+
+  if (len == 0){
+      return;
+  }
+
+  ix = 0;
+  iy = 0;
+  if (Alpha == ZERO) {
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = ZERO;
+      iy = iy + incy;
+    }
+  }
+  else {
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = Alpha * X[ix];
+      iy = iy + incy;
+      ix = ix + incx;
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icscal2v_check(dim_t len, T* alpha, T* X, dim_t incx,
+                                               T* Y, dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  //T ONE  = {1.0 , 0.0} ;
+  T ZERO = {0.0 , 0.0} ;
+  T Alpha = *alpha;
+
+  if(len == 0) {
+      return;
+  }
+
+ ix = 0;
+  if(cfx) {
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  if ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)) {
+    for(i = 0; i < len ; i++) {
+      Y[iy] = ZERO;
+      iy = iy + incy;
+    }
+  }
+  else {
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = mulc<T>(Alpha , X[ix]);
+      ix = ix + incx;
+      iy = iy + incy;
+    }
+  }
+
+  return;
+}
+
+double libblis_test_iscal2v_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y_orig );
+  bool cfx     = bli_obj_has_conj( x );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_iscal2v_check<float, int32_t>(M, Alpha, X, incx, Y, incy );
+      resid = computediffrv(M, incx, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_iscal2v_check<double, int64_t>(M, Alpha, X, incx, Y, incy );
+      resid = computediffrv(M, incx, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icscal2v_check<scomplex, int32_t>(M, Alpha, X, incx,
+                                                          Y, incy, cfx );
+      resid = computediffiv(M, incx, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icscal2v_check<dcomplex, int64_t>(M, Alpha, X, incx,
+                                                          Y, incy, cfx );
+      resid = computediffiv(M, incx, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_scalm.cpp
+++ b/gtestsuite/src/ref_scalm.cpp
@@ -0,0 +1,139 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_scalm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SCALM performs matrix operations
+//*>     A := conjalpha(alpha) * A
+//*>    where A is an m x n matrix, and alpha is a scalar.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iscalm_check(dim_t M, dim_t N, T* alpha,
+                             T* X, dim_t rsx, dim_t csx ) {
+
+  dim_t i, j;
+  T ONE = 1.0 ;
+  T ZERO = 0.0 ;
+  T Alpha = alpha[0];
+
+  if ((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if (Alpha != ONE) {
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          X[i*rsx + j*csx] = ZERO;
+        }
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          X[i*rsx + j*csx] = Alpha * X[i*rsx + j*csx];
+        }
+      }
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icscalm_check(dim_t M, dim_t N, T* alpha,
+                             T* X, dim_t rsx, dim_t csx, bool cfalpha) {
+  dim_t i, j;
+  T ONE  = {1.0, 0.0} ;
+  T ZERO = {0.0, 0.0} ;
+  T Alpha = *alpha;
+
+  if ((M == 0) || (N == 0)) {
+    return;
+  }
+
+  if(cfalpha)
+    Alpha = conjugate<T>(Alpha);
+
+  /* First form  x := Alpha*x. */
+  if ((Alpha.real != ONE.real) && (Alpha.imag != ONE.imag)) {
+    if ((Alpha.real != ZERO.real) && (Alpha.imag != ZERO.imag)) {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          X[i*rsx + j*csx] = ZERO;
+        }
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          X[i*rsx + j*csx] = mulc<T>(Alpha , X[i*rsx + j*csx]);
+        }
+      }
+    }
+  }
+
+  return;
+}
+
+double libblis_test_iscalm_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  x_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_length( x );
+  dim_t  N     = bli_obj_width( x );
+  dim_t  rsx   = bli_obj_row_stride( x ) ;
+  dim_t  csx   = bli_obj_col_stride( x ) ;
+  bool cfalpha = bli_obj_has_conj( alpha );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x_orig );
+      float*   XX       = (float*) bli_obj_buffer( x );
+      libblis_iscalm_check<float, int32_t>(M, N, Alpha, X, rsx, csx);
+      resid = computediffrm(M, N, XX, X, rsx, csx);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x_orig );
+      double*   XX      = (double*) bli_obj_buffer( x );
+      libblis_iscalm_check<double, int64_t>(M, N, Alpha, X, rsx, csx);
+      resid = computediffrm(M, N, XX, X, rsx, csx);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x_orig );
+      scomplex*   XX    = (scomplex*) bli_obj_buffer( x );
+      libblis_icscalm_check<scomplex, int32_t>(M, N, Alpha, X, rsx, csx, cfalpha);
+      resid = computediffim(M, N, XX, X, rsx, csx);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x_orig );
+      dcomplex*   XX    = (dcomplex*) bli_obj_buffer( x );
+      libblis_icscalm_check<dcomplex, int64_t>(M, N, Alpha, X, rsx, csx, cfalpha);
+      resid = computediffim(M, N, XX, X, rsx, csx);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_scalv.cpp
+++ b/gtestsuite/src/ref_scalv.cpp
@@ -0,0 +1,130 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_scalv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SCALV performs vector operations
+//*>     x := conjalpha(alpha) * x
+//*>    where x is a vector of length n, and alpha is a scalar.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_iscalv_check(dim_t len, T* beta, T* X, dim_t incx) {
+
+  dim_t i, ix;
+  T ONE = 1.0 ;
+  T ZERO = 0.0 ;
+  T Beta = beta[0];
+
+  if (len == 0){
+      return;
+  }
+
+  if( Beta != ONE ) {
+    ix = 0;
+    if (Beta == ZERO) {
+      for( i = 0 ; i < len ; i++ ) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for( i = 0 ; i < len ; i++ ) {
+        X[ix] = Beta * X[ix];
+        ix = ix + incx;
+      }
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icscalv_check(dim_t len, T* beta, T* X, dim_t incx, bool cfbeta) {
+  dim_t i, ix;
+  T ONE  = {1.0, 0.0} ;
+  T ZERO = {0.0, 0.0} ;
+  T Beta = *beta;
+
+  if( len == 0 ) {
+      return;
+  }
+
+  if( cfbeta )
+    Beta = conjugate<T>(Beta);
+
+  /* First form  x := beta*x. */
+  if( Beta.real != ONE.real ) {
+    ix = 0;
+    if( (Beta.real != ZERO.real) && (Beta.imag != ZERO.imag) ) {
+      for(i = 0; i < len ; i++) {
+        X[ix] = ZERO;
+        ix = ix + incx;
+      }
+    }
+    else {
+      for( i = 0 ; i < len ; i++ ) {
+        X[ix] = mulc<T>(Beta , X[ix]);
+        ix = ix + incx;
+      }
+    }
+  }
+  return;
+}
+
+double libblis_test_iscalv_check(
+  test_params_t* params,
+  obj_t*  beta,
+  obj_t*  x,
+  obj_t*  x_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  bool cfbeta  = bli_obj_has_conj( beta );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Beta    = (float*) bli_obj_buffer( beta );
+      float*   X       = (float*) bli_obj_buffer( x_orig );
+      float*   XX      = (float*) bli_obj_buffer( x );
+      libblis_iscalv_check<float, int32_t>(M, Beta, X, incx );
+      resid = computediffrv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Beta   = (double*) bli_obj_buffer( beta );
+      double*   X      = (double*) bli_obj_buffer( x_orig );
+      double*   XX     = (double*) bli_obj_buffer( x );
+      libblis_iscalv_check<double, int64_t>(M, Beta, X, incx );
+      resid = computediffrv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Beta = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   X    = (scomplex*) bli_obj_buffer( x_orig );
+      scomplex*   XX   = (scomplex*) bli_obj_buffer( x );
+      libblis_icscalv_check<scomplex, int32_t>(M, Beta, X, incx, cfbeta );
+      resid = computediffiv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Beta = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   X    = (dcomplex*) bli_obj_buffer( x_orig );
+      dcomplex*   XX   = (dcomplex*) bli_obj_buffer( x );
+      libblis_icscalv_check<dcomplex, int64_t>(M, Beta, X, incx, cfbeta );
+      resid = computediffiv(M, incx, XX, X);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_subm.cpp
+++ b/gtestsuite/src/ref_subm.cpp
@@ -0,0 +1,130 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_subm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SUBM performs matrix operations
+//*>    B := B - transa(A)
+//*>    where B is an m x n matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_isubm_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx,
+                                      T* Y, dim_t rsy, dim_t csy, T* YY) {
+
+  dim_t i, j;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = Y[i*rsy + j*csy] - X[i*rsx + j*csx] ;
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icsubm_check(dim_t M, dim_t N, T* X, dim_t rsx, dim_t csx,
+                               conj_t conjx, T* Y, dim_t rsy, dim_t csy) {
+
+  dim_t i, j;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        X[i*rsx + j*csx] = conjugate<T>(X[i*rsx + j*csx]);
+      }
+    }
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = subc<T>(Y[i*rsy + j*csy], X[i*rsx + j*csx]);
+    }
+  }
+
+  return;
+}
+
+double libblis_test_isubm_check(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  bool  transx = bli_obj_has_trans( x );
+  conj_t conjx = bli_obj_conj_status( x );
+  dim_t  M     = bli_obj_length( y );
+  dim_t  N     = bli_obj_width( y );
+  dim_t  rsy   = bli_obj_row_stride( y ) ;
+  dim_t  csy   = bli_obj_col_stride( y ) ;
+  double resid = 0.0;
+  dim_t  rsx, csx;
+
+  if( bli_obj_is_col_stored( x ) ) {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  } else {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_isubm_check<float, int32_t>( M, N, X, rsx, csx,
+                                                 Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_isubm_check<double, int64_t>( M, N, X, rsx, csx,
+                                                  Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icsubm_check<scomplex, int32_t>( M, N, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icsubm_check<dcomplex, int64_t>( M, N, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_subv.cpp
+++ b/gtestsuite/src/ref_subv.cpp
@@ -0,0 +1,117 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_subv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SUBV performs vector operations
+//*>    y := y - conjx(x)
+//*>    where x and y are vectors of length n.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_isubv_check(dim_t len, T* X, dim_t incx, T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  if (len == 0) {
+      return;
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = Y[iy] - X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icsubv_check(dim_t len, T* X, dim_t incx, T* Y,
+                                                   dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  if (len == 0) {
+      return;
+  }
+
+  ix = 0;
+  if(cfx) {
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = subc<T>(Y[iy] , X[ix]);
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_isubv_check(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  beta,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y_orig );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_isubv_check<float, int32_t>( M, X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_isubv_check<double, int64_t>( M, X, incx, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icsubv_check<scomplex, int32_t>( M, X, incx, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icsubv_check<dcomplex, int64_t>( M, X, incx, Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_symm.cpp
+++ b/gtestsuite/src/ref_symm.cpp
@@ -0,0 +1,486 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_symm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SYMM  performs one of the matrix-matrix operations
+//*>    C := alpha*A*B + beta*C,
+//*> or
+//*>    C := alpha*B*A + beta*C,
+//*> where alpha and beta are scalars,  A is a symmetric matrix and  B and
+//*> C are m by n matrices.
+//*  ==========================================================================
+
+template <typename T>
+void libblis_isymm_check(side_t side, uplo_t uplo, dim_t M, dim_t N,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T ONE = 1.0;
+    T ZERO = 0.0;
+    T tmp1, tmp2;
+    bool LSIDE, UPPER;
+    dim_t i, j, k;
+
+    //*     Test the input parameters.
+    LSIDE   = (side == BLIS_LEFT);
+    UPPER   = (uplo == BLIS_UPPER);
+
+    if( (M == 0 || N == 0) || ( Alpha == ZERO && Beta == ONE ) )
+      return;
+
+    //*     And when  Alpha.eq.zero.
+    if( Alpha == ZERO )
+    {
+        if( Beta == ZERO )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = ZERO;
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( LSIDE )
+    {
+        //* Form  C := Alpha*A*B + Beta*C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp1 = Alpha*B[i*rsb + j*csb];
+                    tmp2 = ZERO;
+                    for( k = 0 ; k < i ; k++ )
+                    {
+                        C[k*rsc + j*csc] = C[k*rsc + j*csc] + tmp1*A[k*rsa + i*csa];
+                        tmp2 = tmp2 + B[k*rsb + j*csb] * A[k*rsa + i*csa];
+                    }
+                    if (Beta == ZERO)
+                    {
+                        C[i*rsc + j*csc] = tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = (M-1) ; i >= 0 ; i-- )
+                {
+                    tmp1 = Alpha*B[i*rsb + j*csb];
+                    tmp2 = ZERO;
+                    for( k = (i+1) ; k < M ; k++ )
+                    {
+                        C[k*rsc + j*csc] = C[k*rsc + j*csc] + tmp1*A[k*rsa + i*csa];
+                        tmp2 = tmp2 + B[k*rsb + j*csb]*A[k*rsa + i*csa];
+                    }
+                    if (Beta == ZERO)
+                    {
+                        C[i*rsc + j*csc] = tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1*A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* Form  C := Alpha*B*A + Beta*C.
+        for( j = 0 ; j < N ; j++ )
+        {
+            tmp1 = Alpha*A[j*rsa + j*csa];
+            if( Beta == ZERO )
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = tmp1*B[i*rsb + j*csb];
+                }
+            }
+            else
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1*B[i*rsb + j*csb];
+                }
+            }
+            for( k = 0 ; k < j ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = Alpha*A[k*rsa + j*csa];
+                }
+                else
+                {
+                    tmp1 = Alpha*A[j*rsa + k*csa];
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp1*B[i*rsb + k*csb];
+                }
+            }
+            for( k = (j+1) ; k < N ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = Alpha*A[j*rsa + k*csa];
+                }
+                else
+                {
+                    tmp1 = Alpha*A[k*rsa + j*csa];
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp1*B[i*rsb + k*csb];
+                }
+            }
+        }
+    }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_icsymm_check(side_t side, uplo_t uplo, dim_t M, dim_t N,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T ONE  = {1.0 , 0.0};
+    T ZERO = {0.0 , 0.0};
+    T tmp1, tmp2;
+    bool LSIDE, UPPER;
+    dim_t i, j, k;
+
+    //*     Test the input parameters.
+    LSIDE   = (side == BLIS_LEFT);
+    UPPER   = (uplo == BLIS_UPPER);
+
+    if( (M == 0 || N == 0) || ( Alpha.real == ZERO.real && Beta.real == ONE.real ) )
+      return;
+
+    //*     And when  Alpha.eq.zero.
+    if( Alpha.real == ZERO.real )
+    {
+        if( Beta.real == ZERO.real )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = ZERO;
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( LSIDE )
+    {
+        //* Form  C := Alpha*A*B + Beta*C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp1 = mulc<T>(Alpha , B[i*rsb + j*csb]);
+                    tmp2 = ZERO;
+                    for( k = 0 ; k < i ; k++ )
+                    {
+                        C[k*rsc + j*csc] = addc<T>(C[k*rsc + j*csc] , mulc<T>(tmp1 , A[k*rsa + i*csa]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(B[k*rsb + j*csb] , A[k*rsa + i*csa]));
+                    }
+                    if (Beta.real == ZERO.real)
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(tmp1 , A[i*rsa + i*csa]) , mulc<T>(Alpha , tmp2));
+                    }
+                    else
+                    {
+                        tmp2 = addc<T>(mulc<T>(tmp1 , A[i*rsa + i*csa]) , mulc<T>(Alpha , tmp2));
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , tmp2);
+                        //C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1 * A[i*rsa + i*csa] +  Alpha*tmp2;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = (M-1) ; i >= 0 ; i-- )
+                {
+                    tmp1 = mulc<T>(Alpha , B[i*rsb + j*csb]);
+                    tmp2 = ZERO;
+                    for( k = (i+1) ; k < M ; k++ )
+                    {
+                        C[k*rsc + j*csc] = addc<T>(C[k*rsc + j*csc] , mulc<T>(tmp1 , A[k*rsa + i*csc]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(B[k*rsb + j*csb] , A[k*rsa + i*csa]));
+                    }
+                    if (Beta.real == ZERO.real)
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(tmp1 , A[i*rsa + i*csa]) , mulc<T>(Alpha , tmp2));
+                    }
+                    else
+                    {
+                        tmp2 = addc<T>(mulc<T>(tmp1 , A[i*rsa + i*csa]) , mulc<T>(Alpha , tmp2));
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , tmp2);
+                        //C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + tmp1 * A[i*rsa + i*csa] + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* Form  C := Alpha*B*A + Beta*C.
+        for( j = 0 ; j < N ; j++ )
+        {
+            tmp1 = mulc<T>(Alpha , A[j*rsa + j*csa]);
+            if (Beta.real == ZERO.real)
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = mulc<T>(tmp1 , B[i*rsb + j*csb]);
+                }
+            }
+            else
+            {
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , mulc<T>(tmp1 , B[i*rsb + j*csb]));
+                }
+            }
+            for( k = 0 ; k < j ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = mulc<T>(Alpha , A[k*rsa + j*csa]);
+                }
+                else
+                {
+                    tmp1 = mulc<T>(Alpha , A[j*rsa + k*csa]);
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp1 , B[i*rsb + k*csb]));
+                }
+            }
+            for( k = (j+1) ; k < N ; k++ )
+            {
+                if( UPPER )
+                {
+                    tmp1 = mulc<T>(Alpha , A[j*rsa + k*csa]);
+                }
+                else
+                {
+                    tmp1 = mulc<T>(Alpha , A[k*rsa + j*csa]);
+                }
+                for(i = 0 ; i < M ; i++)
+                {
+                    C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp1 , B[i*rsb + k*csb]));
+                }
+            }
+        }
+    }
+    return;
+}
+
+double libblis_test_isymm_check
+    (
+      test_params_t* params,
+      side_t         side,
+      obj_t*         alpha,
+      obj_t*         a,
+      obj_t*         b,
+      obj_t*         beta,
+      obj_t*         c,
+      obj_t*         c_orig
+    )
+{
+
+    num_t dt     = bli_obj_dt( a );
+    uplo_t uploa = bli_obj_uplo( a );
+    dim_t M      = bli_obj_length( c );
+    dim_t N      = bli_obj_width( c );
+    dim_t rsa    = bli_obj_row_stride( a ) ;
+    dim_t csa    = bli_obj_col_stride( a ) ;
+    dim_t rsb    = bli_obj_row_stride( b ) ;
+    dim_t csb    = bli_obj_col_stride( b ) ;
+    dim_t rsc    = bli_obj_row_stride( c ) ;
+    dim_t csc    = bli_obj_col_stride( c ) ;
+    double resid = 0.0;
+
+    switch( dt )  {
+        case BLIS_FLOAT :
+        {
+            float*   Alpha = (float*) bli_obj_buffer( alpha );
+            float*   A     = (float*) bli_obj_buffer( a );
+            float*   B     = (float*) bli_obj_buffer( b );
+            float*   Beta  = (float*) bli_obj_buffer( beta );
+            float*   C     = (float*) bli_obj_buffer( c_orig );
+            float*   CC    = (float*) bli_obj_buffer( c );
+            libblis_isymm_check<float>(side, uploa, M, N, *Alpha, A, rsa, csa,
+                                            B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, N, CC, C, rsc, csc);
+			break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double*  Alpha = (double*) bli_obj_buffer( alpha );
+            double*  A     = (double*) bli_obj_buffer( a );
+            double*  B     = (double*) bli_obj_buffer( b );
+            double*  Beta  = (double*) bli_obj_buffer( beta );
+            double*  C     = (double*) bli_obj_buffer( c_orig );
+            double*  CC    = (double*) bli_obj_buffer( c );
+            libblis_isymm_check<double>(side, uploa, M, N, *Alpha, A, rsa, csa,
+                                            B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, N, CC, C, rsc, csc);
+        }
+            break;
+        case BLIS_SCOMPLEX :
+        {
+            scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+            scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+            scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+            scomplex*   B     = (scomplex*) bli_obj_buffer( b );
+            scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+            scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+            libblis_icsymm_check<scomplex, float>(side, uploa, M, N, *Alpha,
+                             A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, N, CC, C, rsc, csc);
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+            dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+            dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+            dcomplex*   B     = (dcomplex*) bli_obj_buffer( b );
+            dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+            dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+            libblis_icsymm_check<dcomplex, double>(side, uploa, M, N, *Alpha,
+                             A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, N, CC, C, rsc, csc);
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+    return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_symm(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_symv.cpp
+++ b/gtestsuite/src/ref_symv.cpp
@@ -0,0 +1,305 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_symv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SYMV performs the matrix-vector  operation
+//*>    y := alpha*A*x + beta*y
+//*> where alpha and beta are scalars, x and y are n element vectors and
+//*> A is an n by n symmetric matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_isymv_check(uplo_t uploa, dim_t M, T* alpha, T* A,
+   dim_t rsa, dim_t csa, T* X, dim_t incx, T* beta, T* Y, dim_t incy) {
+  T ONE = 1.0;
+  T ZERO = 0.0;
+  T Alpha = alpha[0];
+  T Beta = beta[0];
+  T tmp1, tmp2;
+  dim_t i, ix, iy, j, jx, jy, kx, ky;
+
+  if ((M == 0) ||
+    ((Alpha == ZERO) && (Beta == ONE)))
+      return ;
+
+  //*     Set up the start points in  X  and  Y.
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (M * incx);
+  }
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (M * incy);
+  }
+
+  //*     First form  y := beta*y.
+  if (Beta != ONE) {
+    iy = ky;
+    if (Beta == ZERO) {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = (Beta * Y[iy]);
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if (Alpha == ZERO)
+    return;
+
+  T tmp = 0.0 ;
+  if(uploa == BLIS_UPPER) {
+    //* Form  y  when A is stored in upper triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = (Alpha * X[jx]);
+      tmp2 = ZERO;
+      ix = kx;
+      iy = ky;
+      for(i = 0 ; i < j ; i++) {
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = Y[iy] + (tmp1 * tmp);
+        tmp2 = tmp2 + (tmp * X[ix]);
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      tmp = A[j*rsa + j*csa];
+      Y[jy] = Y[jy] + (tmp1 * tmp) + (Alpha * tmp2);
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  y  when A is stored in lower triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = (Alpha * X[jx]);
+      tmp = A[j*rsa + j*csa];
+      tmp2 = ZERO;
+      Y[jy] = Y[jy] + (tmp1 * tmp);
+      ix = jx;
+      iy = jy;
+      for(i = (j+1) ; i < M ; i++) {
+        ix = ix + incx;
+        iy = iy + incy;
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = Y[iy] + (tmp1 * tmp);
+        tmp2 = tmp2 + (tmp * X[ix]);
+      }
+      Y[jy] = Y[jy] + (Alpha * tmp2);
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icsymv_check(uplo_t uploa, dim_t M, T* alpha, T* A, dim_t rsa,
+dim_t csa, bool conja, T* X, dim_t incx, bool conjx, T* beta, T* Y, dim_t incy) {
+  T ONE   = { 1.0, 0.0 };
+  T ZERO  = { 0.0, 0.0 };
+  T Alpha = *alpha;
+  T Beta  = *beta;
+  T tmp1, tmp2;
+  dim_t i, ix, iy, j, jx, jy, kx, ky;
+
+  if ((M == 0) ||
+    ((Alpha.real == ZERO.real) && (Beta.real == ONE.real)))
+      return ;
+
+  //*     Set up the start points in  X  and  Y.
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (M * incx);
+  }
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (M * incy);
+  }
+
+  //*     First form  y := beta*y.
+  if((Beta.real != ONE.real) && (Beta.imag != ONE.imag)) {
+    iy = ky;
+    if((Beta.real != ZERO.real) && (Beta.imag != ZERO.imag)) {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        Y[iy] = mulc<T>(Beta , Y[iy]);
+        iy = iy + incy;
+      }
+    }
+  }
+
+  if((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag))
+    return;
+
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < M ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(conja) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < M ; j++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  T tmp = {0.0, 0.0};
+  if(uploa == BLIS_UPPER) {
+    //* Form  y  when A is stored in upper triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = mulc<T>(Alpha , X[jx]);
+      tmp2 = ZERO;
+      ix = kx;
+      iy = ky;
+      for(i = 0 ; i < j ; i++) {
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = addc<T>(Y[iy] , mulc<T>(tmp1 , tmp));
+        tmp2  = addc<T>(tmp2 , mulc<T>(tmp , X[ix]));
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      tmp = A[j*rsa + j*csa];
+      tmp = addc<T>(mulc<T>(tmp1 , tmp) , mulc<T>(Alpha , tmp2));
+      Y[jy] = addc<T>(Y[jy] , tmp );
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  y  when A is stored in lower triangle.
+    jx = kx;
+    jy = ky;
+    for(j = 0 ; j < M ; j++) {
+      tmp1 = mulc<T>(Alpha , X[jx]);
+      tmp  = A[j*rsa + j*csa];
+      tmp2 = ZERO;
+      Y[jy] = addc<T>(Y[jy] , mulc<T>(tmp1 , tmp));
+      ix = jx;
+      iy = jy;
+      for(i = (j+1) ; i < M ; i++) {
+        ix = ix + incx;
+        iy = iy + incy;
+        tmp = A[i*rsa + j*csa];
+        Y[iy] = addc<T>(Y[iy] , mulc<T>(tmp1 , tmp));
+        tmp2  = addc<T>(tmp2 , mulc<T>(tmp , X[ix]));
+      }
+      Y[jy] = addc<T>(Y[jy] , mulc<T>(Alpha , tmp2));
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+
+  return;
+}
+
+double libblis_test_isymv_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig
+){
+  num_t  dt    = bli_obj_dt( a );
+  uplo_t uploa = bli_obj_uplo( a );
+  dim_t M      = bli_obj_length( a );
+  dim_t rsa    = bli_obj_row_stride( a );
+  dim_t csa    = bli_obj_col_stride( a );
+  bool conja   = bli_obj_has_conj( a );
+  dim_t incx   = bli_obj_vector_inc( x );
+  dim_t incy   = bli_obj_vector_inc( y );
+  bool conjx   = bli_obj_has_conj( x );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha = (float*) bli_obj_buffer( alpha );
+      float*   A     = (float*) bli_obj_buffer( a );
+      float*   X     = (float*) bli_obj_buffer( x );
+      float*   Beta  = (float*) bli_obj_buffer( beta );
+      float*   Y     = (float*) bli_obj_buffer( y_orig );
+      float*   YY    = (float*) bli_obj_buffer( y );
+      libblis_isymv_check<float, int32_t>(uploa, M, Alpha, A, rsa, csa,
+                                                 X, incx, Beta, Y, incy);
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_isymv_check<double, int64_t>(uploa, M, Alpha, A, rsa, csa,
+                                                 X, incx, Beta, Y, incy);
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icsymv_check<scomplex, int32_t>(uploa, M, Alpha, A, rsa, csa,
+                                    conja, X, incx, conjx, Beta, Y, incy);
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icsymv_check<dcomplex, int64_t>(uploa, M, Alpha, A, rsa, csa,
+                                    conja, X, incx, conjx, Beta, Y, incy);
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_syr.cpp
+++ b/gtestsuite/src/ref_syr.cpp
@@ -0,0 +1,195 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_syr.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SYR   performs the symmetric rank 1 operation
+//*>    A := alpha*x*x**T + A,
+//*> where alpha is a real scalar, x is an n element vector and A is an
+//*> n by n symmetric matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_isyr_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+                                               T* A, dim_t rsa, dim_t csa) {
+  T ZERO = 0.0;
+  T Alpha = alpha[0];
+  T temp;
+  int i, ix, j, jx, kx;
+
+  if((N == 0) || (Alpha == ZERO))
+    return;
+
+  /* Set the start point in X if the increment is not unity. */
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if(uploa == BLIS_UPPER) {
+    /* Form  A  when A is stored in upper triangle. */
+    jx = kx;
+    for(j = 0; j < N ; j++) {
+      if (X[jx] != ZERO) {
+        temp = Alpha*X[jx];
+        ix = kx;
+        for(i = 0 ; i <= j ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + X[ix]*temp;
+          ix = ix + incx;
+        }
+      }
+      jx = jx + incx;
+    }
+  }
+  else
+  {
+    /* Form  A  when A is stored in lower triangle. */
+    jx = kx;
+    for(j = 0; j < N ; j++) {
+      if (X[jx] != ZERO) {
+        temp = Alpha*X[jx];
+        ix = jx;
+        for(i = j ; i < N ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + X[ix]*temp;
+          ix = ix + incx;
+        }
+      }
+      jx = jx + incx;
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icsyr_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+                                    bool conjx, T* A, dim_t rsa, dim_t csa) {
+  T ZERO  = {0.0 , 0.0};
+  T Alpha = alpha[0];
+  T temp;
+  int i, ix, j, jx, kx;
+
+  if ((N == 0) || ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)))
+   return;
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(uploa == BLIS_UPPER) {
+    /* Form  A  when A is stored in upper triangle. */
+    jx = kx;
+    for(j = 0 ; j < N ; j++) {
+      if ((X[jx].real != ZERO.real) || (X[jx].imag != ZERO.imag)) {
+        temp = mulc<T>(Alpha , X[jx]);
+        ix = kx;
+        for(i = 0 ; i <= j ; i++) {
+          A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , mulc<T>(X[ix] , temp));
+          ix = ix + incx;
+        }
+      }
+      jx = jx + incx;
+    }
+  }
+  else {
+    /* Form  A  when A is stored in lower triangle. */
+    jx = kx;
+    for(j = 0; j < N ; j++) {
+      if ((X[jx].real != ZERO.real) || (X[jx].imag != ZERO.imag)) {
+        temp = mulc<T>(Alpha , X[jx]);
+        ix = jx;
+        for(i = j ; i < N ; i++) {
+          A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , mulc<T>(X[ix] , temp));
+          ix = ix + incx;
+        }
+      }
+      jx = jx + incx;
+    }
+  }
+
+  return;
+}
+
+double libblis_test_isyr_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         a,
+  obj_t*         a_orig
+){
+
+  num_t dt     = bli_obj_dt( x );
+  uplo_t uploa = bli_obj_uplo( a );
+  dim_t M      = bli_obj_length( a );
+  dim_t N      = bli_obj_width( a );
+  dim_t incx   = bli_obj_vector_inc( x );
+  bool conjx   = bli_obj_has_conj( x );
+  dim_t rsa    = bli_obj_row_stride( a ) ;
+  dim_t csa    = bli_obj_col_stride( a ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a_orig );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   AA       = (float*) bli_obj_buffer( a );
+      libblis_isyr_check<float, int32_t>(uploa, M, Alpha, X, incx,
+                                                          A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a_orig );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   AA      = (double*) bli_obj_buffer( a );
+      libblis_isyr_check<double, int64_t>(uploa, M, Alpha, X, incx,
+                                                           A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a_orig );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   AA    = (scomplex*) bli_obj_buffer( a );
+      libblis_icsyr_check<scomplex, int32_t>(uploa, M, Alpha, X, incx, conjx,
+                                                              A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a_orig );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   AA    = (dcomplex*) bli_obj_buffer( a );
+      libblis_icsyr_check<dcomplex, int64_t>(uploa, M, Alpha, X, incx, conjx,
+                                                              A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
--- a/gtestsuite/src/ref_syr2.cpp
+++ b/gtestsuite/src/ref_syr2.cpp
@@ -0,0 +1,243 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_syr2.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SYR2  performs the symmetric rank 2 operation
+//*>    A := alpha*x*y**T + alpha*y*x**T + A,
+//*> where alpha is a scalar, x and y are n element vectors and A is an n
+//*> by n symmetric matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_isyr2_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+                             T* Y, dim_t incy, T* A, dim_t rsa, dim_t csa) {
+
+  T ZERO  = 0.0;
+  T Alpha = alpha[0];
+  T tmp1, tmp2;
+  int i, ix, iy, j, jx, jy, kx, ky;
+
+  if ((N == 0) || (Alpha == ZERO))
+    return;
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (N * incy);
+  }
+  jx = kx;
+  jy = ky;
+
+  if(uploa == BLIS_UPPER) {
+    //* Form  A  when A is stored in the upper triangle.
+    for(j = 0 ; j < N ; j++) {
+      if ((X[jx] != ZERO) || (Y[jy] != ZERO)) {
+        tmp1 = Alpha * Y[jy];
+        tmp2 = Alpha * X[jx];
+        ix = kx;
+        iy = ky;
+        for(i = 0 ; i <= j ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + (X[ix] * tmp1) + (Y[iy] * tmp2);
+          ix = ix + incx;
+          iy = iy + incy;
+        }
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  A  when A is stored in the lower triangle.
+    for(j = 0 ; j < N ; j++) {
+      if((X[jx] != ZERO) || (Y[jy] != ZERO)) {
+        tmp1 = Alpha * Y[jy];
+        tmp2 = Alpha * X[jx];
+        ix = jx;
+        iy = jy;
+        for(i = j ; i < N ; i++) {
+          A[i*rsa + j*csa] = A[i*rsa + j*csa] + (X[ix] * tmp1) + (Y[iy] * tmp2);
+          ix = ix + incx;
+          iy = iy + incy;
+        }
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_icsyr2_check(uplo_t uploa, dim_t N, T* alpha, T* X, dim_t incx,
+      bool conjx, T* Y, dim_t incy, bool conjy, T* A, dim_t rsa, dim_t csa) {
+
+  T ZERO  = {0.0, 0.0};
+  T Alpha = *alpha;
+  T tmp1, tmp2;
+  int i, ix, iy, j, jx, jy, kx, ky;
+
+  if((N == 0) || ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)))
+    return;
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if (incy > 0) {
+    ky = 0;
+  }
+  else {
+    ky = 1 - (N * incy);
+  }
+  jx = kx;
+  jy = ky;
+
+  if(conjx) {
+    ix = 0;
+    for(i = 0 ; i < N ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  if(conjy) {
+    iy = 0;
+    for(i = 0 ; i < N ; i++) {
+      Y[iy] = conjugate<T>(Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  T p1, p2, p;
+  if(uploa == BLIS_UPPER) {
+    //* Form  A  when A is stored in the upper triangle.
+    for(j = 0 ; j < N ; j++) {
+      tmp1 = mulc<T>(Alpha , Y[jy]);
+      tmp2 = mulc<T>(Alpha , X[jx]);
+      ix = kx;
+      iy = ky;
+      for(i = 0 ; i <= j ; i++) {
+        p1 = mulc<T>(X[ix] , tmp1);
+        p2 = mulc<T>(Y[iy] , tmp2);
+        p  = addc<T>(p1 , p2);
+        A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , p);
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  else {
+    //* Form  A  when A is stored in the lower triangle.
+    for(j = 0 ; j < N ; j++) {
+      tmp1 = mulc<T>(Alpha , Y[jy]);
+      tmp2 = mulc<T>(Alpha , X[jx]);
+      ix = jx;
+      iy = jy;
+      for(i = j ; i < N ; i++) {
+        p1 = mulc<T>(X[ix] , tmp1);
+        p2 = mulc<T>(Y[iy] , tmp2);
+        p  = addc<T>(p1 , p2);
+        A[i*rsa + j*csa] = addc<T>(A[i*rsa + j*csa] , p);
+        ix = ix + incx;
+        iy = iy + incy;
+      }
+      jx = jx + incx;
+      jy = jy + incy;
+    }
+  }
+  return;
+}
+
+double libblis_test_isyr2_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         a,
+  obj_t*         a_orig
+){
+
+  num_t dt     = bli_obj_dt( x );
+  uplo_t uploa = bli_obj_uplo( a );
+  dim_t M      = bli_obj_length( a );
+  dim_t N      = bli_obj_width( a );
+  dim_t incx   = bli_obj_vector_inc( x );
+  dim_t incy   = bli_obj_vector_inc( y );
+  bool conjx   = bli_obj_has_conj( x );
+  bool conjy   = bli_obj_has_conj( y );
+  dim_t rsa    = bli_obj_row_stride( a ) ;
+  dim_t csa    = bli_obj_col_stride( a ) ;
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a_orig );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y );
+      float*   AA       = (float*) bli_obj_buffer( a );
+      libblis_isyr2_check<float, int32_t>(uploa, M, Alpha, X, incx,
+                                                 Y, incy, A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a_orig );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y );
+      double*   AA      = (double*) bli_obj_buffer( a );
+      libblis_isyr2_check<double, int64_t>(uploa, M, Alpha, X, incx,
+                                                 Y, incy, A, rsa, csa);
+      resid = computediffrm(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a_orig );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y );
+      scomplex*   AA    = (scomplex*) bli_obj_buffer( a );
+      libblis_icsyr2_check<scomplex, int32_t>(uploa, M, Alpha, X, incx, conjx,
+                                               Y, incy, conjy, A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a_orig );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y    = (dcomplex*) bli_obj_buffer( y );
+      dcomplex*   AA    = (dcomplex*) bli_obj_buffer( a );
+      libblis_icsyr2_check<dcomplex, int64_t>(uploa, M, Alpha, X, incx, conjx,
+                                               Y, incy, conjy, A, rsa, csa);
+      resid = computediffim(M, N, AA, A, rsa, csa);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
+
--- a/gtestsuite/src/ref_syr2k.cpp
+++ b/gtestsuite/src/ref_syr2k.cpp
@@ -0,0 +1,611 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_syr2k.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> SYR2K  performs one of the symmetric rank 2k operations
+//*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
+//*> or
+//*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
+//*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+//*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+//*> matrices in the second case.
+//*  ==========================================================================
+
+template <typename T>
+void libblis_isyr2k_check( uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T tmp1, tmp2;
+    int i, j, l;
+    bool UPPER, NOTRANS;
+
+    T ONE  = 1.0 ;
+    T ZERO = 0.0 ;
+
+    //*     Test the input parameters.
+    UPPER   = ( uplo == BLIS_UPPER );
+    NOTRANS = ( trans == BLIS_NO_TRANSPOSE );
+
+    if( N == 0 || (( Alpha == ZERO || K == 0 ) && Beta == ONE ))
+      return;
+
+    //*     And when  alpha.eq.zero.
+    if( Alpha == ZERO )
+    {
+        if( UPPER )
+        {
+            if( Beta == ZERO )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+            }
+        }
+        else
+        {
+            if( Beta == ZERO )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( NOTRANS )
+    {
+        //* C := alpha*A*B**T + alpha*B*A**T + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta == ZERO )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( Beta != ONE )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                    if( (A[j*rsa + l*csa] != ZERO) || (B[j*rsb + l*csb] != ZERO) )
+                    {
+                        tmp1 = Alpha*B[j*rsb + l*csb];
+                        tmp2 = Alpha*A[j*rsa + l*csa];
+                        for( i = 0 ; i <= j ; i++ )
+                        {
+                            C[i*rsc + j*csc] = C[i*rsc + j*csc] + A[i*rsa + l*csa]*tmp1 +  B[i*rsb + l*csb]*tmp2;
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta == ZERO )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( Beta != ONE )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                    }
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                    if( (A[j*rsa + l*csa] != ZERO) || (B[j*rsb + l*csb] != ZERO) )
+                    {
+                        tmp1 = Alpha*B[j*rsb + l*csb];
+                        tmp2 = Alpha*A[j*rsa + l*csa];
+                        for( i = j; i < N ; i++ )
+                        {
+                            C[i*rsc + j*csc] = C[i*rsc + j*csc] + A[i*rsa + l*csa]*tmp1 + B[i*rsb + l*csb]*tmp2;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* C := alpha*A**T*B + alpha*B**T*A + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i <= j ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = tmp1 + A[l*rsa + i*csa]*B[l*rsb + j*csb];
+                        tmp2 = tmp2 + B[l*rsb + i*csb]*A[l*rsa + j*csa];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp1 + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp1 + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = j ; i < N ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = tmp1 + A[l*rsa + i*csa]*B[l*rsb + j*csb];
+                        tmp2 = tmp2 + B[l*rsb + i*csb]*A[l*rsa + j*csa];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp1 + Alpha*tmp2;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp1 + Alpha*tmp2;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_icsyr2k_check( uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T Alpha, T* A, dim_t rsa, dim_t csa, T* B, dim_t rsb, dim_t csb, T Beta,
+  T* C, dim_t rsc, dim_t csc )
+{
+    T tmp1, tmp2;
+    T tmpa, tmpb;
+    int i, j, l;
+    bool UPPER, NOTRANS;
+
+    T ONE  = { 1.0 , 0.0 };
+    T ZERO = { 0.0 , 0.0 };
+
+    //*     Test the input parameters.
+    UPPER   = (uplo == BLIS_UPPER);
+    NOTRANS = (trans == BLIS_NO_TRANSPOSE);
+
+    if( N == 0 || (( Alpha.real == ZERO.real || K == 0 ) && Beta.real == ONE.real ))
+      return;
+
+    //*     And when  alpha.eq.zero.
+    if( Alpha.real == ZERO.real )
+    {
+        if( UPPER )
+        {
+            if( Beta.real == ZERO.real )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = 0 ; i <= j ; i++)
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                }
+            }
+        }
+        else
+        {
+            if( Beta.real == ZERO.real )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                }
+            }
+        }
+        return;
+    }
+
+    //*     Start the operations.
+    if( NOTRANS )
+    {
+        //*        Form  C := alpha*A*B**H + conjg( alpha )*B*A**H + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( Beta.real == ZERO.real )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( (Beta.real != ONE.real) || (Beta.imag != ONE.imag) )
+                {
+                    for( i = 0 ; i <= j ; i++ )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                   if( ((A[j*rsa + l*csa].real != ZERO.real) || (A[j*rsa + l*csa].imag != ZERO.imag))
+                     || ((B[j*rsb + l*csb].real != ZERO.real) || (B[j*rsb + l*csb].imag != ZERO.imag)) )
+                    {
+                        tmp1 = mulc<T>(Alpha , B[j*rsb + l*csb]);
+                        tmp2 = mulc<T>(Alpha , A[j*rsa + l*csa]);
+                        for( i = 0 ; i <= j ; i++)
+                        {
+                            tmpa = mulc<T>(A[i*rsa + l*csa] , tmp1);
+                            tmpb = mulc<T>(B[i*rsb + l*csb] , tmp2);
+                            tmpa = addc<T>(tmpa , tmpb);
+                            C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , tmpa);
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = ZERO;
+                    }
+                }
+                else if( (Beta.real != ONE.real) || (Beta.imag != ONE.imag) )
+                {
+                    for( i = j ; i < N ; i++ )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                    }
+                }
+                for( l = 0 ; l < K ; l++ )
+                {
+                    if( ((A[j*rsa + l*csa].real != ZERO.real) || (A[j*rsa + l*csa].imag != ZERO.imag))
+                     || ((B[j*rsb + l*csb].real != ZERO.real) || (B[j*rsb + l*csb].imag != ZERO.imag)) )
+                    {
+                        tmp1 = mulc<T>(Alpha , B[j*rsb + l*csb]);
+                        tmp2 = mulc<T>(Alpha , A[j*rsa + l*csa]);
+                        for( i = j ; i < N ; i++ )
+                        {
+                            tmpa = mulc<T>(A[i*rsa + l*csa] , tmp1);
+                            tmpb = mulc<T>(B[i*rsb + l*csb] , tmp2);
+                            tmpa = addc<T>(tmpa, tmpb);
+                            C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , tmpa);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* Form  C := alpha*A**T*B + alpha*B**T*A + C.
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i <= j ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = addc<T>(tmp1 , mulc<T>(A[l*rsa + i*csa] , B[l*rsb + j*csb]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(B[l*rsb + i*csb] , A[l*rsa + j*csa]));
+                    }
+                    if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp1) , mulc<T>(Alpha ,tmp2));
+                    }
+                    else
+                    {
+                        tmpa = mulc<T>(Alpha , tmp1);
+                        tmpb = mulc<T>(Alpha , tmp2);
+                        tmpa = addc<T>(tmpa , tmpb);
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) ,tmpa);
+                    }
+                 }
+             }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = j ; i < N ; i++ )
+                {
+                    tmp1 = ZERO;
+                    tmp2 = ZERO;
+                    for( l = 0 ; l < K ; l++ )
+                    {
+                        tmp1 = addc<T>(tmp1 , mulc<T>(A[l*rsa + i*csa] , B[l*rsb + j*csb]));
+                        tmp2 = addc<T>(tmp2 , mulc<T>(B[l*rsb + i*csb] , A[l*rsa + j*csa]));
+                    }
+                    if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp1) , mulc<T>(Alpha , tmp2));
+                    }
+                    else
+                    {
+                        tmpa = mulc<T>(Alpha , tmp1);
+                        tmpb = mulc<T>(Alpha , tmp2);
+                        tmpa = addc<T>(tmpa, tmpb);
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , tmpa);
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+double libblis_test_isyr2k_check
+    (
+      test_params_t* params,
+      obj_t*         alpha,
+      obj_t*         a,
+      obj_t*         b,
+      obj_t*         beta,
+      obj_t*         c,
+      obj_t*         c_orig
+    )
+{
+    num_t dt      = bli_obj_dt( c );
+    uplo_t uploc  = bli_obj_uplo( c );
+    dim_t  M      = bli_obj_length( c );
+    dim_t  K      = bli_obj_width_after_trans( a );
+    trans_t trans = bli_obj_onlytrans_status( a );
+    dim_t rsa     = bli_obj_row_stride( a ) ;
+    dim_t csa     = bli_obj_col_stride( a ) ;
+    dim_t rsb     = bli_obj_row_stride( b ) ;
+    dim_t csb     = bli_obj_col_stride( b ) ;
+    dim_t rsc     = bli_obj_row_stride( c ) ;
+    dim_t csc     = bli_obj_col_stride( c ) ;
+    double resid  = 0.0;
+    f77_int lda, ldb, ldc;
+
+   if( bli_obj_is_col_stored( c ) ) {
+     lda  = bli_obj_col_stride( a );
+     ldb  = bli_obj_col_stride( b );
+     ldc  = bli_obj_col_stride( c );
+   } else {
+     lda  = bli_obj_row_stride( a );
+     ldb  = bli_obj_row_stride( b );
+     ldc  = bli_obj_row_stride( c );
+   }
+
+   int nrowa;
+   if (trans == BLIS_NO_TRANSPOSE) {
+     nrowa = M;
+   } else {
+     nrowa = K;
+   }
+
+   if( lda < max(1, nrowa) ) {
+     return resid;
+   }
+   if( ldb < max(1, nrowa) ) {
+     return resid;
+   }
+   if( ldc < max(1, (int)M) ) {
+     return resid;
+   }
+
+    switch( dt )  {
+        case BLIS_FLOAT :
+        {
+            float*   Alpha = (float*) bli_obj_buffer( alpha );
+            float*   A     = (float*) bli_obj_buffer( a );
+            float*   B     = (float*) bli_obj_buffer( b );
+            float*   Beta  = (float*) bli_obj_buffer( beta );
+            float*   C     = (float*) bli_obj_buffer( c_orig );
+            float*   CC    = (float*) bli_obj_buffer( c );
+            libblis_isyr2k_check<float>(uploc, trans, M, K, *Alpha, A,
+                                 rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, M, CC, C, rsc, csc);
+			break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double*  Alpha = (double*) bli_obj_buffer( alpha );
+            double*  A     = (double*) bli_obj_buffer( a );
+            double*  B     = (double*) bli_obj_buffer( b );
+            double*  Beta  = (double*) bli_obj_buffer( beta );
+            double*  C     = (double*) bli_obj_buffer( c_orig );
+            double*  CC    = (double*) bli_obj_buffer( c );
+            libblis_isyr2k_check<double>(uploc, trans, M, K, *Alpha, A,
+                                 rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, M, CC, C, rsc, csc);
+        }
+            break;
+        case BLIS_SCOMPLEX :
+        {
+            scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+            scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+            scomplex*   B     = (scomplex*) bli_obj_buffer( b );
+            scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+            scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+            scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+            libblis_icsyr2k_check<scomplex, float>(uploc, trans, M, K, *Alpha,
+                                A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, M, CC, C, rsc, csc);
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+            dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+            dcomplex*   B     = (dcomplex*) bli_obj_buffer( b );
+            dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+            dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+            dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+            libblis_icsyr2k_check<dcomplex, double>(uploc, trans, M, K, *Alpha,
+                                A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffim(M, M, CC, C, rsc, csc);
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+    return abs(resid);
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_syr2k(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_syrk.cpp
+++ b/gtestsuite/src/ref_syrk.cpp
@@ -0,0 +1,503 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_syrk.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> C := alpha*A*A**T + beta*C,
+//*>      or
+//*> C := alpha*A**T*A + beta*C,
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_isyrk_check(uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T* alpha, T* A, dim_t rsa, dim_t csa, T* beta, T* C, dim_t rsc, dim_t csc) {
+
+  //* .. Local Scalars ..
+  T tmp;
+  dim_t i, j, l;
+  bool UPPER, NOTRANS;
+  T Alpha = alpha[0];
+  T Beta  = beta[0];
+
+  //* .. Parameters ..
+  T ONE, ZERO;
+  ONE  = 1.0 ;
+  ZERO = 0.0 ;
+
+  UPPER    = (uplo == BLIS_UPPER);
+  NOTRANS  = (trans == BLIS_NO_TRANSPOSE) || (trans == BLIS_CONJ_NO_TRANSPOSE);
+
+  //* Quick return if possible.
+  if((N == 0) ||
+    (((Alpha == ZERO) || (K == 0)) && (Beta == ONE))) {
+      return;
+  }
+
+  //*     And when  alpha.eq.zero.
+  if (Alpha == ZERO) {
+    if (UPPER) {
+      if (Beta == ZERO) {
+        for(j = 0 ; j < N; j++) {
+          for(i = 0 ; i <= j ; i++) {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {
+          for(i = 0; i <= j ; i++) {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+    }
+    else {
+      if (Beta == ZERO) {
+        for(j = 0 ; j < N ; j++) {
+          for(i = j ; i < N ; i++) {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {
+          for(i = j ; i < N ; i++) {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+    }
+    return;
+  }
+
+  //* Start the operations.
+  if(NOTRANS) {
+    //*        Form  C := alpha*A*A**T + beta*C.
+    if (UPPER) {
+        if(Beta == ZERO) {
+          for(j = 0 ; j < N ; j++) {
+            for(i = 0 ; i <= j ; i++) {
+              C[i*rsc + j*csc] = ZERO;
+            }
+          }
+        }
+        else if(Beta != ONE) {
+          for(j = 0 ; j < N ; j++) {
+            for(i = 0 ; i <= j ; i++) {
+              C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+            }
+          }
+        }
+
+        for(j = 0 ; j < N ; j++) {
+          for(l = 0; l < K ; l++) {
+            if (A[j*rsa + l*csa] != ZERO) {
+              tmp = Alpha*A[j*rsa + l*csa];
+              for(i = 0 ; i <= j ; i++) {
+                C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp*A[i*rsa + l*csa];
+              }
+            }
+          }
+        }
+    }
+    else {
+      if(Beta == ZERO) {
+        for(j = 0; j < N ; j++) {
+          for(i = j ; i < N; i++) {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else if (Beta != ONE) {
+        for(j = 0; j < N ; j++) {
+          for(i = j; i < N; i++) {
+            C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+
+      for(j = 0; j < N ; j++) {
+        for(l = 0; l < K ; l++) {
+          if (A[j*rsa + l*csa] != ZERO) {
+            tmp = Alpha*A[j*rsa + l*csa];
+            for(i = j ; i < N; i++) {
+              C[i*rsc + j*csc] = C[i*rsc + j*csc] + tmp*A[i*rsa + l*csa];
+            }
+          }
+        }
+      }
+    }
+  }
+  else {
+    //*        Form  C := alpha*A**T*A + beta*C.
+    if (UPPER) {
+      for(j = 0 ; j < N ; j++) {
+        for(i = 0 ; i <= j ; i++) {
+          tmp = ZERO;
+          for(l = 0; l < K ; l++) {
+            tmp = tmp + A[l*rsa + i*csa]*A[l*rsa + j*csa];
+          }
+          if (Beta == ZERO) {
+            C[i*rsc + j*csc] = Alpha*tmp;
+          }
+          else {
+            C[i*rsc + j*csc] = Alpha*tmp + Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+    }
+    else {
+      for(j = 0 ; j < N ; j++) {
+        for(i = j ; i < N ; i++) {
+          tmp = ZERO;
+          for(l = 0 ; l < K ; l++) {
+            tmp = tmp + A[l*rsa + i*csa]*A[l*rsa + j*csa];
+          }
+          if (Beta == ZERO) {
+            C[i*rsc + j*csc] = Alpha*tmp;
+          }
+          else {
+            C[i*rsc + j*csc] = Alpha*tmp + Beta*C[i*rsc + j*csc];
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icsyrk_check(uplo_t uplo, trans_t trans, dim_t N, dim_t K,
+  T* alpha, T* A, dim_t rsa, dim_t csa, T* beta, T* C, dim_t rsc, dim_t csc) {
+
+  //* .. Local Scalars ..
+  T tmp;
+  dim_t i, j, l;
+  bool UPPER, NOTRANS;
+  T Alpha = *alpha;
+  T Beta  = *beta;
+
+  //* .. Parameters ..
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+
+  UPPER    = (uplo == BLIS_UPPER);
+  NOTRANS  = (trans == BLIS_NO_TRANSPOSE) || (trans == BLIS_CONJ_NO_TRANSPOSE);
+
+  //* Quick return if possible.
+  if((N == 0) ||
+    (((Alpha.real == ZERO.real) || (K == 0)) && (Beta.real == ONE.real))) {
+      return;
+  }
+
+  //*     And when  alpha.eq.zero.
+  if((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)){
+    if (UPPER) {
+      if((Beta.real == ZERO.real)&&(Beta.imag == ZERO.imag)) {
+        for(j = 0 ; j < N; j++) {
+          for(i = 0 ; i <= j ; i++) {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {
+          for(i = 0; i <= j ; i++) {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+        }
+      }
+    }
+    else {
+      if((Beta.real == ZERO.real)&&(Beta.imag == ZERO.imag)) {
+        for(j = 0 ; j < N ; j++) {
+          for(i = j ; i < N ; i++) {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {
+          for(i = j ; i < N ; i++) {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+        }
+      }
+    }
+    return;
+  }
+
+  //* Start the operations.
+  if(NOTRANS) {
+    //*        Form  C := alpha*A*A**T + beta*C.
+    if (UPPER) {
+        if((Beta.real == ZERO.real)||(Beta.imag == ZERO.imag)) {
+          for(j = 0 ; j < N ; j++) {
+            for(i = 0 ; i <= j ; i++) {
+              C[i*rsc + j*csc] = ZERO;
+            }
+          }
+        }
+        else if((Beta.real != ONE.real)||(Beta.imag != ONE.imag)) {
+          for(j = 0 ; j < N ; j++) {
+            for(i = 0 ; i <= j ; i++) {
+              C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+            }
+          }
+        }
+
+        for(j = 0 ; j < N ; j++) {
+          for(l = 0; l < K ; l++) {
+            if((A[j*rsa + l*csa].real != ZERO.real) || (A[j*rsa + l*csa].imag != ZERO.imag)) {
+              tmp = mulc<T>(Alpha , A[j*rsa + l*csa]);
+              for(i = 0 ; i <= j ; i++) {
+                C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp , A[i*rsa + l*csa]));
+              }
+            }
+          }
+        }
+    }
+    else {
+      if((Beta.real == ZERO.real)||(Beta.imag == ZERO.imag)) {
+        for(j = 0; j < N ; j++) {
+          for(i = j ; i < N; i++) {
+            C[i*rsc + j*csc] = ZERO;
+          }
+        }
+      }
+      else if((Beta.real != ONE.real) || (Beta.imag != ONE.imag)){
+        for(j = 0; j < N ; j++) {
+          for(i = j; i < N; i++) {
+            C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+          }
+        }
+      }
+
+      for(j = 0; j < N ; j++) {
+        for(l = 0; l < K ; l++) {
+          if((A[j*rsa + l*csa].real != ZERO.real)||(A[j*rsa + l*csa].imag != ZERO.imag)) {
+            tmp = mulc<T>(Alpha , A[j*rsa + l*csa]);
+            for(i = j ; i < N; i++) {
+              C[i*rsc + j*csc] = addc<T>(C[i*rsc + j*csc] , mulc<T>(tmp , A[i*rsa + l*csa]));
+            }
+          }
+        }
+      }
+    }
+  }
+  else {
+    //*        Form  C := alpha*A**T*A + beta*C.
+    if (UPPER) {
+      for(j = 0 ; j < N ; j++) {
+        for(i = 0 ; i <= j ; i++) {
+          tmp = ZERO;
+          for(l = 0; l < K ; l++) {
+            tmp = addc<T>(tmp , mulc<T>(A[l*rsa + i*csa] , A[l*rsa + j*csa]));
+          }
+          if((Beta.real == ZERO.real) ||(Beta.imag == ZERO.imag)){
+            C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+          }
+          else {
+            C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp) , mulc<T>(Beta , C[i*rsc + j*csc]));
+          }
+        }
+      }
+    }
+    else {
+      for(j = 0 ; j < N ; j++) {
+        for(i = j ; i < N ; i++) {
+          tmp = ZERO;
+          for(l = 0 ; l < K ; l++) {
+            tmp = addc<T>(tmp , mulc<T>(A[l*rsa + i*csa] , A[l*rsa + j*csa]));
+          }
+          if((Beta.real == ZERO.real) || (Beta.imag == ZERO.imag)) {
+            C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+          }
+          else {
+            C[i*rsc + j*csc] = addc<T>(mulc<T>(Alpha , tmp) , mulc<T>(Beta , C[i*rsc + j*csc]));
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+double libblis_test_isyrk_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         beta,
+  obj_t*         c,
+  obj_t*         c_orig
+){
+  num_t dt       = bli_obj_dt( a );
+  dim_t M        = bli_obj_length( c );
+  dim_t K        = bli_obj_width_after_trans( a );
+  uplo_t uploc   = bli_obj_uplo( c );
+  trans_t transa = bli_obj_onlytrans_status( a );
+  double resid  = 0.0;
+  dim_t  rsa, csa;
+  dim_t  rsc, csc;
+
+  rsa = bli_obj_row_stride( a ) ;
+  csa = bli_obj_col_stride( a ) ;
+  rsc = bli_obj_row_stride( c ) ;
+  csc = bli_obj_col_stride( c ) ;
+
+  f77_int  lda;
+  if( bli_obj_is_col_stored( c ) ) {
+    lda    = bli_obj_col_stride( a );
+  } else {
+    lda    = bli_obj_row_stride( a );
+  }
+  int nrowa;
+  if (transa == BLIS_NO_TRANSPOSE) {
+    nrowa = M;
+  } else {
+    nrowa = K;
+  }
+
+  if( lda < max(1, nrowa) ) {
+    return resid;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   A        = (float*) bli_obj_buffer( a );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   C        = (float*) bli_obj_buffer( c_orig );
+      float*   CC       = (float*) bli_obj_buffer( c );
+      libblis_isyrk_check<float, int32_t>(uploc, transa, M, K, Alpha,
+                                       A, rsa, csa, Beta, C, rsc, csc);
+      resid = computediffrm(M, M, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   A       = (double*) bli_obj_buffer( a );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   C       = (double*) bli_obj_buffer( c_orig );
+      double*   CC      = (double*) bli_obj_buffer( c );
+      libblis_isyrk_check<double, int64_t>(uploc, transa, M, K, Alpha,
+                                       A, rsa, csa, Beta, C, rsc, csc);
+      resid = computediffrm(M, M, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+      scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+      libblis_icsyrk_check<scomplex, int32_t>(uploc, transa, M, K, Alpha,
+                                          A, rsa, csa, Beta, C, rsc, csc);
+      resid = computediffim(M, M, CC, C, rsc, csc);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+      dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+      libblis_icsyrk_check<dcomplex, int64_t>(uploc, transa, M, K, Alpha,
+                                          A, rsa, csa, Beta, C, rsc, csc);
+      resid = computediffim(M, M, CC, C, rsc, csc);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rsc, dim_t csc, obj_t* c ) {
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* C = (T*) bli_obj_buffer( c );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename U, typename T>
+double libblis_check_nan_complex( dim_t rsc, dim_t csc, obj_t* c ) {
+  dim_t  M = bli_obj_length( c );
+  dim_t  N = bli_obj_width( c );
+  dim_t  i,j;
+  double resid = 0.0;
+  U* C = (U*) bli_obj_buffer( c );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = C[ i*rsc + j*csc ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_syrk(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_is_col_stored( c ) ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex, float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex, double>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_trmm.cpp
+++ b/gtestsuite/src/ref_trmm.cpp
@@ -0,0 +1,651 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_trmm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> TRMM  performs one of the matrix-matrix operations
+//*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+//*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+//*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+//*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_itrmm_check(side_t side, uplo_t uplo, trans_t transa,
+  diag_t diag, dim_t M, dim_t N, T Alpha, T* A, dim_t rsa, dim_t csa,
+  T* B, dim_t rsb, dim_t csb)
+{
+    T tmp;
+    int i, j, k;
+    bool LSIDE, NOUNIT, UPPER, NOTRANSA;
+
+    T ONE  = 1.0;
+    T ZERO = 0.0;
+
+    LSIDE    = ( side == BLIS_LEFT );
+    NOTRANSA = ( transa == BLIS_NO_TRANSPOSE );
+    NOUNIT   = ( diag == BLIS_NONUNIT_DIAG );
+    UPPER    = ( uplo == BLIS_UPPER );
+
+    if( M == 0 || N == 0 )
+      return;
+
+    if( Alpha == ZERO )
+    {
+        for( j = 0 ; j < N ; j++ )
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                B[i*rsb + j*csb] = ZERO;
+            }
+        }
+        return;
+    }
+
+
+    if( LSIDE )
+    {
+        if( NOTRANSA )
+        {
+            //* Form  B := alpha*A*B.
+            if( UPPER )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( k = 0 ; k < M ; k++ )
+                    {
+                        if( B[k*rsb + j*csb] != ZERO )
+                        {
+                            tmp = Alpha*B[k*rsb + j*csb];
+                            for( i = 0 ;  i < k ; i++ )
+                            {
+                                B[i*rsb + j*csb] = B[i*rsb + j*csb] + tmp*A[i*rsa + k*csa];
+                            }
+                            if( NOUNIT )
+                                tmp = tmp*A[k*rsa + k*csa];
+                            B[k*rsb + j*csb] = tmp;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    for( k = (M-1) ; k >= 0 ; k-- )
+                    {
+                        if( B[k*rsb + j*csb] != ZERO )
+                        {
+                            tmp = Alpha*B[k*rsb + j*csb];
+                            B[k*rsb + j*csb] = tmp;
+                            if( NOUNIT )
+                                B[k*rsb + j*csb] = B[k*rsb + j*csb]*A[k*rsa + k*csa];
+                            for( i = (k+1) ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = B[i*rsb + j*csb] + (tmp * A[i*rsa + k*csa]);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            //*           Form  B := alpha*A**T*B.
+            if( UPPER )
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    for( i = (M-1) ; i >= 0 ; i-- )
+                    {
+                        tmp = B[i*rsb + j*csb];
+                        if( NOUNIT )
+                            tmp = tmp*A[i*rsa + i*csa];
+                        for( k = 0 ; k < i ; k++ )
+                        {
+                            tmp = tmp + A[k*rsa + i*csa]*B[k*rsb + j*csb];
+                        }
+                        B[i*rsb + j*csb] = Alpha*tmp;
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    for( i = 0 ; i < M ; i++ )
+                    {
+                        tmp = B[i*rsb + j*csb];
+                        if( NOUNIT )
+                            tmp = tmp*A[i*rsa + i*csa];
+                        for( k =(i+1) ; k < M ; k++ )
+                        {
+                            tmp = tmp + A[k*rsa + i*csa]*B[k*rsb + j*csb];
+                        }
+                        B[i*rsb + j*csb] = Alpha*tmp;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        if( NOTRANSA )
+        {
+            //*  Form  B := alpha*B*A.
+            if( UPPER )
+            {
+                for( j = (N-1) ; j >= 0 ; j-- )
+                {
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = tmp*A[j*rsa + j*csa];
+                    for( i = 0 ; i < M ; i++ )
+                    {
+                        B[i*rsb + j*csb] = tmp*B[i*rsb + j*csb];
+                    }
+                    for( k = 0 ; k < j ; k++ )
+                    {
+                        if( A[k*rsa + j*csa] != ZERO )
+                        {
+                            tmp = Alpha*A[k*rsa + j*csa];
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = B[i*rsb + j*csb] + tmp*B[i*rsb + k*csb];
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = tmp*A[j*rsa + j*csa];
+                    for( i = 0 ; i < M ; i++ )
+                    {
+                        B[i*rsb + j*csb] = tmp*B[i*rsb + j*csb];
+                    }
+                    for( k =(j+1) ; k < N ; k++ )
+                    {
+                        if( A[k*rsa + j*csa] != ZERO )
+                        {
+                            tmp = Alpha*A[k*rsa + j*csa];
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = B[i*rsb + j*csb] + tmp*B[i*rsb + k*csb];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            //* Form  B := alpha*B*A**T.
+            if( UPPER )
+            {
+                for( k = 0 ; k < N ; k++ )
+                {
+                    for( j = 0 ; j < k ; j++ )
+                    {
+                        if( A[j*rsa + k*csa] != ZERO )
+                        {
+                            tmp = Alpha*A[j*rsa + k*csa];
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = B[i*rsb + j*csb] + tmp*B[i*rsb + k*csb];
+                            }
+                        }
+                    }
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = tmp*A[k*rsa + k*csa];
+                    if( tmp != ONE )
+                    {
+                        for( i = 0 ; i < M ; i++ )
+                        {
+                            B[i*rsb + k*csb] = tmp*B[i*rsb + k*csb];
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for( k = (N-1) ; k >= 0 ; k-- )
+                {
+                    for( j = (k+1) ; j < N ; j++ )
+                    {
+                        if( A[j*rsa + k*csa] != ZERO )
+                        {
+                            tmp = Alpha*A[j*rsa + k*csa];
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = B[i*rsb + j*csb] + tmp*B[i*rsb + k*csb];
+                            }
+                        }
+                    }
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = tmp*A[k*rsa + k*csa];
+                    if( tmp != ONE )
+                    {
+                        for( i = 0 ; i < M ; i++ )
+                        {
+                            B[i*rsb + k*csb] = tmp*B[i*rsb + k*csb];
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_ictrmm_check(side_t side, uplo_t uplo, trans_t transa,
+  diag_t diag, dim_t M, dim_t N, T Alpha, T* A, dim_t rsa, dim_t csa,
+  bool conja, T* B, dim_t rsb, dim_t csb)
+{
+    T tmp;
+    int i, j, k;
+    bool LSIDE, NOTRANSA, NOUNIT, UPPER;
+
+    T ONE  = { 1.0 , 0.0 };
+    T ZERO = { 0.0 , 0.0 };
+
+    //*     Test the input parameters.
+    LSIDE    = ( side == BLIS_LEFT );
+    NOTRANSA = ( transa == BLIS_NO_TRANSPOSE );
+    NOUNIT   = ( diag == BLIS_NONUNIT_DIAG );
+    UPPER    = ( uplo == BLIS_UPPER );
+
+    if( M == 0 || N == 0 )
+      return;
+
+    if( (Alpha.real == ZERO.real) || (Alpha.imag == ZERO.imag) )
+    {
+        for( j = 0 ; j < N ; j++ )
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                B[i*rsb + j*csb] = ZERO;
+            }
+        }
+        return;
+    }
+
+    if( conja )
+    {
+        dim_t dim;
+        if (LSIDE)         dim = M;
+        else               dim = N;
+        for( i = 0 ; i < dim ; i++ )
+        {
+            for( j = 0 ; j < dim ; j++ )
+            {
+                A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+            }
+        }
+    }
+
+    if( LSIDE )
+    {
+        if( NOTRANSA )
+        {
+            //* Form  B := alpha*A*B.
+            if( UPPER )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( k = 0 ; k < M ; k++ )
+                    {
+                        if( (B[k*rsb + j*csb].real != ZERO.real) || (B[k*rsb + j*csb].imag != ZERO.imag) )
+                        {
+                            tmp = mulc<T>(Alpha , B[k*rsb + j*csb]);
+                            for( i = 0 ;  i < k ; i++ )
+                            {
+                                B[i*rsb + j*csb] = addc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , A[i*rsa + k*csa]));
+                            }
+                            if( NOUNIT )
+                                tmp = mulc<T>(tmp , A[k*rsa + k*csa]);
+                            B[k*rsb + j*csb] = tmp;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    for( k = (M-1) ; k >= 0 ; k-- )
+                    {
+                        if( (B[k*rsb + j*csb].real != ZERO.real) || (B[k*rsb + j*csb].imag != ZERO.imag) )
+                        {
+                            tmp = mulc<T>(Alpha , B[k*rsb + j*csb]);
+                            B[k*rsb + j*csb] = tmp;
+                            if( NOUNIT )
+                                B[k*rsb + j*csb] = mulc<T>(B[k*rsb + j*csb] , A[k*rsa + k*csa]);
+                            for( i = (k+1) ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = addc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , A[i*rsa + k*csa]));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            //* Form  B := alpha*A**T*B   or   B := alpha*A**H*B.
+            if( UPPER )
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    for( i = (M-1) ; i >= 0 ; i-- )
+                    {
+                        tmp = B[i*rsb + j*csb];
+                        if( NOUNIT )
+                            tmp = mulc<T>(tmp , A[i*rsa + i*csa]);
+                        for( k = 0 ; k < i ; k++ )
+                        {
+                            tmp = addc<T>(tmp , mulc<T>(A[k*rsa + i*csa] , B[k*rsb + j*csb]));
+                        }
+                        B[i*rsb + j*csb] = mulc<T>(Alpha , tmp);
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    for( i = 0 ; i < M ; i++ )
+                    {
+                        tmp = B[i*rsb + j*csb];
+                        if( NOUNIT )
+                            tmp = mulc<T>(tmp , A[i*rsa + i*csa]);
+                        for( k =(i+1) ; k < M ; k++ )
+                        {
+                            tmp = addc<T>(tmp , mulc<T>(A[k*rsa + i*csa] , B[k*rsb + j*csb]));
+                        }
+                        B[i*rsb + j*csb] = mulc<T>(Alpha , tmp);
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        if( NOTRANSA )
+        {
+            //*  Form  B := alpha*B*A.
+            if( UPPER )
+            {
+                for( j = (N-1) ; j >= 0 ; j-- )
+                {
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = mulc<T>(tmp , A[j*rsa + j*csa]);
+                    for( i = 0 ; i < M ; i++ )
+                    {
+                        B[i*rsb + j*csb] = mulc<T>(tmp , B[i*rsb + j*csb]);
+                    }
+                    for( k = 0 ; k < j ; k++ )
+                    {
+                        if( (A[k*rsa + j*csa].real != ZERO.real)||(A[k*rsa + j*csa].imag != ZERO.imag) )
+                        {
+                            tmp = mulc<T>(Alpha , A[k*rsa + j*csa]);
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = addc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , B[i*rsb + k*csb]));
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for( j = 0; j < N ; j++ )
+                {
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = mulc<T>(tmp , A[j*rsa + j*csa]);
+                    for( i = 0 ; i < M ; i++ )
+                    {
+                        B[i*rsb + j*csb] = mulc<T>(tmp , B[i*rsb + j*csb]);
+                    }
+                    for( k =(j+1) ; k < N ; k++ )
+                    {
+                        if( (A[k*rsa + j*csa].real != ZERO.real)||(A[k*rsa + j*csa].imag != ZERO.imag) )
+                        {
+                            tmp = mulc<T>(Alpha , A[k*rsa + j*csa]);
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = addc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , B[i*rsb + k*csb]));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            //* Form  B := alpha*B*A**T   or   B := alpha*B*A**H.
+            if( UPPER )
+            {
+                for( k = 0 ; k < N ; k++ )
+                {
+                    for( j = 0 ; j < k ; j++ )
+                    {
+                        if( (A[j*rsa + k*csa].real != ZERO.real)||(A[j*rsa + k*csa].imag != ZERO.imag) )
+                        {
+                            tmp = mulc<T>(Alpha , A[j*rsa + k*csa]);
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = addc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , B[i*rsb + k*csb]));
+                            }
+                        }
+                    }
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = mulc<T>(tmp , A[k*rsa + k*csa]);
+                    if( (tmp.real != ONE.real) || (tmp.imag != ONE.imag) )
+                    {
+                        for( i = 0 ; i < M ; i++ )
+                        {
+                            B[i*rsb + k*csb] = mulc<T>(tmp , B[i*rsb + k*csb]);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for( k = (N-1) ; k >= 0 ; k-- )
+                {
+                    for( j = (k+1) ; j < N ; j++ )
+                    {
+                        if( (A[j*rsa + k*csa].real != ZERO.real)||(A[j*rsa + k*csa].imag != ZERO.imag) )
+                        {
+                            tmp = mulc<T>(Alpha , A[j*rsa + k*csa]);
+                            for( i = 0 ; i < M ; i++ )
+                            {
+                                B[i*rsb + j*csb] = addc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , B[i*rsb + k*csb]));
+                            }
+                        }
+                    }
+                    tmp = Alpha;
+                    if( NOUNIT )
+                        tmp = mulc<T>(tmp , A[k*rsa + k*csa]);
+                    if( (tmp.real != ONE.real) || (tmp.imag != ONE.imag) )
+                    {
+                        for( i = 0 ; i < M ; i++ )
+                        {
+                            B[i*rsb + k*csb] = mulc<T>(tmp , B[i*rsb + k*csb]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+double libblis_test_itrmm_check(
+  test_params_t* params,
+  side_t         side,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         b,
+  obj_t*         b_orig,
+  num_t          dt
+){
+  dim_t M        = bli_obj_length( b_orig );
+  dim_t N        = bli_obj_width( b_orig );
+  uplo_t uploa   = bli_obj_uplo( a );
+  trans_t transa = bli_obj_onlytrans_status( a );
+  bool conja     = bli_obj_has_conj( a );
+  diag_t diaga   = bli_obj_diag( a );
+  dim_t rsa, csa;
+  dim_t rsb, csb;
+  double resid = 0.0;
+
+  rsa = bli_obj_row_stride( a ) ;
+  csa = bli_obj_col_stride( a ) ;
+  rsb = bli_obj_row_stride( b ) ;
+  csb = bli_obj_col_stride( b ) ;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha  = (float*) bli_obj_buffer( alpha );
+      float*   A      = (float*) bli_obj_buffer( a );
+      float*   B      = (float*) bli_obj_buffer( b_orig );
+      float*   BB     = (float*) bli_obj_buffer( b );
+      libblis_itrmm_check<float, int32_t>(side, uploa, transa,
+                       diaga, M, N, *Alpha, A, rsa, csa, B, rsb, csb );
+      resid = computediffrm(M, N, BB, B, rsb, csb);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha = (double*) bli_obj_buffer( alpha );
+      double*   A     = (double*) bli_obj_buffer( a );
+      double*   B     = (double*) bli_obj_buffer( b_orig );
+      double*   BB    = (double*) bli_obj_buffer( b );
+      libblis_itrmm_check<double, int64_t>(side, uploa, transa,
+                       diaga, M, N, *Alpha, A, rsa, csa, B, rsb, csb );
+      resid = computediffrm(M, N, BB, B, rsb, csb);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   B     = (scomplex*) bli_obj_buffer( b_orig );
+      scomplex*   BB    = (scomplex*) bli_obj_buffer( b );
+      libblis_ictrmm_check<scomplex, float>(side, uploa, transa,
+                    diaga, M, N, *Alpha, A, rsa, csa, conja, B, rsb, csb );
+      resid = computediffim(M, N, BB, B, rsb, csb);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   B     = (dcomplex*) bli_obj_buffer( b_orig );
+      dcomplex*   BB    = (dcomplex*) bli_obj_buffer( b );
+      libblis_ictrmm_check<dcomplex, double>(side, uploa, transa,
+                    diaga, M, N, *Alpha, A, rsa, csa, conja, B, rsb, csb );
+      resid = computediffim(M, N, BB, B, rsb, csb);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_trmm(obj_t* b, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( b ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( b );
+  } else {
+    rsc = bli_obj_row_stride( b );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, b );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, b );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, b );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, b );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_trmm3.cpp
+++ b/gtestsuite/src/ref_trmm3.cpp
@@ -0,0 +1,551 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_trmm3.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> TRMM  performs one of the matrix-matrix operations
+//*>    C := beta * C_orig + alpha * transa(A) * transb(B)
+//*> or
+//*>    C := beta * C_orig + alpha * transb(B) * transa(A)
+//*> where alpha and beta are scalars, A is an triangular matrix and  B and
+//*> C are m by n matrices.
+//*  ==========================================================================
+
+template <typename T>
+void libblis_itrmm3_check(side_t side, uplo_t uplo, diag_t diaga,
+  dim_t M, dim_t N, T Alpha, T* A, dim_t rsa, dim_t csa,
+  T* B, dim_t rsb, dim_t csb, T Beta, T* C, dim_t rsc, dim_t csc )
+{
+    T ONE = 1.0;
+    T ZERO = 0.0;
+    T tmp;
+    bool LSIDE, UPPER, UNITDA;
+    dim_t i, j, k;
+
+    //*     Test the input parameters.
+    LSIDE   = ( side == BLIS_LEFT  );
+    UPPER   = ( uplo == BLIS_UPPER );
+    UNITDA  = ( diaga == BLIS_UNIT_DIAG );
+
+    if( (M == 0 || N == 0) || ( Alpha == ZERO && Beta == ONE ) )
+      return;
+
+    //*     And when  Alpha.eq.zero.
+    if( Alpha == ZERO )
+    {
+        if( Beta == ZERO )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = ZERO;
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc];
+                }
+            }
+        }
+        return;
+    }
+
+    if( UNITDA )
+    {
+        dim_t dim;
+        if( LSIDE )        dim = M;
+        else               dim = N;
+        for( i = 0 ; i < dim ; i++ )
+        {
+            for( j = 0 ; j < dim ; j++ )
+            {
+                if( i==j )
+                    A[i*rsa + j*csa] = ONE ;
+            }
+        }
+    }
+
+    //*     Start the operations.
+    if( LSIDE )
+    {
+        //* Form  C := beta * C_orig + alpha * transa(A) * transb(B)
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp = ZERO;
+                    for( k = i ; k < M ; k++ )
+                    {
+                        tmp += A[i*rsa + k*csa] * B[k*rsb + j*csb];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp = ZERO;
+                    for( k = 0 ; k <= i ; k++ )
+                    {
+                        tmp += A[i*rsa + k*csa] * B[k*rsb + j*csb];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* C := beta * C_orig + alpha * transb(B) * transa(A)
+        if( UPPER )
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    tmp = ZERO ;
+                    for( k = 0 ; k <= j ; k++ )
+                    {
+                        tmp += B[i*rsb + k*csb]* A[k*rsa + j*csa];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp;
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    tmp = ZERO ;
+                    for( k = j ; k < N ; k++ )
+                    {
+                        tmp += B[i*rsb + k*csb]* A[k*rsa + j*csa];
+                    }
+                    if( Beta == ZERO )
+                    {
+                        C[i*rsc + j*csc] = Alpha*tmp;
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = Beta*C[i*rsc + j*csc] + Alpha*tmp;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+template <typename T, typename U>
+void libblis_ictrmm3_check(side_t side, uplo_t uplo, diag_t diaga, dim_t M,
+ dim_t N, T Alpha, T* A, dim_t rsa, dim_t csa, bool conja, T* B, dim_t rsb,
+ dim_t csb, bool conjb, T Beta, T* C, dim_t rsc, dim_t csc )
+{
+    T ONE  = {1.0 , 0.0};
+    T ZERO = {0.0 , 0.0};
+    T tmp;
+    bool LSIDE, UPPER, UNITDA;
+    dim_t i, j, k;
+
+    //*     Test the input parameters.
+    LSIDE   = ( side == BLIS_LEFT  );
+    UPPER   = ( uplo == BLIS_UPPER );
+    UNITDA  = ( diaga == BLIS_UNIT_DIAG );
+
+    if( (M == 0 || N == 0) || ( Alpha.real == ZERO.real && Beta.real == ONE.real ) )
+      return;
+
+    if( conja )
+    {
+        dim_t dim;
+        if( LSIDE )        dim = M;
+        else               dim = N;
+        for( i = 0 ; i < dim ; i++ )
+        {
+            for( j = 0 ; j < dim ; j++ )
+            {
+                A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+            }
+        }
+    }
+
+    if( conjb )
+    {
+        for( j = 0 ; j < N ; j++ )
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                B[i*rsc + j*csc] = conjugate<T>(B[i*rsc + j*csc]);
+            }
+        }
+    }
+
+    //*     And when  Alpha.eq.zero.
+    if( Alpha.real == ZERO.real )
+    {
+        if( Beta.real == ZERO.real )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = ZERO;
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    C[i*rsc + j*csc] = mulc<T>(Beta , C[i*rsc + j*csc]);
+                }
+            }
+        }
+        return;
+    }
+
+    if( UNITDA )
+    {
+        dim_t dim;
+        if( LSIDE )        dim = M;
+        else               dim = N;
+        for( i = 0 ; i < dim ; i++ )
+        {
+            for( j = 0 ; j < dim ; j++ )
+            {
+                if( i==j )
+                    A[i*rsa + j*csa] = ONE ;
+            }
+        }
+    }
+
+    //*     Start the operations.
+    if( LSIDE )
+    {
+        //* Form  C := beta * C_orig + alpha * transa(A) * transb(B)
+        if( UPPER )
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp = ZERO;
+                    for( k = i ; k < M ; k++ )
+                    {
+                        tmp = addc<T>(tmp , mulc<T>(A[i*rsa + k*csa] , B[k*rsb + j*csb]));
+                    }
+                    if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , mulc<T>(Alpha , tmp));
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( j = 0 ; j < N ; j++ )
+            {
+                for( i = 0 ; i < M ; i++ )
+                {
+                    tmp = ZERO;
+                    for( k = 0 ; k <= i ; k++ )
+                    {
+                        tmp = addc<T>(tmp , mulc<T>(A[i*rsa + k*csa] , B[k*rsb + j*csb]));
+                    }
+                    if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , mulc<T>(Alpha , tmp));
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        //* C := beta * C_orig + alpha * transb(B) * transa(A)
+        if( UPPER )
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    tmp = ZERO ;
+                    for( k = 0 ; k <= j ; k++ )
+                    {
+                        tmp = addc<T>(tmp , mulc<T>(B[i*rsb + k*csb] , A[k*rsa + j*csa]));
+                    }
+                    if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , mulc<T>(Alpha , tmp));
+                    }
+                }
+            }
+        }
+        else
+        {
+            for( i = 0 ; i < M ; i++ )
+            {
+                for( j = 0 ; j < N ; j++ )
+                {
+                    tmp = ZERO ;
+                    for( k = j ; k < N ; k++ )
+                    {
+                        tmp = addc<T>(tmp , mulc<T>(B[i*rsb + k*csb] , A[k*rsa + j*csa]));
+                    }
+                    if( (Beta.real == ZERO.real) || (Beta.imag == ZERO.imag) )
+                    {
+                        C[i*rsc + j*csc] = mulc<T>(Alpha , tmp);
+                    }
+                    else
+                    {
+                        C[i*rsc + j*csc] = addc<T>(mulc<T>(Beta , C[i*rsc + j*csc]) , mulc<T>(Alpha , tmp));
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+double libblis_test_itrmm3_check
+    (
+      test_params_t* params,
+      side_t         side,
+      obj_t*         alpha,
+      obj_t*         a,
+      obj_t*         b,
+      obj_t*         beta,
+      obj_t*         c,
+      obj_t*         c_orig
+    )
+{
+
+    num_t dt       = bli_obj_dt( a );
+    uplo_t uploa   = bli_obj_uplo( a );
+    diag_t diaga   = bli_obj_diag( a );
+    dim_t M        = bli_obj_length( c );
+    dim_t N        = bli_obj_width( c );
+    bool conja     = bli_obj_has_conj( a );
+    bool conjb     = bli_obj_has_conj( b );
+    trans_t transa = bli_obj_onlytrans_status( a );
+    trans_t transb = bli_obj_onlytrans_status( b );
+    dim_t  rsa, csa;
+    dim_t  rsb, csb;
+    dim_t  rsc, csc;
+    double resid   = 0.0;
+
+    if( bli_obj_row_stride( c ) == 1 )
+    {
+      rsa = transa ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+      csa = transa ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+      rsb = transb ? bli_obj_col_stride( b ) : bli_obj_row_stride( b ) ;
+      csb = transb ? bli_obj_row_stride( b ) : bli_obj_col_stride( b ) ;
+      rsc = 1;
+      csc = bli_obj_col_stride( c_orig );
+    }
+    else
+    {
+      rsa = transa ? bli_obj_col_stride( a ) : bli_obj_row_stride( a ) ;
+      csa = transa ? bli_obj_row_stride( a ) : bli_obj_col_stride( a ) ;
+      rsb = transb ? bli_obj_col_stride( b ) : bli_obj_row_stride( b ) ;
+      csb = transb ? bli_obj_row_stride( b ) : bli_obj_col_stride( b ) ;
+      rsc = bli_obj_row_stride( c_orig );
+      csc = 1 ;
+    }
+
+    if( transa ) {
+      if( bli_obj_is_upper_or_lower( a ) ) {
+        bli_obj_toggle_uplo( a );
+      }
+      uploa   = bli_obj_uplo( a );
+    }
+
+    switch( dt )  {
+        case BLIS_FLOAT :
+        {
+            float*   Alpha = (float*) bli_obj_buffer( alpha );
+            float*   A     = (float*) bli_obj_buffer( a );
+            float*   B     = (float*) bli_obj_buffer( b );
+            float*   Beta  = (float*) bli_obj_buffer( beta );
+            float*   C     = (float*) bli_obj_buffer( c_orig );
+            float*   CC    = (float*) bli_obj_buffer( c );
+            libblis_itrmm3_check<float>(side, uploa, diaga, M, N, *Alpha,
+                                A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, N, CC, C, rsc, csc);
+			break;
+        }
+        case BLIS_DOUBLE :
+        {
+            double*  Alpha = (double*) bli_obj_buffer( alpha );
+            double*  A     = (double*) bli_obj_buffer( a );
+            double*  B     = (double*) bli_obj_buffer( b );
+            double*  Beta  = (double*) bli_obj_buffer( beta );
+            double*  C     = (double*) bli_obj_buffer( c_orig );
+            double*  CC    = (double*) bli_obj_buffer( c );
+            libblis_itrmm3_check<double>(side, uploa, diaga, M, N, *Alpha,
+                                A, rsa, csa, B, rsb, csb, *Beta, C, rsc, csc );
+            resid = computediffrm(M, N, CC, C, rsc, csc);
+        }
+            break;
+        case BLIS_SCOMPLEX :
+        {
+            scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+            scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+            scomplex*   B     = (scomplex*) bli_obj_buffer( b );
+            scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+            scomplex*   C     = (scomplex*) bli_obj_buffer( c_orig );
+            scomplex*   CC    = (scomplex*) bli_obj_buffer( c );
+            libblis_ictrmm3_check<scomplex, float>(side, uploa, diaga, M, N,
+            *Alpha, A, rsa, csa, conja, B, rsb, csb, conjb, *Beta, C, rsc, csc );
+            resid = computediffim(M, N, CC, C, rsc, csc);
+            break;
+        }
+        case BLIS_DCOMPLEX :
+        {
+            dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+            dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+            dcomplex*   B     = (dcomplex*) bli_obj_buffer( b );
+            dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+            dcomplex*   C     = (dcomplex*) bli_obj_buffer( c_orig );
+            dcomplex*   CC    = (dcomplex*) bli_obj_buffer( c );
+            libblis_ictrmm3_check<dcomplex, double>(side, uploa, diaga, M, N,
+            *Alpha, A, rsa, csa, conja, B, rsb, csb, conjb, *Beta, C, rsc, csc );
+            resid = computediffim(M, N, CC, C, rsc, csc);
+            break;
+        }
+        default :
+            bli_check_error_code( BLIS_INVALID_DATATYPE );
+    }
+    return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_trmm3(obj_t* c, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( c ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( c );
+  } else {
+    rsc = bli_obj_row_stride( c );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, c );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, c );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, c );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
--- a/gtestsuite/src/ref_trmv.cpp
+++ b/gtestsuite/src/ref_trmv.cpp
@@ -0,0 +1,278 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_trmv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> TRMV  performs one of the matrix-vector operations
+//*>    x := alpha * transa(A) * x
+//*> where x is an n element vector and  A is an n by n unit, or non-unit,
+//*> upper or lower triangular matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_itrmv_check(uplo_t uploa, trans_t transa, diag_t diaga,
+      T* alpha, dim_t N, T* A, dim_t rsa, dim_t csa, T* X, dim_t incx){
+
+  T Alpha = *alpha;
+  T tmp;
+  int i, ix, j, jx, kx;
+  bool NOTRANS, NOUNIT;
+
+  if (N == 0)
+    return;
+
+  NOTRANS = (transa == BLIS_NO_TRANSPOSE);
+  NOUNIT  = (diaga == BLIS_NONUNIT_DIAG);
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if(NOTRANS) {
+    //* Form  x := A*x.
+    if(uploa == BLIS_UPPER){
+      jx = kx;
+      for(j = 0 ; j < N ; j++){
+        tmp = Alpha*X[jx];
+        ix = kx;
+        for(i = 0 ; i < j ; i++) {
+          X[ix] = X[ix] + tmp*A[i*rsa + j*csa];
+          ix = ix + incx;
+        }
+        if (NOUNIT)
+          tmp = tmp*A[j*rsa + j*csa];
+
+        X[jx] = tmp;
+        jx = jx + incx;
+      }
+    }
+    else{
+      kx = kx + (N - 1)*incx;
+      jx = kx;
+      for(j = (N-1) ; j >= 0 ; j--){
+        tmp = Alpha*X[jx];
+        ix = kx;
+        for(i = (N-1) ; i > j ; i--){
+          X[ix] = X[ix] + tmp*A[i*rsa + j*csa];
+          ix = ix - incx;
+        }
+        if(NOUNIT)
+          tmp = tmp*A[j*rsa + j*csa];
+
+        X[jx] = tmp;
+        jx = jx - incx;
+      }
+    }
+  }
+  else {
+    //* Form  x := A**T*x.
+    if(uploa == BLIS_UPPER){
+      jx = kx + (N - 1)*incx;
+      for(j = (N-1) ; j >= 0 ; j--){
+        tmp = X[jx];
+        ix = jx;
+        if(NOUNIT)
+          tmp = tmp*A[j*rsa + j*csa];
+        for(i = (j-1) ; i >= 0 ; i--) {
+          ix = ix - incx;
+          tmp = tmp + A[i*rsa + j*csa]*X[ix];
+        }
+        X[jx] = Alpha*tmp;
+        jx = jx - incx;
+      }
+    }
+    else{
+      jx = kx;
+      for(j = 0 ; j < N ; j++){
+          tmp = X[jx];
+          ix = jx;
+          if (NOUNIT)
+            tmp = tmp*A[j*rsa + j*csa];
+        for(i = (j+1) ; i < N ; i++){
+          ix = ix + incx;
+          tmp = tmp + X[ix]*A[i*rsa + j*csa];
+        }
+        X[jx] = Alpha*tmp;
+        jx = jx + incx;
+      }
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_ictrmv_check(uplo_t uploa, trans_t transa, diag_t diaga,
+T* alpha, dim_t N, T* A, dim_t rsa, dim_t csa, bool conja, T* X, dim_t incx){
+
+  T Alpha = *alpha;
+  T tmp;
+  int i, ix, j, jx, kx;
+  bool NOTRANS, NOUNIT;
+
+  if (N == 0)
+    return;
+
+  NOTRANS = (transa == BLIS_NO_TRANSPOSE);
+  NOUNIT  = (diaga == BLIS_NONUNIT_DIAG);
+
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  if(conja) {
+    for(i = 0 ; i < N ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  if(NOTRANS){
+    //* Form  x := A*x.
+    if(uploa == BLIS_UPPER){
+      jx = kx;
+      for(j = 0 ; j < N ; j++){
+        tmp = mulc<T>(Alpha , X[jx]);
+        ix = kx;
+        for(i = 0 ; i < j ; i++) {
+          X[ix] = addc<T>(X[ix] , mulc<T>(tmp , A[i*rsa + j*csa]));
+          ix = ix + incx;
+        }
+        if (NOUNIT)
+          tmp = mulc<T>(tmp , A[j*rsa + j*csa]);
+
+        X[jx] = tmp;
+        jx = jx + incx;
+      }
+    }
+    else{
+      kx = kx + (N - 1)*incx;
+      jx = kx;
+      for(j = (N-1) ; j >= 0 ; j--){
+        tmp = mulc<T>(Alpha , X[jx]);
+        ix = kx;
+        for(i = (N-1) ; i > j ; i--){
+          X[ix] = addc<T>(X[ix] , mulc<T>(tmp , A[i*rsa + j*csa]));
+          ix = ix - incx;
+        }
+        if(NOUNIT)
+          tmp = mulc<T>(tmp , A[j*rsa + j*csa]);
+
+        X[jx] = tmp;
+        jx = jx - incx;
+      }
+    }
+  }
+  else {
+    //* Form  x := A**T*x.
+    if(uploa == BLIS_UPPER){
+      jx = kx + (N - 1)*incx;
+      for(j = (N-1) ; j >= 0 ; j--){
+        tmp = X[jx];
+        ix = jx;
+        if(NOUNIT)
+          tmp = mulc<T>(tmp , A[j*rsa + j*csa]);
+        for(i = (j-1) ; i >= 0 ; i--) {
+          ix = ix - incx;
+          tmp = addc<T>(tmp , mulc<T>(A[i*rsa + j*csa] , X[ix]));
+        }
+        X[jx] = mulc<T>(Alpha , tmp);
+        jx = jx - incx;
+      }
+    }
+    else{
+      jx = kx;
+      for(j = 0 ; j < N ; j++){
+          tmp = X[jx];
+          ix = jx;
+          if (NOUNIT)
+            tmp = mulc<T>(tmp , A[j*rsa + j*csa]);
+        for(i = (j+1) ; i < N ; i++){
+          ix = ix + incx;
+          tmp = addc<T>(tmp , mulc<T>(X[ix] , A[i*rsa + j*csa]));
+        }
+        X[jx] = mulc<T>(Alpha , tmp);
+        jx = jx + incx;
+      }
+    }
+  }
+  return;
+}
+
+double libblis_test_itrmv_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         x,
+  obj_t*         x_orig
+){
+  num_t dt       = bli_obj_dt( x );
+  dim_t M        = bli_obj_length( a );
+  dim_t incx     = bli_obj_vector_inc( x );
+  dim_t rsa      = bli_obj_row_stride( a );
+  dim_t csa      = bli_obj_col_stride( a );
+  uplo_t uploa   = bli_obj_uplo( a );
+  bool conja     = bli_obj_has_conj( a );
+  trans_t transa = bli_obj_onlytrans_status( a );
+  diag_t diaga   = bli_obj_diag( a );
+  double resid   = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X       = (float*) bli_obj_buffer( x_orig );
+      float*   Alpha   = (float*) bli_obj_buffer( alpha );
+      float*   A       = (float*) bli_obj_buffer( a );
+      float*   XX      = (float*) bli_obj_buffer( x );
+      libblis_itrmv_check<float, int32_t>(uploa, transa, diaga, Alpha,
+                                                M, A, rsa, csa, X, incx);
+      resid = computediffrv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X      = (double*) bli_obj_buffer( x_orig );
+      double*   Alpha  = (double*) bli_obj_buffer( alpha );
+      double*   A      = (double*) bli_obj_buffer( a );
+      double*   XX     = (double*) bli_obj_buffer( x );
+      libblis_itrmv_check<double, int64_t>(uploa, transa, diaga, Alpha,
+                                                M, A, rsa, csa, X, incx);
+      resid = computediffrv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x_orig );
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   XX    = (scomplex*) bli_obj_buffer( x );
+      libblis_ictrmv_check<scomplex, int32_t>(uploa, transa, diaga, Alpha,
+                                           M, A, rsa, csa, conja, X, incx);
+      resid = computediffiv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x_orig );
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   XX    = (dcomplex*) bli_obj_buffer( x );
+      libblis_ictrmv_check<dcomplex, int64_t>(uploa, transa, diaga, Alpha,
+                                           M, A, rsa, csa, conja, X, incx);
+      resid = computediffiv(M, incx, XX, X);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
--- a/gtestsuite/src/ref_trsm.cpp
+++ b/gtestsuite/src/ref_trsm.cpp
@@ -0,0 +1,534 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_trsm.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> TRSM  solves one of the matrix equations
+//*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+//*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+//*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+//*>    op( A ) = A   or   op( A ) = A**T.
+//*> The matrix X is overwritten on B.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_ictrsm_check(side_t side, uplo_t uplo, trans_t transa,
+  diag_t diaga, dim_t M, dim_t N, T Alpha, T* A, dim_t rsa, dim_t csa,
+  bool conja, T* B, dim_t rsb, dim_t csb){
+
+  T tmp;
+  dim_t i, j, k;
+  bool LSIDE, NOUNIT, UPPER, NOTRANS;
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+
+  //*     Test the input parameters.
+  LSIDE   = (side == BLIS_LEFT);
+  NOTRANS = (transa == BLIS_NO_TRANSPOSE) || (transa == BLIS_CONJ_NO_TRANSPOSE);
+  NOUNIT  = (diaga == BLIS_NONUNIT_DIAG);
+  UPPER   = (uplo == BLIS_UPPER);
+
+  //* Quick return if possible.
+  if ((M == 0) || (N == 0) )
+    return;
+
+  //* And when  alpha.eq.zero.
+  if ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)) {
+    for(i = 0; i < M ; i++) {
+      for(j = 0; j < N ; j++) {
+        B[i*rsb+ j*csb] = ZERO;
+      }
+    }
+    return;
+  }
+
+  if(conja) {
+    dim_t dim;
+    if (LSIDE)         dim = M;
+    else               dim = N;
+    for( i = 0 ; i < dim ; i++ ) {
+      for( j = 0 ; j < dim ; j++ ) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  if((Alpha.real != ONE.real)&&(Alpha.imag != ONE.imag)){
+    for(i = 0; i < M; i++) {
+      for(j = 0 ; j < N ; j++) {
+        B[i*rsb + j*csb] = mulc<T>(Alpha , B[i*rsb + j*csb]);
+      }
+    }
+  }
+
+  //* Start the operations.
+  if (LSIDE) {
+    if (NOTRANS) {
+      //* Form  B := alpha*inv( A )*B.
+      if (UPPER) { /* AuXB : LUN */
+        for(j = 0 ; j < N ; j++) {
+          for(k = M; k-- ; ) {
+            if((B[k*rsb + j*csb].real != ZERO.real) || (B[k*rsb + j*csb].imag != ZERO.imag)) {
+              if (NOUNIT) B[k*rsb + j*csb] = divc<T,U>(B[k*rsb + j*csb] , A[k*rsa + k*csa]);
+              for(i = 0 ; i < k ; i++) {
+                B[i*rsb + j*csb] = subc<T>(B[i*rsb + j*csb] , mulc<T>(B[k*rsb + j*csb] , A[i*rsa + k*csa]));
+              }
+            }
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {  /* AlXB : LLN */
+          for(k = 0 ; k < M ; k++) {
+            if ((B[k*rsb + j*csb].real != ZERO.real) || (B[k*rsb + j*csb].imag != ZERO.imag)) {
+              if (NOUNIT) B[k*rsb + j*csb] = divc<T,U>(B[k*rsb + j*csb] , A[k*rsa + k*csa]);
+              for(i=(k+1) ; i < M ; i++) {
+                B[i*rsb + j*csb] = subc<T>(B[i*rsb + j*csb] , mulc<T>(B[k*rsb + j*csb] , A[i*rsa + k*csa]));
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      //* Form  B := alpha*inv( A**T )*B.
+      if (UPPER) {
+        for(j = 0 ; j < N ; j++) {  /* AutXB : LUT */
+          for(i = 0 ; i < M ; i++) {
+            tmp = B[i*rsb + j*csb];
+            for(k = 0 ; k < i ; k++) {
+              tmp = subc<T>(tmp , mulc<T>(A[k*rsa + i*csa] , B[k*rsb + j*csb]));
+            }
+            if (NOUNIT) tmp = divc<T,U>(tmp , A[i*rsa + i*csa]);
+            B[i*rsb + j*csb] = tmp;
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {  /* AltXB : LLT */
+          for(i = M ; i-- ;) {
+            tmp = B[i*rsb + j*csb];
+            for(k = (i+1) ; k < M ; k++) {
+              tmp = subc<T>(tmp , mulc<T>(A[k*rsa + i*csa] , B[k*rsb + j*csb]));
+            }
+            if (NOUNIT) tmp = divc<T,U>(tmp , A[i*rsa + i*csa]);
+            B[i*rsb + j*csb] = tmp;
+          }
+        }
+      }
+    }
+  }
+  else {
+    if(NOTRANS) {
+      //* Form  B := alpha*B*inv( A ).
+      if (UPPER) {
+        for(j = 0 ; j < N ; j++) {  /* XAuB : RUN */
+          for(k = 0 ; k < j ; k++) {
+            if ((A[k*rsa + j*csa].real != ZERO.real)||(A[k*rsa + j*csa].imag != ZERO.imag)) {
+              for(i = 0 ; i < M ; i++) {
+                B[i*rsb + j*csb] = subc<T>(B[i*rsb + j*csb] , mulc<T>(A[k*rsa + j*csa] , B[i*rsb + k*csb]));
+              }
+            }
+          }
+          if (NOUNIT) {
+            tmp = divc<T,U>(ONE , A[j*rsa + j*csa]);
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + j*csb] = mulc<T>(tmp , B[i*rsb + j*csb]);
+            }
+          }
+        }
+      }
+      else {
+        for(j = N; j-- ; ) {  /* XAlB : RLN */
+          for(k = (j+1) ; k < N ; k++) {
+            if((A[k*rsa + j*csa].real != ZERO.real)||(A[k*rsa + j*csa].imag != ZERO.imag)) {
+              for(i = 0 ; i < M ; i++) {
+                B[i*rsb + j*csb] = subc<T>(B[i*rsb + j*csb] , mulc<T>(A[k*rsa + j*csa] , B[i*rsb + k*csb]));
+              }
+            }
+          }
+          if (NOUNIT) {
+            tmp = divc<T,U>(ONE , A[j*rsa + j*csa]);
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + j*csb] = mulc<T>(tmp , B[i*rsb + j*csb]);
+            }
+          }
+        }
+      }
+    }
+    else {
+      //* Form  B := alpha*B*inv( A**T ).
+      if (UPPER) {  /* XAutB : RUT */
+        for(k = N ; k-- ; ) {
+          if (NOUNIT) {
+            tmp = divc<T,U>(ONE , A[k*rsa + k*csa]);
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + k*csb] = mulc<T>(tmp , B[i*rsb + k*csb]);
+            }
+          }
+          for(j = 0 ; j < k; j++) {
+            if((A[j*rsa + k*csa].real != ZERO.real)||(A[j*rsa + k*csa].imag != ZERO.imag)) {
+              tmp = A[j*rsa + k*csa];
+              for(i = 0 ; i < M ; i++) {
+                B[i*rsb + j*csb] = subc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , B[i*rsb + k*csb]));
+              }
+            }
+          }
+        }
+      }
+      else {  /* XAltB : RLT */
+        for(k = 0 ; k < N; k++) {
+          if (NOUNIT) {
+            tmp = divc<T,U>(ONE , A[k*rsa + k*csa]);
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + k*csb] = mulc<T>(tmp , B[i*rsb + k*csb]);
+            }
+          }
+          for(j = (k+1) ; j < N ; j++) {
+            if((A[j*rsa + k*csa].real != ZERO.real)||(A[j*rsa + k*csa].imag != ZERO.imag)) {
+              tmp = A[j*rsa + k*csa];
+              for(i = 0 ; i < M; i++) {
+                B[i*rsb + j*csb] = subc<T>(B[i*rsb + j*csb] , mulc<T>(tmp , B[i*rsb + k*csb]));
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_itrsm_check(side_t side, uplo_t uploa, trans_t transa,
+  diag_t diaga, dim_t M, dim_t N, T Alpha, T* A, dim_t rsa, dim_t csa,
+  T* B, dim_t rsb, dim_t csb) {
+
+  T tmp;
+  dim_t i, j, k;
+  bool LSIDE, UPPER;
+  bool NOTRANS, NOUNIT;
+  T ONE = 1.0;
+  T ZERO = 0.0;
+
+  LSIDE   = (side == BLIS_LEFT);
+  NOTRANS = (transa == BLIS_NO_TRANSPOSE) || (transa == BLIS_CONJ_NO_TRANSPOSE);
+  NOUNIT  = (diaga == BLIS_NONUNIT_DIAG);
+  UPPER   = (uploa == BLIS_UPPER);
+
+  if((M == 0) || (N == 0))
+    return;
+
+  if (Alpha == ZERO) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N; j++) {
+        B[i*rsb + j*csb] = ZERO;
+      }
+    }
+    return;
+  }
+
+  if (Alpha != ONE) {
+    for(i = 0; i < M; i++) {
+      for(j = 0 ; j < N ; j++) {
+        B[i*rsb + j*csb] = Alpha*B[i*rsb + j*csb];
+      }
+    }
+  }
+
+  //* Start the operations.
+  if (LSIDE) {
+    if (NOTRANS) {
+      //* Form  B := alpha*inv( A )*B.
+      if (UPPER) { /* AuXB : LUN */
+        for(j = 0 ; j < N ; j++) {
+          for(k = M; k-- ; ) {
+            if (B[k*rsb + j*csb] != ZERO) {
+              if (NOUNIT) B[k*rsb + j*csb] = B[k*rsb + j*csb]/A[k*rsa + k*csa];
+              for(i = 0 ; i < k ; i++) {
+                B[i*rsb + j*csb] = B[i*rsb + j*csb] - B[k*rsb + j*csb]*A[i*rsa + k*csa];
+              }
+            }
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {  /* AlXB : LLN */
+          for(k = 0 ; k < M ; k++) {
+            if (B[k*rsb + j*csb] != ZERO) {
+              if (NOUNIT) B[k*rsb + j*csb] = B[k*rsb + j*csb]/A[k*rsa + k*csa];
+              for(i=(k+1) ; i < M ; i++) {
+                B[i*rsb + j*csb] = B[i*rsb + j*csb] - (B[k*rsb + j*csb]*A[i*rsa + k*csa]);
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      //* Form  B := alpha*inv( A**T )*B.
+      if (UPPER) {
+        for(j = 0 ; j < N ; j++) {  /* AutXB : LUT */
+          for(i = 0 ; i < M ; i++) {
+            tmp = B[i*rsb + j*csb];
+            for(k = 0 ; k < i ; k++) {
+              tmp = tmp - A[k*rsa + i*csa]*B[k*rsb + j*csb];
+            }
+            if (NOUNIT) tmp = tmp/A[i*rsa + i*csa];
+            B[i*rsb + j*csb] = tmp;
+          }
+        }
+      }
+      else {
+        for(j = 0 ; j < N ; j++) {  /* AltXB : LLT */
+          for(i = M ; i-- ;) {
+            tmp = B[i*rsb + j*csb];
+            for(k = (i+1) ; k < M ; k++) {
+              tmp = tmp - A[k*rsa + i*csa]*B[k*rsb + j*csb];
+            }
+            if (NOUNIT) tmp = tmp/A[i*rsa + i*csa];
+            B[i*rsb + j*csb] = tmp;
+          }
+        }
+      }
+    }
+  }
+  else {
+    if(NOTRANS) {
+      //* Form  B := alpha*B*inv( A ).
+      if (UPPER) {
+        for(j = 0 ; j < N ; j++) {  /* XAuB : RUN */
+          for(k = 0 ; k < j ; k++) {
+            if (A[k*rsa + j*csa] != ZERO) {
+              for(i = 0 ; i < M ; i++) {
+                B[i*rsb + j*csb] = B[i*rsb + j*csb] - A[k*rsa + j*csa]*B[i*rsb + k*csb];
+              }
+            }
+          }
+          if (NOUNIT) {
+            tmp = ONE/A[j*rsa + j*csa];
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + j*csb] = tmp*B[i*rsb + j*csb];
+            }
+          }
+        }
+      }
+      else {
+        for(j = N; j-- ; ) {  /* XAlB : RLN */
+          for(k = (j+1) ; k < N ; k++) {
+            if (A[k*rsa + j*csa] != ZERO) {
+              for(i = 0 ; i < M ; i++) {
+                B[i*rsb + j*csb] = B[i*rsb + j*csb] - A[k*rsa + j*csa]*B[i*rsb + k*csb];
+              }
+            }
+          }
+          if (NOUNIT) {
+            tmp = ONE/A[j*rsa + j*csa];
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + j*csb] = tmp*B[i*rsb + j*csb];
+            }
+          }
+        }
+      }
+    }
+    else {
+      //* Form  B := alpha*B*inv( A**T ).
+      if (UPPER) {  /* XAutB : RUT */
+        for(k = N ; k-- ; ) {
+          if (NOUNIT) {
+            tmp = ONE/A[k*rsa + k*csa];
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + k*csb] = tmp*B[i*rsb + k*csb];
+            }
+          }
+          for(j = 0 ; j < k; j++) {
+            if (A[j*rsa + k*csa] != ZERO) {
+              tmp = A[j*rsa + k*csa];
+              for(i = 0 ; i < M ; i++) {
+                B[i*rsb + j*csb] = B[i*rsb + j*csb] - tmp*B[i*rsb + k*csb];
+              }
+            }
+          }
+        }
+      }
+      else {  /* XAltB : RLT */
+        for(k = 0 ; k < N; k++) {
+          if (NOUNIT) {
+            tmp = ONE/A[k*rsa + k*csa];
+            for(i = 0 ; i < M ; i++) {
+              B[i*rsb + k*csb] = tmp*B[i*rsb + k*csb];
+            }
+          }
+          for(j = (k+1) ; j < N ; j++) {
+            if (A[j*rsa + k*csa] != ZERO) {
+              tmp = A[j*rsa + k*csa];
+              for(i = 0 ; i < M; i++) {
+                B[i*rsb + j*csb] = B[i*rsb + j*csb] - tmp*B[i*rsb + k*csb];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+double libblis_test_itrsm_check(
+  test_params_t* params,
+  side_t         side,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         b,
+  obj_t*         b_orig,
+  num_t          dt
+){
+  dim_t M        = bli_obj_length( b_orig );
+  dim_t N        = bli_obj_width( b_orig );
+  uplo_t uploa   = bli_obj_uplo( a );
+  trans_t transa = bli_obj_onlytrans_status( a );
+  bool conja     = bli_obj_has_conj( a );
+  diag_t diaga   = bli_obj_diag( a );
+  dim_t rsa, csa;
+  dim_t rsb, csb;
+  double resid = 0.0;
+
+  rsa = bli_obj_row_stride( a ) ;
+  csa = bli_obj_col_stride( a ) ;
+  rsb = bli_obj_row_stride( b ) ;
+  csb = bli_obj_col_stride( b ) ;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha  = (float*) bli_obj_buffer( alpha );
+      float*   A      = (float*) bli_obj_buffer( a );
+      float*   B      = (float*) bli_obj_buffer( b_orig );
+      float*   BB     = (float*) bli_obj_buffer( b );
+      libblis_itrsm_check<float, int32_t>(side, uploa, transa,
+                       diaga, M, N, *Alpha, A, rsa, csa, B, rsb, csb );
+      resid = computediffrm(M, N, BB, B, rsb, csb);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha = (double*) bli_obj_buffer( alpha );
+      double*   A     = (double*) bli_obj_buffer( a );
+      double*   B     = (double*) bli_obj_buffer( b_orig );
+      double*   BB    = (double*) bli_obj_buffer( b );
+      libblis_itrsm_check<double, int64_t>(side, uploa, transa,
+                       diaga, M, N, *Alpha, A, rsa, csa, B, rsb, csb );
+      resid = computediffrm(M, N, BB, B, rsb, csb);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   B     = (scomplex*) bli_obj_buffer( b_orig );
+      scomplex*   BB    = (scomplex*) bli_obj_buffer( b );
+      libblis_ictrsm_check<scomplex, float>(side, uploa, transa,
+                    diaga, M, N, *Alpha, A, rsa, csa, conja, B, rsb, csb );
+      resid = computediffim(M, N, BB, B, rsb, csb);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   B     = (dcomplex*) bli_obj_buffer( b_orig );
+      dcomplex*   BB    = (dcomplex*) bli_obj_buffer( b );
+      libblis_ictrsm_check<dcomplex, double>(side, uploa, transa,
+                    diaga, M, N, *Alpha, A, rsa, csa, conja, B, rsb, csb );
+      resid = computediffim(M, N, BB, B, rsb, csb);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_real( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv )) {
+        resid = tv ;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+template <typename T>
+double libblis_check_nan_complex( dim_t rs, dim_t cs, obj_t* b ) {
+  dim_t  M = bli_obj_length( b );
+  dim_t  N = bli_obj_width( b );
+  dim_t  i,j;
+  double resid = 0.0;
+  T* B = (T*) bli_obj_buffer( b );
+
+  for( i = 0 ; i < M ; i++ ) {
+    for( j = 0 ; j < N ; j++ ) {
+      auto tv = B[ i*rs + j*cs ];
+      if ( bli_isnan( tv.real ) || bli_isnan( tv.imag )) {
+        resid = bli_isnan( tv.real ) ? tv.real : tv.imag;
+        break;
+      }
+    }
+  }
+  return resid;
+}
+
+double libblis_check_nan_trsm(obj_t* b, num_t dt ) {
+  dim_t  rsc, csc;
+  double resid = 0.0;
+
+  if( bli_obj_row_stride( b ) == 1 ) {
+    rsc = 1;
+    csc = bli_obj_col_stride( b );
+  } else {
+    rsc = bli_obj_row_stride( b );
+    csc = 1 ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT:
+    {
+      resid = libblis_check_nan_real<float>( rsc, csc, b );
+      break;
+    }
+    case BLIS_DOUBLE:
+    {
+      resid = libblis_check_nan_real<double>( rsc, csc, b );
+      break;
+    }
+    case BLIS_SCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<scomplex>( rsc, csc, b );
+      break;
+    }
+    case BLIS_DCOMPLEX:
+    {
+      resid = libblis_check_nan_complex<dcomplex>( rsc, csc, b );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_trsv.cpp
+++ b/gtestsuite/src/ref_trsv.cpp
@@ -0,0 +1,293 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_trsv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> TRSV Solves a triangular system of equations with a single value for the
+//*>        right side
+//*>    x := alpha * inv(transa(A)) * x_orig
+//*> where b and x are n element vectors and A is an n by n unit, or
+//*> non-unit, upper or lower triangular matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_itrsv_check(uplo_t uploa, trans_t transa, diag_t diaga,
+      T* alpha, dim_t N, T* A, dim_t rsa, dim_t csa, T* X, dim_t incx){
+
+  T Alpha = alpha[0];
+  T tmp;
+  int i, ix, j, jx, kx;
+  bool NOTRANS, NOUNIT;
+
+  if(N == 0)
+    return;
+
+  NOTRANS = (transa == BLIS_NO_TRANSPOSE);
+  NOUNIT  = (diaga == BLIS_NONUNIT_DIAG);
+
+  //*     Set up the start point in X if the increment is not unity. This
+  //*     will be  ( N - 1 )*incx  too small for descending loops.
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  ix = 0;
+  for(i = 0 ; i < N ; i++) {
+    X[ix] = (Alpha * X[ix]);
+    ix = ix + incx;
+  }
+
+  //*     Start the operations. In this version the elements of A are
+  //*     accessed sequentially with one pass through A.
+  if(NOTRANS){
+    //* Form  x := inv( A )*x.
+    if(uploa == BLIS_UPPER){
+      kx = kx + (N - 1)*incx;
+      ix = kx;
+      for(i = (N-1) ; i >= 0 ; i--){
+        tmp = 0;
+        jx = (ix+1);
+        for(j = (i+1) ; j < N ; j++){
+          tmp = tmp + X[jx]*A[i*rsa + j*csa];
+          jx = jx + incx;
+        }
+        tmp = (X[ix] - tmp);
+        if(NOUNIT)
+          tmp = (tmp/A[i*rsa + i*csa]);
+        X[ix] = tmp;
+        ix = ix - incx;
+      }
+    }
+    else{
+      ix = kx;
+      for(i = 0 ; i < N ; i++){
+        tmp = 0;
+        jx = kx;
+        for(j = 0 ; j < i ; j++ ){
+          tmp = tmp + (X[jx]*A[i*rsa + j*csa]);
+          jx = jx + incx;
+        }
+        tmp = (X[ix] - tmp);
+        if(NOUNIT)
+          tmp = (tmp/A[i*rsa + i*csa]);
+        X[ix] = tmp;
+        ix = ix + incx;
+      }
+    }
+  }
+  else{
+    //* Form  x := inv( A**T )*x.
+    if(uploa == BLIS_UPPER){
+      ix = kx;
+      for(i = 0 ; i < N ; i++){
+        if(NOUNIT)
+          X[ix] = (X[ix]/A[i*rsa + i*csa]);
+        tmp = X[ix];
+        jx  = ix;
+        for(j = (i+1) ; j < N ; j++){
+          jx = jx + incx;
+          X[jx] = X[jx] - (tmp * A[i*rsa + j*csa]);
+        }
+        ix = ix + incx;
+      }
+    }
+    else{
+      ix = kx + (N - 1)*incx;
+      for(i = (N-1) ; i >= 0 ; i--){
+        if(NOUNIT)
+          X[ix] = (X[ix]/A[i*rsa + i*csa]);
+        tmp = X[ix];
+        jx = ix;
+        for(j = (i-1) ; j >= 0 ; j--){
+          jx = jx - incx;
+          X[jx] = X[jx] - (tmp * A[i*rsa + j*csa]);
+        }
+        ix = ix - incx;
+      }
+    }
+  }
+  return;
+}
+
+template <typename T, typename U>
+void libblis_ictrsv_check(uplo_t uploa, trans_t transa, diag_t diaga,
+T* alpha, dim_t N, T* A, dim_t rsa, dim_t csa, bool conja, T* X, dim_t incx){
+
+  T Alpha = *alpha;
+  T tmp;
+  int i, ix, j, jx, kx;
+  bool NOTRANS, NOUNIT;
+
+  if (N == 0)
+    return;
+
+  NOTRANS = (transa == BLIS_NO_TRANSPOSE);
+  NOUNIT  = (diaga == BLIS_NONUNIT_DIAG);
+
+  //*     Set up the start point in X if the increment is not unity. This
+  //*     will be  ( N - 1 )*incx  too small for descending loops.
+  if (incx > 0) {
+    kx = 0;
+  }
+  else {
+    kx = 1 - (N * incx);
+  }
+
+  ix = 0;
+  for(i = 0 ; i < N ; i++) {
+    X[ix] = mulc<T>(Alpha , X[ix]);
+    ix = ix + incx;
+  }
+
+  if(conja) {
+    for(i = 0 ; i < N ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        A[i*rsa + j*csa] = conjugate<T>(A[i*rsa + j*csa]);
+      }
+    }
+  }
+
+  if(NOTRANS){
+    //* Form  x := inv( A )*x.
+    if(uploa == BLIS_UPPER){
+      kx = kx + (N - 1)*incx;
+      ix = kx;
+      for(i = (N-1) ; i >= 0 ; i--){
+        tmp = {0.0,0.0};
+        jx = (ix+1);
+        for(j = (i+1) ; j < N ; j++){
+          tmp = addc<T>(tmp , mulc<T>(X[jx] , A[i*rsa + j*csa]));
+          jx = jx + incx;
+        }
+        tmp = subc<T>(X[ix] , tmp);
+        if(NOUNIT)
+          tmp = divc<T,U>(tmp , A[i*rsa + i*csa]);
+        X[ix] = tmp;
+        ix = ix - incx;
+      }
+    }
+    else{
+      ix = kx;
+      for(i = 0 ; i < N ; i++){
+        tmp = {0.0,0.0};
+        jx = kx;
+        for(j = 0 ; j < i ; j++ ){
+          tmp = addc<T>(tmp , mulc<T>(X[jx] , A[i*rsa + j*csa]));
+          jx = jx + incx;
+        }
+        tmp = subc<T>(X[ix] , tmp);
+        if(NOUNIT)
+          tmp = divc<T,U>(tmp , A[i*rsa + i*csa]);
+        X[ix] = tmp;
+        ix = ix + incx;
+      }
+    }
+  }
+  else{
+    //* Form  x := inv( A**T )*x.
+    if(uploa == BLIS_UPPER){
+      ix = kx;
+      for(i = 0 ; i < N ; i++){
+        if(NOUNIT)
+          X[ix] = divc<T,U>(X[ix] , A[i*rsa + i*csa]);
+        tmp = X[ix];
+        jx  = ix;
+        for(j = (i+1) ; j < N ; j++){
+          jx = jx + incx;
+          X[jx] = subc<T>(X[jx] , mulc<T>(tmp , A[i*rsa + j*csa]));
+        }
+        ix = ix + incx;
+      }
+    }
+    else{
+      ix = kx + (N - 1)*incx;
+      for(i = (N-1) ; i >= 0 ; i--){
+        if(NOUNIT)
+          X[ix] = divc<T,U>(X[ix] , A[i*rsa + i*csa]);
+        tmp = X[ix];
+        jx = ix;
+        for(j = (i-1) ; j >= 0 ; j--){
+          jx = jx - incx;
+          X[jx] = subc<T>(X[jx] , mulc<T>(tmp , A[i*rsa + j*csa]));
+        }
+        ix = ix - incx;
+      }
+    }
+  }
+  return;
+}
+
+double libblis_test_itrsv_check(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         a,
+  obj_t*         x,
+  obj_t*         x_orig
+){
+  num_t dt       = bli_obj_dt( x );
+  dim_t M        = bli_obj_length( a );
+  dim_t incx     = bli_obj_vector_inc( x );
+  dim_t rsa      = bli_obj_row_stride( a ) ;
+  dim_t csa      = bli_obj_col_stride( a ) ;
+  uplo_t uploa   = bli_obj_uplo( a );
+  bool conja     = bli_obj_has_conj( a );
+  trans_t transa = bli_obj_onlytrans_status( a );
+  diag_t diaga   = bli_obj_diag( a );
+  double resid   = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X       = (float*) bli_obj_buffer( x_orig );
+      float*   A       = (float*) bli_obj_buffer( a );
+      float*   Alpha   = (float*) bli_obj_buffer( alpha );
+      float*   XX      = (float*) bli_obj_buffer( x );
+      libblis_itrsv_check<float, int32_t>(uploa, transa, diaga, Alpha,
+                                                M, A, rsa, csa, X, incx);
+      resid = computediffrv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X      = (double*) bli_obj_buffer( x_orig );
+      double*   A      = (double*) bli_obj_buffer( a );
+      double*   Alpha  = (double*) bli_obj_buffer( alpha );
+      double*   XX     = (double*) bli_obj_buffer( x );
+      libblis_itrsv_check<double, int64_t>(uploa, transa, diaga, Alpha,
+                                                 M, A, rsa, csa, X, incx);
+      resid = computediffrv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x_orig );
+      scomplex*   A     = (scomplex*) bli_obj_buffer( a );
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   XX    = (scomplex*) bli_obj_buffer( x );
+      libblis_ictrsv_check<scomplex, float>(uploa, transa, diaga, Alpha,
+                                           M, A, rsa, csa, conja, X, incx);
+      resid = computediffiv(M, incx, XX, X);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x_orig );
+      dcomplex*   A     = (dcomplex*) bli_obj_buffer( a );
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   XX    = (dcomplex*) bli_obj_buffer( x );
+      libblis_ictrsv_check<dcomplex, double>(uploa, transa, diaga, Alpha,
+                                           M, A, rsa, csa, conja, X, incx);
+      resid = computediffiv(M, incx, XX, X);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return resid;
+}
+
--- a/gtestsuite/src/ref_xpbym.cpp
+++ b/gtestsuite/src/ref_xpbym.cpp
@@ -0,0 +1,176 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_xpbym.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> XPBYM performs vector operations
+//*>    B := B * alpha + conjx(A)
+//*>    where B is an m x n matrix.
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_ixpbym_check(dim_t M, dim_t N, T* alpha,
+     T* X, dim_t rsx, dim_t csx, T* Y, dim_t rsy, dim_t csy, T* YY) {
+
+  dim_t i, j;
+  T ONE = 1.0 ;
+  T ZERO = 0.0 ;
+  T Alpha = alpha[0];
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  //*     First form  y := beta*y.
+  if (Alpha != ONE) {
+    if (Alpha == ZERO) {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = ZERO;
+        }
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = (Alpha * Y[i*rsy + j*csy]);
+        }
+      }
+    }
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = Y[i*rsy + j*csy] + X[i*rsx + j*csx] ;
+    }
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icxpbym_check(dim_t M, dim_t N, T* alpha,
+     T* X, dim_t rsx, dim_t csx, conj_t conjx, T* Y, dim_t rsy, dim_t csy) {
+
+  dim_t i, j;
+  T ONE  = {1.0 , 0.0};
+  T ZERO = {0.0 , 0.0};
+  T Alpha = *alpha;
+
+  if ((M == 0) || (N == 0)) {
+      return;
+  }
+
+  if(conjx) {
+    for(i = 0 ; i < M ; i++) {
+      for(j = 0 ; j < N ; j++) {
+        X[i*rsx + j*csx] = conjugate<T>(X[i*rsx + j*csx]);
+      }
+    }
+  }
+
+  if ((Alpha.real != ONE.real) && (Alpha.imag != ONE.imag)) {
+    if ((Alpha.real == ZERO.real) && (Alpha.imag == ZERO.imag)) {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = ZERO;
+        }
+      }
+    }
+    else {
+      for(i = 0 ; i < M ; i++) {
+        for(j = 0 ; j < N ; j++) {
+          Y[i*rsy + j*csy] = mulc<T>(Alpha , Y[i*rsy + j*csy]);
+        }
+      }
+    }
+  }
+
+  for(i = 0 ; i < M ; i++) {
+    for(j = 0 ; j < N ; j++) {
+      Y[i*rsy + j*csy] = addc<T>(Y[i*rsy + j*csy] , X[i*rsx + j*csx]);
+    }
+  }
+
+  return;
+}
+
+double libblis_test_ixpbym_check(
+  test_params_t* params,
+  obj_t*         x,
+  obj_t*         alpha,
+  obj_t*         y,
+  obj_t*         y_orig
+){
+  num_t  dt    = bli_obj_dt( y );
+  dim_t  M     = bli_obj_length( y );
+  dim_t  N     = bli_obj_width( y );
+  bool  transx = bli_obj_has_trans( x );
+  conj_t conjx = bli_obj_conj_status( x );
+  dim_t  rsy   = bli_obj_row_stride( y ) ;
+  dim_t  csy   = bli_obj_col_stride( y ) ;
+  double resid = 0.0;
+  dim_t  rsx, csx;
+
+  if( bli_obj_is_col_stored( x ) ) {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  } else {
+    rsx = transx ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ;
+    csx = transx ? bli_obj_row_stride( x ) : bli_obj_col_stride( x ) ;
+  }
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   Alpha    = (float*) bli_obj_buffer( alpha );
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float*   YY       = (float*) bli_obj_buffer( y );
+      libblis_ixpbym_check<float, int32_t>( M, N, Alpha, X, rsx, csx,
+                                                 Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   Alpha   = (double*) bli_obj_buffer( alpha );
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_ixpbym_check<double, int64_t>( M, N, Alpha, X, rsx, csx,
+                                                  Y, rsy, csy, YY );
+      resid = computediffrm(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   Alpha = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icxpbym_check<scomplex, int32_t>( M, N, Alpha, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   Alpha = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icxpbym_check<dcomplex, int64_t>( M, N, Alpha, X, rsx, csx,
+                                              conjx, Y, rsy, csy );
+      resid = computediffim(M, N, YY, Y, rsy, csy);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/ref_xpbyv.cpp
+++ b/gtestsuite/src/ref_xpbyv.cpp
@@ -0,0 +1,165 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_xpbyv.h"
+
+using namespace std;
+
+//*  ==========================================================================
+//*> XPBYV performs vector operations
+//*>    y := beta * y + conjx(x)
+//*>    where x and y are vectors of length n, and beta is scalar
+//*  ==========================================================================
+
+template <typename T, typename U>
+void libblis_ixpbyv_check(dim_t len, T* X, dim_t incx,
+                                              T* beta, T* Y, dim_t incy) {
+
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE = 1.0 ;
+  ZERO = 0.0 ;
+  T Beta  = beta[0];
+  if ((len == 0) || (Beta == ONE)){
+    return;
+  }
+
+  //*     First form  y := beta*y.
+  if (Beta != ONE) {
+    iy = 0;
+    if (Beta == ZERO) {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = ZERO;
+        iy = iy + incy;
+      }
+    }
+    else {
+      for(i = 0 ; i < len ; i++) {
+        Y[iy] = Beta*Y[iy];
+        iy = iy + incy;
+      }
+    }
+  }
+
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = Y[iy] + X[ix];
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+template <typename T, typename U>
+void libblis_icxpbyv_check(dim_t len, T* X, dim_t incx,
+                                       T* beta, T* Y, dim_t incy, bool cfx) {
+  dim_t i, ix, iy;
+  T ONE, ZERO;
+  ONE  = {1.0 , 0.0};
+  ZERO = {0.0 , 0.0};
+  T Beta  = *beta;
+
+  if (len == 0) {
+    return;
+  }
+
+  if(cfx) {
+    ix = 0;
+    for(i = 0 ; i < len ; i++) {
+      X[ix] = conjugate<T>(X[ix]);
+      ix = ix + incx;
+    }
+  }
+
+  /* First form  y := beta*y. */
+  iy = 0;
+  if ((Beta.real == ZERO.real) && (Beta.imag == ZERO.imag)) {
+    for(i = 0; i < len ; i++) {
+      Y[iy] = ZERO;
+      iy = iy + incy;
+    }
+  }
+  else {
+    for(i = 0 ; i < len ; i++) {
+      Y[iy] = mulc<T>(Beta , Y[iy]);
+      iy = iy + incy;
+    }
+  }
+
+  ix = 0;
+  iy = 0;
+  for(i = 0 ; i < len ; i++) {
+    Y[iy] = addc<T>(Y[iy] , X[ix]);
+    ix = ix + incx;
+    iy = iy + incy;
+  }
+
+  return;
+}
+
+double libblis_test_ixpbyv_check(
+  test_params_t* params,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig
+) {
+  num_t  dt    = bli_obj_dt( x );
+  dim_t  M     = bli_obj_vector_dim( x );
+  bool cfx     = bli_obj_has_conj( x );
+  f77_int incx = bli_obj_vector_inc( x );
+  f77_int incy = bli_obj_vector_inc( y_orig );
+  double resid = 0.0;
+
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*   X        = (float*) bli_obj_buffer( x );
+      float*   Beta     = (float*) bli_obj_buffer( beta );
+      float*   Y        = (float*) bli_obj_buffer( y_orig );
+      float* YY         = (float*) bli_obj_buffer( y );
+      libblis_ixpbyv_check<float, int32_t>( M, X, incx, Beta, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*   X       = (double*) bli_obj_buffer( x );
+      double*   Beta    = (double*) bli_obj_buffer( beta );
+      double*   Y       = (double*) bli_obj_buffer( y_orig );
+      double*   YY      = (double*) bli_obj_buffer( y );
+      libblis_ixpbyv_check<double, int64_t>( M, X, incx, Beta, Y, incy );
+      resid = computediffrv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*   X     = (scomplex*) bli_obj_buffer( x );
+      scomplex*   Beta  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*   Y     = (scomplex*) bli_obj_buffer( y_orig );
+      scomplex*   YY    = (scomplex*) bli_obj_buffer( y );
+      libblis_icxpbyv_check<scomplex, int32_t>( M, X, incx, Beta,
+                                                            Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*   X     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*   Beta  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*   Y     = (dcomplex*) bli_obj_buffer( y_orig );
+      dcomplex*   YY    = (dcomplex*) bli_obj_buffer( y );
+      libblis_icxpbyv_check<dcomplex, int64_t>( M, X, incx, Beta,
+                                                           Y, incy, cfx );
+      resid = computediffiv(M, incy, YY, Y);
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+
+  return resid;
+}
+
--- a/gtestsuite/src/test_addm.cpp
+++ b/gtestsuite/src/test_addm.cpp
@@ -0,0 +1,235 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_addm.h"
+
+// Local prototypes.
+void libblis_test_addm_deps (
+  thread_data_t* tdata,
+  test_params_t* params,
+  test_op_t*     op
+);
+
+void libblis_test_addm_impl (
+  iface_t   iface,
+  obj_t*    x,
+  obj_t*    y
+);
+
+double libblis_test_addm_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         beta,
+  obj_t*         x,
+  obj_t*         y
+);
+
+double libblis_ref_addm(
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         beta,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         y_orig
+){
+  double resid = 0.0;
+
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    resid = libblis_test_addm_check( params, alpha, beta, x, y );
+  }
+  else {
+   if(params->oruflw == BLIS_DEFAULT) {
+      resid = libblis_test_iaddm_check( params, x, y, y_orig);
+    }
+    else {
+      resid = libblis_test_matrix_check(params, y);
+    }
+  }
+  return resid;
+}
+
+double libblis_test_bitrp_addm(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         y_orig,
+  obj_t*         r,
+  num_t          dt
+) {
+  double resid = 0.0;
+	 unsigned int n_repeats = params->n_repeats;
+	 unsigned int i;
+
+  for(i = 0; i < n_repeats; i++) {
+    bli_copym( y_orig, r );
+    libblis_test_addm_impl( iface, x, r );
+    resid = libblis_test_bitrp_matrix(y, r, dt);
+  }
+  return resid;
+}
+
+double libblis_test_op_addm (
+  test_params_t* params,
+  iface_t        iface,
+  char*          dc_str,
+  char*          pc_str,
+  char*          sc_str,
+  tensor_t*      dim
+) {
+  num_t        datatype;
+  dim_t        m, n;
+  trans_t      transx;
+  obj_t        alpha, beta;
+  obj_t        x, y, y_save;
+  double       resid = 0.0;
+
+  // Use the datatype of the first char in the datatype combination string.
+  bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
+
+  // Map the dimension specifier to actual dimensions.
+  m = dim->m;
+  n = dim->n;
+
+  // Map parameter characters to BLIS constants.
+  bli_param_map_char_to_blis_trans( pc_str[0], &transx );
+
+  // Create test scalars.
+  bli_obj_scalar_init_detached( datatype, &alpha );
+  bli_obj_scalar_init_detached( datatype, &beta );
+
+  // Create test operands (vectors and/or matrices).
+  libblis_test_mobj_create( params, datatype, transx,
+                            sc_str[0], m, n, &x );
+  libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
+                            sc_str[1], m, n, &y );
+  libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
+                            sc_str[1], m, n, &y_save );
+
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    // Initialize alpha and beta.
+    bli_setsc( -1.0, -1.0, &alpha );
+    bli_setsc(  3.0,  3.0, &beta );
+
+    // Randomize x.
+    bli_setm( &alpha, &x );
+    bli_setm( &beta,  &y );
+  }
+  else {
+    libblis_test_mobj_irandomize( params, &x );
+    libblis_test_mobj_irandomize( params, &y );
+  }
+
+  // Apply the parameters.
+  bli_obj_set_conjtrans( transx, &x );
+
+  //Copy c to c_save
+  bli_copym( &y, &y_save );
+
+  libblis_test_addm_impl( iface, &x, &y );
+
+#ifndef __GTEST_VALGRIND_TEST__
+  if(params->bitrp) {
+    obj_t r;
+
+    libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
+                                                sc_str[1], m, n, &r );
+    resid = libblis_test_bitrp_addm( params, iface, &x, &y, &y_save,
+                                                            &r, datatype);
+    bli_obj_free( &r );
+  }
+  else {
+    resid = libblis_ref_addm( params, &alpha, &beta, &x, &y, &y_save );
+  }
+#endif
+
+  // Zero out performance and residual if output matrix is empty.
+  libblis_test_check_empty_problem( &y, &resid );
+
+  // Free the test objects.
+  libblis_test_obj_free( &x );
+  libblis_test_obj_free( &y );
+  libblis_test_obj_free( &y_save );
+
+  return abs(resid);
+}
+
+void libblis_test_addm_impl(
+  iface_t   iface,
+  obj_t*    x,
+  obj_t*    y
+) {
+  switch ( iface ) {
+    case BLIS_TEST_SEQ_FRONT_END:
+      bli_addm( x, y );
+      break;
+
+    default:
+      libblis_test_printf_error( "Invalid interface type.\n" );
+  }
+}
+
+double libblis_test_addm_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         beta,
+  obj_t*         x,
+  obj_t*         y
+) {
+  num_t  dt      = bli_obj_dt( y );
+  num_t  dt_real = bli_obj_dt_proj_to_real( y );
+  dim_t  m       = bli_obj_length( y );
+  dim_t  n       = bli_obj_width( y );
+
+  conj_t conjx   = bli_obj_conj_status( x );
+
+  obj_t  aplusb;
+  obj_t  alpha_conj;
+  obj_t  norm_r, m_r, n_r, temp_r;
+
+  double junk;
+  double resid = 0.0;
+  //
+  // Pre-conditions:
+  // - x is set to alpha.
+  // - y_orig is set to beta.
+  // Note:
+  // - alpha and beta should have non-zero imaginary components in the
+  //   complex cases in order to more fully exercise the implementation.
+  //
+  // Under these conditions, we assume that the implementation for
+  //
+  //   y := y_orig + conjx(x)
+  //
+  // is functioning correctly if
+  //
+  //   normfm(y) - sqrt( absqsc( beta + conjx(alpha) ) * m * n )
+  //
+  // is negligible.
+  //
+
+  bli_obj_scalar_init_detached( dt,      &aplusb );
+  bli_obj_scalar_init_detached( dt_real, &temp_r );
+  bli_obj_scalar_init_detached( dt_real, &norm_r );
+  bli_obj_scalar_init_detached( dt_real, &m_r );
+  bli_obj_scalar_init_detached( dt_real, &n_r );
+
+  bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj );
+
+  bli_normfm( y, &norm_r );
+
+  bli_copysc( beta, &aplusb );
+  bli_addsc( &alpha_conj, &aplusb );
+
+  bli_setsc( ( double )m, 0.0, &m_r );
+  bli_setsc( ( double )n, 0.0, &n_r );
+
+  bli_absqsc( &aplusb, &temp_r );
+  bli_mulsc( &m_r, &temp_r );
+  bli_mulsc( &n_r, &temp_r );
+  bli_sqrtsc( &temp_r, &temp_r );
+  bli_subsc( &temp_r, &norm_r );
+
+  bli_getsc( &norm_r, &resid, &junk );
+
+  return resid;
+}
--- a/gtestsuite/src/test_addm.h
+++ b/gtestsuite/src/test_addm.h
@@ -0,0 +1,16 @@
+#ifndef TEST_ADDM_H
+#define TEST_ADDM_H
+
+#include "blis_test.h"
+
+double libblis_test_iaddm_check
+    (
+      test_params_t* params,
+      obj_t*         x,
+      obj_t*         y,
+      obj_t*         y_orig
+    );
+
+double libblis_check_nan_addm( char*  sc_str, obj_t* b, num_t dt );
+
+#endif /* TEST_ADDM_H */
--- a/gtestsuite/src/test_addv.cpp
+++ b/gtestsuite/src/test_addv.cpp
@@ -0,0 +1,216 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_addv.h"
+
+// Local prototypes.
+void libblis_test_addv_deps(thread_data_t* tdata,
+                            test_params_t* params, test_op_t* op );
+
+void libblis_test_addv_impl (iface_t iface, obj_t* x, obj_t* y);
+
+double libblis_test_addv_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         beta,
+  obj_t*         x,
+  obj_t*         y
+);
+
+double libblis_ref_addv(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  beta,
+  obj_t*  x,
+  obj_t*  y,
+  obj_t*  y_save
+) {
+  double resid = 0.0;
+
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    resid = libblis_test_addv_check(params, alpha, beta, x, y );
+  }
+  else {
+   if(params->oruflw == BLIS_DEFAULT) {
+      resid = libblis_test_iaddv_check(params, alpha, beta, x, y, y_save);
+    }
+    else {
+      resid = libblis_test_vector_check(params, y);
+    }
+  }
+  return resid;
+}
+
+double libblis_test_bitrp_addv(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         y_orig,
+  obj_t*         r,
+  num_t          dt
+) {
+  double resid = 0.0;
+	 unsigned int n_repeats = params->n_repeats;
+	 unsigned int i;
+
+  for(i = 0; i < n_repeats; i++) {
+    bli_copyv( y_orig, r );
+    libblis_test_addv_impl( iface, x, r );
+    resid = libblis_test_bitrp_vector(y, r, dt);
+  }
+  return resid;
+}
+
+double libblis_test_op_addv (
+  test_params_t* params,
+  iface_t        iface,
+  char*          dc_str,
+  char*          pc_str,
+  char*          sc_str,
+  tensor_t*      dim
+){
+  num_t        datatype;
+  dim_t        m;
+  conj_t       conjx;
+  obj_t        alpha, beta;
+  obj_t        x, y, y_save;
+  double       resid = 0.0;
+
+  // Use the datatype of the first char in the datatype combination string.
+  bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
+
+  // Map the dimension specifier to an actual dimension.
+  m = dim->m;
+
+  // Map parameter characters to BLIS constants.
+  bli_param_map_char_to_blis_conj( pc_str[0], &conjx );
+
+  // Create test scalars.
+  bli_obj_scalar_init_detached( datatype, &alpha );
+  bli_obj_scalar_init_detached( datatype, &beta );
+
+  // Create test operands (vectors and/or matrices).
+  libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );
+  libblis_test_vobj_create( params, datatype, sc_str[1], m, &y );
+  libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save );
+
+  // Initialize alpha and beta.
+  bli_setsc( -1.0, -1.0, &alpha );
+  bli_setsc(  3.0,  3.0, &beta );
+
+  // Set x and y to alpha and beta, respectively.
+  bli_setv( &alpha, &x );
+  bli_setv( &beta,  &y );
+
+  // Apply the parameters.
+  bli_obj_set_conj( conjx, &x );
+
+  bli_copyv( &y, &y_save );
+
+  libblis_test_addv_impl( iface, &x, &y );
+
+#ifndef __GTEST_VALGRIND_TEST__
+  if(params->bitrp) {
+    obj_t r;
+
+    libblis_test_vobj_create( params, datatype, sc_str[1], m, &r );
+
+    resid = libblis_test_bitrp_addv( params, iface, &x, &y, &y_save,
+                                                            &r, datatype);
+    bli_obj_free( &r );
+  }
+  else {
+    resid = libblis_ref_addv( params, &alpha, &beta, &x, &y, &y_save );
+  }
+#endif
+
+  // Zero out performance and residual if output vector is empty.
+  libblis_test_check_empty_problem( &y, &resid );
+
+  // Free the test objects.
+  libblis_test_obj_free( &x );
+  libblis_test_obj_free( &y );
+  libblis_test_obj_free( &y_save );
+
+  return abs(resid);
+}
+
+void libblis_test_addv_impl (
+  iface_t   iface,
+  obj_t*    x,
+  obj_t*    y
+){
+
+  switch ( iface )
+  {
+    case BLIS_TEST_SEQ_FRONT_END:
+      bli_addv( x, y );
+      break;
+
+    default:
+      libblis_test_printf_error( "Invalid interface type.\n" );
+  }
+}
+
+double libblis_test_addv_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         beta,
+  obj_t*         x,
+  obj_t*         y
+) {
+  num_t  dt      = bli_obj_dt( x );
+  num_t  dt_real = bli_obj_dt_proj_to_real( x );
+  dim_t  m       = bli_obj_vector_dim( x );
+
+  conj_t conjx   = bli_obj_conj_status( x );
+
+  obj_t  aplusb;
+  obj_t  alpha_conj;
+  obj_t  norm_r, m_r, temp_r;
+
+  double junk;
+  double resid = 0.0;
+
+  //
+  // Pre-conditions:
+  // - x is set to alpha.
+  // - y_orig is set to beta.
+  // Note:
+  // - alpha and beta should have non-zero imaginary components in the
+  //   complex cases in order to more fully exercise the implementation.
+  //
+  // Under these conditions, we assume that the implementation for
+  //
+  //   y := y_orig + conjx(x)
+  //
+  // is functioning correctly if
+  //
+  //   normfv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m )
+  //
+  // is negligible.
+  //
+
+  bli_obj_scalar_init_detached( dt,      &aplusb );
+  bli_obj_scalar_init_detached( dt_real, &temp_r );
+  bli_obj_scalar_init_detached( dt_real, &norm_r );
+  bli_obj_scalar_init_detached( dt_real, &m_r );
+
+  bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj );
+
+  bli_normfv( y, &norm_r );
+
+  bli_copysc( beta, &aplusb );
+  bli_addsc( &alpha_conj, &aplusb );
+
+  bli_setsc( ( double )m, 0.0, &m_r );
+
+  bli_absqsc( &aplusb, &temp_r );
+  bli_mulsc( &m_r, &temp_r );
+  bli_sqrtsc( &temp_r, &temp_r );
+  bli_subsc( &temp_r, &norm_r );
+
+  bli_getsc( &norm_r, &resid, &junk );
+
+  return resid;
+}
--- a/gtestsuite/src/test_addv.h
+++ b/gtestsuite/src/test_addv.h
@@ -0,0 +1,18 @@
+#ifndef TEST_ADDV_H
+#define TEST_ADDV_H
+
+#include "blis_test.h"
+
+double libblis_test_iaddv_check
+     (
+       test_params_t* params,
+       obj_t*         alpha,
+       obj_t*         beta,
+       obj_t*         x,
+       obj_t*         y,
+       obj_t*         y_orig
+     );
+
+double libblis_check_nan_addv( char*  sc_str, obj_t* b, num_t dt );
+
+#endif /* TEST_ADDV_H */
--- a/gtestsuite/src/test_amaxv.cpp
+++ b/gtestsuite/src/test_amaxv.cpp
@@ -0,0 +1,477 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_amaxv.h"
+
+// Local prototypes.
+void libblis_test_amaxv_deps (
+  thread_data_t* tdata,
+  test_params_t* params,
+  test_op_t*     op
+);
+
+void libblis_test_amaxv_impl (
+  iface_t   iface,
+  obj_t*    x,
+  obj_t*    index
+);
+
+double libblis_test_amaxv_check (
+  test_params_t* params,
+  obj_t*         x,
+  obj_t*         index
+);
+
+void bli_amaxv_test (
+  obj_t*  x,
+  obj_t*  index
+);
+
+double cblas_amaxv(
+  f77_int    m,
+  obj_t*     x,
+  f77_int    incx,
+  gint_t*    idx,
+  num_t      dt
+){
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*  xp     = (float*) bli_obj_buffer( x );
+      *idx = cblas_isamax( m, xp, incx );
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*  xp    = (double*) bli_obj_buffer( x );
+      *idx = cblas_idamax( m, xp, incx );
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*  xp    = (scomplex*) bli_obj_buffer( x );
+      *idx = cblas_icamax( m, xp, incx );
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*  xp    = (dcomplex*) bli_obj_buffer( x );
+      *idx = cblas_izamax( m, xp, incx );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return 0;
+}
+
+double blas_amaxv(
+  f77_int    m,
+  obj_t*     x,
+  f77_int    incx,
+  gint_t*    idx,
+  num_t      dt
+){
+  gint_t index = 1;
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*  xp     = (float*) bli_obj_buffer( x );
+      index = isamax_( &m, xp, &incx );
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*  xp    = (double*) bli_obj_buffer( x );
+      index = idamax_( &m, xp, &incx );
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*  xp    = (scomplex*) bli_obj_buffer( x );
+      index = icamax_( &m, xp, &incx );
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*  xp    = (dcomplex*) bli_obj_buffer( x );
+      index = izamax_( &m, xp, &incx );
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  *idx = (index - 1);
+  return 0;
+}
+
+void libblis_api_amaxv(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         x,
+  obj_t*         index,
+  num_t          dt
+){
+  if(params->api == API_BLIS) {
+    libblis_test_amaxv_impl( iface, x, index );
+  }
+  else { /*CLBAS  || BLAS */
+    dim_t  m     = bli_obj_vector_dim( x );
+    f77_int incx = bli_obj_vector_inc( x );
+    gint_t *idx  = (gint_t *)bli_obj_buffer( index );
+
+      if( params->api == API_CBLAS ) {
+        cblas_amaxv( m, x, incx, idx, dt );
+      } else {
+        blas_amaxv( m, x, incx, idx, dt );;
+      }
+  }
+  return ;
+}
+
+double libblis_ref_amaxv(
+  test_params_t* params,
+  obj_t*  x,
+  obj_t*  index
+) {
+  double resid = 0.0;
+
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    resid = libblis_test_amaxv_check( params, x, index );
+  }
+  else {
+   if(params->oruflw == BLIS_DEFAULT) {
+      resid = libblis_test_iamaxv_check( params, x, index );
+    }
+    else {
+      resid = libblis_test_vector_check(params, x);
+    }
+  }
+
+  return resid;
+}
+
+double libblis_test_bitrp_amaxv(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         x,
+  obj_t*         index,
+  obj_t*         r,
+  num_t          dt
+) {
+  double resid = 0.0;
+	 unsigned int n_repeats = params->n_repeats;
+	 unsigned int i;
+
+  for(i = 0; i < n_repeats; i++) {
+    bli_obj_scalar_init_detached( BLIS_INT, r );
+    libblis_test_amaxv_impl( iface, x, r );
+    resid = libblis_test_bitrp_vector(index, r, dt);
+  }
+  return resid;
+}
+
+double libblis_test_op_amaxv (
+  test_params_t* params,
+  iface_t        iface,
+  char*          dc_str,
+  char*          pc_str,
+  char*          sc_str,
+  tensor_t*      dim
+){
+  num_t        datatype;
+  dim_t        m;
+  obj_t        x;
+  obj_t        index;
+  double resid = 0.0;
+
+  // Use the datatype of the first char in the datatype combination string.
+  bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
+
+  // Map the dimension specifier to an actual dimension.
+  m = dim->m;
+
+  // Create test scalars.
+  bli_obj_scalar_init_detached( BLIS_INT, &index );
+
+  // Create test operands (vectors and/or matrices).
+  libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );
+
+  // Randomize x.
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    libblis_test_vobj_randomize( params, FALSE, &x );
+  } else {
+    libblis_test_vobj_irandomize( params, &x );
+  }
+
+  libblis_api_amaxv( params, iface, &x, &index, datatype );
+
+#ifndef __GTEST_VALGRIND_TEST__
+  if(params->bitrp) {
+    obj_t r;
+
+    resid = libblis_test_bitrp_amaxv( params, iface, &x, &index, &r, datatype );
+
+    bli_obj_free( &r );
+  }
+  else {
+    resid = libblis_ref_amaxv( params, &x, &index );
+  }
+#endif
+
+  // Zero out performance and residual if input vector is empty.
+  libblis_test_check_empty_problem( &x, &resid );
+
+  // Free the test objects.
+  libblis_test_obj_free( &x );
+
+  return abs(resid);
+}
+
+void libblis_test_amaxv_impl (
+  iface_t   iface,
+  obj_t*    x,
+  obj_t*    index
+) {
+
+	switch ( iface )
+	{
+		 case BLIS_TEST_SEQ_FRONT_END:
+		   bli_amaxv( x, index );
+		 break;
+
+		default:
+		  libblis_test_printf_error( "Invalid interface type.\n" );
+	 }
+}
+
+double libblis_test_amaxv_check (
+  test_params_t* params,
+  obj_t*         x,
+  obj_t*         index
+) {
+  obj_t index_test;
+  obj_t chi_i;
+  obj_t chi_i_test;
+  dim_t i;
+  dim_t i_test;
+
+  double i_d, junk;
+  double i_d_test;
+
+  double resid = 0.0;
+  //
+  // Pre-conditions:
+  // - x is randomized.
+  //
+  // Under these conditions, we assume that the implementation for
+  //
+  //   index := amaxv( x )
+  //
+  // is functioning correctly if
+  //
+  //   x[ index ] = max( x )
+  //
+  // where max() is implemented via the bli_?amaxv_test() function.
+  //
+
+  // The following two calls have already been made by the caller. That
+  // is, the index object has already been created and the library's
+  // amaxv implementation has already been tested.
+  //bli_obj_scalar_init_detached( BLIS_INT, &index );
+  //bli_amaxv( x, &index );
+  bli_getsc( index, &i_d, &junk ); i = i_d;
+
+  // If x is length 0, then we can't access any elements, and so we
+  // return early with a good residual.
+  if ( bli_obj_vector_dim( x ) == 0 ) { resid = 0.0; return resid; }
+
+  bli_acquire_vi( i, x, &chi_i );
+
+  bli_obj_scalar_init_detached( BLIS_INT, &index_test );
+  bli_amaxv_test( x, &index_test );
+  bli_getsc( &index_test, &i_d_test, &junk ); i_test = i_d_test;
+  bli_acquire_vi( i_test, x, &chi_i_test );
+
+  // Verify that the values referenced by index and index_test are equal.
+  if ( bli_obj_equals( &chi_i, &chi_i_test ) ) resid = 0.0;
+  else                                         resid = 1.0;
+
+  return resid;
+}
+
+// -----------------------------------------------------------------------------
+
+//
+// Prototype BLAS-like interfaces with typed operands for a local amaxv test
+// operation
+//
+
+#undef  GENTPROT
+#define GENTPROT( ctype, ch, opname ) \
+\
+void PASTEMAC(ch,opname) \
+     ( \
+       dim_t           n, \
+       ctype* restrict x, inc_t incx, \
+       dim_t* restrict index  \
+     ); \
+
+INSERT_GENTPROT_BASIC0( amaxv_test )
+
+
+//
+// Prototype function pointer query interface.
+//
+
+#undef  GENPROT
+#define GENPROT( tname, opname ) \
+\
+PASTECH(tname,_vft) \
+PASTEMAC(opname,_qfp)( num_t dt );
+
+GENPROT( amaxv, amaxv_test )
+
+
+//
+// Define function pointer query interfaces.
+//
+
+#undef  GENFRONT
+#define GENFRONT( tname, opname ) \
+\
+GENARRAY_FPA( PASTECH(tname,_vft), \
+              opname ); \
+\
+PASTECH(tname,_vft) \
+PASTEMAC(opname,_qfp)( num_t dt ) \
+{ \
+    return PASTECH(opname,_fpa)[ dt ]; \
+}
+
+GENFRONT( amaxv, amaxv_test )
+
+
+//
+// Define object-based interface for a local amaxv test operation.
+//
+
+#undef  GENFRONT
+#define GENFRONT( tname, opname ) \
+\
+void PASTEMAC0(opname) \
+     ( \
+       obj_t*  x, \
+       obj_t*  index  \
+     ) \
+{ \
+    num_t     dt        = bli_obj_dt( x ); \
+\
+    dim_t     n         = bli_obj_vector_dim( x ); \
+    void*     buf_x     = bli_obj_buffer_at_off( x ); \
+    inc_t     incx      = bli_obj_vector_inc( x ); \
+\
+    dim_t*    buf_index = (dim_t*)bli_obj_buffer_at_off( index ); \
+\
+/*
+	FGVZ: Disabling this code since bli_amaxv_check() is supposed to be a
+	non-public API function, and therefore unavailable unless all symbols
+	are scheduled to be exported at configure-time (which is not currently
+	the default behavior).
+
+    if ( bli_error_checking_is_enabled() ) \
+        bli_amaxv_check( x, index ); \
+*/ \
+\
+	/* Query a type-specific function pointer, except one that uses
+	   void* for function arguments instead of typed pointers. */ \
+	PASTECH(tname,_vft) f = \
+	PASTEMAC(opname,_qfp)( dt ); \
+\
+	f \
+	( \
+       n, \
+       buf_x, incx, \
+       buf_index  \
+    ); \
+}
+
+GENFRONT( amaxv, amaxv_test )
+
+
+//
+// Define BLAS-like interfaces with typed operands for a local amaxv test
+// operation.
+// NOTE: This is based on a simplified version of the bli_?amaxv_ref()
+// reference kernel.
+//
+
+#undef  GENTFUNCR
+#define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \
+\
+void PASTEMAC(ch,varname) \
+     ( \
+       dim_t    n, \
+       ctype*   x, inc_t incx, \
+       dim_t*   index  \
+     ) \
+{ \
+	ctype_r* minus_one = PASTEMAC(chr,m1); \
+	dim_t*   zero_i    = PASTEMAC(i,0); \
+\
+	ctype_r  chi1_r; \
+	ctype_r  chi1_i; \
+	ctype_r  abs_chi1; \
+	ctype_r  abs_chi1_max; \
+	dim_t    index_l; \
+	dim_t    i; \
+\
+	/* If the vector length is zero, return early. This directly emulates
+	   the behavior of netlib BLAS's i?amax() routines. */ \
+	if ( bli_zero_dim1( n ) ) \
+	{ \
+		PASTEMAC(i,copys)( *zero_i, *index ); \
+		return; \
+	} \
+\
+	/* Initialize the index of the maximum absolute value to zero. */ \
+	PASTEMAC(i,copys)( *zero_i, index_l ); \
+\
+	/* Initialize the maximum absolute value search candidate with
+	   -1, which is guaranteed to be less than all values we will
+	   compute. */ \
+	PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \
+\
+	{ \
+		for ( i = 0; i < n; ++i ) \
+		{ \
+			ctype* chi1 = x + (i  )*incx; \
+\
+			/* Get the real and imaginary components of chi1. */ \
+			PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
+\
+			/* Replace chi1_r and chi1_i with their absolute values. */ \
+			PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
+			PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
+\
+			/* Add the real and imaginary absolute values together. */ \
+			PASTEMAC(chr,set0s)( abs_chi1 ); \
+			PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
+			PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
+\
+			/* If the absolute value of the current element exceeds that of
+			   the previous largest, save it and its index. If NaN is
+			   encountered, then treat it the same as if it were a valid
+			   value that was smaller than any previously seen. This
+			   behavior mimics that of LAPACK's ?lange(). */ \
+			if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
+			{ \
+				abs_chi1_max = abs_chi1; \
+				index_l       = i; \
+			} \
+		} \
+	} \
+\
+	/* Store the final index to the output variable. */ \
+	PASTEMAC(i,copys)( index_l, *index ); \
+}
+INSERT_GENTFUNCR_BASIC0( amaxv_test )
--- a/gtestsuite/src/test_amaxv.h
+++ b/gtestsuite/src/test_amaxv.h
@@ -0,0 +1,15 @@
+#ifndef TEST_AMAXV_H
+#define TEST_AMAXV_H
+
+#include "blis_test.h"
+
+double libblis_test_iamaxv_check
+     (
+       test_params_t* params,
+       obj_t*         x,
+       obj_t*         index
+     );
+
+double libblis_check_nan_amaxv( char*  sc_str, obj_t* b, num_t dt );
+
+#endif /* TEST_AMAXV_H */
--- a/gtestsuite/src/test_axpbyv.cpp
+++ b/gtestsuite/src/test_axpbyv.cpp
@@ -0,0 +1,380 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpbyv.h"
+
+// Local prototypes.
+void libblis_test_axpbyv_deps (
+  thread_data_t* tdata,
+  test_params_t* params,
+  test_op_t*     op
+);
+
+void libblis_test_axpbyv_impl (
+  iface_t   iface,
+  obj_t*    alpha,
+  obj_t*    x,
+  obj_t*    beta,
+  obj_t*    y
+);
+
+double libblis_test_axpbyv_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig
+);
+
+double cblas_axpbyv(
+  f77_int    m,
+  obj_t*     alpha,
+  obj_t*     x,
+  f77_int    incx,
+  obj_t*     beta,
+  obj_t*     y,
+  f77_int    incy,
+  num_t      dt
+){
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*  alphap  = (float*) bli_obj_buffer( alpha );
+      float*  betap   = (float*) bli_obj_buffer( beta );
+      float*  xp      = (float*) bli_obj_buffer( x );
+      float*  yp      = (float*) bli_obj_buffer( y );
+      cblas_saxpby( m, *alphap, xp, incx, *betap, yp, incy );
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*  alphap = (double*) bli_obj_buffer( alpha );
+      double*  betap  = (double*) bli_obj_buffer( beta );
+      double*  xp     = (double*) bli_obj_buffer( x );
+      double*  yp     = (double*) bli_obj_buffer( y );
+      cblas_daxpby( m, *alphap, xp, incx, *betap, yp, incy );
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*  alphap = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*  betap  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*  xp     = (scomplex*) bli_obj_buffer( x );
+      scomplex*  yp     = (scomplex*) bli_obj_buffer( y );
+      cblas_caxpby( m, alphap, xp, incx, betap, yp, incy );
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*  alphap = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*  betap  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*  xp     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*  yp     = (dcomplex*) bli_obj_buffer( y );
+      cblas_zaxpby( m, alphap, xp, incx, betap, yp, incy );;
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return 0;
+}
+
+double blas_axpbyv(
+  f77_int    m,
+  obj_t*     alpha,
+  obj_t*     x,
+  f77_int    incx,
+  obj_t*     beta,
+  obj_t*     y,
+  f77_int    incy,
+  num_t      dt
+){
+  switch( dt )  {
+    case BLIS_FLOAT :
+    {
+      float*  alphap    = (float*) bli_obj_buffer( alpha );
+      float*  betap     = (float*) bli_obj_buffer( beta );
+      float*  xp        = (float*) bli_obj_buffer( x );
+      float*  yp        = (float*) bli_obj_buffer( y );
+      saxpby_( &m, alphap, xp, &incx, betap, yp, &incy );
+      break;
+    }
+    case BLIS_DOUBLE :
+    {
+      double*  alphap   = (double*) bli_obj_buffer( alpha );
+      double*  betap    = (double*) bli_obj_buffer( beta );
+      double*  xp       = (double*) bli_obj_buffer( x );
+      double*  yp       = (double*) bli_obj_buffer( y );
+      daxpby_( &m, alphap, xp, &incx, betap, yp, &incy );
+      break;
+    }
+    case BLIS_SCOMPLEX :
+    {
+      scomplex*  alphap = (scomplex*) bli_obj_buffer( alpha );
+      scomplex*  betap  = (scomplex*) bli_obj_buffer( beta );
+      scomplex*  xp     = (scomplex*) bli_obj_buffer( x );
+      scomplex*  yp     = (scomplex*) bli_obj_buffer( y );
+      caxpby_( &m, alphap, xp, &incx, betap, yp, &incy );;
+      break;
+    }
+    case BLIS_DCOMPLEX :
+    {
+      dcomplex*  alphap = (dcomplex*) bli_obj_buffer( alpha );
+      dcomplex*  betap  = (dcomplex*) bli_obj_buffer( beta );
+      dcomplex*  xp     = (dcomplex*) bli_obj_buffer( x );
+      dcomplex*  yp     = (dcomplex*) bli_obj_buffer( y );
+      zaxpby_( &m, alphap, xp, &incx, betap, yp, &incy );;
+      break;
+    }
+    default :
+      bli_check_error_code( BLIS_INVALID_DATATYPE );
+  }
+  return 0;
+}
+
+void libblis_api_axpbyv(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  num_t          dt
+){
+  if(params->api == API_BLIS) {
+    libblis_test_axpbyv_impl( iface, alpha, x, beta, y );
+  }
+  else { /*CLBAS  || BLAS */
+    dim_t  m     = bli_obj_vector_dim( x );
+    f77_int incx = bli_obj_vector_inc( x );
+    f77_int incy = bli_obj_vector_inc( y );
+
+    if(bli_obj_has_conj(x)) {
+       conjugate_tensor(x, dt);
+       bli_obj_set_conj( BLIS_NO_CONJUGATE, x );
+    }
+
+    if( params->api == API_CBLAS ) {
+      cblas_axpbyv( m, alpha, x, incx, beta, y, incy, dt );
+    } else {
+      blas_axpbyv( m, alpha, x, incx, beta, y, incy, dt );
+    }
+  }
+  return ;
+}
+
+double libblis_ref_axpbyv(
+  test_params_t* params,
+  obj_t*  alpha,
+  obj_t*  x,
+  obj_t*  beta,
+  obj_t*  y,
+  obj_t*  y_save
+) {
+  double resid = 0.0;
+
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+ 	  // Perform checks.
+    resid = libblis_test_axpbyv_check( params, alpha, x, beta, y, y_save );
+  }
+  else {
+    if(params->oruflw == BLIS_DEFAULT) {
+      resid = libblis_test_iaxpbyv_check( params, alpha, x, beta, y, y_save );
+    }
+    else {
+      resid = libblis_test_vector_check(params, y);
+    }
+  }
+  return resid;
+}
+
+double libblis_test_bitrp_axpbyv(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig,
+  obj_t*         r,
+  num_t          dt
+) {
+  double resid = 0.0;
+	 unsigned int n_repeats = params->n_repeats;
+	 unsigned int i;
+
+  for(i = 0; i < n_repeats; i++) {
+    bli_copyv( y_orig, r );
+    libblis_test_axpbyv_impl( iface, alpha, x, beta, r );
+    resid = libblis_test_bitrp_vector(y, r, dt);
+  }
+  return resid;
+}
+
+double libblis_test_op_axpbyv (
+  test_params_t* params,
+  iface_t        iface,
+  char*          dc_str,
+  char*          pc_str,
+  char*          sc_str,
+  tensor_t*      dim,
+  atom_t         alpv,
+  atom_t         betv
+){
+  num_t        datatype;
+  dim_t        m;
+  conj_t       conjx;
+  obj_t        alpha, beta, x, y;
+  obj_t        y_save;
+  double       resid = 0.0;
+  obj_t        xx;
+
+  // Use the datatype of the first char in the datatype combination string.
+  bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
+
+  // Map the dimension specifier to actual dimensions.
+  m = dim->m;
+
+  // Map parameter characters to BLIS constants.
+  bli_param_map_char_to_blis_conj( pc_str[0], &conjx );
+
+  // Create test scalars.
+  bli_obj_scalar_init_detached( datatype, &alpha );
+  bli_obj_scalar_init_detached( datatype, &beta );
+
+  // Create test operands (vectors and/or matrices).
+  libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );
+  libblis_test_vobj_create( params, datatype, sc_str[0], m, &xx );
+  libblis_test_vobj_create( params, datatype, sc_str[1], m, &y );
+  libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save );
+
+  if ( bli_obj_is_real( &y ) )
+    bli_setsc( -2.0,  0.0, &alpha );
+  else
+    bli_setsc(  0.0, -2.0, &alpha );
+
+  bli_setsc( -1.0,  0.0, &beta );
+
+  // Randomize x and y, and save y.
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    libblis_test_vobj_randomize( params, FALSE, &x );
+    libblis_test_vobj_randomize( params, FALSE, &y );
+  } else {
+    libblis_test_vobj_irandomize( params, &x );
+    libblis_test_vobj_irandomize( params, &y );
+  }
+
+  bli_copyv( &y, &y_save );
+
+  // Apply the parameters.
+  bli_obj_set_conj( conjx, &x );
+
+  bli_copyv( &x, &xx );
+
+  libblis_api_axpbyv( params, iface, &alpha, &xx, &beta, &y, datatype );
+
+#ifndef __GTEST_VALGRIND_TEST__
+  if(params->bitrp) {
+    obj_t r;
+
+    libblis_test_vobj_create( params, datatype, sc_str[1], m, &r );
+
+    resid = libblis_test_bitrp_axpbyv( params, iface, &alpha, &x,
+                                 &beta, &y, &y_save, &r, datatype);
+
+    bli_obj_free( &r );
+  }
+  else {
+    resid = libblis_ref_axpbyv( params, &alpha, &x, &beta, &y, &y_save );
+  }
+#endif
+
+  // Zero out performance and residual if output vector is empty.
+  libblis_test_check_empty_problem( &y, &resid );
+
+  // Free the test objects.
+  libblis_test_obj_free( &x );
+  libblis_test_obj_free( &xx );
+  libblis_test_obj_free( &y );
+  libblis_test_obj_free( &y_save );
+
+  return abs(resid);
+}
+
+void libblis_test_axpbyv_impl (
+  iface_t   iface,
+  obj_t*    alpha,
+  obj_t*    x,
+  obj_t*    beta,
+  obj_t*    y
+) {
+  switch ( iface )
+  {
+    case BLIS_TEST_SEQ_FRONT_END:
+      bli_axpbyv( alpha, x, beta, y );
+      break;
+
+    default:
+      libblis_test_printf_error( "Invalid interface type.\n" );
+  }
+}
+
+double libblis_test_axpbyv_check (
+  test_params_t* params,
+  obj_t*         alpha,
+  obj_t*         x,
+  obj_t*         beta,
+  obj_t*         y,
+  obj_t*         y_orig
+) {
+  num_t  dt      = bli_obj_dt( y );
+  num_t  dt_real = bli_obj_dt_proj_to_real( y );
+
+  dim_t  m       = bli_obj_vector_dim( y );
+
+  obj_t  x_temp, y_temp;
+  obj_t  norm;
+
+  double junk;
+  double resid = 0.0;
+
+  //
+  // Pre-conditions:
+  // - x is randomized.
+  // - y_orig is randomized.
+  // Note:
+  // - alpha should have a non-zero imaginary component in the complex
+  //   cases in order to more fully exercise the implementation.
+  //
+  // Under these conditions, we assume that the implementation for
+  //
+  //   y := beta * y_orig + alpha * conjx(x)
+  //
+  // is functioning correctly if
+  //
+  //   normfv( y - ( beta * y_orig + alpha * conjx(x) ) )
+  //
+  // is negligible.
+  //
+
+  bli_obj_scalar_init_detached( dt_real, &norm );
+
+  bli_obj_create( dt, m, 1, 0, 0, &x_temp );
+  bli_obj_create( dt, m, 1, 0, 0, &y_temp );
+
+  bli_copyv( x,      &x_temp );
+  bli_copyv( y_orig, &y_temp );
+
+  bli_scalv( alpha, &x_temp );
+  bli_scalv( beta, &y_temp );
+  bli_addv( &x_temp, &y_temp );
+
+  bli_subv( &y_temp, y );
+  bli_normfv( y, &norm );
+  bli_getsc( &norm, &resid, &junk );
+
+  bli_obj_free( &x_temp );
+  bli_obj_free( &y_temp );
+
+  return resid;
+}
--- a/gtestsuite/src/test_axpbyv.h
+++ b/gtestsuite/src/test_axpbyv.h
@@ -0,0 +1,18 @@
+#ifndef TEST_AXPBYV_H
+#define TEST_AXPBYV_H
+
+#include "blis_test.h"
+
+double libblis_test_iaxpbyv_check
+     (
+       test_params_t* params,
+       obj_t*         alpha,
+       obj_t*         x,
+       obj_t*         beta,
+       obj_t*         y,
+       obj_t*         y_orig
+     );
+
+double libblis_check_nan_axpbyv( char*  sc_str, obj_t* b, num_t dt );
+
+#endif /* TEST_AXPBYV_H */
--- a/gtestsuite/src/test_axpy2v.cpp
+++ b/gtestsuite/src/test_axpy2v.cpp
@@ -0,0 +1,274 @@
+#include "blis_test.h"
+#include "blis_utils.h"
+#include "test_axpy2v.h"
+
+// Local prototypes.
+void libblis_test_axpy2v_deps (
+  thread_data_t* tdata,
+  test_params_t* params,
+  test_op_t*     op
+);
+
+void libblis_test_axpy2v_impl (
+  iface_t   iface,
+  obj_t*    alpha1,
+  obj_t*    alpha2,
+  obj_t*    x,
+  obj_t*    y,
+  obj_t*    z,
+  cntx_t*   cntx
+);
+
+double libblis_test_axpy2v_check (
+  test_params_t* params,
+  obj_t*         alpha1,
+  obj_t*         alpha2,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         z,
+  obj_t*         z_orig
+);
+
+double libblis_ref_axpy2v (
+  test_params_t* params,
+  obj_t*         alpha1,
+  obj_t*         alpha2,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         z,
+  obj_t*         z_orig
+){
+  double resid = 0.0;
+
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    resid = libblis_test_axpy2v_check( params, alpha1, alpha2, x, y, z, z_orig );
+  }
+  else {
+    if(params->oruflw == BLIS_DEFAULT) {
+      resid = libblis_test_iaxpy2v_check( params, alpha1, alpha2, x, y, z, z_orig );
+    }
+    else {
+      resid = libblis_test_vector_check(params, y);
+    }
+  }
+  return resid;
+}
+
+double libblis_test_bitrp_axpy2v(
+  test_params_t* params,
+  iface_t        iface,
+  obj_t*         alpha1,
+  obj_t*         alpha2,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         z,
+  cntx_t*        cntx,
+  obj_t*         z_orig,
+  obj_t*         r,
+  num_t          dt
+) {
+  double resid = 0.0;
+	 unsigned int n_repeats = params->n_repeats;
+	 unsigned int i;
+
+  for(i = 0; i < n_repeats; i++) {
+    bli_copyv( z_orig, r );
+    libblis_test_axpy2v_impl( iface, alpha1, alpha2, x, y, r, cntx);
+    resid = libblis_test_bitrp_vector(z, r, dt);
+  }
+  return resid;
+}
+
+double libblis_test_op_axpy2v (
+  test_params_t* params,
+  iface_t        iface,
+  char*          dc_str,
+  char*          pc_str,
+  char*          sc_str,
+  tensor_t*      dim,
+  atom_t         alpv,
+  atom_t         betv
+) {
+  num_t        datatype;
+  dim_t        m;
+  conj_t       conjx, conjy;
+  obj_t        alpha1, alpha2, x, y, z;
+  obj_t        z_save;
+  cntx_t*      cntx;
+  double       resid = 0.0;
+
+  // Query a context.
+  cntx = bli_gks_query_cntx();
+
+  // Use the datatype of the first char in the datatype combination string.
+  bli_param_map_char_to_blis_dt( dc_str[0], &datatype );
+
+  // Map the dimension specifier to an actual dimension.
+  m = dim->m;
+
+  // Map parameter characters to BLIS constants.
+  bli_param_map_char_to_blis_conj( pc_str[0], &conjx );
+  bli_param_map_char_to_blis_conj( pc_str[1], &conjy );
+
+  // Create test scalars.
+  bli_obj_scalar_init_detached( datatype, &alpha1 );
+  bli_obj_scalar_init_detached( datatype, &alpha2 );
+
+  // Create test operands (vectors and/or matrices).
+  libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );
+  libblis_test_vobj_create( params, datatype, sc_str[1], m, &y );
+  libblis_test_vobj_create( params, datatype, sc_str[2], m, &z );
+  libblis_test_vobj_create( params, datatype, sc_str[2], m, &z_save );
+
+  // Randomize x and y, and save y.
+  if((params->bitextf == 0) && (params->oruflw == BLIS_DEFAULT)) {
+    // Set alpha.
+    if ( bli_obj_is_real( &z ) )	{
+      bli_setsc(  alpv.real,  0.0, &alpha1 );
+      bli_setsc(  betv.real,  0.0, &alpha2 );
+    }
+    else	{
+      bli_setsc(  alpv.real,  (alpv.real/0.8), &alpha1 );
+      bli_setsc(  betv.real,  (betv.real/1.2), &alpha2 );
+    }
+    libblis_test_vobj_randomize( params, TRUE, &x );
+    libblis_test_vobj_randomize( params, TRUE, &y );
+    libblis_test_vobj_randomize( params, TRUE, &z );
+  } else {
+    int32_t xx = (int32_t)alpv.real;
+    int32_t yy = (int32_t)betv.real;
+    if ( bli_obj_is_real( &z ) )	{
+      bli_setsc( (double)xx,  0.0, &alpha1 );
+      bli_setsc( (double)yy,  0.0, &alpha2 );
+    }
+    else	{
+      int32_t ac = (int32_t)(xx/0.8);
+      int32_t bc = (int32_t)(yy/1.0);
+      bli_setsc( (double)xx, (double)ac, &alpha1 );
+      bli_setsc( (double)yy, (double)bc, &alpha2 );
+    }
+    libblis_test_vobj_irandomize( params, &x );
+    libblis_test_vobj_irandomize( params, &y );
+    libblis_test_vobj_irandomize( params, &z );
+  }
+
+  bli_copyv( &z, &z_save );
+
+  // Apply the parameters.
+  bli_obj_set_conj( conjx, &x );
+  bli_obj_set_conj( conjy, &y );
+
+  libblis_test_axpy2v_impl( iface, &alpha1, &alpha2, &x, &y, &z, cntx);
+
+#ifndef __GTEST_VALGRIND_TEST__
+  if(params->bitrp) {
+    obj_t r;
+
+    libblis_test_vobj_create( params, datatype, sc_str[2], m, &r );
+
+    resid = libblis_test_bitrp_axpy2v( params, iface, &alpha1,
+                  &alpha2, &x, &y, &z, cntx, &z_save, &r, datatype);
+
+    bli_obj_free( &r );
+  }
+  else {
+    resid = libblis_ref_axpy2v( params, &alpha1, &alpha2, &x, &y, &z, &z_save );
+  }
+#endif
+
+  // Zero out performance and residual if output vector is empty.
+  libblis_test_check_empty_problem( &z, &resid );
+
+  // Free the test objects.
+  libblis_test_obj_free( &x );
+  libblis_test_obj_free( &y );
+  libblis_test_obj_free( &z );
+  libblis_test_obj_free( &z_save );
+
+  return abs(resid);
+}
+
+void libblis_test_axpy2v_impl (
+  iface_t   iface,
+  obj_t*    alpha1,
+  obj_t*    alpha2,
+  obj_t*    x,
+  obj_t*    y,
+  obj_t*    z,
+  cntx_t*   cntx
+){
+  switch ( iface )
+  {
+    case BLIS_TEST_SEQ_FRONT_END:
+      bli_axpy2v_ex( alpha1, alpha2, x, y, z, cntx, NULL );
+      break;
+
+    default:
+      libblis_test_printf_error( "Invalid interface type.\n" );
+  }
+}
+
+double libblis_test_axpy2v_check (
+  test_params_t* params,
+  obj_t*         alpha1,
+  obj_t*         alpha2,
+  obj_t*         x,
+  obj_t*         y,
+  obj_t*         z,
+  obj_t*         z_orig
+) {
+  num_t  dt      = bli_obj_dt( z );
+  num_t  dt_real = bli_obj_dt_proj_to_real( z );
+
+  dim_t  m       = bli_obj_vector_dim( z );
+
+  obj_t  x_temp, y_temp, z_temp;
+  obj_t  norm;
+
+  double junk;
+  double resid = 0.0;
+  //
+  // Pre-conditions:
+  // - x is randomized.
+  // - y is randomized.
+  // - z_orig is randomized.
+  // Note:
+  // - alpha1, alpha2 should have a non-zero imaginary component in the
+  //   complex cases in order to more fully exercise the implementation.
+  //
+  // Under these conditions, we assume that the implementation for
+  //
+  //   z := z_orig + alpha1 * conjx(x) + alpha2 * conjy(y)
+  //
+  // is functioning correctly if
+  //
+  //   normfv( z - v )
+  //
+  // is negligible, where v contains z as computed by two calls to axpyv.
+  //
+
+  bli_obj_scalar_init_detached( dt_real, &norm );
+
+  bli_obj_create( dt, m, 1, 0, 0, &x_temp );
+  bli_obj_create( dt, m, 1, 0, 0, &y_temp );
+  bli_obj_create( dt, m, 1, 0, 0, &z_temp );
+
+  bli_copyv( x,      &x_temp );
+  bli_copyv( y,      &y_temp );
+  bli_copyv( z_orig, &z_temp );
+
+  bli_scalv( alpha1, &x_temp );
+  bli_scalv( alpha2, &y_temp );
+  bli_addv( &x_temp, &z_temp );
+  bli_addv( &y_temp, &z_temp );
+
+  bli_subv( &z_temp, z );
+  bli_normfv( z, &norm );
+  bli_getsc( &norm, &resid, &junk );
+
+  bli_obj_free( &x_temp );
+  bli_obj_free( &y_temp );
+  bli_obj_free( &z_temp );
+
+  return resid;
+}
--- a/Show More
+++ b/Show More