Modularize ckProfiler operations (#514)

* Re-structure ckProfiler source files

* Rename profiler.cpp to main.cpp

* Modularize ckProfiler operations

* Add description for profiler operations

* Use longer name to avoid name collision

* Use macro to delay expansion

* Use std::move() to avoid object copying

* Prohibit users from calling dtor

* Use macro to eliminate redundant code

* Make friend function hidden

* Add missing include directive <iostream>

* Fix wrong include directives

* Remove int8 from batchnorm-forward instances since it is not needed for forward training and could fail test

Co-authored-by: Qianfeng Zhang <Qianfeng.Zhang@amd.com>

[ROCm/composable_kernel commit: 8784a72e23]
This commit is contained in:
Po Yen Chen
2022-12-02 05:15:02 +08:00
committed by GitHub
parent 8e868bf880
commit 02db748e74
82 changed files with 346 additions and 273 deletions

View File

@@ -7,7 +7,8 @@
#include <initializer_list>
#include <cstdlib>
#include "profiler/include/profile_batched_gemm_impl.hpp"
#include "profiler/profile_batched_gemm_impl.hpp"
#include "profiler_operation_registry.hpp"
enum struct GemmMatrixLayout
{
@@ -25,12 +26,15 @@ enum struct GemmDataType
INT8_INT8_INT8, // 3
};
#define OP_NAME "batched_gemm"
#define OP_DESC "Batched GEMM"
int profile_batched_gemm(int argc, char* argv[])
{
if(argc != 18)
{
// clang-format off
printf("arg1: tensor operation (batched_gemm: Batched GEMM)\n");
printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16, 2: bf16, 3: int8)\n");
printf("arg3: matrix layout (0: A[g, m, k] * B[g, k, n] = C[g, m, n];\n");
printf(" 1: A[g, m, k] * B[g, n, k] = C[g, m, n];\n");
@@ -195,3 +199,5 @@ int profile_batched_gemm(int argc, char* argv[])
return 1;
}
}
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_batched_gemm);