refactoring for miopen

This commit is contained in:
Chao Liu
2019-06-17 14:58:44 -05:00
parent b1cb48a04d
commit 33d1e0e2e5
33 changed files with 195 additions and 617 deletions

View File

@@ -3,91 +3,8 @@
#include "vector_type.hpp"
#define NO_VM_WAIT 0
#define NO_LGKM_WAIT 0
#define NO_DS_READ 0
#define NO_DS_WRITE 0
#define NO_GLB_READ 0
namespace ck {
// cast a pointer of LDS to its address
extern "C" __attribute__((address_space(3))) void* __to_local(void* p)[[hc]];
__device__ void vmcnt(index_t cnt)
{
#if !NO_VM_WAIT
if(cnt == 0)
{
asm volatile("\n \
s_waitcnt vmcnt(0) \n \
" ::);
}
else if(cnt == 1)
{
asm volatile("\n \
s_waitcnt vmcnt(1) \n \
" ::);
}
else if(cnt == 2)
{
asm volatile("\n \
s_waitcnt vmcnt(2) \n \
" ::);
}
else if(cnt == 4)
{
asm volatile("\n \
s_waitcnt vmcnt(2) \n \
" ::);
}
else
{
assert(false);
}
#endif
}
__device__ void lgkmcnt(index_t cnt)
{
#if !NO_LGKM_WAIT
if(cnt == 0)
{
asm volatile("\n \
s_waitcnt lgkmcnt(0) \n \
" ::);
}
else if(cnt == 1)
{
asm volatile("\n \
s_waitcnt lgkmcnt(1) \n \
" ::);
}
else if(cnt == 2)
{
asm volatile("\n \
s_waitcnt lgkmcnt(2) \n \
" ::);
}
else if(cnt == 3)
{
asm volatile("\n \
s_waitcnt lgkmcnt(3) \n \
" ::);
}
else if(cnt == 4)
{
asm volatile("\n \
s_waitcnt lgkmcnt(4) \n \
" ::);
}
else
{
assert(false);
}
#endif
}
__device__ void outerProduct1x4(const float* a, const float* b, float* c)
{
asm volatile("\n \
@@ -112,21 +29,7 @@ __device__ void outerProduct1x4(const float& a,
const vector_type<float, 4>::MemoryType& b,
vector_type<float, 4>::MemoryType& c)
{
#if 0
asm volatile(
"\n \
v_mac_f32 %0, %4, %5 \n \
v_mac_f32 %1, %4, %6 \n \
v_mac_f32 %2, %4, %7 \n \
v_mac_f32 %3, %4, %8 \n \
"
:
:"v"(c.x),"v"(c.y),"v"(c.z),"v"(c.w), \
"v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
#else
outerProduct1x4(&a, (float*)&b, (float*)&c);
#endif
}
__device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
@@ -136,57 +39,10 @@ __device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
vector_type<float, 4>::MemoryType& c2,
vector_type<float, 4>::MemoryType& c3)
{
#if 0
asm volatile(
"\n \
v_mac_f32 %0, %4, %5 \n \
v_mac_f32 %1, %4, %6 \n \
v_mac_f32 %2, %4, %7 \n \
v_mac_f32 %3, %4, %8 \n \
"
:
:"v"(c0.x),"v"(c0.y),"v"(c0.z),"v"(c0.w), \
"v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
asm volatile(
"\n \
v_mac_f32 %0, %4, %5 \n \
v_mac_f32 %1, %4, %6 \n \
v_mac_f32 %2, %4, %7 \n \
v_mac_f32 %3, %4, %8 \n \
"
:
:"v"(c1.x),"v"(c1.y),"v"(c1.z),"v"(c1.w), \
"v"(a.y),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
asm volatile(
"\n \
v_mac_f32 %0, %4, %5 \n \
v_mac_f32 %1, %4, %6 \n \
v_mac_f32 %2, %4, %7 \n \
v_mac_f32 %3, %4, %8 \n \
"
:
:"v"(c2.x),"v"(c2.y),"v"(c2.z),"v"(c2.w), \
"v"(a.z),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
asm volatile(
"\n \
v_mac_f32 %0, %4, %5 \n \
v_mac_f32 %1, %4, %6 \n \
v_mac_f32 %2, %4, %7 \n \
v_mac_f32 %3, %4, %8 \n \
"
:
:"v"(c3.x),"v"(c3.y),"v"(c3.z),"v"(c3.w), \
"v"(a.w),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
#else
outerProduct1x4(a.x, b, c0);
outerProduct1x4(a.y, b, c1);
outerProduct1x4(a.z, b, c2);
outerProduct1x4(a.w, b, c3);
#endif
}
__device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
@@ -201,7 +57,6 @@ __device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
__device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
{
#if !NO_DS_READ
if(offset == 0)
{
asm volatile("\n \
@@ -722,33 +577,11 @@ __device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, in
: "=v"(r)
: "v"(__to_local(lds)));
}
#endif
}
__device__ void global_load(vector_type<float, 4>::MemoryType& r,
const vector_type<float, 4>::MemoryType* ptr,
index_t offset = 0)
{
#if !NO_GLB_READ
if(offset == 0)
{
asm volatile("\n \
global_load_dwordx4 %0, %1, off \n \
"
: "=v"(r)
: "v"(ptr));
}
else
{
assert(false);
}
#endif
}
__device__ void
ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
{
#if !NO_DS_WRITE
if(offset == 0)
{
asm volatile("\n \
@@ -761,7 +594,6 @@ ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t off
{
assert(false);
}
#endif
}
} // namespace ck