diff --git a/example/ck_tile/01_fmha/hsaco/bwd_fp16_a32.cpp b/example/ck_tile/01_fmha/hsaco/bwd_fp16_a32.cpp index ab84d83684..caff0fbdd4 100644 --- a/example/ck_tile/01_fmha/hsaco/bwd_fp16_a32.cpp +++ b/example/ck_tile/01_fmha/hsaco/bwd_fp16_a32.cpp @@ -855,7 +855,7 @@ unsigned char bwd_fp16_a32[] = { 0x34, 0x00, 0xCD, 0xD3, 0x9A, 0x35, 0xD2, 0x1C, 0x34, 0x00, 0xCD, 0xD3, 0x9C, 0x39, 0xD2, 0x1C, 0x34, 0x00, 0xCD, 0xD3, 0x9E, 0x3D, 0xD2, 0x1C, 0x00, 0x35, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x70, 0x20, 0x04, 0x1A, 0xD8, 0x0B, 0x25, 0x00, 0x00, 0x38, 0x00, 0xCD, 0xD3, 0x90, 0x41, 0x02, 0x1A, - 0x00, 0xE0, 0x7E, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, + 0x00, 0x20, 0x7C, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x94, 0x49, 0xE2, 0x1C, 0x80, 0x3B, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x74, 0x80, 0x00, 0x1A, 0xD8, 0x0B, 0x26, 0x00, 0x00, 0x38, 0x00, 0xCD, 0xD3, 0x96, 0x4D, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x98, 0x51, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x9A, 0x55, 0xE2, 0x1C, @@ -925,7 +925,7 @@ unsigned char bwd_fp16_a32[] = { 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0xF4, 0x00, 0xCD, 0xD3, 0x7A, 0x29, 0xD3, 0x07, 0x80, 0x11, 0x1A, 0xD8, 0x0D, 0x56, 0x00, 0x00, 0xA0, 0x15, 0x1A, 0xD8, 0x0D, 0x57, 0x00, 0x00, 0x00, 0x00, 0x8A, 0xBF, 0xA0, 0x80, 0xCD, 0xD3, 0x90, 0x91, 0x82, 0x0E, - 0x00, 0xE0, 0x7E, 0xE0, 0x07, 0x80, 0x08, 0x80, 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, + 0x00, 0x20, 0x7C, 0xE0, 0x07, 0x80, 0x08, 0x80, 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, 0x00, 0xC8, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x7C, 0x00, 0xC9, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x96, 0xA8, 0x80, 0xCD, 0xD3, 0x94, 0x91, 0xA2, 0x0E, 0x7F, 0xC6, 0x8C, 0xBF, 0x00, 0x00, 0x8A, 0xBF, 0xAC, 0x80, 0xCD, 0xD3, 0x96, 0x91, 0xB2, 0x0E, 0x00, 0x44, 0xFE, 0xD9, 0x11, 0x00, 0x00, 0x30, @@ -990,7 +990,7 @@ unsigned char bwd_fp16_a32[] = { 0x34, 0x00, 0xCD, 0xD3, 0x9A, 0x35, 0xD2, 0x1C, 0x34, 0x00, 0xCD, 0xD3, 0x9C, 0x39, 0xD2, 0x1C, 0x34, 0x00, 0xCD, 0xD3, 0x9E, 0x3D, 0xD2, 0x1C, 0x00, 0x35, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x70, 0x20, 0x04, 0x1A, 0xD8, 0x0B, 0x21, 0x00, 0x00, 0x38, 0x00, 0xCD, 0xD3, 0x90, 0x41, 0x02, 0x1A, - 0x00, 0xE0, 0x7E, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, + 0x00, 0x20, 0x7C, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x94, 0x49, 0xE2, 0x1C, 0x80, 0x3B, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x74, 0x80, 0x00, 0x1A, 0xD8, 0x0B, 0x22, 0x00, 0x00, 0x38, 0x00, 0xCD, 0xD3, 0x96, 0x4D, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x98, 0x51, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x9A, 0x55, 0xE2, 0x1C, @@ -1060,7 +1060,7 @@ unsigned char bwd_fp16_a32[] = { 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0xF4, 0x00, 0xCD, 0xD3, 0x7A, 0x29, 0xD3, 0x07, 0x80, 0x11, 0x1A, 0xD8, 0x0D, 0x56, 0x00, 0x00, 0xA0, 0x15, 0x1A, 0xD8, 0x0D, 0x57, 0x00, 0x00, 0x00, 0x00, 0x8A, 0xBF, 0xA0, 0x80, 0xCD, 0xD3, 0x90, 0x91, 0x82, 0x0E, - 0x00, 0xE0, 0x7E, 0xE0, 0x07, 0x80, 0x08, 0x80, 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, + 0x00, 0x20, 0x7C, 0xE0, 0x07, 0x80, 0x08, 0x80, 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, 0x00, 0xC6, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x7C, 0x00, 0xC7, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x96, 0xA8, 0x80, 0xCD, 0xD3, 0x94, 0x91, 0xA2, 0x0E, 0x7F, 0xC6, 0x8C, 0xBF, 0x00, 0x00, 0x8A, 0xBF, 0xAC, 0x80, 0xCD, 0xD3, 0x96, 0x91, 0xB2, 0x0E, 0x00, 0x44, 0xFE, 0xD9, 0x11, 0x00, 0x00, 0x30, @@ -1126,7 +1126,7 @@ unsigned char bwd_fp16_a32[] = { 0x00, 0x00, 0x1A, 0xD8, 0x0B, 0x24, 0x00, 0x00, 0x34, 0x00, 0xCD, 0xD3, 0x9C, 0x39, 0xD2, 0x1C, 0x34, 0x00, 0xCD, 0xD3, 0x9E, 0x3D, 0xD2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x90, 0x41, 0x02, 0x1A, 0x00, 0x35, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x70, 0x20, 0x04, 0x1A, 0xD8, 0x0B, 0x25, 0x00, 0x00, - 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, 0x00, 0xE0, 0x7E, 0xE0, 0x08, 0x84, 0x08, 0x80, + 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, 0x00, 0x20, 0x7C, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x38, 0x00, 0xCD, 0xD3, 0x94, 0x49, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x96, 0x4D, 0xE2, 0x1C, 0x80, 0x3B, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x74, 0x80, 0x00, 0x1A, 0xD8, 0x0B, 0x26, 0x00, 0x00, 0x38, 0x00, 0xCD, 0xD3, 0x98, 0x51, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x9A, 0x55, 0xE2, 0x1C, @@ -1197,7 +1197,7 @@ unsigned char bwd_fp16_a32[] = { 0xF4, 0x00, 0xCD, 0xD3, 0x7A, 0x29, 0xD3, 0x07, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x8A, 0xBF, 0xA0, 0x80, 0xCD, 0xD3, 0x90, 0x91, 0x82, 0x0E, 0x00, 0xC8, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x7C, 0x00, 0xC9, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x96, - 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, 0x00, 0xE0, 0x7E, 0xE0, 0x07, 0x80, 0x08, 0x80, + 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, 0x00, 0x20, 0x7C, 0xE0, 0x07, 0x80, 0x08, 0x80, 0xA8, 0x80, 0xCD, 0xD3, 0x94, 0x91, 0xA2, 0x0E, 0x7F, 0xC6, 0x8C, 0xBF, 0x00, 0x00, 0x8A, 0xBF, 0xAC, 0x80, 0xCD, 0xD3, 0x96, 0x91, 0xB2, 0x0E, 0xB0, 0x80, 0xCD, 0xD3, 0x98, 0x91, 0xC2, 0x0E, 0x00, 0x44, 0xFE, 0xD9, 0x11, 0x00, 0x00, 0x30, 0xB4, 0x80, 0xCD, 0xD3, 0x9A, 0x91, 0xD2, 0x0E, @@ -1261,7 +1261,7 @@ unsigned char bwd_fp16_a32[] = { 0x00, 0x00, 0x1A, 0xD8, 0x0B, 0x20, 0x00, 0x00, 0x34, 0x00, 0xCD, 0xD3, 0x9C, 0x39, 0xD2, 0x1C, 0x34, 0x00, 0xCD, 0xD3, 0x9E, 0x3D, 0xD2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x90, 0x41, 0x02, 0x1A, 0x00, 0x35, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x70, 0x20, 0x04, 0x1A, 0xD8, 0x0B, 0x21, 0x00, 0x00, - 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, 0x00, 0xE0, 0x7E, 0xE0, 0x08, 0x84, 0x08, 0x80, + 0x38, 0x00, 0xCD, 0xD3, 0x92, 0x45, 0xE2, 0x1C, 0x00, 0x20, 0x7C, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x38, 0x00, 0xCD, 0xD3, 0x94, 0x49, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x96, 0x4D, 0xE2, 0x1C, 0x80, 0x3B, 0xFE, 0xD9, 0x0C, 0x00, 0x00, 0x74, 0x80, 0x00, 0x1A, 0xD8, 0x0B, 0x22, 0x00, 0x00, 0x38, 0x00, 0xCD, 0xD3, 0x98, 0x51, 0xE2, 0x1C, 0x38, 0x00, 0xCD, 0xD3, 0x9A, 0x55, 0xE2, 0x1C, @@ -1332,7 +1332,7 @@ unsigned char bwd_fp16_a32[] = { 0xF4, 0x00, 0xCD, 0xD3, 0x7A, 0x29, 0xD3, 0x07, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x80, 0xBF, 0x00, 0x00, 0x8A, 0xBF, 0xA0, 0x80, 0xCD, 0xD3, 0x90, 0x91, 0x82, 0x0E, 0x00, 0xC6, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x7C, 0x00, 0xC7, 0x6C, 0xD8, 0x15, 0x00, 0x00, 0x96, - 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, 0x00, 0xE0, 0x7E, 0xE0, 0x07, 0x80, 0x08, 0x80, + 0xA4, 0x80, 0xCD, 0xD3, 0x92, 0x91, 0x92, 0x0E, 0x00, 0x20, 0x7C, 0xE0, 0x07, 0x80, 0x08, 0x80, 0xA8, 0x80, 0xCD, 0xD3, 0x94, 0x91, 0xA2, 0x0E, 0x7F, 0xC6, 0x8C, 0xBF, 0x00, 0x00, 0x8A, 0xBF, 0xAC, 0x80, 0xCD, 0xD3, 0x96, 0x91, 0xB2, 0x0E, 0xB0, 0x80, 0xCD, 0xD3, 0x98, 0x91, 0xC2, 0x0E, 0x00, 0x44, 0xFE, 0xD9, 0x11, 0x00, 0x00, 0x30, 0xB4, 0x80, 0xCD, 0xD3, 0x9A, 0x91, 0xD2, 0x0E, @@ -1381,13 +1381,13 @@ unsigned char bwd_fp16_a32[] = { 0x84, 0x00, 0xCD, 0xD3, 0x56, 0x85, 0x12, 0x0E, 0x3B, 0x90, 0x09, 0xBF, 0x43, 0x42, 0x42, 0x85, 0x84, 0x00, 0xCD, 0xD3, 0x5A, 0x89, 0x12, 0x0E, 0x10, 0x00, 0x3B, 0xB7, 0x00, 0x00, 0x80, 0xBF, 0x3B, 0x3A, 0x04, 0xBF, 0x84, 0x00, 0xCD, 0xD3, 0x5E, 0x8D, 0x12, 0x0E, 0xC5, 0xFB, 0x84, 0xBF, - 0xC7, 0xFB, 0x82, 0xBF, 0x00, 0xE0, 0x7E, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x42, 0x20, 0x20, 0x80, + 0xC7, 0xFB, 0x82, 0xBF, 0x00, 0x20, 0x7C, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x42, 0x20, 0x20, 0x80, 0x80, 0x21, 0x21, 0x82, 0x85, 0x00, 0x38, 0x20, 0xFF, 0x38, 0x32, 0x0C, 0x42, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x38, 0x26, 0x82, 0x38, 0x3A, 0x0C, 0x1D, 0x33, 0x32, 0x68, 0x2F, 0xFF, 0x3C, 0x92, 0x20, 0x04, 0x00, 0x00, 0x3C, 0x32, 0x32, 0x68, 0x82, 0x32, 0x32, 0x24, 0x30, 0x00, 0x01, 0x0B, 0x30, 0x02, 0x03, 0x0B, 0x30, 0x04, 0x05, 0x0B, 0x30, 0x06, 0x07, 0x0B, 0x30, 0x08, 0x09, 0x0B, - 0x30, 0x0A, 0x0B, 0x0B, 0x30, 0x0C, 0x0D, 0x0B, 0x30, 0x0E, 0x0F, 0x0B, 0x00, 0xE0, 0x7E, 0xE0, - 0x07, 0x80, 0x08, 0x80, 0x00, 0xE0, 0x7E, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x84, 0x00, 0x38, 0x20, + 0x30, 0x0A, 0x0B, 0x0B, 0x30, 0x0C, 0x0D, 0x0B, 0x30, 0x0E, 0x0F, 0x0B, 0x00, 0x20, 0x7C, 0xE0, + 0x07, 0x80, 0x08, 0x80, 0x00, 0x20, 0x7C, 0xE0, 0x08, 0x84, 0x08, 0x80, 0x84, 0x00, 0x38, 0x20, 0x82, 0x38, 0x30, 0x0C, 0x8F, 0x00, 0x38, 0x26, 0xFF, 0x38, 0x3A, 0x0C, 0x42, 0x00, 0x00, 0x00, 0x1D, 0x31, 0x30, 0x68, 0x2F, 0xFF, 0x3C, 0x92, 0x20, 0x04, 0x00, 0x00, 0x3C, 0x30, 0x30, 0x68, 0x82, 0x30, 0x30, 0x24, 0x1C, 0x40, 0xD8, 0xD3, 0xA0, 0x01, 0x00, 0x18, 0x1D, 0x40, 0xD8, 0xD3,