mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-05-11 17:00:05 +00:00
releaase 2.11 (#703)
This commit is contained in:
@@ -29,7 +29,6 @@
|
||||
|
||||
|
||||
set(TEST_COMMAND_00 RowMajor --extent=16,16)
|
||||
set(TEST_COMMAND_01 \"ColumnMajorInterleaved<4>\" --extent=32,8 --output-shape=16 --vectorize=4)
|
||||
|
||||
cutlass_example_add_executable(
|
||||
03_visualize_layout
|
||||
@@ -37,6 +36,5 @@ cutlass_example_add_executable(
|
||||
register_layout.cu
|
||||
TEST_COMMAND_OPTIONS
|
||||
TEST_COMMAND_00
|
||||
TEST_COMMAND_01
|
||||
)
|
||||
|
||||
|
||||
@@ -64,15 +64,15 @@ void RegisterLayouts(std::map<std::string, std::unique_ptr<VisualizeLayoutBase>
|
||||
// All Ampere/Turing H/Integer matrix multiply tensor core kernels uses the same swizzling
|
||||
// layout implementation with different templates.
|
||||
//
|
||||
// BMMA 88128 Interleaved-256
|
||||
// BMMA 168256 Interleaved-256
|
||||
// mma.sync.aligned.m8n8k128.s32.b1.b1.s32 Interleaved-256
|
||||
// mma.sync.aligned.m16n8k256.s32.b1.b1.s32 Interleaved-256
|
||||
{"TensorOpMultiplicand<1,256>",
|
||||
new VisualizeLayout<cutlass::layout::TensorOpMultiplicand<1, 256>>},
|
||||
// BMMA 88128 TN kblock512
|
||||
// BMMA 168256 TN kblock512
|
||||
// mma.sync.aligned.m8n8k128.s32.b1.b1.s32 TN kblock512
|
||||
// mma.sync.aligned.m16n8k256.s32.b1.b1.s32 TN kblock512
|
||||
{"TensorOpMultiplicand<1,512>",
|
||||
new VisualizeLayout<cutlass::layout::TensorOpMultiplicand<1, 512>>},
|
||||
// BMMA 168256 TN kblock1024
|
||||
// mma.sync.aligned.m16n8k256.s32.b1.b1.s32 TN kblock1024
|
||||
{"TensorOpMultiplicand<1,1024>",
|
||||
new VisualizeLayout<cutlass::layout::TensorOpMultiplicand<1, 1024>>},
|
||||
// Integer matrix multiply.int4 8832 Interleaved-64
|
||||
|
||||
Reference in New Issue
Block a user