mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-05-12 09:15:56 +00:00
CUTLASS 3.3.0 (#1167)
* Release 3.3.0 Adds support for mixed precision GEMMs On Hopper and Ampere Adds support for < 16B aligned GEMMs on Hopper Enhancements to EVT Enhancements to Python interface Enhancements to Sub-byte type handling in CuTe Several other bug-fixes and performance improvements. * minor doc update
This commit is contained in:
@@ -50,7 +50,7 @@ class Conv2dEquivalence:
|
||||
"""
|
||||
def __init__(self, conv_kind, element_A, element_B, element_C, element_D, element_accumulator,
|
||||
alignment_A, alignment_B, alignment_C):
|
||||
|
||||
|
||||
self.element_A = element_A
|
||||
self.element_B = element_B
|
||||
self.element_C = element_C
|
||||
@@ -59,21 +59,21 @@ class Conv2dEquivalence:
|
||||
self.alignment_A = alignment_A
|
||||
self.alignment_B = alignment_B
|
||||
self.alignment_C = alignment_C
|
||||
|
||||
|
||||
self.conv_kind = conv_kind
|
||||
|
||||
|
||||
self.plan = cutlass.op.Conv2d(
|
||||
kind=self.conv_kind, element_A=element_A, element_B=element_B, element_C=element_C,
|
||||
element_D=element_D, element_accumulator=element_accumulator)
|
||||
|
||||
|
||||
self.op = self.plan.construct(
|
||||
alignment_A=self.alignment_A, alignment_B=self.alignment_B,
|
||||
alignment_A=self.alignment_A, alignment_B=self.alignment_B,
|
||||
alignment_C=self.alignment_C)
|
||||
|
||||
|
||||
def _plans_equal(self, other_plan) -> bool:
|
||||
"""
|
||||
Compares whether two plans are equal
|
||||
|
||||
|
||||
:param other_plan: plan to compare against the default Conv2d
|
||||
:type other_plan: cutlass.op.Conv2d
|
||||
|
||||
@@ -81,9 +81,9 @@ class Conv2dEquivalence:
|
||||
:rtype: bool
|
||||
"""
|
||||
other_op = other_plan.construct(
|
||||
alignment_A=self.alignment_A, alignment_B=self.alignment_B,
|
||||
alignment_A=self.alignment_A, alignment_B=self.alignment_B,
|
||||
alignment_C=self.alignment_C)
|
||||
|
||||
|
||||
return self.op.rt_module.emit() == other_op.rt_module.emit()
|
||||
|
||||
def generic_test(self):
|
||||
@@ -91,16 +91,16 @@ class Conv2dEquivalence:
|
||||
Tests the equivalence of various constructions of the Conv2d interface when using CUTLASS data types
|
||||
and layouts for constructing the Conv2d interface
|
||||
"""
|
||||
if not datatypes.numpy_available:
|
||||
if not datatypes.is_numpy_available():
|
||||
return
|
||||
|
||||
|
||||
# Test when specifying all parameters
|
||||
plan_other = cutlass.op.Conv2d(
|
||||
kind=self.conv_kind,
|
||||
element_A=self.element_A, element_B=self.element_B, element_C=self.element_C,
|
||||
element_D=self.element_D, element_accumulator=self.element_accumulator)
|
||||
assert self._plans_equal(plan_other)
|
||||
|
||||
|
||||
# Test when specifying all parameters but A
|
||||
plan_other = cutlass.op.Conv2d(
|
||||
kind=self.conv_kind,
|
||||
@@ -108,7 +108,7 @@ class Conv2dEquivalence:
|
||||
element_D=self.element_D, element_accumulator=self.element_accumulator,
|
||||
element=self.element_A)
|
||||
assert self._plans_equal(plan_other)
|
||||
|
||||
|
||||
# Test when specifying all parameters but A and B as tensors using generic element and output
|
||||
plan_other = cutlass.op.Conv2d(
|
||||
kind=self.conv_kind,
|
||||
@@ -116,7 +116,7 @@ class Conv2dEquivalence:
|
||||
element_D=self.element_D, element_accumulator=self.element_accumulator,
|
||||
element=self.element_A)
|
||||
assert self._plans_equal(plan_other)
|
||||
|
||||
|
||||
# Test without explicit accumulator. Only run if the type of C and the accumulator are equal
|
||||
if self.element_C == self.element_accumulator:
|
||||
plan_other = cutlass.op.Conv2d(
|
||||
@@ -125,18 +125,18 @@ class Conv2dEquivalence:
|
||||
element_D=self.element_D,
|
||||
element=self.element_A)
|
||||
assert self._plans_equal(plan_other)
|
||||
|
||||
|
||||
# Test with only the generic types. Only rune if the types of A, B, C, and D are the same
|
||||
if (self.element_A == self.element_B and self.element_A == self.element_C and self.element_A == self.element_D
|
||||
and self.element_A == self.element_accumulator):
|
||||
plan_other = cutlass.op.Conv2d(kind=self.conv_kind, element=self.element_A)
|
||||
assert self._plans_equal(plan_other)
|
||||
|
||||
|
||||
def numpy_test(self):
|
||||
"""
|
||||
Tests the equivalence of various constructions of the Conv2d interface when using numpy as a frontend
|
||||
"""
|
||||
if not datatypes.numpy_available:
|
||||
if not datatypes.is_numpy_available():
|
||||
return
|
||||
|
||||
import numpy as np
|
||||
@@ -145,7 +145,7 @@ class Conv2dEquivalence:
|
||||
type_C = datatypes.numpy_type(self.element_C)
|
||||
type_D = datatypes.numpy_type(self.element_D)
|
||||
type_accum = datatypes.numpy_type(self.element_accumulator)
|
||||
|
||||
|
||||
size = (2, 2)
|
||||
A = np.zeros(size, dtype=type_A)
|
||||
B = np.zeros(size, dtype=type_B)
|
||||
@@ -153,49 +153,49 @@ class Conv2dEquivalence:
|
||||
D = np.zeros(size, dtype=type_D)
|
||||
|
||||
return self.tensor_test(type_A, type_B, type_C, type_D, type_accum, A, B, C, D)
|
||||
|
||||
|
||||
def torch_test(self):
|
||||
"""
|
||||
Tests the equivalence of various constructions of the Conv2d interface when using torch as a frontend
|
||||
"""
|
||||
if not datatypes.torch_available:
|
||||
if not datatypes.is_torch_available():
|
||||
return
|
||||
|
||||
|
||||
import torch
|
||||
type_A = datatypes.torch_type(self.element_A)
|
||||
type_B = datatypes.torch_type(self.element_B)
|
||||
type_C = datatypes.torch_type(self.element_C)
|
||||
type_D = datatypes.torch_type(self.element_D)
|
||||
type_accum = datatypes.torch_type(self.element_accumulator)
|
||||
|
||||
|
||||
size = (2, 2)
|
||||
|
||||
|
||||
A = torch.empty(size, dtype=type_A)
|
||||
B = torch.empty(size, dtype=type_B)
|
||||
C = torch.empty(size, dtype=type_C)
|
||||
D = torch.empty(size, dtype=type_D)
|
||||
|
||||
|
||||
return self.tensor_test(type_A, type_B, type_C, type_D, type_accum, A, B, C, D)
|
||||
|
||||
|
||||
def tensor_test(self, type_A, type_B, type_C, type_D, type_accum, A, B, C, D):
|
||||
# Test when specifying all parameters via tensors
|
||||
plan_np = cutlass.op.Conv2d(kind=self.conv_kind, A=A, B=B, C=C, D=D, element_accumulator=type_accum)
|
||||
assert self._plans_equal(plan_np)
|
||||
|
||||
|
||||
# Test when specifying all parameters but A as tensors
|
||||
plan_np = cutlass.op.Conv2d(kind=self.conv_kind, B=B, C=C, D=D, element_accumulator=type_accum, element_A=type_A)
|
||||
assert self._plans_equal(plan_np)
|
||||
|
||||
|
||||
# Test when specifying all parameters but A and B as tensors and using generic element and output
|
||||
if type_A == type_B:
|
||||
plan_np = cutlass.op.Conv2d(kind=self.conv_kind, C=C, D=D, element_accumulator=type_accum, element=type_A)
|
||||
assert self._plans_equal(plan_np)
|
||||
|
||||
|
||||
# Test without explicit accumulator. Only run if the type of C and the accumulator.
|
||||
if type_C == type_accum:
|
||||
plan_np = cutlass.op.Conv2d(kind=self.conv_kind, A=A, B=B, C=C, D=D)
|
||||
assert self._plans_equal(plan_np)
|
||||
|
||||
|
||||
# Test with only the generic types and layouts. Only run if types and layouts of A, B, C, and D are the same.
|
||||
if (type_A == type_B and type_A == type_C and type_A == type_D and type_A == type_accum):
|
||||
plan_np = cutlass.op.Conv2d(kind=self.conv_kind, element=type_A)
|
||||
@@ -223,20 +223,20 @@ type2alignment = {
|
||||
}
|
||||
|
||||
def add_test(conv_kind, element_A, element_B, element_C, element_D, element_accumulator):
|
||||
|
||||
|
||||
test_name = f"test_conv2d_{conv_kind}_{element_A}_{element_B}_{element_C}_{element_D}_{element_accumulator}"
|
||||
|
||||
|
||||
def run(self):
|
||||
conv2d_eq = Conv2dEquivalence(
|
||||
conv_kind=conv_kind,
|
||||
conv_kind=conv_kind,
|
||||
element_A=element_A, element_B=element_B,
|
||||
element_C=element_C, element_D=element_D,
|
||||
element_accumulator=element_accumulator,
|
||||
element_accumulator=element_accumulator,
|
||||
alignment_A=type2alignment[element_A], alignment_B=type2alignment[element_B],
|
||||
alignment_C=type2alignment[element_C]
|
||||
)
|
||||
conv2d_eq.test_all()
|
||||
|
||||
|
||||
setattr(ConvEquivalenceTest, test_name, run)
|
||||
|
||||
for conv_kind in ["fprop", "wgrad", "dgrad"]:
|
||||
@@ -255,25 +255,25 @@ class Conv2dErrorTests(unittest.TestCase):
|
||||
"""
|
||||
Tests various error scenarios that arise with the high-level Gemm interface
|
||||
"""
|
||||
|
||||
|
||||
def test_alignment(self):
|
||||
"""
|
||||
Tests case in which the alignment specified is unsupported
|
||||
"""
|
||||
plan = cutlass.op.Conv2d(kind="fprop", element=cutlass.DataType.f16)
|
||||
|
||||
|
||||
with ExpectException(True, 'Alignment 3 is not supported for F16. The construction should fail.'):
|
||||
op = plan.construct(alignment_A=3, alignment_B=3, alignment_C=3)
|
||||
|
||||
|
||||
def test_invalid_tile_description(self):
|
||||
"""
|
||||
Tests scenarios in which an invalid tile description is provided for a given CC
|
||||
"""
|
||||
plan = cutlass.op.Conv2d(kind="fprop", element=cutlass.DataType.f16)
|
||||
|
||||
|
||||
td = plan.tile_descriptions()[0]
|
||||
td.threadblock_shape=[17, 32, 5]
|
||||
|
||||
|
||||
plan.tile_description = td
|
||||
with ExpectException(True, 'The threadblock shape is invalid. The compilation should fail.'):
|
||||
plan.compile()
|
||||
|
||||
@@ -93,13 +93,16 @@ class EVTErrorTests(unittest.TestCase):
|
||||
"""
|
||||
Test when the epilogue consumes too much shared memory
|
||||
"""
|
||||
def evt_too_much_shared_memory(accum, C1, C2, C3, C4, C5):
|
||||
def evt_too_much_shared_memory(accum, C1, C2, C3, C4, C5, C6, C7, C8):
|
||||
D1 = accum + C1
|
||||
D2 = D1 + C2
|
||||
D3 = D2 + C3
|
||||
D4 = D3 + C4
|
||||
D = D4 + C5
|
||||
return D, D1, D2, D3, D4
|
||||
D5 = D4 + C5
|
||||
D6 = D5 + C6
|
||||
D7 = D6 + C7
|
||||
D = D7 + C8
|
||||
return D, D1, D2, D3, D4, D5, D6, D7
|
||||
|
||||
example_tensors = {
|
||||
"accum": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
@@ -108,10 +111,16 @@ class EVTErrorTests(unittest.TestCase):
|
||||
"C3": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"C4": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"C5": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"C6": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"C7": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"C8": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D1": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D2": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D3": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D4": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D5": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D6": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D7": self.fake_tensor(np.float16, (6, 512, 512)),
|
||||
"D": self.fake_tensor(np.float16, (6, 512, 512))
|
||||
}
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ class GemmEquivalence:
|
||||
Tests the equivalence of various constructions of the Gemm interface when using CUTLASS data types
|
||||
and layouts for constructing the Gemm interface
|
||||
"""
|
||||
if not datatypes.numpy_available:
|
||||
if not datatypes.is_numpy_available():
|
||||
return
|
||||
|
||||
# Test when specifying all parameters
|
||||
@@ -126,7 +126,7 @@ class GemmEquivalence:
|
||||
"""
|
||||
Tests the equivalence of various constructions of the Gemm interface when using numpy as a frontend
|
||||
"""
|
||||
if not datatypes.numpy_available:
|
||||
if not datatypes.is_numpy_available():
|
||||
return
|
||||
|
||||
import numpy as np
|
||||
|
||||
Reference in New Issue
Block a user