mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-04-19 22:38:56 +00:00
[CuTeDSL] Add sub_packed_f32x2 operation
Add subtraction operation for packed f32x2 values, following the same pattern as the existing add_packed_f32x2 and mul_packed_f32x2 operations. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -96,6 +96,7 @@ __all__ = [
|
||||
"fma_packed_f32x2",
|
||||
"mul_packed_f32x2",
|
||||
"add_packed_f32x2",
|
||||
"sub_packed_f32x2",
|
||||
"fmax",
|
||||
"rcp_approx",
|
||||
"exp2",
|
||||
|
||||
@@ -940,6 +940,9 @@ mul_packed_f32x2 = partial(
|
||||
add_packed_f32x2 = partial(
|
||||
calc_packed_f32x2_op, src_c=None, calc_func=nvvm.add_packed_f32x2
|
||||
)
|
||||
sub_packed_f32x2 = partial(
|
||||
calc_packed_f32x2_op, src_c=None, calc_func=nvvm.sub_packed_f32x2
|
||||
)
|
||||
|
||||
|
||||
@dsl_user_op
|
||||
|
||||
Reference in New Issue
Block a user