[CuTeDSL] Add sub_packed_f32x2 operation

Add subtraction operation for packed f32x2 values, following the same
pattern as the existing add_packed_f32x2 and mul_packed_f32x2 operations.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Tri Dao
2026-02-04 21:18:46 +07:00
parent 6b3e607b85
commit 51935551fb
2 changed files with 4 additions and 0 deletions

View File

@@ -96,6 +96,7 @@ __all__ = [
"fma_packed_f32x2",
"mul_packed_f32x2",
"add_packed_f32x2",
"sub_packed_f32x2",
"fmax",
"rcp_approx",
"exp2",

View File

@@ -940,6 +940,9 @@ mul_packed_f32x2 = partial(
add_packed_f32x2 = partial(
calc_packed_f32x2_op, src_c=None, calc_func=nvvm.add_packed_f32x2
)
sub_packed_f32x2 = partial(
calc_packed_f32x2_op, src_c=None, calc_func=nvvm.sub_packed_f32x2
)
@dsl_user_op