Merge pull request #3004 from tridao/add-sub-packed-f32x2

[CuTeDSL] Add sub_packed_f32x2 operation
This commit is contained in:
drazi
2026-02-13 20:46:26 +08:00
committed by GitHub
2 changed files with 4 additions and 0 deletions

View File

@@ -96,6 +96,7 @@ __all__ = [
"fma_packed_f32x2",
"mul_packed_f32x2",
"add_packed_f32x2",
"sub_packed_f32x2",
"fmax",
"rcp_approx",
"exp2",

View File

@@ -940,6 +940,9 @@ mul_packed_f32x2 = partial(
add_packed_f32x2 = partial(
calc_packed_f32x2_op, src_c=None, calc_func=nvvm.add_packed_f32x2
)
sub_packed_f32x2 = partial(
calc_packed_f32x2_op, src_c=None, calc_func=nvvm.sub_packed_f32x2
)
@dsl_user_op