mirror of
https://github.com/NVIDIA/cutlass.git
synced 2026-04-20 06:48:59 +00:00
[fix] Boolean.__dsl_and__ emits arith.andi directly for i1 operands (#3087)
Before this fix, combining two Boolean (i1) DSL values with Python `and` triggered a verbose i1→i32→i1 round-trip in __dsl_and__: arith.extui (×3), arith.select, arith.cmpi ne (×2) — 6 extra MLIR ops. Add a fast path: when both operands are Boolean, delegate directly to __and__, emitting a single arith.andi %a, %b : i1 — identical to `&`. Both operators were already semantically equivalent; this fix makes the generated MLIR identical as well. Includes: - repro_dsl_and_bool.py — minimal standalone reproducer / bug-report script - test_dsl_and_fix.py — pytest tests verifying the fixed behaviour
This commit is contained in:
@@ -1021,6 +1021,14 @@ class Numeric(metaclass=NumericMeta, is_abstract=True):
|
||||
0 and 3 -> 0
|
||||
3 and 0 and ... -> 0
|
||||
"""
|
||||
# Fast path: Boolean & Boolean → single arith.andi i1 instruction.
|
||||
# The general path promotes i1 operands to i32 via arith.extui, performs
|
||||
# arith.select, then converts back to i1 via arith.cmpi ne — generating
|
||||
# 6 unnecessary MLIR operations. For Boolean inputs the semantics of
|
||||
# `and` are identical to bitwise AND, so delegate directly to __and__.
|
||||
if isinstance(self, Boolean) and isinstance(other, Boolean):
|
||||
return self.__and__(other, loc=loc, ip=ip)
|
||||
|
||||
is_true = self.__dsl_bool__(loc=loc, ip=ip)
|
||||
|
||||
def and_op(lhs, rhs):
|
||||
|
||||
Reference in New Issue
Block a user