v4.3 tag release update. (#2789)

2026-04-19 22:38:56 +00:00 · 2025-11-21 09:49:44 +08:00
parent 406e078b29
commit 8cd5bef43a
225 changed files with 23229 additions and 2813 deletions
--- a/python/cutlass_cppgen/backend/evt/backend/sm90_emitter.py
+++ b/python/cutlass_cppgen/backend/evt/backend/sm90_emitter.py
@@ -90,7 +90,7 @@ class Sm90Emitter:
            tile_description=operation.tile_description,
            schedule=operation.tile_description.epilogue_schedule,
            element_c=operation.C.element,
-            element_d=operation.C.element,
+            element_d=operation.D.element,
            fusion_callbacks=fusion_callbacks
        )

--- a/python/cutlass_cppgen/backend/evt/frontend/python_ast.py
+++ b/python/cutlass_cppgen/backend/evt/frontend/python_ast.py
@@ -140,7 +140,7 @@ class PythonASTFrontend(EVTFrontendBase, ast.NodeVisitor):
        self.add_edge(rhs, name, weight=1)
        return name

-    def visit_Assign(self, node: ast.BinOp):
+    def visit_Assign(self, node: ast.Assign):
        target = self.visit(node.targets[0])
        value = self.visit(node.value)
        # Create the assign node
--- a/python/cutlass_cppgen/backend/evt/ir/layout_algorithm.py
+++ b/python/cutlass_cppgen/backend/evt/ir/layout_algorithm.py
@@ -119,8 +119,8 @@ def _get_first_rhs_nonzero_stride(stride_list, idx):
    for i in range(idx+1, len(stride_list)):
        if stride_list[i] != 0:
            return i
-        else:
-            return None
+    else:
+        return None

 def reshape(layout, new_shape):
    """
--- a/python/cutlass_cppgen/backend/evt/passes/graph_drawer.py
+++ b/python/cutlass_cppgen/backend/evt/passes/graph_drawer.py
@@ -99,11 +99,10 @@ class EVTGraphDrawer:
            stride = node.tensor.stride
            label += f"|shape={shape}|stride={stride}"

-        if hasattr(node, "store_tensor"):
-            if node.store_tensor is not None:
-                store_shape = node.store_tensor.shape
-                store_stride = node.store_tensor.stride
-                label += f"|store_shape={store_shape}|stride_stride={store_stride}"
+        if hasattr(node, "store_tensor") and node.store_tensor is not None:
+            store_shape = node.store_tensor.shape
+            store_stride = node.store_tensor.stride
+            label += f"|store_shape={store_shape}|store_stride={store_stride}"

        label += "}"
        return label
@@ -114,7 +113,7 @@ class EVTGraphDrawer:
        name: str
    ):
        import pydot
-        dot_graph = pydot.Dot(name, randir="TB")
+        dot_graph = pydot.Dot(name, rankdir="TB")
        for node in graph.nodes_meta:
            style = self._get_node_style(node)
            label = self._get_node_label(node)
@@ -133,11 +132,11 @@ class EVTGraphDrawer:

        return dot_graph

-    def get_dot_graph(self) -> pydot.Dot:
+    def get_dot_graph(self) -> "pydot.Dot":
        return [(key, self.get_dot_graph_by_name(key)) for key in self._dot_graphs.keys()]

-    def get_dot_graph_by_name(self, name) -> pydot.Dot:
+    def get_dot_graph_by_name(self, name) -> "pydot.Dot":
        return self._dot_graphs[name]

-    def get_main_dot_graph(self) -> pydot.Dot:
+    def get_main_dot_graph(self) -> "pydot.Dot":
        return self._dot_graphs[self._name]
--- a/python/cutlass_cppgen/backend/evt/passes/pass_preprocess_red.py
+++ b/python/cutlass_cppgen/backend/evt/passes/pass_preprocess_red.py
@@ -51,15 +51,14 @@ class PassPreprocessRed(EVTPassBase):
        # Step 1: find the compute nodes with op=red
        red_compute_nodes = []
        for node_meta in self.dag_ir.nodes_meta:
-            if isinstance(node_meta, ComputeNode):
-                if type(node_meta.fn) == tuple:
-                    # To keep the frontend simple, the reduction nodes
-                    # are parsed into compute nodes by default
-                    # The simple heuristic to distinguish between compute
-                    # and reduction node is that compute node is a single function,
-                    # while the reduction node is a tuple of functions for
-                    # in-register reduction and atomic global memory reduction
-                    red_compute_nodes.append(node_meta.name)
+            if isinstance(node_meta, ComputeNode) and type(node_meta.fn) == tuple:
+                # To keep the frontend simple, the reduction nodes
+                # are parsed into compute nodes by default
+                # The simple heuristic to distinguish between compute
+                # and reduction node is that compute node is a single function,
+                # while the reduction node is a tuple of functions for
+                # in-register reduction and atomic global memory reduction
+                red_compute_nodes.append(node_meta.name)

        # Step 2: for each compute, merge it with the succeeding store
        for node in red_compute_nodes: