iree-org · IanWood1 · Apr 28, 2025 · Apr 28, 2025
@@ -131,13 +131,8 @@ void BubbleUpExpandShapesPass::runOnOperation() {
           return false;
         }
 
-        // Do not push down collapse shape across consumer if it is a bit-extend
-        // op. The bit-extend ops get cloned into producer dispatches, and the
-        // `collapse_shape` op going past dequant, prevents this clong.
-        if (IREE::LinalgExt::isBitExtendOp(consumer)) {
-          return false;
-        }
-
+        // If producer generic op is elementwise op, bubble up the expand shape
+        // past this operation.
         if (auto producerGenericOp = dyn_cast<linalg::GenericOp>(producer)) {
           // If producer generic op is elementwise op, bubble up the expand
           // shape past this operation.

@@ -24,6 +24,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
+#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
 #include "mlir/Dialect/Tensor/Transforms/Transforms.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
@@ -54,17 +55,24 @@ static bool isFusableUsingTileAndFuse(Operation *producer,
                                IREE::Encoding::UnsetEncodingOp>(producer);
 }
 
-/// Control function to check if an `tensor.expand_shape` (which is producer of
-/// `opOperand`) should be pushed past the `genericOp` (which is the consumer of
-/// `opOperand`).
-static bool shouldSinkExpandShapeOp(OpOperand *opOperand) {
-  auto reshapeOp =
-      dyn_cast<tensor::ExpandShapeOp>(opOperand->get().getDefiningOp());
-  if (!reshapeOp) {
+/// Control function to check if a `tensor.collapse_shape` (which is the
+/// consumer of `opOperand`) should be bubbled through the `genericOp`.
+static bool shouldBubbleCollapseShapeOp(tensor::CollapseShapeOp collapseOp,
+                                        OpOperand *opOperand) {
+  auto *producer = opOperand->get().getDefiningOp();
+  if (!producer) {
     return false;
   }
+  return IREE::Flow::isClonableIntoDispatchOp(opOperand->get().getDefiningOp());
+}
+
+/// Control function to check if a `tensor.expand_shape` (which is producer of
+/// `opOperand`) should be pushed past the `genericOp` (which is the consumer of
+/// `opOperand`).
+static bool shouldSinkExpandShapeOp(tensor::ExpandShapeOp expandOp,
+                                    OpOperand *opOperand) {
   Operation *consumer = opOperand->getOwner();
-  if (!IREE::Flow::isNonNullAndOutsideDispatch({reshapeOp, consumer})) {
+  if (!IREE::Flow::isNonNullAndOutsideDispatch({expandOp, consumer})) {
     return false;
   }
   auto consumerGenericOp = dyn_cast<linalg::GenericOp>(consumer);
@@ -84,12 +92,11 @@ static bool shouldSinkExpandShapeOp(OpOperand *opOperand) {
   }
 
   // First check that the expand_shape producer and consumer can be fused.
-  Operation *reshapeProducer = reshapeOp.getSrc().getDefiningOp();
+  Operation *reshapeProducer = expandOp.getSrc().getDefiningOp();
   if (!reshapeProducer) {
     return false;
   }
-  if (!isFusableUsingTileAndFuse(reshapeOp.getSrc().getDefiningOp(),
-                                 consumer)) {
+  if (!isFusableUsingTileAndFuse(expandOp.getSrc().getDefiningOp(), consumer)) {
     return false;
   }
 
@@ -165,10 +172,31 @@ void SinkReshapesPass::runOnOperation() {
   MLIRContext *context = &getContext();
 
   RewritePatternSet sinkReshapePatterns(context);
+
+  auto collapsingControlFn = [](OpOperand *opOperand) {
+    auto collapseOp =
+        llvm::dyn_cast_or_null<tensor::CollapseShapeOp>(opOperand->getOwner());
+    if (collapseOp) {
+      return shouldBubbleCollapseShapeOp(collapseOp, opOperand);
+    }
+
+    auto expandOp =
+        dyn_cast<tensor::ExpandShapeOp>(opOperand->get().getDefiningOp());
+    if (expandOp) {
+      return shouldSinkExpandShapeOp(expandOp, opOperand);
+    }
+    llvm_unreachable("reshape is neither a collapse or expand op");
+  };
   linalg::populateFoldReshapeOpsByCollapsingPatterns(sinkReshapePatterns,
-                                                     shouldSinkExpandShapeOp);
+                                                     collapsingControlFn);
   // Add patterns to fold `tensor.empty` and reshape ops.
   tensor::populateFoldTensorEmptyPatterns(sinkReshapePatterns);
+  memref::populateResolveRankedShapedTypeResultDimsPatterns(
+      sinkReshapePatterns);
+  tensor::ExpandShapeOp::getCanonicalizationPatterns(sinkReshapePatterns,
+                                                     context);
+  tensor::CollapseShapeOp::getCanonicalizationPatterns(sinkReshapePatterns,
+                                                       context);
   if (failed(applyPatternsGreedily(getOperation(),
                                    std::move(sinkReshapePatterns)))) {
     getOperation()->emitOpError("failed to sink reshape ops");

@@ -236,3 +236,38 @@ func.func @fuse_softmax_with_truncate(%arg0 : tensor<4x64x?xf32>) -> tensor<4x64
 //       CHECK:   %[[TRUNC:.+]] = linalg.generic {{.*}} ins(%[[SOFTMAX]]
 //       CHECK:   %[[EXPAND:.+]] = tensor.expand_shape %[[TRUNC]]
 //       CHECK:   return %[[EXPAND]]
+
+// -----
+
+func.func @bubble_across_bit_extend(%arg0: tensor<2x64x32xf16>, %arg1 : tensor<2xf32>) -> tensor<2xf32> {
+  %empty = tensor.empty() : tensor<2x64x32xf32>
+  %0 = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+                       affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+                  iterator_types = ["parallel", "parallel", "parallel"]}
+      ins(%arg0 : tensor<2x64x32xf16>) outs(%empty : tensor<2x64x32xf32>) {
+    ^bb0(%b0 : f16, %b1 : f32):
+      %0 = arith.extf %b0 : f16 to f32
+      linalg.yield %0 : f32
+  } -> tensor<2x64x32xf32>
+  %collapse = tensor.collapse_shape %0 [[0], [1, 2]] : tensor<2x64x32xf32> into tensor<2x2048xf32>
+  %1 = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                       affine_map<(d0, d1) -> (d0)>],
+                  iterator_types = ["parallel", "reduction"]}
+      ins(%collapse : tensor<2x2048xf32>) outs(%arg1 : tensor<2xf32>) {
+    ^bb0(%b0 : f32, %b1 : f32):
+      %2 = arith.addf %b0, %b1 : f32
+      linalg.yield %2  : f32
+  } -> tensor<2xf32>
+  func.return %1 : tensor<2xf32>
+}
+// CHECK-LABEL: func @bubble_across_bit_extend
+//  CHECK-SAME:     %[[ARG0:.+]]: tensor<2x64x32xf16>
+//  CHECK-SAME:     %[[ARG1:.+]]: tensor<2xf32>
+//       CHECK:   %[[COLLAPSE:.+]] = tensor.collapse_shape %[[ARG0]]
+//       CHECK:   %[[GEN0:.+]] = linalg.generic
+//  CHECK-SAME:       ins(%[[COLLAPSE]] :
+//       CHECK:   %[[GEN1:.+]] = linalg.generic
+//  CHECK-SAME:       ins(%[[GEN0]] :
+//       CHECK:   return %[[GEN1]]