[Mlir-commits] [mlir] [MLIR][Vector] Hoist uniform scalar loop code after scf.for distribution (PR #71422)

Mon Nov 6 09:14:52 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-mlir-vector

Author: Quinn Dawkins (qedawkins)

<details>
<summary>Changes</summary>

After propagation of `vector.warp_execute_on_lane_0` through `scf.for`, uniform operations like those on the loop iterators can now be hoisted out of the inner warp op.

---
Full diff: https://github.com/llvm/llvm-project/pull/71422.diff


2 Files Affected:

- (modified) mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp (+3) 
- (modified) mlir/test/Dialect/Vector/vector-warp-distribute.mlir (+5-3) 


``````````diff

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
index 8b4575e96875409..78015e3deeb967e 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
@@ -1560,6 +1560,9 @@ struct WarpOpScfForOp : public OpRewritePattern<WarpExecuteOnLane0Op> {
         operand.set(innerWarp.getBodyRegion().getArgument(it->second));
       }
     });
+
+    // Finally, hoist out any now uniform code from the inner warp op.
+    mlir::vector::moveScalarUniformCode(innerWarp);
     return success();
   }
 
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index 3bb981c7a623886..5ec02ce002ffbd6 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -322,10 +322,11 @@ func.func @extract_scalar_vector_broadcast(%laneid: index) {
 // CHECK-PROP:   %[[INI1:.*]] = "some_def"() : () -> vector<128xf32>
 // CHECK-PROP:   vector.yield %[[INI1]] : vector<128xf32>
 // CHECK-PROP: }
-// CHECK-PROP: %[[F:.*]] = scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[FARG:.*]] = %[[INI]]) -> (vector<4xf32>) {
+// CHECK-PROP: %[[F:.*]] = scf.for %[[IT:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[FARG:.*]] = %[[INI]]) -> (vector<4xf32>) {
+// CHECK-PROP:   %[[A:.*]] = arith.addi %[[IT]], %{{.*}} : index
 // CHECK-PROP:   %[[W:.*]] = vector.warp_execute_on_lane_0(%{{.*}})[32] args(%[[FARG]] : vector<4xf32>) -> (vector<4xf32>) {
 // CHECK-PROP:    ^bb0(%[[ARG:.*]]: vector<128xf32>):
-// CHECK-PROP:      %[[ACC:.*]] = "some_def"(%[[ARG]]) : (vector<128xf32>) -> vector<128xf32>
+// CHECK-PROP:      %[[ACC:.*]] = "some_def"(%[[A]], %[[ARG]]) : (index, vector<128xf32>) -> vector<128xf32>
 // CHECK-PROP:      vector.yield %[[ACC]] : vector<128xf32>
 // CHECK-PROP:   }
 // CHECK-PROP:   scf.yield %[[W]] : vector<4xf32>
@@ -338,7 +339,8 @@ func.func @warp_scf_for(%arg0: index) {
   %0 = vector.warp_execute_on_lane_0(%arg0)[32] -> (vector<4xf32>) {
     %ini = "some_def"() : () -> (vector<128xf32>)
     %3 = scf.for %arg3 = %c0 to %c128 step %c1 iter_args(%arg4 = %ini) -> (vector<128xf32>) {
-      %acc = "some_def"(%arg4) : (vector<128xf32>) -> (vector<128xf32>)
+      %add = arith.addi %arg3, %c1 : index
+      %acc = "some_def"(%add, %arg4) : (index, vector<128xf32>) -> (vector<128xf32>)
       scf.yield %acc : vector<128xf32>
     }
     vector.yield %3 : vector<128xf32>

``````````

</details>


https://github.com/llvm/llvm-project/pull/71422