[Mlir-commits] [mlir] a8b7e56 - [mlir] Set insertion point of vector constant to the top of the vectorized loop body

Thu Jul 29 16:03:32 PDT 2021

Author: Amy Zhuang
Date: 2021-07-29T15:42:23-07:00
New Revision: a8b7e56f65c78a49ba0297c4ecabbd643fa40c25

URL: https://github.com/llvm/llvm-project/commit/a8b7e56f65c78a49ba0297c4ecabbd643fa40c25
DIFF: https://github.com/llvm/llvm-project/commit/a8b7e56f65c78a49ba0297c4ecabbd643fa40c25.diff

LOG: [mlir] Set insertion point of vector constant to the top of the vectorized loop body

When we vectorize a scalar constant, the vector constant is inserted before its
first user if the scalar constant is defined outside the loops to be vectorized.
It is possible that the vector constant does not dominate all its users. To fix
the problem, we find the innermost vectorized loop that encloses that first user
and insert the vector constant at the top of the loop body.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D106609

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
    mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
    mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index 570f2922d5809..d529d842caba3 100644

--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -948,6 +948,16 @@ static ConstantOp vectorizeConstant(ConstantOp constOp,
 
   auto vecTy = getVectorType(scalarTy, state.strategy);
   auto vecAttr = DenseElementsAttr::get(vecTy, constOp.getValue());
+
+  OpBuilder::InsertionGuard guard(state.builder);
+  Operation *parentOp = state.builder.getInsertionBlock()->getParentOp();
+  // Find the innermost vectorized ancestor loop to insert the vector constant.
+  while (parentOp && !state.vecLoopToVecDim.count(parentOp))
+    parentOp = parentOp->getParentOp();
+  assert(parentOp && state.vecLoopToVecDim.count(parentOp) &&
+         isa<AffineForOp>(parentOp) && "Expected a vectorized for op");
+  auto vecForOp = cast<AffineForOp>(parentOp);
+  state.builder.setInsertionPointToStart(vecForOp.getBody());
   auto newConstOp = state.builder.create<ConstantOp>(constOp.getLoc(), vecAttr);
 
   // Register vector replacement for future uses in the scope.

diff  --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
index 47e00f1faf4a5..ca2f5ca08dac4 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
@@ -113,12 +113,12 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   }
   affine.for %i4 = 0 to %M {
     affine.for %i5 = 0 to %N {
+      // CHECK: %[[SPLAT2:.*]] = constant dense<2.000000e+00> : vector<128xf32>
+      // CHECK: %[[SPLAT1:.*]] = constant dense<1.000000e+00> : vector<128xf32>
       // CHECK: %[[A5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref<?x?xf32>, vector<128xf32>
       // CHECK: %[[B5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref<?x?xf32>, vector<128xf32>
       // CHECK: %[[S5:.*]] = addf %[[A5]], %[[B5]] : vector<128xf32>
-      // CHECK: %[[SPLAT1:.*]] = constant dense<1.000000e+00> : vector<128xf32>
       // CHECK: %[[S6:.*]] = addf %[[S5]], %[[SPLAT1]] : vector<128xf32>
-      // CHECK: %[[SPLAT2:.*]] = constant dense<2.000000e+00> : vector<128xf32>
       // CHECK: %[[S7:.*]] = addf %[[S5]], %[[SPLAT2]] : vector<128xf32>
       // CHECK: %[[S8:.*]] = addf %[[S7]], %[[S6]] : vector<128xf32>
       // CHECK: vector.transfer_write %[[S8]], {{.*}} : vector<128xf32>, memref<?x?xf32>
@@ -142,6 +142,29 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
 
 // -----
 
+// CHECK-LABEL: func @vec_constant_with_two_users
+func @vec_constant_with_two_users(%M : index, %N : index) -> (f32, f32) {
+  %A = memref.alloc (%M, %N) : memref<?x?xf32, 0>
+  %B = memref.alloc (%M) : memref<?xf32, 0>
+  %f1 = constant 1.0 : f32
+  affine.for %i0 = 0 to %M { // vectorized
+    // CHECK:      %[[C1:.*]] = constant dense<1.000000e+00> : vector<128xf32>
+    // CHECK-NEXT: affine.for
+    // CHECK-NEXT:   vector.transfer_write %[[C1]], {{.*}} : vector<128xf32>, memref<?x?xf32>
+    affine.for %i1 = 0 to %N {
+      affine.store %f1, %A[%i1, %i0] : memref<?x?xf32, 0>
+    }
+    // CHECK: vector.transfer_write %[[C1]], {{.*}} : vector<128xf32>, memref<?xf32>
+    affine.store %f1, %B[%i0] : memref<?xf32, 0>
+  }
+  %c12 = constant 12 : index
+  %res1 = affine.load %A[%c12, %c12] : memref<?x?xf32, 0>
+  %res2 = affine.load %B[%c12] : memref<?xf32, 0>
+  return %res1, %res2 : f32, f32
+}
+
+// -----
+
 // CHECK-LABEL: func @vec_rejected_1
 func @vec_rejected_1(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
@@ -551,8 +574,8 @@ func @vec_non_vecdim_reductions(%in0: memref<128x256xf32>, %in1: memref<128x256x
 
 // CHECK-LABEL: @vec_non_vecdim_reductions
 // CHECK:       affine.for %{{.*}} = 0 to 256 step 128 {
-// CHECK:         %[[vzero:.*]] = constant dense<0.000000e+00> : vector<128xf32>
 // CHECK:         %[[vone:.*]] = constant dense<1> : vector<128xi32>
+// CHECK:         %[[vzero:.*]] = constant dense<0.000000e+00> : vector<128xf32>
 // CHECK:         %[[reds:.*]]:2 = affine.for %{{.*}} = 0 to 128
 // CHECK-SAME:      iter_args(%[[red_iter0:.*]] = %[[vzero]], %[[red_iter1:.*]] = %[[vone]]) -> (vector<128xf32>, vector<128xi32>) {
 // CHECK:           %[[ld0:.*]] = vector.transfer_read %{{.*}} : memref<128x256xf32>, vector<128xf32>

diff  --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
index 3d467405805f2..27403e35bf611 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
@@ -70,12 +70,12 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   }
   affine.for %i4 = 0 to %M {
     affine.for %i5 = 0 to %N {
+      // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32>
+      // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32>
       // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32>
       // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32>
       // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
-      // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32>
       // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32>
-      // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32>
       // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<32x256xf32>
       // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<32x256xf32>
       // CHECK: vector.transfer_write [[S8]], {{.*}} : vector<32x256xf32>, memref<?x?xf32>