[Mlir-commits] [mlir] [mlir][vector] Propagate alignment in LowerVectorGather. (PR #155683)

Wed Aug 27 12:38:13 PDT 2025

https://github.com/amd-eochoalo created https://github.com/llvm/llvm-project/pull/155683

Alignment is properly propagated when the pass
--test-vector-gather-lowering is applied.

>From 23efc1ed535ae3d785bb7c4d8b52197a1648376a Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Wed, 27 Aug 2025 09:26:10 -0700
Subject: [PATCH] [mlir][vector] Propagate alignment in LowerVectorGather.

Alignment is properly propagated when the pass
--test-vector-gather-lowering is applied.
---
 .../Vector/Transforms/LowerVectorGather.cpp   | 18 +++++++++++----
 .../Vector/vector-gather-lowering.mlir        | 22 ++++++++++++++-----
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
index 983018934a85c..72095e6df6742 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
@@ -57,6 +57,10 @@ struct UnrollGather : OpRewritePattern<vector::GatherOp> {
     Value indexVec = op.getIndices();
     Value maskVec = op.getMask();
     Value passThruVec = op.getPassThru();
+    auto alignment = op.getAlignment();
+    llvm::MaybeAlign align = alignment.has_value()
+                                 ? llvm::MaybeAlign(alignment.value())
+                                 : llvm::MaybeAlign();
 
     auto unrollGatherFn = [&](PatternRewriter &rewriter, Location loc,
                               VectorType subTy, int64_t index) {
@@ -70,7 +74,7 @@ struct UnrollGather : OpRewritePattern<vector::GatherOp> {
           vector::ExtractOp::create(rewriter, loc, passThruVec, thisIdx);
       return vector::GatherOp::create(rewriter, loc, subTy, op.getBase(),
                                       op.getOffsets(), indexSubVec, maskSubVec,
-                                      passThruSubVec);
+                                      passThruSubVec, align);
     };
 
     return unrollVectorOp(op, rewriter, unrollGatherFn);
@@ -152,7 +156,8 @@ struct RemoveStrideFromGatherSource : OpRewritePattern<vector::GatherOp> {
     // updated indices.
     Value newGather = vector::GatherOp::create(
         rewriter, op.getLoc(), op.getResult().getType(), collapsed,
-        op.getOffsets(), newIdxs, op.getMask(), op.getPassThru());
+        op.getOffsets(), newIdxs, op.getMask(), op.getPassThru(),
+        op.getAlignmentAttr());
     rewriter.replaceOp(op, newGather);
 
     return success();
@@ -200,6 +205,11 @@ struct Gather1DToConditionalLoads : OpRewritePattern<vector::GatherOp> {
     Value lastBaseOffset = baseOffsets.back();
 
     Value result = op.getPassThru();
+    bool nontemporal = false;
+    auto alignment = op.getAlignment();
+    llvm::MaybeAlign align = alignment.has_value()
+                                 ? llvm::MaybeAlign(alignment.value())
+                                 : llvm::MaybeAlign();
 
     // Emit a conditional access for each vector element.
     for (int64_t i = 0, e = resultTy.getNumElements(); i < e; ++i) {
@@ -215,8 +225,8 @@ struct Gather1DToConditionalLoads : OpRewritePattern<vector::GatherOp> {
         if (isa<MemRefType>(base.getType())) {
           // `vector.load` does not support scalar result; emit a vector load
           // and extract the single result instead.
-          Value load =
-              vector::LoadOp::create(b, loc, elemVecTy, base, baseOffsets);
+          Value load = vector::LoadOp::create(b, loc, elemVecTy, base,
+                                              baseOffsets, nontemporal, align);
           int64_t zeroIdx[1] = {0};
           extracted = vector::ExtractOp::create(b, loc, load, zeroIdx);
         } else {
diff --git a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
index 0e1bad62ce763..73280197c5fc3 100644
--- a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
@@ -100,6 +100,18 @@ func.func @scalable_gather_memref_2d(%base: memref<?x?xf32>, %v: vector<2x[3]xin
  return %0 : vector<2x[3]xf32>
 }
 
+// CHECK-LABEL: @scalable_gather_with_alignment
+// CHECK:         vector.gather
+// CHECK-SAME:    {alignment = 8 : i64}
+// CHECK:         vector.gather
+// CHECK-SAME:    {alignment = 8 : i64}
+func.func @scalable_gather_with_alignment(%base: memref<?x?xf32>, %v: vector<2x[3]xindex>, %mask: vector<2x[3]xi1>, %pass_thru: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = vector.gather %base[%c0, %c1][%v], %mask, %pass_thru {alignment = 8} : memref<?x?xf32>, vector<2x[3]xindex>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32>
+ return %0 : vector<2x[3]xf32>
+}
+
 // CHECK-LABEL: @scalable_gather_cant_unroll
 // CHECK-NOT: extract
 // CHECK: vector.gather
@@ -234,7 +246,7 @@ func.func @strided_gather(%base : memref<100x3xf32>,
   %mask = arith.constant dense<true> : vector<4xi1>
   %pass_thru = arith.constant dense<0.000000e+00> : vector<4xf32>
   // Gather of a strided MemRef
-  %res = vector.gather %subview[%c0] [%idxs], %mask, %pass_thru : memref<100xf32, strided<[3]>>, vector<4xindex>, vector<4xi1>, vector<4xf32> into vector<4xf32>
+  %res = vector.gather %subview[%c0] [%idxs], %mask, %pass_thru {alignment = 8} : memref<100xf32, strided<[3]>>, vector<4xindex>, vector<4xi1>, vector<4xf32> into vector<4xf32>
   return %res : vector<4xf32>
 }
 // CHECK-LABEL:   func.func @strided_gather(
@@ -250,22 +262,22 @@ func.func @strided_gather(%base : memref<100x3xf32>,
 
 // CHECK:           %[[IDX_0:.*]] = vector.extract %[[NEW_IDXS]][0] : index from vector<4xindex>
 // CHECK:           scf.if %[[TRUE]] -> (vector<4xf32>)
-// CHECK:             %[[M_0:.*]] = vector.load %[[COLLAPSED]][%[[IDX_0]]] : memref<300xf32>, vector<1xf32>
+// CHECK:             %[[M_0:.*]] = vector.load %[[COLLAPSED]][%[[IDX_0]]] {alignment = 8 : i64} : memref<300xf32>, vector<1xf32>
 // CHECK:             %[[V_0:.*]] = vector.extract %[[M_0]][0] : f32 from vector<1xf32>
 
 // CHECK:           %[[IDX_1:.*]] = vector.extract %[[NEW_IDXS]][1] : index from vector<4xindex>
 // CHECK:           scf.if %[[TRUE]] -> (vector<4xf32>)
-// CHECK:             %[[M_1:.*]] = vector.load %[[COLLAPSED]][%[[IDX_1]]] : memref<300xf32>, vector<1xf32>
+// CHECK:             %[[M_1:.*]] = vector.load %[[COLLAPSED]][%[[IDX_1]]] {alignment = 8 : i64} : memref<300xf32>, vector<1xf32>
 // CHECK:             %[[V_1:.*]] = vector.extract %[[M_1]][0] : f32 from vector<1xf32>
 
 // CHECK:           %[[IDX_2:.*]] = vector.extract %[[NEW_IDXS]][2] : index from vector<4xindex>
 // CHECK:           scf.if %[[TRUE]] -> (vector<4xf32>)
-// CHECK:             %[[M_2:.*]] = vector.load %[[COLLAPSED]][%[[IDX_2]]] : memref<300xf32>, vector<1xf32>
+// CHECK:             %[[M_2:.*]] = vector.load %[[COLLAPSED]][%[[IDX_2]]] {alignment = 8 : i64} : memref<300xf32>, vector<1xf32>
 // CHECK:             %[[V_2:.*]] = vector.extract %[[M_2]][0] : f32 from vector<1xf32>
 
 // CHECK:           %[[IDX_3:.*]] = vector.extract %[[NEW_IDXS]][3] : index from vector<4xindex>
 // CHECK:           scf.if %[[TRUE]] -> (vector<4xf32>)
-// CHECK:             %[[M_3:.*]] = vector.load %[[COLLAPSED]][%[[IDX_3]]] : memref<300xf32>, vector<1xf32>
+// CHECK:             %[[M_3:.*]] = vector.load %[[COLLAPSED]][%[[IDX_3]]] {alignment = 8 : i64} : memref<300xf32>, vector<1xf32>
 // CHECK:             %[[V_3:.*]] = vector.extract %[[M_3]][0] : f32 from vector<1xf32>
 
 // CHECK-LABEL: @scalable_gather_1d