[Mlir-commits] [mlir] [vector][distribution] Bug fix in `moveRegionToNewWarpOpAndAppendReturns` (PR #153656)
Charitha Saumya
llvmlistbot at llvm.org
Mon Aug 18 12:59:23 PDT 2025
https://github.com/charithaintc updated https://github.com/llvm/llvm-project/pull/153656
>From 2ad884cb91dad7ee77ef39cec80edf242a1aba71 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Wed, 13 Aug 2025 23:57:56 +0000
Subject: [PATCH 1/2] bug fix
---
.../Dialect/GPU/Utils/DistributionUtils.cpp | 32 +++++++++++--------
1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp
index 384d1a0ddccd2..be71bd02fc43b 100644
--- a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp
+++ b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp
@@ -14,6 +14,7 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/Value.h"
+#include "llvm/ADT/DenseMap.h"
#include <numeric>
@@ -57,26 +58,29 @@ WarpDistributionPattern::moveRegionToNewWarpOpAndAppendReturns(
warpOp.getResultTypes().end());
auto yield = cast<gpu::YieldOp>(
warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
- llvm::SmallSetVector<Value, 32> yieldValues(yield.getOperands().begin(),
- yield.getOperands().end());
+ SmallVector<Value> yieldValues(yield.getOperands().begin(),
+ yield.getOperands().end());
+ llvm::SmallDenseMap<Value, unsigned> indexLookup;
+ // Record the value -> first index mapping for faster lookup.
+ for (auto [i, v] : llvm::enumerate(yieldValues)) {
+ if (!indexLookup.count(v))
+ indexLookup[v] = i;
+ }
+
for (auto [value, type] : llvm::zip_equal(newYieldedValues, newReturnTypes)) {
- if (yieldValues.insert(value)) {
+ // If the value already exists in the yield, don't create a new output.
+ if (indexLookup.count(value)) {
+ indices.push_back(indexLookup[value]);
+ } else {
+ // If the value is new, add it to the yield and to the types.
+ yieldValues.push_back(value);
types.push_back(type);
indices.push_back(yieldValues.size() - 1);
- } else {
- // If the value already exit the region don't create a new output.
- for (auto [idx, yieldOperand] :
- llvm::enumerate(yieldValues.getArrayRef())) {
- if (yieldOperand == value) {
- indices.push_back(idx);
- break;
- }
- }
}
}
- yieldValues.insert_range(newYieldedValues);
+
WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
- rewriter, warpOp, yieldValues.getArrayRef(), types);
+ rewriter, warpOp, yieldValues, types);
rewriter.replaceOp(warpOp,
newWarpOp.getResults().take_front(warpOp.getNumResults()));
return newWarpOp;
>From e49a6f18bdd5daf92ecab391051b12dd2edad6c5 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Thu, 14 Aug 2025 19:31:48 +0000
Subject: [PATCH 2/2] add test
---
.../Vector/vector-warp-distribute.mlir | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index ae8fce786ee57..c3ce7e9ca7fda 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -1803,3 +1803,24 @@ func.func @warp_propagate_nd_write(%laneid: index, %dest: memref<4x1024xf32>) {
// CHECK-DIST-AND-PROP: %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (4, 8) : index, index
// CHECK-DIST-AND-PROP: %[[INNER_ID:.+]] = affine.apply #map()[%[[IDS]]#1]
// CHECK-DIST-AND-PROP: vector.transfer_write %[[W]], %{{.*}}[%[[IDS]]#0, %[[INNER_ID]]] {{.*}} : vector<1x128xf32>
+
+// -----
+func.func @warp_propagate_duplicated_operands_in_yield(%laneid: index) {
+ %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1xf32>, vector<1xf32>, vector<1xf32>) {
+ %0 = "some_def"() : () -> (vector<32xf32>)
+ %1 = "some_other_def"() : () -> (vector<32xf32>)
+ %2 = math.exp %1 : vector<32xf32>
+ gpu.yield %2, %0, %0 : vector<32xf32>, vector<32xf32>, vector<32xf32>
+ }
+ "some_use"(%r#0) : (vector<1xf32>) -> ()
+ return
+}
+
+// CHECK-PROP-LABEL : func.func @warp_propagate_duplicated_operands_in_yield(
+// CHECK-PROP : %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32] -> (vector<1xf32>) {
+// CHECK-PROP : %{{.*}} = "some_def"() : () -> vector<32xf32>
+// CHECK-PROP : %[[T3:.*]] = "some_other_def"() : () -> vector<32xf32>
+// CHECK-PROP : gpu.yield %[[T3]] : vector<32xf32>
+// CHECK-PROP : }
+// CHECK-PROP : %[T1:.*] = math.exp %[[W]] : vector<1xf32>
+// CHECK-PROP : "some_use"(%[[T1]]) : (vector<1xf32>) -> ()
More information about the Mlir-commits
mailing list