[Mlir-commits] [mlir] 72d5ac9 - [mlir] Use affine dim instead of symbol in SCFToGPU lowering.
Tres Popp
llvmlistbot at llvm.org
Tue Oct 20 02:56:45 PDT 2020
Author: Tres Popp
Date: 2020-10-20T11:56:34+02:00
New Revision: 72d5ac90b9282cd17c8608a756ebb49c19ad4e04
URL: https://github.com/llvm/llvm-project/commit/72d5ac90b9282cd17c8608a756ebb49c19ad4e04
DIFF: https://github.com/llvm/llvm-project/commit/72d5ac90b9282cd17c8608a756ebb49c19ad4e04.diff
LOG: [mlir] Use affine dim instead of symbol in SCFToGPU lowering.
This still satisfies the constraints required by the affine dialect and
gives more flexibility in what iteration bounds can be used when
loewring to the GPU dialect.
Differential Revision: https://reviews.llvm.org/D89782
Added:
Modified:
mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index 21a698628737..d494d12d0e4f 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -466,11 +466,10 @@ static LogicalResult processParallelLoop(
// Compute the number of iterations needed. We compute this as an
// affine expression ceilDiv (upperBound - lowerBound) step. We use
// affine.apply here so that it composes nicely with the provided map.
- AffineMap stepMap =
- AffineMap::get(0, 3,
- ((rewriter.getAffineSymbolExpr(0) -
- rewriter.getAffineSymbolExpr(1))
- .ceilDiv(rewriter.getAffineSymbolExpr(2))));
+ AffineMap stepMap = AffineMap::get(
+ 1, 2,
+ ((rewriter.getAffineDimExpr(0) - rewriter.getAffineSymbolExpr(0))
+ .ceilDiv(rewriter.getAffineSymbolExpr(1))));
Value launchBound = rewriter.create<AffineApplyOp>(
loc, annotation.bound().getValue().compose(stepMap),
ValueRange{
diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
index 5e3e2dc5aa82..3af50da6165f 100644
--- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
@@ -15,7 +15,7 @@ func @parallel_loop_bidy_bidx(%arg0 : index, %arg1 : index, %arg2 : index,
return
}
-// CHECK: #[[$MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)>
+// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
// CHECK: module {
@@ -23,8 +23,8 @@ func @parallel_loop_bidy_bidx(%arg0 : index, %arg1 : index, %arg2 : index,
// CHECK-SAME: [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: index, [[VAL_3:%.*]]: index, [[VAL_4:%.*]]: index, [[VAL_5:%.*]]: memref<?x?xf32>, [[VAL_6:%.*]]: memref<?x?xf32>) {
// CHECK: [[VAL_7:%.*]] = constant 2 : index
// CHECK: [[VAL_8:%.*]] = constant 1 : index
-// CHECK: [[VAL_9:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_2]], [[VAL_0]], [[VAL_4]]]
-// CHECK: [[VAL_10:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_3]], [[VAL_1]], [[VAL_7]]]
+// CHECK: [[VAL_9:%.*]] = affine.apply #[[$MAP0]]([[VAL_2]]){{\[}}[[VAL_0]], [[VAL_4]]]
+// CHECK: [[VAL_10:%.*]] = affine.apply #[[$MAP0]]([[VAL_3]]){{\[}}[[VAL_1]], [[VAL_7]]]
// CHECK: gpu.launch blocks([[VAL_11:%.*]], [[VAL_12:%.*]], [[VAL_13:%.*]]) in ([[VAL_14:%.*]] = [[VAL_10]], [[VAL_15:%.*]] = [[VAL_9]], [[VAL_16:%.*]] = [[VAL_8]]) threads([[VAL_17:%.*]], [[VAL_18:%.*]], [[VAL_19:%.*]]) in ([[VAL_20:%.*]] = [[VAL_8]], [[VAL_21:%.*]] = [[VAL_8]], [[VAL_22:%.*]] = [[VAL_8]]) {
// CHECK: [[VAL_23:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}[[VAL_4]], [[VAL_0]]]
// CHECK: [[VAL_24:%.*]] = affine.apply #[[$MAP1]]([[VAL_11]]){{\[}}[[VAL_7]], [[VAL_1]]]
@@ -66,7 +66,7 @@ func @parallel_loop_tiled(%arg0 : index, %arg1 : index, %arg2 : index,
return
}
-// CHECK: #[[$MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)>
+// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
// CHECK: module {
@@ -76,10 +76,10 @@ func @parallel_loop_tiled(%arg0 : index, %arg1 : index, %arg2 : index,
// CHECK: [[VAL_33:%.*]] = constant 1 : index
// CHECK: [[VAL_34:%.*]] = constant 4 : index
// CHECK: [[VAL_35:%.*]] = constant 1 : index
-// CHECK: [[VAL_36:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_28]], [[VAL_26]], [[VAL_34]]]
-// CHECK: [[VAL_37:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_29]], [[VAL_27]], [[VAL_34]]]
-// CHECK: [[VAL_38:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_34]], [[VAL_32]], [[VAL_33]]]
-// CHECK: [[VAL_39:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_34]], [[VAL_32]], [[VAL_33]]]
+// CHECK: [[VAL_36:%.*]] = affine.apply #[[$MAP0]]([[VAL_28]]){{\[}}[[VAL_26]], [[VAL_34]]]
+// CHECK: [[VAL_37:%.*]] = affine.apply #[[$MAP0]]([[VAL_29]]){{\[}}[[VAL_27]], [[VAL_34]]]
+// CHECK: [[VAL_38:%.*]] = affine.apply #[[$MAP0]]([[VAL_34]]){{\[}}[[VAL_32]], [[VAL_33]]]
+// CHECK: [[VAL_39:%.*]] = affine.apply #[[$MAP0]]([[VAL_34]]){{\[}}[[VAL_32]], [[VAL_33]]]
// CHECK: gpu.launch blocks([[VAL_40:%.*]], [[VAL_41:%.*]], [[VAL_42:%.*]]) in ([[VAL_43:%.*]] = [[VAL_37]], [[VAL_44:%.*]] = [[VAL_36]], [[VAL_45:%.*]] = [[VAL_35]]) threads([[VAL_46:%.*]], [[VAL_47:%.*]], [[VAL_48:%.*]]) in ([[VAL_49:%.*]] = [[VAL_39]], [[VAL_50:%.*]] = [[VAL_38]], [[VAL_51:%.*]] = [[VAL_35]]) {
// CHECK: [[VAL_52:%.*]] = affine.apply #[[$MAP1]]([[VAL_41]]){{\[}}[[VAL_34]], [[VAL_26]]]
// CHECK: [[VAL_53:%.*]] = affine.apply #[[$MAP1]]([[VAL_40]]){{\[}}[[VAL_34]], [[VAL_27]]]
@@ -115,7 +115,7 @@ func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index,
return
}
-// CHECK: #[[$MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)>
+// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
// CHECK: module {
@@ -123,7 +123,7 @@ func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index,
// CHECK-SAME: [[VAL_59:%.*]]: index, [[VAL_60:%.*]]: index, [[VAL_61:%.*]]: index, [[VAL_62:%.*]]: index, [[VAL_63:%.*]]: index, [[VAL_64:%.*]]: memref<?x?xf32>, [[VAL_65:%.*]]: memref<?x?xf32>) {
// CHECK: [[VAL_66:%.*]] = constant 2 : index
// CHECK: [[VAL_67:%.*]] = constant 1 : index
-// CHECK: [[VAL_68:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_61]], [[VAL_59]], [[VAL_63]]]
+// CHECK: [[VAL_68:%.*]] = affine.apply #[[$MAP0]]([[VAL_61]]){{\[}}[[VAL_59]], [[VAL_63]]]
// CHECK: gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) {
// CHECK: [[VAL_81:%.*]] = affine.apply #[[$MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]]
// CHECK: scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
@@ -166,7 +166,7 @@ func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index,
return
}
-// CHECK: #[[$MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)>
+// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
// CHECK: module {
@@ -176,8 +176,8 @@ func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index,
// CHECK: [[VAL_91:%.*]] = constant 1 : index
// CHECK: [[VAL_92:%.*]] = constant 4 : index
// CHECK: [[VAL_93:%.*]] = constant 1 : index
-// CHECK: [[VAL_94:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_86]], [[VAL_84]], [[VAL_92]]]
-// CHECK: [[VAL_95:%.*]] = affine.apply #[[$MAP0]](){{\[}}[[VAL_92]], [[VAL_90]], [[VAL_91]]]
+// CHECK: [[VAL_94:%.*]] = affine.apply #[[$MAP0]]([[VAL_86]]){{\[}}[[VAL_84]], [[VAL_92]]]
+// CHECK: [[VAL_95:%.*]] = affine.apply #[[$MAP0]]([[VAL_92]]){{\[}}[[VAL_90]], [[VAL_91]]]
// CHECK: gpu.launch blocks([[VAL_96:%.*]], [[VAL_97:%.*]], [[VAL_98:%.*]]) in ([[VAL_99:%.*]] = [[VAL_93]], [[VAL_100:%.*]] = [[VAL_94]], [[VAL_101:%.*]] = [[VAL_93]]) threads([[VAL_102:%.*]], [[VAL_103:%.*]], [[VAL_104:%.*]]) in ([[VAL_105:%.*]] = [[VAL_93]], [[VAL_106:%.*]] = [[VAL_95]], [[VAL_107:%.*]] = [[VAL_93]]) {
// CHECK: [[VAL_108:%.*]] = affine.apply #[[$MAP1]]([[VAL_97]]){{\[}}[[VAL_92]], [[VAL_84]]]
// CHECK: scf.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] {
@@ -242,7 +242,7 @@ module {
}
// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
-// CHECK: #[[$MAP1:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)>
+// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
// CHECK: #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
// CHECK: #[[$MAP3:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
// CHECK: #[[$MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
@@ -258,12 +258,12 @@ module {
// CHECK: [[VAL_7:%.*]] = dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
// CHECK: [[VAL_8:%.*]] = dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
// CHECK: [[VAL_9:%.*]] = constant 1 : index
-// CHECK: [[VAL_10:%.*]] = affine.apply #[[$MAP1]](){{\[}}[[VAL_7]], %[[C0]], %[[C2]]]
-// CHECK: [[VAL_11:%.*]] = affine.apply #[[$MAP1]](){{\[}}[[VAL_8]], %[[C0]], %[[C3]]]
+// CHECK: [[VAL_10:%.*]] = affine.apply #[[$MAP1]]([[VAL_7]]){{\[}}%[[C0]], %[[C2]]]
+// CHECK: [[VAL_11:%.*]] = affine.apply #[[$MAP1]]([[VAL_8]]){{\[}}%[[C0]], %[[C3]]]
// CHECK: [[VAL_12:%.*]] = constant 4 : index
-// CHECK: [[VAL_13:%.*]] = affine.apply #[[$MAP1]](){{\[}}[[VAL_12]], %[[C0]], %[[C1]]]
+// CHECK: [[VAL_13:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}%[[C0]], %[[C1]]]
// CHECK: [[VAL_14:%.*]] = constant 3 : index
-// CHECK: [[VAL_15:%.*]] = affine.apply #[[$MAP1]](){{\[}}[[VAL_14]], %[[C0]], %[[C1]]]
+// CHECK: [[VAL_15:%.*]] = affine.apply #[[$MAP1]]([[VAL_14]]){{\[}}%[[C0]], %[[C1]]]
// CHECK: gpu.launch blocks([[VAL_16:%.*]], [[VAL_17:%.*]], [[VAL_18:%.*]]) in ([[VAL_19:%.*]] = [[VAL_10]], [[VAL_20:%.*]] = [[VAL_11]], [[VAL_21:%.*]] = [[VAL_9]]) threads([[VAL_22:%.*]], [[VAL_23:%.*]], [[VAL_24:%.*]]) in ([[VAL_25:%.*]] = [[VAL_13]], [[VAL_26:%.*]] = [[VAL_15]], [[VAL_27:%.*]] = [[VAL_9]]) {
// CHECK: [[VAL_28:%.*]] = affine.apply #[[$MAP2]]([[VAL_16]]){{\[}}%[[C2]], %[[C0]]]
// CHECK: [[VAL_29:%.*]] = affine.apply #[[$MAP2]]([[VAL_17]]){{\[}}%[[C3]], %[[C0]]]
More information about the Mlir-commits
mailing list