[Mlir-commits] [mlir] 14858cf - [mlir][Conversion/GPUCommon] Fix bug in conversion of `math` ops
Christopher Bate
llvmlistbot at llvm.org
Mon Jul 3 12:26:59 PDT 2023
Author: Christopher Bate
Date: 2023-07-03T13:26:51-06:00
New Revision: 14858cf05dc7cbc0f34629d693b0039c3d15c34f
URL: https://github.com/llvm/llvm-project/commit/14858cf05dc7cbc0f34629d693b0039c3d15c34f
DIFF: https://github.com/llvm/llvm-project/commit/14858cf05dc7cbc0f34629d693b0039c3d15c34f.diff
LOG: [mlir][Conversion/GPUCommon] Fix bug in conversion of `math` ops
The common GPU operation transformation that lowers `math` operations
to function calls in the `gpu-to-nvvm` and `gpu-to-rocdl` passes handles
`vector` types by applying the function to each scalar and returning a
new vector. However, there was a typo that results in incorrectly
accumulating the result vector, and the rewrite returns an `llvm.mlir.undef`
result instead of the correct vector. A patch is added and tests are
strengthened.
Reviewed By: ThomasRaoux
Differential Revision: https://reviews.llvm.org/D154269
Added:
Modified:
mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 38b7248e397250..2fe1c7c7f7af14 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -485,8 +485,8 @@ LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
auto scalarOperands = llvm::map_to_vector(operands, extractElement);
Operation *scalarOp =
rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
- rewriter.create<LLVM::InsertElementOp>(loc, result, scalarOp->getResult(0),
- index);
+ result = rewriter.create<LLVM::InsertElementOp>(
+ loc, result, scalarOp->getResult(0), index);
}
rewriter.replaceOp(op, result);
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index b2d8b8ea3290eb..56c322dd94592b 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -516,10 +516,16 @@ gpu.module @test_module {
// CHECK-LABEL: func @gpu_unroll
func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {
%result = math.exp %arg0 : vector<4xf32>
- // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
- // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
- // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
- // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<4xf32>
+ // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V1:.+]] = llvm.insertelement %[[CL]], %[[V0]]
+ // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V2:.+]] = llvm.insertelement %[[CL]], %[[V1]]
+ // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V3:.+]] = llvm.insertelement %[[CL]], %[[V2]]
+ // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V4:.+]] = llvm.insertelement %[[CL]], %[[V3]]
+ // CHECK: return %[[V4]]
func.return %result : vector<4xf32>
}
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index aec44034451fbe..1ca6b867c79eb3 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -456,10 +456,16 @@ gpu.module @test_module {
// CHECK-LABEL: func @gpu_unroll
func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {
%result = math.exp %arg0 : vector<4xf32>
- // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
- // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
- // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
- // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<4xf32>
+ // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V1:.+]] = llvm.insertelement %[[CL]], %[[V0]]
+ // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V2:.+]] = llvm.insertelement %[[CL]], %[[V1]]
+ // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V3:.+]] = llvm.insertelement %[[CL]], %[[V2]]
+ // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
+ // CHECK: %[[V4:.+]] = llvm.insertelement %[[CL]], %[[V3]]
+ // CHECK: return %[[V4]]
func.return %result : vector<4xf32>
}
}
More information about the Mlir-commits
mailing list