[Mlir-commits] [mlir] [MLIR][AMDGPU] Support gpu::ShuffleMode::DOWN lowering in ROCDL (PR #106237)
Dragan Mladjenovic
llvmlistbot at llvm.org
Tue Aug 27 08:36:22 PDT 2024
https://github.com/draganmladjenovic created https://github.com/llvm/llvm-project/pull/106237
None
>From f3454263135e855ac809b3069d72254258f4401a Mon Sep 17 00:00:00 2001
From: Dragan Mladjenovic <Dragan.Mladjenovic at amd.com>
Date: Fri, 29 Mar 2024 12:27:36 +0000
Subject: [PATCH] [MLIR][AMDGPU] Support gpu::ShuffleMode::DOWN lowering in
ROCDL
---
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 4 ++++
.../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 18 ++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 93e8b080a4f672..e3438036dace80 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -157,6 +157,10 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
// TODO: Use ds_swizzle for XOR when step/offsets are constants for better
// perf.
switch (op.getMode()) {
+ case gpu::ShuffleMode::DOWN:
+ dstLane = rewriter.create<LLVM::AddOp>(loc, int32Type, srcLaneId,
+ adaptor.getOffset());
+ break;
case gpu::ShuffleMode::XOR:
dstLane = rewriter.create<LLVM::XOrOp>(loc, int32Type, srcLaneId,
adaptor.getOffset());
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index bf49a42a115775..2ad3a07b9fba66 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -600,7 +600,7 @@ gpu.module @test_module {
gpu.module @test_module {
// CHECK-LABEL: func @gpu_shuffle()
- func.func @gpu_shuffle() -> (f32, f32) {
+ func.func @gpu_shuffle() -> (f32, f32, f32) {
// CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
%arg0 = arith.constant 1.0 : f32
// CHECK: %[[#OFFSET:]] = llvm.mlir.constant(4 : i32) : i32
@@ -634,7 +634,21 @@ gpu.module @test_module {
// CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
// CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
- func.return %shfl, %shfli : f32, f32
+ // CHECK: %[[#LANE_ID:]] = rocdl.mbcnt.hi
+ // CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: %[[#NEG_WIDTH:]] = llvm.sub %[[#ZERO]], %[[#WIDTH]] : i32
+ // CHECK: %[[#ADD:]] = llvm.add %[[#LANE_ID]], %[[#WIDTH]] : i32
+ // CHECK: %[[#WARP_OR_ZERO:]] = llvm.and %[[#ADD]], %[[#NEG_WIDTH]] : i32
+ // CHECK: %[[#DOWN:]] = llvm.add %[[#LANE_ID]], %{{.*}} : i32
+ // CHECK: %[[#CMP:]] = llvm.icmp "slt" %[[#DOWN]], %[[#WARP_OR_ZERO]] : i32
+ // CHECK: %[[#DST_LANE:]] = llvm.select %[[#CMP]], %[[#DOWN]], %{{.*}} : i1, i32
+ // CHECK: %[[#TWO:]] = llvm.mlir.constant(2 : i32) : i32
+ // CHECK: %[[#ALIGNED_DST_LANE:]] = llvm.shl %[[#DST_LANE]], %[[#TWO]] : i32
+ // CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
+ // CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
+ // CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
+ %shfld, %predd = gpu.shuffle down %arg0, %arg1, %arg2 : f32
+ func.return %shfl, %shfli, %shfld : f32, f32, f32
}
}
More information about the Mlir-commits
mailing list