[Mlir-commits] [mlir] cdf7ca6 - [MLIR][ROCDL] Add conversion for gpu.lane_id to ROCDL
Krzysztof Drewniak
llvmlistbot at llvm.org
Wed Jul 26 08:12:53 PDT 2023
Author: SJW
Date: 2023-07-26T15:12:48Z
New Revision: cdf7ca6db76b09d58e7c5ac1d8156034dd29c3c7
URL: https://github.com/llvm/llvm-project/commit/cdf7ca6db76b09d58e7c5ac1d8156034dd29c3c7
DIFF: https://github.com/llvm/llvm-project/commit/cdf7ca6db76b09d58e7c5ac1d8156034dd29c3c7.diff
LOG: [MLIR][ROCDL] Add conversion for gpu.lane_id to ROCDL
Creates rocdl.lane_id op with llvm conversion to:
__device__ static unsigned int __lane_id() {
return __builtin_amdgcn_mbcnt_hi(
-1, __builtin_amdgcn_mbcnt_lo(-1, 0));
}
Reviewed By: krzysz00
Differential Revision: https://reviews.llvm.org/D154666
Added:
Modified:
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
mlir/test/Target/LLVMIR/rocdl.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index b173ce55ed350c..e9c349577dfbca 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -54,6 +54,10 @@ class ROCDL_Op<string mnemonic, list<Trait> traits = []> :
LLVM_OpBase<ROCDL_Dialect, mnemonic, traits> {
}
+class ROCDL_IntrPure1Op<string mnemonic> :
+ LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
+ "amdgcn_" # !subst(".", "_", mnemonic), [], [], [Pure], 1>;
+
//===----------------------------------------------------------------------===//
// ROCDL special register op definitions
//===----------------------------------------------------------------------===//
@@ -77,6 +81,20 @@ class ROCDL_DeviceFunctionOp<string mnemonic, string device_function,
let assemblyFormat = "attr-dict `:` type($res)";
}
+//===----------------------------------------------------------------------===//
+// Wave-level primitives
+
+class ROCDL_MbcntOp<string mnemonic> :
+ ROCDL_IntrPure1Op<"mbcnt." # mnemonic>,
+ Arguments<(ins I32:$in0, I32:$in1)> {
+ let assemblyFormat = [{
+ $in0 `,` $in1 attr-dict `:` `(` type($in0) `,` type($in1) `)` `->` type($res)
+ }];
+}
+
+def ROCDL_MbcntLoOp : ROCDL_MbcntOp<"lo">;
+def ROCDL_MbcntHiOp : ROCDL_MbcntOp<"hi">;
+
//===----------------------------------------------------------------------===//
// Thread index and Block index
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index bd820130374837..ed40b773b3c4bf 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -60,6 +60,38 @@ static bool canBeCalledWithBarePointers(gpu::GPUFuncOp func) {
}
namespace {
+struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
+ using ConvertOpToLLVMPattern<gpu::LaneIdOp>::ConvertOpToLLVMPattern;
+
+ LogicalResult
+ matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto loc = op->getLoc();
+ MLIRContext *context = rewriter.getContext();
+ // convert to: %mlo = call @llvm.amdgcn.mbcnt.lo(-1, 0)
+ // followed by: %lid = call @llvm.amdgcn.mbcnt.hi(-1, %mlo)
+
+ Type intTy = IntegerType::get(context, 32);
+ Value zero = rewriter.createOrFold<arith::ConstantIntOp>(loc, 0, 32);
+ Value minus1 = rewriter.createOrFold<arith::ConstantIntOp>(loc, -1, 32);
+ Value mbcntLo =
+ rewriter.create<ROCDL::MbcntLoOp>(loc, intTy, ValueRange{minus1, zero});
+ Value laneId = rewriter.create<ROCDL::MbcntHiOp>(
+ loc, intTy, ValueRange{minus1, mbcntLo});
+ // Truncate or extend the result depending on the index bitwidth specified
+ // by the LLVMTypeConverter options.
+ const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
+ if (indexBitwidth > 32) {
+ laneId = rewriter.create<LLVM::SExtOp>(
+ loc, IntegerType::get(context, indexBitwidth), laneId);
+ } else if (indexBitwidth < 32) {
+ laneId = rewriter.create<LLVM::TruncOp>(
+ loc, IntegerType::get(context, indexBitwidth), laneId);
+ }
+ rewriter.replaceOp(op, {laneId});
+ return success();
+ }
+};
/// Import the GPU Ops to ROCDL Patterns.
#include "GPUToROCDL.cpp.inc"
@@ -240,6 +272,8 @@ void mlir::populateGpuToROCDLConversionPatterns(
patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/4);
}
+ patterns.add<GPULaneIdOpToROCDL>(converter);
+
populateOpPatterns<math::AbsFOp>(converter, patterns, "__ocml_fabs_f32",
"__ocml_fabs_f64");
populateOpPatterns<math::AtanOp>(converter, patterns, "__ocml_atan_f32",
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 1ca6b867c79eb3..6e90fe74614545 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -6,7 +6,8 @@ gpu.module @test_module {
// CHECK32-LABEL: func @gpu_index_ops()
func.func @gpu_index_ops()
-> (index, index, index, index, index, index,
- index, index, index, index, index, index) {
+ index, index, index, index, index, index,
+ index) {
// CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
// CHECK: rocdl.workitem.id.x : i32
@@ -49,10 +50,17 @@ gpu.module @test_module {
// CHECK: = llvm.sext %{{.*}} : i32 to i64
%gDimZ = gpu.grid_dim z
+ // CHECK: = rocdl.mbcnt.lo %{{.*}}, %{{.*}} : (i32, i32) -> i32
+ // CHECK: = rocdl.mbcnt.hi %{{.*}}, %{{.*}} : (i32, i32) -> i32
+ // CHECK: = llvm.sext %{{.*}} : i32 to i64
+ %laneId = gpu.lane_id
+
func.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,
- %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ
+ %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ,
+ %laneId
: index, index, index, index, index, index,
- index, index, index, index, index, index
+ index, index, index, index, index, index,
+ index
}
}
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index c75f5acc854000..dfee70b99d3910 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -56,6 +56,16 @@ llvm.func @known_block_sizes()
llvm.return
}
+llvm.func @rocdl.lane_id() -> i32 {
+ // CHECK: [[mbcntlo:%.+]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+ // CHECK-NEXT: call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[mbcntlo]])
+ %0 = llvm.mlir.constant(-1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = rocdl.mbcnt.lo %0, %1 : (i32, i32) -> i32
+ %3 = rocdl.mbcnt.hi %0, %2 : (i32, i32) -> i32
+ llvm.return %3 : i32
+}
+
llvm.func @rocdl.barrier() {
// CHECK: fence syncscope("workgroup") release
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
More information about the Mlir-commits
mailing list