[Mlir-commits] [mlir] [MLIR][GPU] Lower subgroup query ops in gpu-to-llvm-spv (PR #108839)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Sep 17 08:27:29 PDT 2024
https://github.com/FMarno updated https://github.com/llvm/llvm-project/pull/108839
>From 2e4df871cbbf148f78c739c3bbf20ce23e889aa1 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Mon, 16 Sep 2024 15:38:58 +0100
Subject: [PATCH 1/5] [MLIR][GPU] lower subgroup query ops in gpu-to-llvm-spv
These ops are:
* gpu.subgroup_id
* gpu.lane_id
* gpu.num_subgroups
* gpu.subgroup_size
Signed-off-by: Finlay Marno <finlay.marno at codeplay.com>
---
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 51 +++++++++++++++++--
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 25 +++++++++
2 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 41a3ac76df4b78..f9d92f850df68b 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -316,6 +316,43 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
}
};
+//===----------------------------------------------------------------------===//
+// Subgroup query ops.
+//===----------------------------------------------------------------------===//
+
+template <typename SubgroupOp>
+struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern<SubgroupOp> {
+ using ConvertOpToLLVMPattern<SubgroupOp>::ConvertOpToLLVMPattern;
+
+ LogicalResult
+ matchAndRewrite(SubgroupOp op, typename SubgroupOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const final {
+ constexpr StringRef funcName = [] {
+ if constexpr (std::is_same_v<SubgroupOp, gpu::SubgroupIdOp>) {
+ return "_Z16get_sub_group_id";
+ } else if constexpr (std::is_same_v<SubgroupOp, gpu::LaneIdOp>) {
+ return "_Z22get_sub_group_local_id";
+ } else if constexpr (std::is_same_v<SubgroupOp, gpu::NumSubgroupsOp>) {
+ return "_Z18get_num_sub_groups";
+ } else if constexpr (std::is_same_v<SubgroupOp, gpu::SubgroupSizeOp>) {
+ return "_Z18get_sub_group_size";
+ }
+ }();
+
+ Operation *moduleOp =
+ op->template getParentWithTrait<OpTrait::SymbolTable>();
+ Type resultType = rewriter.getI32Type();
+ LLVM::LLVMFuncOp func =
+ lookupOrCreateSPIRVFn(moduleOp, funcName, {}, resultType,
+ /*isMemNone=*/false, /*isConvergent=*/false);
+
+ Location loc = op->getLoc();
+ Value result = createSPIRVBuiltinCall(loc, rewriter, func, {}).getResult();
+ rewriter.replaceOp(op, result);
+ return success();
+ }
+};
+
//===----------------------------------------------------------------------===//
// GPU To LLVM-SPV Pass.
//===----------------------------------------------------------------------===//
@@ -335,9 +372,11 @@ struct GPUToLLVMSPVConversionPass final
LLVMTypeConverter converter(context, options);
LLVMConversionTarget target(*context);
- target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
- gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
- gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
+ target.addIllegalOp<gpu::ThreadIdOp, gpu::BlockIdOp, gpu::GlobalIdOp,
+ gpu::BlockDimOp, gpu::GridDimOp, gpu::BarrierOp,
+ gpu::GPUFuncOp, gpu::ReturnOp, gpu::ShuffleOp,
+ gpu::SubgroupIdOp, gpu::LaneIdOp, gpu::NumSubgroupsOp,
+ gpu::SubgroupSizeOp>();
populateGpuToLLVMSPVConversionPatterns(converter, patterns);
populateGpuMemorySpaceAttributeConversions(converter);
@@ -370,7 +409,11 @@ void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter,
LaunchConfigOpConversion<gpu::GridDimOp>,
LaunchConfigOpConversion<gpu::BlockDimOp>,
LaunchConfigOpConversion<gpu::ThreadIdOp>,
- LaunchConfigOpConversion<gpu::GlobalIdOp>>(typeConverter);
+ LaunchConfigOpConversion<gpu::GlobalIdOp>,
+ GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
+ GPUSubgroupOpConversion<gpu::LaneIdOp>,
+ GPUSubgroupOpConversion<gpu::NumSubgroupsOp>,
+ GPUSubgroupOpConversion<gpu::SubgroupSizeOp>>(typeConverter);
MLIRContext *context = &typeConverter.getContext();
unsigned privateAddressSpace =
gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 860bb60726352d..c06648f117e8f1 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -563,3 +563,28 @@ gpu.module @kernels {
gpu.return
}
}
+
+// -----
+
+// Lowering of subgroup query operations
+
+// CHECK-DAG: llvm.func spir_funccc @_Z18get_sub_group_size() -> i32 attributes {no_unwind, will_return}
+// CHECK-DAG: llvm.func spir_funccc @_Z18get_num_sub_groups() -> i32 attributes {no_unwind, will_return}
+// CHECK-DAG: llvm.func spir_funccc @_Z22get_sub_group_local_id() -> i32 attributes {no_unwind, will_return}
+// CHECK-DAG: llvm.func spir_funccc @_Z16get_sub_group_id() -> i32 attributes {no_unwind, will_return}
+
+
+gpu.module @subgroup_operations {
+// CHECK-LABEL: @gpu_subgroup
+ func.func @gpu_subgroup() {
+ // CHECK: llvm.call spir_funccc @_Z16get_sub_group_id() {no_unwind, will_return} : () -> i32
+ %0 = gpu.subgroup_id : index
+ // CHECK: llvm.call spir_funccc @_Z22get_sub_group_local_id() {no_unwind, will_return} : () -> i32
+ %1 = gpu.lane_id
+ // CHECK: llvm.call spir_funccc @_Z18get_num_sub_groups() {no_unwind, will_return} : () -> i32
+ %2 = gpu.num_subgroups : index
+ // CHECK: llvm.call spir_funccc @_Z18get_sub_group_size() {no_unwind, will_return} : () -> i32
+ %3 = gpu.subgroup_size : index
+ return
+ }
+}
>From 5ad0f256dc9d4276008cd85169d400ac4a46419c Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 17 Sep 2024 11:12:39 +0100
Subject: [PATCH 2/5] fixup! [MLIR][GPU] lower subgroup query ops in
gpu-to-llvm-spv
---
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 26 ++++++++++++++-----
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 16 +++++++++---
2 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index f9d92f850df68b..9466bb194e0384 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -322,7 +322,12 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
template <typename SubgroupOp>
struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern<SubgroupOp> {
- using ConvertOpToLLVMPattern<SubgroupOp>::ConvertOpToLLVMPattern;
+ Type indexTy;
+ // using ConvertOpToLLVMPattern<SubgroupOp>::ConvertOpToLLVMPattern;
+ GPUSubgroupOpConversion(Type indexTy, const LLVMTypeConverter &typeConverter,
+ PatternBenefit benefit = 1)
+ : ConvertOpToLLVMPattern<SubgroupOp>(typeConverter, benefit),
+ indexTy(indexTy) {}
LogicalResult
matchAndRewrite(SubgroupOp op, typename SubgroupOp::Adaptor adaptor,
@@ -341,13 +346,21 @@ struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern<SubgroupOp> {
Operation *moduleOp =
op->template getParentWithTrait<OpTrait::SymbolTable>();
- Type resultType = rewriter.getI32Type();
+ Type resultTy = rewriter.getI32Type();
LLVM::LLVMFuncOp func =
- lookupOrCreateSPIRVFn(moduleOp, funcName, {}, resultType,
+ lookupOrCreateSPIRVFn(moduleOp, funcName, {}, resultTy,
/*isMemNone=*/false, /*isConvergent=*/false);
Location loc = op->getLoc();
Value result = createSPIRVBuiltinCall(loc, rewriter, func, {}).getResult();
+
+ if (resultTy != indexTy) {
+ assert(indexTy.getIntOrFloatBitWidth() >
+ resultTy.getIntOrFloatBitWidth() &&
+ "expected that index type would be >= i32");
+ result = rewriter.create<LLVM::ZExtOp>(loc, indexTy, result);
+ }
+
rewriter.replaceOp(op, result);
return success();
}
@@ -409,11 +422,12 @@ void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter,
LaunchConfigOpConversion<gpu::GridDimOp>,
LaunchConfigOpConversion<gpu::BlockDimOp>,
LaunchConfigOpConversion<gpu::ThreadIdOp>,
- LaunchConfigOpConversion<gpu::GlobalIdOp>,
- GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
+ LaunchConfigOpConversion<gpu::GlobalIdOp>>(typeConverter);
+ patterns.add<GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
GPUSubgroupOpConversion<gpu::LaneIdOp>,
GPUSubgroupOpConversion<gpu::NumSubgroupsOp>,
- GPUSubgroupOpConversion<gpu::SubgroupSizeOp>>(typeConverter);
+ GPUSubgroupOpConversion<gpu::SubgroupSizeOp>>(
+ typeConverter.getIndexType(), typeConverter);
MLIRContext *context = &typeConverter.getContext();
unsigned privateAddressSpace =
gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index c06648f117e8f1..910105ddf69586 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -577,13 +577,21 @@ gpu.module @kernels {
gpu.module @subgroup_operations {
// CHECK-LABEL: @gpu_subgroup
func.func @gpu_subgroup() {
- // CHECK: llvm.call spir_funccc @_Z16get_sub_group_id() {no_unwind, will_return} : () -> i32
+ // CHECK: %[[SG_ID:.*]] = llvm.call spir_funccc @_Z16get_sub_group_id() {no_unwind, will_return} : () -> i32
+ // CHECK-32-NOT: llvm.zext
+ // CHECK-64 %{{.*}} = llvm.zext %[[SG_ID]] : i32 to i64
%0 = gpu.subgroup_id : index
- // CHECK: llvm.call spir_funccc @_Z22get_sub_group_local_id() {no_unwind, will_return} : () -> i32
+ // CHECK: %[[SG_LOCAL_ID:.*]] = llvm.call spir_funccc @_Z22get_sub_group_local_id() {no_unwind, will_return} : () -> i32
+ // CHECK-32-NOT: llvm.zext
+ // CHECK-64: %{{.*}} = llvm.zext %[[SG_LOCAL_ID]] : i32 to i64
%1 = gpu.lane_id
- // CHECK: llvm.call spir_funccc @_Z18get_num_sub_groups() {no_unwind, will_return} : () -> i32
+ // CHECK: %[[NUM_SGS:.*]] = llvm.call spir_funccc @_Z18get_num_sub_groups() {no_unwind, will_return} : () -> i32
+ // CHECK-32-NOT: llvm.zext
+ // CHECK-64: %{{.*}} = llvm.zext %[[NUM_SGS]] : i32 to i64
%2 = gpu.num_subgroups : index
- // CHECK: llvm.call spir_funccc @_Z18get_sub_group_size() {no_unwind, will_return} : () -> i32
+ // CHECK: %[[SG_SIZE:.*]] = llvm.call spir_funccc @_Z18get_sub_group_size() {no_unwind, will_return} : () -> i32
+ // CHECK-32-NOT: llvm.zext
+ // CHECK-64: %{{.*}} = llvm.zext %[[SG_SIZE]] : i32 to i64
%3 = gpu.subgroup_size : index
return
}
>From 92006154c417568d44973a1ce4b44ce3ff924153 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 17 Sep 2024 14:13:13 +0100
Subject: [PATCH 3/5] fixup! [MLIR][GPU] lower subgroup query ops in
gpu-to-llvm-spv
---
.../lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 9466bb194e0384..1c8545bf357a9b 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -322,12 +322,8 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
template <typename SubgroupOp>
struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern<SubgroupOp> {
- Type indexTy;
- // using ConvertOpToLLVMPattern<SubgroupOp>::ConvertOpToLLVMPattern;
- GPUSubgroupOpConversion(Type indexTy, const LLVMTypeConverter &typeConverter,
- PatternBenefit benefit = 1)
- : ConvertOpToLLVMPattern<SubgroupOp>(typeConverter, benefit),
- indexTy(indexTy) {}
+ using ConvertOpToLLVMPattern<SubgroupOp>::ConvertOpToLLVMPattern;
+ using ConvertToLLVMPattern::getTypeConverter;
LogicalResult
matchAndRewrite(SubgroupOp op, typename SubgroupOp::Adaptor adaptor,
@@ -354,6 +350,7 @@ struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern<SubgroupOp> {
Location loc = op->getLoc();
Value result = createSPIRVBuiltinCall(loc, rewriter, func, {}).getResult();
+ Type indexTy = getTypeConverter()->getIndexType();
if (resultTy != indexTy) {
assert(indexTy.getIntOrFloatBitWidth() >
resultTy.getIntOrFloatBitWidth() &&
@@ -422,12 +419,11 @@ void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter,
LaunchConfigOpConversion<gpu::GridDimOp>,
LaunchConfigOpConversion<gpu::BlockDimOp>,
LaunchConfigOpConversion<gpu::ThreadIdOp>,
- LaunchConfigOpConversion<gpu::GlobalIdOp>>(typeConverter);
- patterns.add<GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
+ LaunchConfigOpConversion<gpu::GlobalIdOp>,
+ GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
GPUSubgroupOpConversion<gpu::LaneIdOp>,
GPUSubgroupOpConversion<gpu::NumSubgroupsOp>,
- GPUSubgroupOpConversion<gpu::SubgroupSizeOp>>(
- typeConverter.getIndexType(), typeConverter);
+ GPUSubgroupOpConversion<gpu::SubgroupSizeOp>>(typeConverter);
MLIRContext *context = &typeConverter.getContext();
unsigned privateAddressSpace =
gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
>From a88317ef1400a3a0d6365657ac3f3783490cfaf4 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 17 Sep 2024 14:29:15 +0100
Subject: [PATCH 4/5] fixup! [MLIR][GPU] lower subgroup query ops in
gpu-to-llvm-spv
---
mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 1c8545bf357a9b..c7de4fccafdd65 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -352,9 +352,9 @@ struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern<SubgroupOp> {
Type indexTy = getTypeConverter()->getIndexType();
if (resultTy != indexTy) {
- assert(indexTy.getIntOrFloatBitWidth() >
- resultTy.getIntOrFloatBitWidth() &&
- "expected that index type would be >= i32");
+ if (indexTy.getIntOrFloatBitWidth() < resultTy.getIntOrFloatBitWidth()) {
+ return failure();
+ }
result = rewriter.create<LLVM::ZExtOp>(loc, indexTy, result);
}
>From c2e099d1a4966a39cf9165cebe6f9920d5035da7 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 17 Sep 2024 16:27:08 +0100
Subject: [PATCH 5/5] fixup! [MLIR][GPU] lower subgroup query ops in
gpu-to-llvm-spv
---
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index c7de4fccafdd65..739a34e0aa610e 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -382,11 +382,11 @@ struct GPUToLLVMSPVConversionPass final
LLVMTypeConverter converter(context, options);
LLVMConversionTarget target(*context);
- target.addIllegalOp<gpu::ThreadIdOp, gpu::BlockIdOp, gpu::GlobalIdOp,
- gpu::BlockDimOp, gpu::GridDimOp, gpu::BarrierOp,
- gpu::GPUFuncOp, gpu::ReturnOp, gpu::ShuffleOp,
- gpu::SubgroupIdOp, gpu::LaneIdOp, gpu::NumSubgroupsOp,
- gpu::SubgroupSizeOp>();
+ target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
+ gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
+ gpu::LaneIdOp, gpu::NumSubgroupsOp, gpu::ReturnOp,
+ gpu::ShuffleOp, gpu::SubgroupIdOp, gpu::SubgroupSizeOp,
+ gpu::ThreadIdOp>();
populateGpuToLLVMSPVConversionPatterns(converter, patterns);
populateGpuMemorySpaceAttributeConversions(converter);
@@ -415,15 +415,15 @@ gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace addressSpace) {
void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter,
RewritePatternSet &patterns) {
patterns.add<GPUBarrierConversion, GPUReturnOpLowering, GPUShuffleConversion,
- LaunchConfigOpConversion<gpu::BlockIdOp>,
- LaunchConfigOpConversion<gpu::GridDimOp>,
- LaunchConfigOpConversion<gpu::BlockDimOp>,
- LaunchConfigOpConversion<gpu::ThreadIdOp>,
- LaunchConfigOpConversion<gpu::GlobalIdOp>,
- GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
GPUSubgroupOpConversion<gpu::LaneIdOp>,
GPUSubgroupOpConversion<gpu::NumSubgroupsOp>,
- GPUSubgroupOpConversion<gpu::SubgroupSizeOp>>(typeConverter);
+ GPUSubgroupOpConversion<gpu::SubgroupIdOp>,
+ GPUSubgroupOpConversion<gpu::SubgroupSizeOp>,
+ LaunchConfigOpConversion<gpu::BlockDimOp>,
+ LaunchConfigOpConversion<gpu::BlockIdOp>,
+ LaunchConfigOpConversion<gpu::GlobalIdOp>,
+ LaunchConfigOpConversion<gpu::GridDimOp>,
+ LaunchConfigOpConversion<gpu::ThreadIdOp>>(typeConverter);
MLIRContext *context = &typeConverter.getContext();
unsigned privateAddressSpace =
gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private);
More information about the Mlir-commits
mailing list