[Mlir-commits] [mlir] [MLIR] Add optimization attrs for gpu-to-llvmspv function declarations (PR #99301)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Jul 17 03:31:05 PDT 2024
https://github.com/FMarno updated https://github.com/llvm/llvm-project/pull/99301
>From 4c5887ecaf038d3ff1ff9588725b7b7743b528d3 Mon Sep 17 00:00:00 2001
From: Finlay Marno <finlay.marno at codeplay.com>
Date: Tue, 16 Jul 2024 11:15:04 +0100
Subject: [PATCH] set optimization attrs for gpu-to-llvmspv ops
Adds the attributes include nounwind, willreturn and memory(none).
---
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 35 ++++--
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 104 ++++++++++++++----
2 files changed, 108 insertions(+), 31 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ebeb8f803d71d..81682a52c8c4b 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -40,11 +40,10 @@ namespace mlir {
// Helper Functions
//===----------------------------------------------------------------------===//
-static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
- StringRef name,
- ArrayRef<Type> paramTypes,
- Type resultType,
- bool isConvergent = false) {
+static LLVM::LLVMFuncOp
+lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name,
+ ArrayRef<Type> paramTypes, Type resultType,
+ bool hasMemoryEffects = true, bool isConvergent = false) {
auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
SymbolTable::lookupSymbolIn(symbolTable, name));
if (!func) {
@@ -53,6 +52,17 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
symbolTable->getLoc(), name,
LLVM::LLVMFunctionType::get(resultType, paramTypes));
func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
+ func.setNoUnwind(true);
+ func.setWillReturn(true);
+ if (!hasMemoryEffects) {
+ // no externally observable effects
+ constexpr auto noModRef = mlir::LLVM::ModRefInfo::NoModRef;
+ auto memAttr = b.getAttr<LLVM::MemoryEffectsAttr>(
+ /*other*/ noModRef,
+ /*argMem*/ noModRef, /*inaccessibleMem*/ noModRef);
+ func.setMemoryAttr(memAttr);
+ }
+
func.setConvergent(isConvergent);
}
return func;
@@ -91,8 +101,9 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
assert(moduleOp && "Expecting module");
Type flagTy = rewriter.getI32Type();
Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
- LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
- moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true);
+ LLVM::LLVMFuncOp func =
+ lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy,
+ /*hasMemoryEffects*/ true, /*isConvergent=*/true);
// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
@@ -134,8 +145,8 @@ struct LaunchConfigConversion : ConvertToLLVMPattern {
assert(moduleOp && "Expecting module");
Type dimTy = rewriter.getI32Type();
Type indexTy = getTypeConverter()->getIndexType();
- LLVM::LLVMFuncOp func =
- lookupOrCreateSPIRVFn(moduleOp, funcName, dimTy, indexTy);
+ LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
+ moduleOp, funcName, dimTy, indexTy, /*hasMemoryEffects*/ false);
Location loc = op->getLoc();
gpu::Dimension dim = getDimension(op);
@@ -268,9 +279,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Type valueType = adaptor.getValue().getType();
Type offsetType = adaptor.getOffset().getType();
Type resultType = valueType;
- LLVM::LLVMFuncOp func =
- lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType},
- resultType, /*isConvergent=*/true);
+ LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
+ moduleOp, funcName, {valueType, offsetType}, resultType,
+ /*hasMemoryEffects*/ true, /*isConvergent=*/true);
Location loc = op->getLoc();
std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 1b0f89a9a573e..d094c786414fc 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -4,16 +4,46 @@
// RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
gpu.module @builtins {
- // CHECK-64: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i64
- // CHECK-32: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i32
+ // CHECK-64: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
// CHECK-LABEL: gpu_block_id
func.func @gpu_block_id() -> (index, index, index) {
@@ -104,7 +134,11 @@ gpu.module @builtins {
// -----
gpu.module @barriers {
- // CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent}
+ // CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_barrier
func.func @gpu_barrier() {
@@ -120,10 +154,26 @@ gpu.module @barriers {
// Check `gpu.shuffle` conversion with default subgroup size.
gpu.module @shuffles {
- // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
+ // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
@@ -155,10 +205,26 @@ gpu.module @shuffles {
gpu.module @shuffles attributes {
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
} {
- // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
+ // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
More information about the Mlir-commits
mailing list