[Mlir-commits] [mlir] [mlir][gpu] Add address space modifier to Barrier (PR #110527)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Sep 30 08:58:52 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Finlay (FMarno)
<details>
<summary>Changes</summary>
Add an address space modifier to the GPU Barrier. All work-items in the workgroup are still required to reach the barrier, but the address space visibility can be reduced.
I've put up an RFC [here](https://discourse.llvm.org/t/rfc-add-memory-scope-to-gpu-barrier/81021/2?u=fmarno) (with a bit of a bad start).
---
Full diff: https://github.com/llvm/llvm-project/pull/110527.diff
8 Files Affected:
- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUBase.td (+2)
- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+17-2)
- (modified) mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp (+24-5)
- (modified) mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td (+1-1)
- (modified) mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td (+1-1)
- (modified) mlir/lib/Dialect/GPU/IR/GPUDialect.cpp (+3)
- (modified) mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir (+17-2)
- (modified) mlir/test/Dialect/GPU/ops.mlir (+6)
``````````diff
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 860f8933672038..ccb1678aef9192 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -99,6 +99,8 @@ def GPU_AddressSpaceEnum : GPU_I32Enum<
def GPU_AddressSpaceAttr :
GPU_I32EnumAttr<"address_space", GPU_AddressSpaceEnum>;
+def GPU_AddressSpaceAttrArray : TypedArrayAttrBase<GPU_AddressSpaceAttr, "GPU Address Space array">;
+
//===----------------------------------------------------------------------===//
// GPU Types.
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6098eb34d04d52..9d89068c72969b 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1355,7 +1355,8 @@ def GPU_ShuffleOp : GPU_Op<
];
}
-def GPU_BarrierOp : GPU_Op<"barrier"> {
+def GPU_BarrierOp : GPU_Op<"barrier">,
+ Arguments<(ins OptionalAttr<GPU_AddressSpaceAttrArray> :$address_spaces)> {
let summary = "Synchronizes all work items of a workgroup.";
let description = [{
The "barrier" op synchronizes all work items of a workgroup. It is used
@@ -1371,11 +1372,25 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
accessing the same memory can be avoided by synchronizing work items
in-between these accesses.
+ The address space of visible memory accesses can be modified by adding a
+ list of address spaces required to be visible. By default all address spaces
+ are included.
+
+ ```mlir
+ // only workgroup address spaces accesses required to be visible
+ gpu.barrier memfence [#gpu.address_space<workgroup>]
+ // no memory accesses required to be visible
+ gpu.barrier memfence []
+ // all memory accesses required to be visible
+ gpu.barrier
+ ```
+
Either none or all work items of a workgroup need to execute this op
in convergence.
}];
- let assemblyFormat = "attr-dict";
+ let assemblyFormat = "(`memfence` $address_spaces^)? attr-dict";
let hasCanonicalizer = 1;
+ let builders = [OpBuilder<(ins)>];
}
def GPU_GPUModuleOp : GPU_Op<"module", [
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 739a34e0aa610e..f9e8e397f93f27 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -116,12 +116,31 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy,
/*isMemNone=*/false, /*isConvergent=*/true);
- // Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
- // See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
- constexpr int64_t localMemFenceFlag = 1;
+ // Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE` and
+ // `CLK_GLOBAL_MEM_FENCE`. See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
+ constexpr int32_t localMemFenceFlag = 1;
+ constexpr int32_t globalMemFenceFlag = 2;
+ int32_t memFenceFlag = 0;
+ std::optional<ArrayAttr> addressSpaces = adaptor.getAddressSpaces();
+ if (addressSpaces) {
+ for (Attribute attr : addressSpaces.value()) {
+ auto addressSpace = cast<gpu::AddressSpaceAttr>(attr).getValue();
+ switch (addressSpace) {
+ case gpu::AddressSpace::Global:
+ memFenceFlag = memFenceFlag | globalMemFenceFlag;
+ break;
+ case gpu::AddressSpace::Workgroup:
+ memFenceFlag = memFenceFlag | localMemFenceFlag;
+ break;
+ case gpu::AddressSpace::Private:
+ break;
+ }
+ }
+ } else {
+ memFenceFlag = localMemFenceFlag | globalMemFenceFlag;
+ }
Location loc = op->getLoc();
- Value flag =
- rewriter.create<LLVM::ConstantOp>(loc, flagTy, localMemFenceFlag);
+ Value flag = rewriter.create<LLVM::ConstantOp>(loc, flagTy, memFenceFlag);
rewriter.replaceOp(op, createSPIRVBuiltinCall(loc, rewriter, func, flag));
return success();
}
diff --git a/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td b/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
index f513bb1a0a8265..0fcda38631a9b0 100644
--- a/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
+++ b/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
@@ -17,6 +17,6 @@ include "mlir/IR/PatternBase.td"
include "mlir/Dialect/GPU/IR/GPUOps.td"
include "mlir/Dialect/LLVMIR/NVVMOps.td"
-def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>;
+def : Pat<(GPU_BarrierOp : $op $memory_fence), (NVVM_Barrier0Op)>;
#endif // MLIR_CONVERSION_GPUTONVVM_TD
diff --git a/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td b/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td
index 8d2f30a9a16835..d3bb7748134374 100644
--- a/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td
+++ b/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td
@@ -17,6 +17,6 @@ include "mlir/IR/PatternBase.td"
include "mlir/Dialect/GPU/IR/GPUOps.td"
include "mlir/Dialect/LLVMIR/ROCDLOps.td"
-def : Pat<(GPU_BarrierOp), (ROCDL_BarrierOp)>;
+def : Pat<(GPU_BarrierOp : $op $memory_fence), (ROCDL_BarrierOp)>;
#endif // MLIR_CONVERSION_GPUTOROCDL_TD
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 956877497d9338..156d6b8fe15951 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1351,6 +1351,9 @@ void BarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
results.add(eraseRedundantGpuBarrierOps);
}
+void BarrierOp::build(mlir::OpBuilder &odsBuilder,
+ mlir::OperationState &odsState) {}
+
//===----------------------------------------------------------------------===//
// GPUFuncOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 910105ddf69586..4767565ea05501 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -213,14 +213,29 @@ gpu.module @barriers {
// CHECK-LABEL: gpu_barrier
func.func @gpu_barrier() {
- // CHECK: [[FLAGS:%.*]] = llvm.mlir.constant(1 : i32) : i32
- // CHECK: llvm.call spir_funccc @_Z7barrierj([[FLAGS]]) {
+ // CHECK: [[GLOBAL_AND_LOCAL_FLAG:%.*]] = llvm.mlir.constant(3 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[GLOBAL_AND_LOCAL_FLAG]]) {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: } : (i32) -> ()
gpu.barrier
+ // CHECK: [[GLOBAL_AND_LOCAL_FLAG2:%.*]] = llvm.mlir.constant(3 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[GLOBAL_AND_LOCAL_FLAG2]])
+ gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
+ // CHECK: [[LOCAL_FLAG:%.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[LOCAL_FLAG]])
+ gpu.barrier memfence [#gpu.address_space<workgroup>]
+ // CHECK: [[GLOBAL_FLAG:%.*]] = llvm.mlir.constant(2 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[GLOBAL_FLAG]])
+ gpu.barrier memfence [#gpu.address_space<global>]
+ // CHECK: [[NONE_FLAG:%.*]] = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG]])
+ gpu.barrier memfence []
+ // CHECK: [[NONE_FLAG2:%.*]] = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG2]])
+ gpu.barrier memfence [#gpu.address_space<private>]
return
}
}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index b9c0a0e79e8f2a..2bba66f786f189 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -141,6 +141,12 @@ module attributes {gpu.container_module} {
%shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32
"gpu.barrier"() : () -> ()
+ gpu.barrier
+ gpu.barrier memfence [#gpu.address_space<workgroup>]
+ gpu.barrier memfence [#gpu.address_space<global>]
+ gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
+ gpu.barrier memfence [#gpu.address_space<private>]
+ gpu.barrier memfence []
"some_op"(%bIdX, %tIdX) : (index, index) -> ()
%42 = memref.load %arg1[%bIdX] : memref<?xf32, 1>
``````````
</details>
https://github.com/llvm/llvm-project/pull/110527
More information about the Mlir-commits
mailing list