[Mlir-commits] [mlir] [mlir][GPU] Add constant address space to GPU dialect (PR #190211)
Bangtian Liu
llvmlistbot at llvm.org
Thu Apr 2 09:48:46 PDT 2026
https://github.com/bangtianliu updated https://github.com/llvm/llvm-project/pull/190211
>From 432d546fccf002f4fe10a87b323f38a1731cf11a Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian at gmail.com>
Date: Thu, 2 Apr 2026 09:47:17 -0700
Subject: [PATCH] [mlir][GPU] Add constant address space to GPU dialect
Signed-off-by: Bangtian Liu <liubangtian at gmail.com>
---
mlir/docs/Dialects/GPU.md | 7 +++--
mlir/include/mlir/Dialect/GPU/IR/GPUBase.td | 16 +++++++++++-
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 2 ++
.../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 2 ++
.../GPUCommon/AttrToSPIRVConverter.cpp | 2 ++
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 2 ++
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 ++
.../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 2 ++
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 13 ++++++++++
.../GPUToLLVMSPV/constant-address-space.mlir | 26 +++++++++++++++++++
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 4 +++
.../GPUToNVVM/constant-address-space.mlir | 22 ++++++++++++++++
.../GPUToROCDL/constant-address-space.mlir | 22 ++++++++++++++++
.../GPUToROCDL/gpu-to-rocdl-barrier.mlir | 12 +++++++++
.../Dialect/GPU/constant-address-space.mlir | 23 ++++++++++++++++
mlir/test/Dialect/GPU/ops.mlir | 1 +
16 files changed, 155 insertions(+), 3 deletions(-)
create mode 100644 mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir
create mode 100644 mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir
create mode 100644 mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir
create mode 100644 mlir/test/Dialect/GPU/constant-address-space.mlir
diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md
index 511e4ef03ed60..ea939c74f77ab 100644
--- a/mlir/docs/Dialects/GPU.md
+++ b/mlir/docs/Dialects/GPU.md
@@ -28,13 +28,16 @@ is being used.
## GPU address spaces
The GPU dialect exposes the `gpu.address_space` attribute, which currently has
-three values: `global`, `workgroup`, and `private`.
+four values: `global`, `workgroup`, `private`, and `constant`.
These address spaces represent the types of buffer commonly seen in GPU compilation.
`global` memory is memory that resides in the GPU's global memory. `workgroup`
memory is a limited, per-workgroup resource: all threads in a workgroup/thread
-block access the same values in `workgroup` memory. Finally, `private` memory is
+block access the same values in `workgroup` memory. `private` memory is
used to represent `alloca`-like buffers that are private to a single thread/workitem.
+`constant` memory is read-only memory residing in global address space, guaranteed
+not to change during kernel execution, allowing backend-specific optimizations
+(e.g., scalar reads on AMD GPUs).
These address spaces may be used as the `memorySpace` attribute on `memref` values.
The `gpu.module`/`gpu.func` compilation pipeline will lower such memory space
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 55326f044147b..49ecc7f6c9b95 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -53,6 +53,10 @@ def GPU_Dialect : Dialect {
/// space.
static AddressSpace getPrivateAddressSpace() { return AddressSpace::Private; }
+ /// Returns the numeric value used to identify the constant memory address
+ /// space.
+ static AddressSpace getConstantAddressSpace() { return AddressSpace::Constant; }
+
/// Return true if the given MemRefType has an address space that matches
/// with the gpu::AddressSpaceAttr attribute with value 'workgroup`.
static bool hasWorkgroupMemoryAddressSpace(MemRefType type);
@@ -60,6 +64,14 @@ def GPU_Dialect : Dialect {
/// Return true if the given Attribute is an gpu::AddressSpaceAttr
/// attribute with value 'workgroup`.
static bool isWorkgroupMemoryAddressSpace(Attribute memorySpace);
+
+ /// Return true if the given MemRefType has an address space that matches
+ /// with the gpu::AddressSpaceAttr attribute with value 'constant`.
+ static bool hasConstantMemoryAddressSpace(MemRefType type);
+
+ /// Return true if the given Attribute is an gpu::AddressSpaceAttr
+ /// attribute with value 'constant`.
+ static bool isConstantMemoryAddressSpace(Attribute memorySpace);
}];
let discardableAttrs = (ins
"::mlir::DenseI32ArrayAttr":$known_block_size,
@@ -89,11 +101,13 @@ class GPU_I32EnumAttr<string mnemonic, GPU_I32Enum enumInfo> :
def GPU_AddressSpaceGlobal : I32EnumAttrCase<"Global", 1, "global">;
def GPU_AddressSpaceWorkgroup : I32EnumAttrCase<"Workgroup", 2, "workgroup">;
def GPU_AddressSpacePrivate : I32EnumAttrCase<"Private", 3, "private">;
+def GPU_AddressSpaceConstant : I32EnumAttrCase<"Constant", 4, "constant">;
def GPU_AddressSpaceEnum : GPU_I32Enum<
"AddressSpace", "GPU address space", [
GPU_AddressSpaceGlobal,
GPU_AddressSpaceWorkgroup,
- GPU_AddressSpacePrivate
+ GPU_AddressSpacePrivate,
+ GPU_AddressSpaceConstant
]>;
def GPU_AddressSpaceAttr :
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index c20c08685f268..7305de2049ee2 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -131,6 +131,8 @@ def ROCDL_Dialect : Dialect {
static constexpr unsigned kGlobalMemoryAddressSpace = 1;
/// The address space value that represents shared memory.
static constexpr unsigned kSharedMemoryAddressSpace = 3;
+ /// The address space value that represents constant memory.
+ static constexpr unsigned kConstantMemoryAddressSpace = 4;
/// The address space value that represents private memory.
static constexpr unsigned kPrivateMemoryAddressSpace = 5;
}];
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index a6c9285976269..423d261bc188a 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -4105,6 +4105,8 @@ void mlir::amdgpu::populateCommonGPUTypeAndAttributeConversions(
return ROCDL::ROCDLDialect::kSharedMemoryAddressSpace;
case gpu::AddressSpace::Private:
return ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace;
+ case gpu::AddressSpace::Constant:
+ return ROCDL::ROCDLDialect::kConstantMemoryAddressSpace;
}
llvm_unreachable("unknown address space enum value");
});
diff --git a/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp b/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp
index fcabad32974ac..bc50879f7a8d3 100644
--- a/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp
+++ b/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp
@@ -17,6 +17,8 @@ spirv::StorageClass addressSpaceToStorageClass(gpu::AddressSpace addressSpace) {
return spirv::StorageClass::Workgroup;
case gpu::AddressSpace::Private:
return spirv::StorageClass::Private;
+ case gpu::AddressSpace::Constant:
+ return spirv::StorageClass::UniformConstant;
}
llvm_unreachable("Unhandled storage class");
}
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 6efd137f513e9..5df9193cf27f1 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -138,6 +138,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
memFenceFlag = memFenceFlag | localMemFenceFlag;
break;
case gpu::AddressSpace::Private:
+ case gpu::AddressSpace::Constant:
+ // Private is thread-local, constant is read-only; no fencing needed.
break;
}
}
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 1b74fa81f66fe..58ab1c799b574 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -545,6 +545,8 @@ struct GPUBarrierOpLowering final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
fenceLDS = true;
break;
case gpu::AddressSpace::Private:
+ case gpu::AddressSpace::Constant:
+ // Private is thread-local, constant is read-only; no fencing needed.
break;
}
}
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index 6edc8f5c86dd3..303dc82a67374 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -1723,6 +1723,8 @@ void mlir::nvgpu::populateCommonGPUTypeAndAttributeConversions(
return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
case gpu::AddressSpace::Private:
return 0;
+ case gpu::AddressSpace::Constant:
+ return static_cast<unsigned>(NVVM::NVVMMemorySpace::Constant);
}
llvm_unreachable("unknown address space enum value");
});
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index aff6bce57b5a0..8039f3952eea6 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -248,6 +248,19 @@ bool GPUDialect::hasWorkgroupMemoryAddressSpace(MemRefType type) {
return isWorkgroupMemoryAddressSpace(memorySpace);
}
+bool GPUDialect::isConstantMemoryAddressSpace(Attribute memorySpace) {
+ if (!memorySpace)
+ return false;
+ if (auto gpuAttr = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
+ return gpuAttr.getValue() == getConstantAddressSpace();
+ return false;
+}
+
+bool GPUDialect::hasConstantMemoryAddressSpace(MemRefType type) {
+ Attribute memorySpace = type.getMemorySpace();
+ return isConstantMemoryAddressSpace(memorySpace);
+}
+
bool GPUDialect::isKernel(Operation *op) {
UnitAttr isKernelAttr = op->getAttrOfType<UnitAttr>(getKernelFuncAttrName());
return static_cast<bool>(isKernelAttr);
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir b/mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir
new file mode 100644
index 0000000000000..4f43963b2208d
--- /dev/null
+++ b/mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir
@@ -0,0 +1,26 @@
+// RUN: mlir-opt %s -convert-gpu-to-llvm-spv | FileCheck %s
+
+gpu.module @kernels {
+ // CHECK-LABEL: llvm.func spir_kernelcc @constant_load
+ // Constant address space maps to SPIRV/OpenCL address space 2 (UniformConstant)
+ // CHECK-SAME: !llvm.ptr<2>
+ gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+ %c0 = arith.constant 0 : index
+ %v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+ gpu.return
+ }
+
+ // CHECK-LABEL: llvm.func spir_funccc @all_address_spaces
+ // Global -> 1, Workgroup -> 3, Private -> 0 (default), Constant -> 2
+ // CHECK-SAME: !llvm.ptr<1>
+ // CHECK-SAME: !llvm.ptr<3>
+ // CHECK-SAME: !llvm.ptr,
+ // CHECK-SAME: !llvm.ptr<2>
+ gpu.func @all_address_spaces(
+ %arg0: memref<f32, #gpu.address_space<global>>,
+ %arg1: memref<f32, #gpu.address_space<workgroup>>,
+ %arg2: memref<f32, #gpu.address_space<private>>,
+ %arg3: memref<f32, #gpu.address_space<constant>>) {
+ gpu.return
+ }
+}
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ef5aa023bce51..913c8b88d8e94 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -238,6 +238,10 @@ gpu.module @barriers {
// CHECK: [[NONE_FLAG2:%.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG2]])
gpu.barrier memfence [#gpu.address_space<private>]
+ // Constant memory is read-only, no fencing needed (same as private)
+ // CHECK: [[NONE_FLAG3:%.*]] = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG3]])
+ gpu.barrier memfence [#gpu.address_space<constant>]
return
}
}
diff --git a/mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir b/mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir
new file mode 100644
index 0000000000000..668e3aafab356
--- /dev/null
+++ b/mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt -convert-gpu-to-nvvm %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+ gpu.module @kernel_module {
+ // CHECK-LABEL: llvm.func @constant_load
+ // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+ gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+ %c0 = arith.constant 0 : index
+ %v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+ gpu.return
+ }
+
+ // CHECK-LABEL: llvm.func @constant_multidim
+ // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+ gpu.func @constant_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %v = memref.load %arg0[%c0, %c1] : memref<4x8xf32, #gpu.address_space<constant>>
+ gpu.return
+ }
+ }
+}
diff --git a/mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir b/mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir
new file mode 100644
index 0000000000000..738aece1769da
--- /dev/null
+++ b/mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt -convert-gpu-to-rocdl %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+ gpu.module @kernel_module {
+ // CHECK-LABEL: llvm.func @constant_load
+ // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+ gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+ %c0 = arith.constant 0 : index
+ %v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+ gpu.return
+ }
+
+ // CHECK-LABEL: llvm.func @constant_multidim
+ // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+ gpu.func @constant_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %v = memref.load %arg0[%c0, %c1] : memref<4x8xf32, #gpu.address_space<constant>>
+ gpu.return
+ }
+ }
+}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir
index c1c7d41f66a32..11a874cfb89e7 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir
@@ -65,4 +65,16 @@ func.func @barrier_private_only() {
gpu.barrier memfence [#gpu.address_space<private>]
func.return
}
+
+// GFX9-LABEL: func @barrier_constant_only
+// GFX12-LABEL: func @barrier_constant_only
+func.func @barrier_constant_only() {
+ // GFX9-NEXT: rocdl.s.barrier
+ // GFX12-NEXT: rocdl.s.barrier.signal id = -1
+ // GFX12-NEXT: rocdl.s.barrier.wait id = -1
+ // CHECK-NOT: llvm.fence
+ // Constant memory is read-only, no fencing needed
+ gpu.barrier memfence [#gpu.address_space<constant>]
+ func.return
+}
}
diff --git a/mlir/test/Dialect/GPU/constant-address-space.mlir b/mlir/test/Dialect/GPU/constant-address-space.mlir
new file mode 100644
index 0000000000000..9dcef61dead82
--- /dev/null
+++ b/mlir/test/Dialect/GPU/constant-address-space.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
+
+gpu.module @test {
+ // CHECK-LABEL: @constant_memref_basic
+ // CHECK-SAME: (%{{.*}}: memref<16xf32, #gpu.address_space<constant>>)
+ gpu.func @constant_memref_basic(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+ %c0 = arith.constant 0 : index
+ %0 = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+ gpu.return
+ }
+
+ // CHECK-LABEL: @constant_memref_multidim
+ // CHECK: memref<4x8xf32, #gpu.address_space<constant>>
+ gpu.func @constant_memref_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
+ gpu.return
+ }
+
+ // CHECK-LABEL: @constant_memref_dynamic
+ // CHECK: memref<?x?xf32, #gpu.address_space<constant>>
+ gpu.func @constant_memref_dynamic(%arg0: memref<?x?xf32, #gpu.address_space<constant>>) kernel {
+ gpu.return
+ }
+}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index a5dad3f931cc1..cbafc376fb89a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -186,6 +186,7 @@ module attributes {gpu.container_module} {
gpu.barrier memfence [#gpu.address_space<global>]
gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
gpu.barrier memfence [#gpu.address_space<private>]
+ gpu.barrier memfence [#gpu.address_space<constant>]
gpu.barrier memfence []
"some_op"(%bIdX, %tIdX) : (index, index) -> ()
More information about the Mlir-commits
mailing list