[Mlir-commits] [mlir] [mlir][GPU] Add constant address space to GPU dialect (PR #190211)

Bangtian Liu llvmlistbot at llvm.org
Thu Apr 2 09:48:46 PDT 2026


https://github.com/bangtianliu updated https://github.com/llvm/llvm-project/pull/190211

>From 432d546fccf002f4fe10a87b323f38a1731cf11a Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian at gmail.com>
Date: Thu, 2 Apr 2026 09:47:17 -0700
Subject: [PATCH] [mlir][GPU] Add constant address space to GPU dialect

Signed-off-by: Bangtian Liu <liubangtian at gmail.com>
---
 mlir/docs/Dialects/GPU.md                     |  7 +++--
 mlir/include/mlir/Dialect/GPU/IR/GPUBase.td   | 16 +++++++++++-
 mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td  |  2 ++
 .../AMDGPUToROCDL/AMDGPUToROCDL.cpp           |  2 ++
 .../GPUCommon/AttrToSPIRVConverter.cpp        |  2 ++
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  |  2 ++
 .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp      |  2 ++
 .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp    |  2 ++
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp        | 13 ++++++++++
 .../GPUToLLVMSPV/constant-address-space.mlir  | 26 +++++++++++++++++++
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         |  4 +++
 .../GPUToNVVM/constant-address-space.mlir     | 22 ++++++++++++++++
 .../GPUToROCDL/constant-address-space.mlir    | 22 ++++++++++++++++
 .../GPUToROCDL/gpu-to-rocdl-barrier.mlir      | 12 +++++++++
 .../Dialect/GPU/constant-address-space.mlir   | 23 ++++++++++++++++
 mlir/test/Dialect/GPU/ops.mlir                |  1 +
 16 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir
 create mode 100644 mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir
 create mode 100644 mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir
 create mode 100644 mlir/test/Dialect/GPU/constant-address-space.mlir

diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md
index 511e4ef03ed60..ea939c74f77ab 100644
--- a/mlir/docs/Dialects/GPU.md
+++ b/mlir/docs/Dialects/GPU.md
@@ -28,13 +28,16 @@ is being used.
 ## GPU address spaces
 
 The GPU dialect exposes the `gpu.address_space` attribute, which currently has
-three values: `global`, `workgroup`, and `private`.
+four values: `global`, `workgroup`, `private`, and `constant`.
 
 These address spaces represent the types of buffer commonly seen in GPU compilation.
 `global` memory is memory that resides in the GPU's global memory. `workgroup`
 memory is a limited, per-workgroup resource: all threads in a workgroup/thread
-block access the same values in `workgroup` memory. Finally, `private` memory is
+block access the same values in `workgroup` memory. `private` memory is
 used to represent `alloca`-like buffers that are private to a single thread/workitem.
+`constant` memory is read-only memory residing in global address space, guaranteed
+not to change during kernel execution, allowing backend-specific optimizations
+(e.g., scalar reads on AMD GPUs).
 
 These address spaces may be used as the `memorySpace` attribute on `memref` values.
 The `gpu.module`/`gpu.func` compilation pipeline will lower such memory space
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 55326f044147b..49ecc7f6c9b95 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -53,6 +53,10 @@ def GPU_Dialect : Dialect {
     /// space.
     static AddressSpace getPrivateAddressSpace() { return AddressSpace::Private; }
 
+    /// Returns the numeric value used to identify the constant memory address
+    /// space.
+    static AddressSpace getConstantAddressSpace() { return AddressSpace::Constant; }
+
     /// Return true if the given MemRefType has an address space that matches
     /// with the gpu::AddressSpaceAttr attribute with value 'workgroup`.
     static bool hasWorkgroupMemoryAddressSpace(MemRefType type);
@@ -60,6 +64,14 @@ def GPU_Dialect : Dialect {
     /// Return true if the given Attribute is an gpu::AddressSpaceAttr
     /// attribute with value 'workgroup`.
     static bool isWorkgroupMemoryAddressSpace(Attribute memorySpace);
+
+    /// Return true if the given MemRefType has an address space that matches
+    /// with the gpu::AddressSpaceAttr attribute with value 'constant`.
+    static bool hasConstantMemoryAddressSpace(MemRefType type);
+
+    /// Return true if the given Attribute is an gpu::AddressSpaceAttr
+    /// attribute with value 'constant`.
+    static bool isConstantMemoryAddressSpace(Attribute memorySpace);
   }];
   let discardableAttrs = (ins
     "::mlir::DenseI32ArrayAttr":$known_block_size,
@@ -89,11 +101,13 @@ class GPU_I32EnumAttr<string mnemonic, GPU_I32Enum enumInfo> :
 def GPU_AddressSpaceGlobal : I32EnumAttrCase<"Global", 1, "global">;
 def GPU_AddressSpaceWorkgroup : I32EnumAttrCase<"Workgroup", 2, "workgroup">;
 def GPU_AddressSpacePrivate : I32EnumAttrCase<"Private", 3, "private">;
+def GPU_AddressSpaceConstant : I32EnumAttrCase<"Constant", 4, "constant">;
 def GPU_AddressSpaceEnum : GPU_I32Enum<
   "AddressSpace", "GPU address space", [
     GPU_AddressSpaceGlobal,
     GPU_AddressSpaceWorkgroup,
-    GPU_AddressSpacePrivate
+    GPU_AddressSpacePrivate,
+    GPU_AddressSpaceConstant
   ]>;
 
 def GPU_AddressSpaceAttr :
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index c20c08685f268..7305de2049ee2 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -131,6 +131,8 @@ def ROCDL_Dialect : Dialect {
     static constexpr unsigned kGlobalMemoryAddressSpace = 1;
     /// The address space value that represents shared memory.
     static constexpr unsigned kSharedMemoryAddressSpace = 3;
+    /// The address space value that represents constant memory.
+    static constexpr unsigned kConstantMemoryAddressSpace = 4;
     /// The address space value that represents private memory.
     static constexpr unsigned kPrivateMemoryAddressSpace = 5;
   }];
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index a6c9285976269..423d261bc188a 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -4105,6 +4105,8 @@ void mlir::amdgpu::populateCommonGPUTypeAndAttributeConversions(
           return ROCDL::ROCDLDialect::kSharedMemoryAddressSpace;
         case gpu::AddressSpace::Private:
           return ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace;
+        case gpu::AddressSpace::Constant:
+          return ROCDL::ROCDLDialect::kConstantMemoryAddressSpace;
         }
         llvm_unreachable("unknown address space enum value");
       });
diff --git a/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp b/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp
index fcabad32974ac..bc50879f7a8d3 100644
--- a/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp
+++ b/mlir/lib/Conversion/GPUCommon/AttrToSPIRVConverter.cpp
@@ -17,6 +17,8 @@ spirv::StorageClass addressSpaceToStorageClass(gpu::AddressSpace addressSpace) {
     return spirv::StorageClass::Workgroup;
   case gpu::AddressSpace::Private:
     return spirv::StorageClass::Private;
+  case gpu::AddressSpace::Constant:
+    return spirv::StorageClass::UniformConstant;
   }
   llvm_unreachable("Unhandled storage class");
 }
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 6efd137f513e9..5df9193cf27f1 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -138,6 +138,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
           memFenceFlag = memFenceFlag | localMemFenceFlag;
           break;
         case gpu::AddressSpace::Private:
+        case gpu::AddressSpace::Constant:
+          // Private is thread-local, constant is read-only; no fencing needed.
           break;
         }
       }
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 1b74fa81f66fe..58ab1c799b574 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -545,6 +545,8 @@ struct GPUBarrierOpLowering final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
           fenceLDS = true;
           break;
         case gpu::AddressSpace::Private:
+        case gpu::AddressSpace::Constant:
+          // Private is thread-local, constant is read-only; no fencing needed.
           break;
         }
       }
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index 6edc8f5c86dd3..303dc82a67374 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -1723,6 +1723,8 @@ void mlir::nvgpu::populateCommonGPUTypeAndAttributeConversions(
           return static_cast<unsigned>(NVVM::NVVMMemorySpace::Shared);
         case gpu::AddressSpace::Private:
           return 0;
+        case gpu::AddressSpace::Constant:
+          return static_cast<unsigned>(NVVM::NVVMMemorySpace::Constant);
         }
         llvm_unreachable("unknown address space enum value");
       });
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index aff6bce57b5a0..8039f3952eea6 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -248,6 +248,19 @@ bool GPUDialect::hasWorkgroupMemoryAddressSpace(MemRefType type) {
   return isWorkgroupMemoryAddressSpace(memorySpace);
 }
 
+bool GPUDialect::isConstantMemoryAddressSpace(Attribute memorySpace) {
+  if (!memorySpace)
+    return false;
+  if (auto gpuAttr = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
+    return gpuAttr.getValue() == getConstantAddressSpace();
+  return false;
+}
+
+bool GPUDialect::hasConstantMemoryAddressSpace(MemRefType type) {
+  Attribute memorySpace = type.getMemorySpace();
+  return isConstantMemoryAddressSpace(memorySpace);
+}
+
 bool GPUDialect::isKernel(Operation *op) {
   UnitAttr isKernelAttr = op->getAttrOfType<UnitAttr>(getKernelFuncAttrName());
   return static_cast<bool>(isKernelAttr);
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir b/mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir
new file mode 100644
index 0000000000000..4f43963b2208d
--- /dev/null
+++ b/mlir/test/Conversion/GPUToLLVMSPV/constant-address-space.mlir
@@ -0,0 +1,26 @@
+// RUN: mlir-opt %s -convert-gpu-to-llvm-spv | FileCheck %s
+
+gpu.module @kernels {
+  // CHECK-LABEL: llvm.func spir_kernelcc @constant_load
+  // Constant address space maps to SPIRV/OpenCL address space 2 (UniformConstant)
+  // CHECK-SAME: !llvm.ptr<2>
+  gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+    %c0 = arith.constant 0 : index
+    %v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+    gpu.return
+  }
+
+  // CHECK-LABEL: llvm.func spir_funccc @all_address_spaces
+  // Global -> 1, Workgroup -> 3, Private -> 0 (default), Constant -> 2
+  // CHECK-SAME: !llvm.ptr<1>
+  // CHECK-SAME: !llvm.ptr<3>
+  // CHECK-SAME: !llvm.ptr,
+  // CHECK-SAME: !llvm.ptr<2>
+  gpu.func @all_address_spaces(
+    %arg0: memref<f32, #gpu.address_space<global>>,
+    %arg1: memref<f32, #gpu.address_space<workgroup>>,
+    %arg2: memref<f32, #gpu.address_space<private>>,
+    %arg3: memref<f32, #gpu.address_space<constant>>) {
+    gpu.return
+  }
+}
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ef5aa023bce51..913c8b88d8e94 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -238,6 +238,10 @@ gpu.module @barriers {
     // CHECK:         [[NONE_FLAG2:%.*]] = llvm.mlir.constant(0 : i32) : i32
     // CHECK:         llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG2]])
     gpu.barrier memfence [#gpu.address_space<private>]
+    // Constant memory is read-only, no fencing needed (same as private)
+    // CHECK:         [[NONE_FLAG3:%.*]] = llvm.mlir.constant(0 : i32) : i32
+    // CHECK:         llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG3]])
+    gpu.barrier memfence [#gpu.address_space<constant>]
     return
   }
 }
diff --git a/mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir b/mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir
new file mode 100644
index 0000000000000..668e3aafab356
--- /dev/null
+++ b/mlir/test/Conversion/GPUToNVVM/constant-address-space.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt -convert-gpu-to-nvvm %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+  gpu.module @kernel_module {
+    // CHECK-LABEL: llvm.func @constant_load
+    // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+    gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+      %c0 = arith.constant 0 : index
+      %v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+      gpu.return
+    }
+
+    // CHECK-LABEL: llvm.func @constant_multidim
+    // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+    gpu.func @constant_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
+      %c0 = arith.constant 0 : index
+      %c1 = arith.constant 1 : index
+      %v = memref.load %arg0[%c0, %c1] : memref<4x8xf32, #gpu.address_space<constant>>
+      gpu.return
+    }
+  }
+}
diff --git a/mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir b/mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir
new file mode 100644
index 0000000000000..738aece1769da
--- /dev/null
+++ b/mlir/test/Conversion/GPUToROCDL/constant-address-space.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt -convert-gpu-to-rocdl %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+  gpu.module @kernel_module {
+    // CHECK-LABEL: llvm.func @constant_load
+    // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+    gpu.func @constant_load(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+      %c0 = arith.constant 0 : index
+      %v = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+      gpu.return
+    }
+
+    // CHECK-LABEL: llvm.func @constant_multidim
+    // CHECK-SAME: %{{.*}}: !llvm.ptr<4>
+    gpu.func @constant_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
+      %c0 = arith.constant 0 : index
+      %c1 = arith.constant 1 : index
+      %v = memref.load %arg0[%c0, %c1] : memref<4x8xf32, #gpu.address_space<constant>>
+      gpu.return
+    }
+  }
+}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir
index c1c7d41f66a32..11a874cfb89e7 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-barrier.mlir
@@ -65,4 +65,16 @@ func.func @barrier_private_only() {
   gpu.barrier memfence [#gpu.address_space<private>]
   func.return
 }
+
+// GFX9-LABEL: func @barrier_constant_only
+// GFX12-LABEL: func @barrier_constant_only
+func.func @barrier_constant_only() {
+  // GFX9-NEXT: rocdl.s.barrier
+  // GFX12-NEXT: rocdl.s.barrier.signal id = -1
+  // GFX12-NEXT: rocdl.s.barrier.wait id = -1
+  // CHECK-NOT: llvm.fence
+  // Constant memory is read-only, no fencing needed
+  gpu.barrier memfence [#gpu.address_space<constant>]
+  func.return
+}
 }
diff --git a/mlir/test/Dialect/GPU/constant-address-space.mlir b/mlir/test/Dialect/GPU/constant-address-space.mlir
new file mode 100644
index 0000000000000..9dcef61dead82
--- /dev/null
+++ b/mlir/test/Dialect/GPU/constant-address-space.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
+
+gpu.module @test {
+  // CHECK-LABEL: @constant_memref_basic
+  // CHECK-SAME: (%{{.*}}: memref<16xf32, #gpu.address_space<constant>>)
+  gpu.func @constant_memref_basic(%arg0: memref<16xf32, #gpu.address_space<constant>>) kernel {
+    %c0 = arith.constant 0 : index
+    %0 = memref.load %arg0[%c0] : memref<16xf32, #gpu.address_space<constant>>
+    gpu.return
+  }
+
+  // CHECK-LABEL: @constant_memref_multidim
+  // CHECK: memref<4x8xf32, #gpu.address_space<constant>>
+  gpu.func @constant_memref_multidim(%arg0: memref<4x8xf32, #gpu.address_space<constant>>) kernel {
+    gpu.return
+  }
+
+  // CHECK-LABEL: @constant_memref_dynamic
+  // CHECK: memref<?x?xf32, #gpu.address_space<constant>>
+  gpu.func @constant_memref_dynamic(%arg0: memref<?x?xf32, #gpu.address_space<constant>>) kernel {
+    gpu.return
+  }
+}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index a5dad3f931cc1..cbafc376fb89a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -186,6 +186,7 @@ module attributes {gpu.container_module} {
       gpu.barrier memfence [#gpu.address_space<global>]
       gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
       gpu.barrier memfence [#gpu.address_space<private>]
+      gpu.barrier memfence [#gpu.address_space<constant>]
       gpu.barrier memfence []
 
       "some_op"(%bIdX, %tIdX) : (index, index) -> ()



More information about the Mlir-commits mailing list