[llvm-branch-commits] [llvm] [SPIRV] Lower load/store atomic to OpAtomicLoad/OpAtomicStore (PR #185696)
Juan Manuel Martinez CaamaƱo via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 10 10:22:56 PDT 2026
https://github.com/jmmartinez created https://github.com/llvm/llvm-project/pull/185696
Still missing:
* [ ] why load of vector is rejected by spirv-val and not store of vector ?
* [ ] dropping volatile
* [ ] dropping alignment
* [ ] Try to put all together in the addMemoryOperands function
closes #185629
>From 5bf3949b165f210e73725e7658849e3e382327bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
<jmartinezcaamao at gmail.com>
Date: Tue, 10 Mar 2026 17:45:57 +0100
Subject: [PATCH] [SPIRV] Lower load/store atomic to OpAtomicLoad/OpAtomicStore
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 54 +++++++++++++++++--
.../CodeGen/SPIRV/transcoding/load-atomic.ll | 45 +++++++++++-----
.../CodeGen/SPIRV/transcoding/store-atomic.ll | 44 ++++++++++-----
3 files changed, 113 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 65f4856aeee68..3097a70ccb946 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -1635,6 +1635,7 @@ static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) {
bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
SPIRVTypeInst ResType,
MachineInstr &I) const {
+ LLVMContext &Context = I.getMF()->getFunction().getContext();
unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register Ptr = I.getOperand(1 + OpOffset).getReg();
@@ -1658,7 +1659,31 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
}
- auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
+ MachineIRBuilder MIRBuilder(I);
+
+ if (I.getNumMemOperands()) {
+ const MachineMemOperand *MemOp = *I.memoperands_begin();
+ if (MemOp->isAtomic()) {
+ uint32_t Scope = static_cast<uint32_t>(getMemScope(
+ Context, MemOp->getSyncScopeID()));
+ Register ScopeReg = buildI32Constant(Scope, I);
+
+ AtomicOrdering AO = MemOp->getSuccessOrdering();
+ uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO));
+ Register MemSemReg = buildI32Constant(MemSem, I);
+
+ auto Load = MIRBuilder.buildInstr(SPIRV::OpAtomicLoad)
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(Ptr)
+ .addUse(ScopeReg)
+ .addUse(MemSemReg);
+ Load.constrainAllUses(TII, TRI, RBI);
+ return true;
+ }
+ }
+
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpLoad)
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addUse(Ptr);
@@ -1676,6 +1701,7 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
+ LLVMContext &Context = I.getMF()->getFunction().getContext();
unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register StoreVal = I.getOperand(0 + OpOffset).getReg();
Register Ptr = I.getOperand(1 + OpOffset).getReg();
@@ -1710,8 +1736,30 @@ bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
}
}
- MachineBasicBlock &BB = *I.getParent();
- auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpStore))
+ MachineIRBuilder MIRBuilder(I);
+
+ if (I.getNumMemOperands()) {
+ const MachineMemOperand *MemOp = *I.memoperands_begin();
+ if (MemOp->isAtomic()) {
+ uint32_t Scope = static_cast<uint32_t>(getMemScope(
+ Context, MemOp->getSyncScopeID()));
+ Register ScopeReg = buildI32Constant(Scope, I);
+
+ AtomicOrdering AO = MemOp->getSuccessOrdering();
+ uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO));
+ Register MemSemReg = buildI32Constant(MemSem, I);
+
+ auto Store = MIRBuilder.buildInstr(SPIRV::OpAtomicStore)
+ .addUse(Ptr)
+ .addUse(ScopeReg)
+ .addUse(MemSemReg)
+ .addUse(StoreVal);
+ Store.constrainAllUses(TII, TRI, RBI);
+ return true;
+ }
+ }
+
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpStore)
.addUse(Ptr)
.addUse(StoreVal);
if (!I.getNumMemOperands()) {
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll b/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll
index 0ebd3a5ec20ae..1e2568b05b251 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/load-atomic.ll
@@ -4,19 +4,25 @@
; RUN: llc -O0 -mtriple=spirv32-- %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-- %s -o - -filetype=obj | spirv-val %}
-;; Check that 'load atomic' LLVM IR instructions are lowered.
-;; NOTE: The current lowering is incorrect: 'load atomic' should produce
-;; OpAtomicLoad but currently produces OpLoad, silently dropping the atomic
-;; ordering. This test documents the broken behaviour so it can be fixed.
+; Check that 'load atomic' LLVM IR instructions are lowered correctly to
+; OpAtomicLoad with the right Scope and Memory Semantics operands.
+;
+; unordered and monotonic are currently mapped to Memory Semantics `None (Relaxed)` 0x0
; CHECK-DAG: %[[#Int32:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32
; CHECK-DAG: %[[#Int32Vec:]] = OpTypeVector %[[#Int32]] 2
+; CHECK-DAG: %[[#Const0:]] = OpConstantNull %[[#Int32]]
+; CHECK-DAG: %[[#Const1:]] = OpConstant %[[#Int32]] 1{{$}}
+; CHECK-DAG: %[[#Const2:]] = OpConstant %[[#Int32]] 2{{$}}
+; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Int32]] 3{{$}}
+; CHECK-DAG: %[[#Const4:]] = OpConstant %[[#Int32]] 4{{$}}
+; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Int32]] 16{{$}}
define i32 @load_i32_unordered(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const0]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
ret i32 %val
@@ -25,7 +31,7 @@ define i32 @load_i32_unordered(ptr addrspace(1) %ptr) {
define i32 @load_i32_monotonic(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const0]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4
ret i32 %val
@@ -34,7 +40,7 @@ define i32 @load_i32_monotonic(ptr addrspace(1) %ptr) {
define i32 @load_i32_acquire(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const2]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr acquire, align 4
ret i32 %val
@@ -43,7 +49,7 @@ define i32 @load_i32_acquire(ptr addrspace(1) %ptr) {
define i32 @load_i32_seq_cst(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const16]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr seq_cst, align 4
ret i32 %val
@@ -54,7 +60,7 @@ define i32 @load_i32_seq_cst(ptr addrspace(1) %ptr) {
define i32 @load_i32_acquire_singlethread(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const4]] %[[#Const2]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr syncscope("singlethread") acquire, align 4
ret i32 %val
@@ -63,7 +69,7 @@ define i32 @load_i32_acquire_singlethread(ptr addrspace(1) %ptr) {
define i32 @load_i32_acquire_subgroup(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const3]] %[[#Const2]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr syncscope("subgroup") acquire, align 4
ret i32 %val
@@ -72,7 +78,7 @@ define i32 @load_i32_acquire_subgroup(ptr addrspace(1) %ptr) {
define i32 @load_i32_acquire_workgroup(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const2]] %[[#Const2]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr syncscope("workgroup") acquire, align 4
ret i32 %val
@@ -81,7 +87,7 @@ define i32 @load_i32_acquire_workgroup(ptr addrspace(1) %ptr) {
define i32 @load_i32_acquire_device(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 4
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const1]] %[[#Const2]]
; CHECK: OpReturnValue
%val = load atomic i32, ptr addrspace(1) %ptr syncscope("device") acquire, align 4
ret i32 %val
@@ -92,7 +98,7 @@ define i32 @load_i32_acquire_device(ptr addrspace(1) %ptr) {
define float @load_float_acquire(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#load:]] = OpLoad %[[#Int32]] %[[#ptr]] Aligned 8
+; CHECK: %[[#load:]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const0]] %[[#Const2]]
; CHECK: %[[#val:]] = OpBitcast %[[#Float]] %[[#load]]
; CHECK: OpReturnValue %[[#val]]
%val = load atomic float, ptr addrspace(1) %ptr acquire, align 8
@@ -104,8 +110,19 @@ define float @load_float_acquire(ptr addrspace(1) %ptr) {
define <2 x i32> @load_vector_acquire(ptr addrspace(1) %ptr) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
-; CHECK: %[[#]] = OpLoad %[[#Int32Vec]] %[[#ptr]] Aligned 8
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32Vec]] %[[#ptr]] %[[#Const0]] %[[#Const2]]
; CHECK: OpReturnValue
%val = load atomic <2 x i32>, ptr addrspace(1) %ptr acquire, align 8
ret <2 x i32> %val
}
+
+; -- test with volatile
+
+define i32 @load_i32_acquire_device_volatile(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: OpFunction %[[#]]
+; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpAtomicLoad %[[#Int32]] %[[#ptr]] %[[#Const1]] %[[#Const2]]
+; CHECK: OpReturnValue
+ %val = load atomic volatile i32, ptr addrspace(1) %ptr syncscope("device") acquire, align 4
+ ret i32 %val
+}
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll b/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll
index b11b26451d086..b0f685d5c9e29 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/store-atomic.ll
@@ -4,20 +4,26 @@
; RUN: llc -O0 -mtriple=spirv32-- %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-- %s -o - -filetype=obj | spirv-val %}
-;; Check that 'store atomic' LLVM IR instructions are lowered.
-;; NOTE: The current lowering is incorrect: 'store atomic' should produce
-;; OpAtomicStore but currently produces OpStore, silently dropping the atomic
-;; ordering. This test documents the broken behaviour so it can be fixed.
+; Check that 'store atomic' LLVM IR instructions are lowered correctly to
+; OpAtomicStore with the right Scope and Memory Semantics operands.
+;
+; unordered and monotonic are currently mapped to Memory Semantics `None (Relaxed)` 0x0
; CHECK-DAG: %[[#Int32:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32
; CHECK-DAG: %[[#Int32Vec:]] = OpTypeVector %[[#Int32]] 2
+; CHECK-DAG: %[[#Const0:]] = OpConstantNull %[[#Int32]]
+; CHECK-DAG: %[[#Const1:]] = OpConstant %[[#Int32]] 1{{$}}
+; CHECK-DAG: %[[#Const2:]] = OpConstant %[[#Int32]] 2{{$}}
+; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Int32]] 3{{$}}
+; CHECK-DAG: %[[#Const4:]] = OpConstant %[[#Int32]] 4{{$}}
+; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Int32]] 16{{$}}
define void @store_i32_unordered(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const0]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr unordered, align 4
ret void
@@ -27,7 +33,7 @@ define void @store_i32_monotonic(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const0]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr monotonic, align 4
ret void
@@ -37,7 +43,7 @@ define void @store_i32_release(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const4]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr release, align 4
ret void
@@ -47,7 +53,7 @@ define void @store_i32_seq_cst(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const16]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr seq_cst, align 4
ret void
@@ -59,7 +65,7 @@ define void @store_i32_release_singlethread(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const4]] %[[#Const4]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr syncscope("singlethread") release, align 4
ret void
@@ -69,7 +75,7 @@ define void @store_i32_release_subgroup(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const3]] %[[#Const4]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr syncscope("subgroup") release, align 4
ret void
@@ -79,7 +85,7 @@ define void @store_i32_release_workgroup(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const2]] %[[#Const4]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr syncscope("workgroup") release, align 4
ret void
@@ -89,7 +95,7 @@ define void @store_i32_release_device(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: OpFunction %[[#]]
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
-; CHECK: OpStore %[[#ptr]] %[[#val]] Aligned 4
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const1]] %[[#Const4]] %[[#val]]
; CHECK: OpReturn
store atomic i32 %val, ptr addrspace(1) %ptr syncscope("device") release, align 4
ret void
@@ -102,7 +108,7 @@ define void @store_float_release(ptr addrspace(1) %ptr, float %val) {
; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
; CHECK: %[[#val:]] = OpFunctionParameter %[[#Float]]
; CHECK: %[[#cast:]] = OpBitcast %[[#Int32]] %[[#val]]
-; CHECK: OpStore %[[#ptr]] %[[#cast]] Aligned 8
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const0]] %[[#Const4]] %[[#cast]]
; CHECK: OpReturn
store atomic float %val, ptr addrspace(1) %ptr release, align 8
ret void
@@ -119,3 +125,15 @@ define void @store_vector_release(ptr addrspace(1) %ptr, <2 x i32> %val) {
store atomic <2 x i32> %val, ptr addrspace(1) %ptr release, align 8
ret void
}
+
+; -- test with volatile
+
+define void @store_i32_release_device_volatile(ptr addrspace(1) %ptr, i32 %val) {
+; CHECK-LABEL: OpFunction %[[#]]
+; CHECK: %[[#ptr:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#val:]] = OpFunctionParameter %[[#Int32]]
+; CHECK: OpAtomicStore %[[#ptr]] %[[#Const1]] %[[#Const4]] %[[#val]]
+; CHECK: OpReturn
+ store atomic volatile i32 %val, ptr addrspace(1) %ptr syncscope("device") release, align 4
+ ret void
+}
More information about the llvm-branch-commits
mailing list