[clang] [llvm] [RFC][BPF] Do atomic_fetch_*() pattern matching with memory ordering (PR #107343)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 13 12:02:34 PDT 2024
https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/107343
>From a29ba5bd76ba0bb2149fe8ab20da0bd78c2b04dd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Tue, 3 Sep 2024 21:26:17 -0700
Subject: [PATCH 1/3] [BPF] Do atomic_fetch_*() pattern matching with memory
ordering
For atomic fetch_and_*() operations, do pattern matching with memory ordering
seq_cst, acq_rel, release, acquire and monotonic (relaxed). For fetch_and_*()
operations with seq_cst/acq_rel/release/acquire ordering, atomic_fetch_*()
instructions are generated. For monotonic ordering, locked insns are generated
if return value is not used. Otherwise, atomic_fetch_*() insns are used.
The main motivation is to resolve the kernel issue [1].
The following are memory ordering are supported:
seq_cst, acq_rel, release, acquire, relaxed
Current gcc style __sync_fetch_and_*() operations are all seq_cst.
To use explicit memory ordering, the _Atomic type is needed. The following is
an example:
```
$ cat test.c
\#include <stdatomic.h>
void f1(_Atomic int *i) {
(void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed);
}
void f2(_Atomic int *i) {
(void)__c11_atomic_fetch_and(i, 10, memory_order_acquire);
}
void f3(_Atomic int *i) {
(void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
}
$ cat run.sh
clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test.c -o test.o && llvm-objdump -d test.o
$ ./run.sh
test.o: file format elf64-bpf
Disassembly of section .text:
0000000000000000 <f1>:
0: b4 02 00 00 0a 00 00 00 w2 = 0xa
1: c3 21 00 00 50 00 00 00 lock *(u32 *)(r1 + 0x0) &= w2
2: 95 00 00 00 00 00 00 00 exit
0000000000000018 <f2>:
3: b4 02 00 00 0a 00 00 00 w2 = 0xa
4: c3 21 00 00 51 00 00 00 w2 = atomic_fetch_and((u32 *)(r1 + 0x0), w2)
5: 95 00 00 00 00 00 00 00 exit
0000000000000030 <f3>:
6: b4 02 00 00 0a 00 00 00 w2 = 0xa
7: c3 21 00 00 51 00 00 00 w2 = atomic_fetch_and((u32 *)(r1 + 0x0), w2)
8: 95 00 00 00 00 00 00 00 exit
```
The following is another example where return value is used:
```
$ cat test1.c
\#include <stdatomic.h>
int f1(_Atomic int *i) {
return __c11_atomic_fetch_and(i, 10, memory_order_relaxed);
}
int f2(_Atomic int *i) {
return __c11_atomic_fetch_and(i, 10, memory_order_acquire);
}
int f3(_Atomic int *i) {
return __c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
}
$ cat run.sh
clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test1.c -o test1.o && llvm-objdump -d test1.o
$ ./run.sh
test.o: file format elf64-bpf
Disassembly of section .text:
0000000000000000 <f1>:
0: b4 00 00 00 0a 00 00 00 w0 = 0xa
1: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0)
2: 95 00 00 00 00 00 00 00 exit
0000000000000018 <f2>:
3: b4 00 00 00 0a 00 00 00 w0 = 0xa
4: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0)
5: 95 00 00 00 00 00 00 00 exit
0000000000000030 <f3>:
6: b4 00 00 00 0a 00 00 00 w0 = 0xa
7: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0)
8: 95 00 00 00 00 00 00 00 exit
```
You can see that for relaxed memory ordering, if return value is used, atomic_fetch_and()
insn is used. Otherwise, if return value is not used, locked insn is used.
Here is another example with global _Atomic variable:
```
$ cat test3.c
\#include <stdatomic.h>
_Atomic int i;
void f1(void) {
(void)__c11_atomic_fetch_and(&i, 10, memory_order_relaxed);
}
void f2(void) {
(void)__c11_atomic_fetch_and(&i, 10, memory_order_seq_cst);
}
$ cat run.sh
clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test3.c -o test3.o && llvm-objdump -d test3.o
$ ./run.sh
test3.o: file format elf64-bpf
Disassembly of section .text:
0000000000000000 <f1>:
0: b4 01 00 00 0a 00 00 00 w1 = 0xa
1: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
3: c3 12 00 00 50 00 00 00 lock *(u32 *)(r2 + 0x0) &= w1
4: 95 00 00 00 00 00 00 00 exit
0000000000000028 <f2>:
5: b4 01 00 00 0a 00 00 00 w1 = 0xa
6: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
8: c3 12 00 00 51 00 00 00 w1 = atomic_fetch_and((u32 *)(r2 + 0x0), w1)
9: 95 00 00 00 00 00 00 00 exit
```
Note that in the above compilations, '-g' is not used. The reason is due to the following IR
related to _Atomic type:
```
$clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -g -S -emit-llvm test3.c
```
The related debug info for test3.c:
```
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "i", scope: !2, file: !3, line: 3, type: !16, isLocal: false, isDefinition: true)
...
!16 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !17)
!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
```
If compiling test.c, the related debug info:
```
...
!19 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !20, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !25)
!20 = !DISubroutineType(types: !21)
!21 = !{null, !22}
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64)
!23 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !24)
!24 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!25 = !{!26}
!26 = !DILocalVariable(name: "i", arg: 1, scope: !19, file: !1, line: 3, type: !22)
```
All the above suggests _Atomic behaves like a modifier (e.g. const, restrict, volatile).
This seems true based on doc [1].
Without proper handling DW_TAG_atomic_type, llvm BTF generation will be incorrect since
the current implementation assumes no existence of DW_TAG_atomic_type. So we have
two choices here:
(1). llvm bpf backend processes DW_TAG_atomic_type but ignores it in BTF encoding.
(2). Add another type, e.g., BTF_KIND_ATOMIC to BTF. BTF_KIND_ATOMIC behaves as a
modifier like const/volatile/restrict.
For choice (1), llvm bpf backend should skip dwarf::DW_TAG_atomic_type during
BTF generation whenever necessary.
For choice (2), BTF_KIND_ATOMIC will be added to BTF so llvm backend and kernel
needs to handle that properly. The main advantage of it probably is to maintain
this atomic type so it is also available to skeleton. But I think for skeleton
a raw type might be good enough unless user space intends to do some atomic
operation with that, which is a unlikely case.
So I choose choice (1) in this implementation.
[1] https://lore.kernel.org/bpf/7b941f53-2a05-48ec-9032-8f106face3a3@linux.dev/
[2] https://dwarfstd.org/issues/131112.1.html
---
clang/lib/Basic/Targets/BPF.cpp | 1 +
llvm/lib/Target/BPF/BPFInstrInfo.td | 113 +++++++++++++++++++++-----
llvm/lib/Target/BPF/BPFMIChecking.cpp | 95 +++++++++++++++++++---
3 files changed, 177 insertions(+), 32 deletions(-)
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index a94ceee5a6a5e7..77e3a9388b0c46 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -37,6 +37,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
}
Builder.defineMacro("__BPF_FEATURE_ADDR_SPACE_CAST");
+ Builder.defineMacro("__BPF_FEATURE_ATOMIC_MEM_ORDERING");
if (CPU.empty())
CPU = "v3";
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index f7e17901c7ed5e..f96811967382c9 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -826,13 +826,12 @@ let Predicates = [BPFNoALU32] in {
}
// Atomic Fetch-and-<add, and, or, xor> operations
-class XFALU64<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
- string OpcStr, PatFrag OpNode>
+class XFALU64<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr, string OpcStr>
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
(outs GPR:$dst),
(ins MEMri:$addr, GPR:$val),
"$dst = atomic_fetch_"#OpcStr#"(("#OpcodeStr#" *)($addr), $val)",
- [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
+ []> {
bits<4> dst;
bits<20> addr;
@@ -844,13 +843,12 @@ class XFALU64<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
let BPFClass = BPF_STX;
}
-class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
- string OpcStr, PatFrag OpNode>
+class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr, string OpcStr>
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
(outs GPR32:$dst),
(ins MEMri:$addr, GPR32:$val),
"$dst = atomic_fetch_"#OpcStr#"(("#OpcodeStr#" *)($addr), $val)",
- [(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> {
+ []> {
bits<4> dst;
bits<20> addr;
@@ -864,26 +862,101 @@ class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
let Constraints = "$dst = $val" in {
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
- def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
- def XFANDW32 : XFALU32<BPF_W, BPF_AND, "u32", "and", atomic_load_and_i32>;
- def XFORW32 : XFALU32<BPF_W, BPF_OR, "u32", "or", atomic_load_or_i32>;
- def XFXORW32 : XFALU32<BPF_W, BPF_XOR, "u32", "xor", atomic_load_xor_i32>;
+ def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add">;
+ def XFANDW32 : XFALU32<BPF_W, BPF_AND, "u32", "and">;
+ def XFORW32 : XFALU32<BPF_W, BPF_OR, "u32", "or">;
+ def XFXORW32 : XFALU32<BPF_W, BPF_XOR, "u32", "xor">;
}
let Predicates = [BPFHasALU32] in {
- def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_i64>;
+ def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add">;
}
- def XFANDD : XFALU64<BPF_DW, BPF_AND, "u64", "and", atomic_load_and_i64>;
- def XFORD : XFALU64<BPF_DW, BPF_OR, "u64", "or", atomic_load_or_i64>;
- def XFXORD : XFALU64<BPF_DW, BPF_XOR, "u64", "xor", atomic_load_xor_i64>;
+ def XFANDD : XFALU64<BPF_DW, BPF_AND, "u64", "and">;
+ def XFORD : XFALU64<BPF_DW, BPF_OR, "u64", "or">;
+ def XFXORD : XFALU64<BPF_DW, BPF_XOR, "u64", "xor">;
}
-// atomic_load_sub can be represented as a neg followed
-// by an atomic_load_add.
-def : Pat<(atomic_load_sub_i32 ADDRri:$addr, GPR32:$val),
- (XFADDW32 ADDRri:$addr, (NEG_32 GPR32:$val))>;
-def : Pat<(atomic_load_sub_i64 ADDRri:$addr, GPR:$val),
- (XFADDD ADDRri:$addr, (NEG_64 GPR:$val))>;
+let Predicates = [BPFHasALU32] in {
+ foreach P = [// add
+ [atomic_load_add_i32_monotonic, XADDW32],
+ [atomic_load_add_i32_acquire, XFADDW32],
+ [atomic_load_add_i32_release, XFADDW32],
+ [atomic_load_add_i32_acq_rel, XFADDW32],
+ [atomic_load_add_i32_seq_cst, XFADDW32],
+ // and
+ [atomic_load_and_i32_monotonic, XANDW32],
+ [atomic_load_and_i32_acquire, XFANDW32],
+ [atomic_load_and_i32_release, XFANDW32],
+ [atomic_load_and_i32_acq_rel, XFANDW32],
+ [atomic_load_and_i32_seq_cst, XFANDW32],
+ // or
+ [atomic_load_or_i32_monotonic, XORW32],
+ [atomic_load_or_i32_acquire, XFORW32],
+ [atomic_load_or_i32_release, XFORW32],
+ [atomic_load_or_i32_acq_rel, XFORW32],
+ [atomic_load_or_i32_seq_cst, XFORW32],
+ // xor
+ [atomic_load_xor_i32_monotonic, XXORW32],
+ [atomic_load_xor_i32_acquire, XFXORW32],
+ [atomic_load_xor_i32_release, XFXORW32],
+ [atomic_load_xor_i32_acq_rel, XFXORW32],
+ [atomic_load_xor_i32_seq_cst, XFXORW32],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr, GPR32:$val), (P[1] ADDRri:$addr, GPR32:$val)>;
+ }
+
+ // atomic_load_sub can be represented as a neg followed
+ // by an atomic_load_add.
+ foreach P = [[atomic_load_sub_i32_monotonic, XADDW32],
+ [atomic_load_sub_i32_acquire, XFADDW32],
+ [atomic_load_sub_i32_release, XFADDW32],
+ [atomic_load_sub_i32_acq_rel, XFADDW32],
+ [atomic_load_sub_i32_seq_cst, XFADDW32],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr, GPR32:$val), (P[1] ADDRri:$addr, (NEG_32 GPR32:$val))>;
+ }
+
+ foreach P = [// add
+ [atomic_load_add_i64_monotonic, XADDD],
+ [atomic_load_add_i64_acquire, XFADDD],
+ [atomic_load_add_i64_release, XFADDD],
+ [atomic_load_add_i64_acq_rel, XFADDD],
+ [atomic_load_add_i64_seq_cst, XFADDD],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, GPR:$val)>;
+ }
+}
+
+foreach P = [[atomic_load_sub_i64_monotonic, XADDD],
+ [atomic_load_sub_i64_acquire, XFADDD],
+ [atomic_load_sub_i64_release, XFADDD],
+ [atomic_load_sub_i64_acq_rel, XFADDD],
+ [atomic_load_sub_i64_seq_cst, XFADDD],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, (NEG_64 GPR:$val))>;
+}
+
+foreach P = [// and
+ [atomic_load_and_i64_monotonic, XANDD],
+ [atomic_load_and_i64_acquire, XFANDD],
+ [atomic_load_and_i64_release, XFANDD],
+ [atomic_load_and_i64_acq_rel, XFANDD],
+ [atomic_load_and_i64_seq_cst, XFANDD],
+ // or
+ [atomic_load_or_i64_monotonic, XORD],
+ [atomic_load_or_i64_acquire, XFORD],
+ [atomic_load_or_i64_release, XFORD],
+ [atomic_load_or_i64_acq_rel, XFORD],
+ [atomic_load_or_i64_seq_cst, XFORD],
+ // xor
+ [atomic_load_xor_i64_monotonic, XXORD],
+ [atomic_load_xor_i64_acquire, XFXORD],
+ [atomic_load_xor_i64_release, XFXORD],
+ [atomic_load_xor_i64_acq_rel, XFXORD],
+ [atomic_load_xor_i64_seq_cst, XFXORD],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, GPR:$val)>;
+}
// Atomic Exchange
class XCHG<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index 24224f6c1e9e66..1e1f14decf238d 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -43,14 +43,14 @@ struct BPFMIPreEmitChecking : public MachineFunctionPass {
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- void processAtomicInsts();
+ bool processAtomicInsts();
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
if (!skipFunction(MF.getFunction())) {
initialize(MF);
- processAtomicInsts();
+ return processAtomicInsts();
}
return false;
}
@@ -118,7 +118,7 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
if (!MO.isDead()) {
- // It is a GPR64 live Def, we are sure it is live. */
+ // It is a GPR64 live Def, we are sure it is live.
if (RegIsGPR64)
return true;
// It is a GPR32 live Def, we are unsure whether it is really dead due to
@@ -152,22 +152,93 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
return false;
}
-void BPFMIPreEmitChecking::processAtomicInsts() {
+bool BPFMIPreEmitChecking::processAtomicInsts() {
+ if (!MF->getSubtarget<BPFSubtarget>().getHasJmp32()) {
+ // Only check for cpu version 1 and 2.
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD)
+ continue;
+
+ LLVM_DEBUG(MI.dump());
+ if (hasLiveDefs(MI, TRI)) {
+ DebugLoc Empty;
+ const DebugLoc &DL = MI.getDebugLoc();
+ const Function &F = MF->getFunction();
+ F.getContext().diagnose(DiagnosticInfoUnsupported{
+ F, "Invalid usage of the XADD return value", DL});
+ }
+ }
+ }
+ }
+
+ // Check return values of atomic_fetch_and_{add,and,or,xor}.
+ // If the return is not used, the atomic_fetch_and_<op> instruction
+ // is replaced with atomic_<op> instruction.
+ MachineInstr *ToErase = nullptr;
+ bool Changed = false;
+ const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD)
+ // Erase the insn from the previous iteration. Otherwise, erasing the
+ // instruction in the same insn will make MI iterating not work.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ if (MI.getOpcode() != BPF::XADDW32 && MI.getOpcode() != BPF::XADDD &&
+ MI.getOpcode() != BPF::XANDW32 && MI.getOpcode() != BPF::XANDD &&
+ MI.getOpcode() != BPF::XXORW32 && MI.getOpcode() != BPF::XXORD &&
+ MI.getOpcode() != BPF::XORW32 && MI.getOpcode() != BPF::XORD)
continue;
- LLVM_DEBUG(MI.dump());
- if (hasLiveDefs(MI, TRI)) {
- DebugLoc Empty;
- const DebugLoc &DL = MI.getDebugLoc();
- const Function &F = MF->getFunction();
- F.getContext().diagnose(DiagnosticInfoUnsupported{
- F, "Invalid usage of the XADD return value", DL});
+ if (!hasLiveDefs(MI, TRI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Transforming "; MI.dump());
+ unsigned newOpcode;
+ switch (MI.getOpcode()) {
+ case BPF::XADDW32:
+ newOpcode = BPF::XFADDW32;
+ break;
+ case BPF::XADDD:
+ newOpcode = BPF::XFADDD;
+ break;
+ case BPF::XANDW32:
+ newOpcode = BPF::XFANDW32;
+ break;
+ case BPF::XANDD:
+ newOpcode = BPF::XFANDD;
+ break;
+ case BPF::XXORW32:
+ newOpcode = BPF::XFXORW32;
+ break;
+ case BPF::XXORD:
+ newOpcode = BPF::XFXORD;
+ break;
+ case BPF::XORW32:
+ newOpcode = BPF::XFORW32;
+ break;
+ case BPF::XORD:
+ newOpcode = BPF::XFORD;
+ break;
+ default:
+ llvm_unreachable("Incorrect Atomic Instruction Opcode");
}
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+
+ ToErase = &MI;
+ Changed = true;
}
}
+
+ return Changed;
}
} // namespace
>From 689cdeb91fa394482069dc2bd781cc3653bfa143 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 9 Sep 2024 11:04:17 -0700
Subject: [PATCH 2/3] [BPF] Handle DW_TAG_atomic_type properly
Make change in BTFDebug.cpp to handle DW_TAG_atomic_type properly.
Otherwise, a type like
_Atomic int i; // global
the dwarf type chain atomic->int
Since DW_TAG_atomic_type is not processed BTF generation will stop
at atomic modifier and BTF will encode 'i' as void type.
Similar for type like
volatile _Atomic int *p;
the dwarf type chain ptr->volatile->atomic->int
Since atomic type is not processed and BTF generation will stop at
atomic type, the eventual BTF type will be
ptr->volatile->void
which is incorrect.
This patch fixed the following cases including the above two patterns
by skipping DW_TAG_atomic_type:
- global variable with _Atomic type.
- function parameter and return type with _Atomic type.
- struct member with _Atomic type.
- ptr,const,volatile,restrict pointing to a _Atomic type.
With changed llvm, in kernel selftest arena_atomics.c ([1]), the new bpf
code looks like
```
_Atomic __u64 __arena_global and64_value = (0x110ull << 32);
_Atomic __u32 __arena_global and32_value = 0x110;
SEC("raw_tp/sys_enter")
int and(const void *ctx)
{
...
__c11_atomic_fetch_and(&and64_value, 0x011ull << 32, memory_order_relaxed);
__c11_atomic_fetch_and(&and32_value, 0x011, memory_order_relaxed);
...
return 0;
}
```
and compilation is successful.
The skel file arena_atomics.skel.h will be
```
struct arena_atomics__arena {
...
__u64 and64_value;
__u32 and32_value;
...
} *arena;
```
[1] https://lore.kernel.org/r/20240909223431.1666305-1-yonghong.song@linux.dev
---
llvm/lib/Target/BPF/BTFDebug.cpp | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 4d847abea731dc..83df657a18e562 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -35,6 +35,15 @@ static const char *BTFKindStr[] = {
#include "llvm/DebugInfo/BTF/BTF.def"
};
+static const DIType *tryRemoveAtomicType(const DIType *Ty) {
+ if (!Ty)
+ return Ty;
+ auto DerivedTy = dyn_cast<DIDerivedType>(Ty);
+ if (DerivedTy && DerivedTy->getTag() == dwarf::DW_TAG_atomic_type)
+ return DerivedTy->getBaseType();
+ return Ty;
+}
+
/// Emit a BTF common type.
void BTFTypeBase::emitType(MCStreamer &OS) {
OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
@@ -90,7 +99,7 @@ void BTFTypeDerived::completeType(BTFDebug &BDebug) {
return;
// The base type for PTR/CONST/VOLATILE could be void.
- const DIType *ResolvedType = DTy->getBaseType();
+ const DIType *ResolvedType = tryRemoveAtomicType(DTy->getBaseType());
if (!ResolvedType) {
assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST ||
Kind == BTF::BTF_KIND_VOLATILE) &&
@@ -305,7 +314,7 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
} else {
BTFMember.Offset = DDTy->getOffsetInBits();
}
- const auto *BaseTy = DDTy->getBaseType();
+ const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType());
BTFMember.Type = BDebug.getTypeId(BaseTy);
Members.push_back(BTFMember);
}
@@ -342,7 +351,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
IsCompleted = true;
DITypeRefArray Elements = STy->getTypeArray();
- auto RetType = Elements[0];
+ auto RetType = tryRemoveAtomicType(Elements[0]);
BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0;
BTFType.NameOff = 0;
@@ -350,7 +359,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
// to represent the vararg, encode the NameOff/Type to be 0.
for (unsigned I = 1, N = Elements.size(); I < N; ++I) {
struct BTF::BTFParam Param;
- auto Element = Elements[I];
+ auto Element = tryRemoveAtomicType(Elements[I]);
if (Element) {
Param.NameOff = BDebug.addString(FuncArgNames[I]);
Param.Type = BDebug.getTypeId(Element);
@@ -800,6 +809,10 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
unsigned Tag = DTy->getTag();
+ if (Tag == dwarf::DW_TAG_atomic_type)
+ return visitTypeEntry(DTy->getBaseType(), TypeId, CheckPointer,
+ SeenPointer);
+
/// Try to avoid chasing pointees, esp. structure pointees which may
/// unnecessary bring in a lot of types.
if (CheckPointer && !SeenPointer) {
@@ -1444,8 +1457,10 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
DIGlobal = GVE->getVariable();
if (SecName.starts_with(".maps"))
visitMapDefType(DIGlobal->getType(), GVTypeId);
- else
- visitTypeEntry(DIGlobal->getType(), GVTypeId, false, false);
+ else {
+ const DIType *Ty = tryRemoveAtomicType(DIGlobal->getType());
+ visitTypeEntry(Ty, GVTypeId, false, false);
+ }
break;
}
>From 5e0909e522dbfe983effbddc92cf6efd7b8f7595 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Fri, 13 Sep 2024 09:51:35 -0700
Subject: [PATCH 3/3] [BPF] Add functionality/btf selftests for memory ordering
cases
The following test cases are added:
- all memory ordering and its asm codes with -mcpu=v3
- all memory ordering and its asm codes with -mcpu=v1
Note that __c11_atomic_fetch_{sub,and,or,xor} for 32bit won't
work for -mcpu=v1. Also at -mcpu=v1, no return value
allowed for 64bit __sync_fetch_and_add.
- at -mcpu=v1, __c11_atomic_fetch_sub() for 64bit with relaxed
memory ordering, the xaddd insn will be used so return
value is not supported. Otherwise, it will work fine
if return value is not used. This aligns to
__c11_atomic_fetch_add() for 64bit with relaxed memory
ordering at -mcpu=v1.
- BTF test with _Atomic types in different cases.
---
llvm/test/CodeGen/BPF/BTF/atomics.ll | 186 +++++
llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll | 385 +++++++++
llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll | 781 ++++++++++++++++++
.../CodeGen/BPF/atomics_sub64_relaxed_v1.ll | 27 +
llvm/test/CodeGen/BPF/xaddd_v1.ll | 25 +
5 files changed, 1404 insertions(+)
create mode 100644 llvm/test/CodeGen/BPF/BTF/atomics.ll
create mode 100644 llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll
create mode 100644 llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll
create mode 100644 llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll
create mode 100644 llvm/test/CodeGen/BPF/xaddd_v1.ll
diff --git a/llvm/test/CodeGen/BPF/BTF/atomics.ll b/llvm/test/CodeGen/BPF/BTF/atomics.ll
new file mode 100644
index 00000000000000..3b470479955fe3
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/BTF/atomics.ll
@@ -0,0 +1,186 @@
+; RUN: llc -march=bpfel -mcpu=v3 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -march=bpfeb -mcpu=v3 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+;
+; Source:
+; #include <stdatomic.h>
+; struct gstruct_t {
+; _Atomic int a;
+; } gstruct;
+; extern _Atomic int ext;
+; _Atomic int gbl;
+; _Atomic int *pgbl;
+; volatile _Atomic int vvar;
+; _Atomic int foo(_Atomic int a1, _Atomic int *p1) {
+; (void)__c11_atomic_fetch_add(&gstruct.a, 1, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(&ext, 1, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(&gbl, 1, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(pgbl, 1, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(&vvar, 1, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(p1, 1, memory_order_relaxed);
+;
+; return a1;
+; }
+
+target triple = "bpf"
+
+%struct.gstruct_t = type { i32 }
+
+ at gstruct = dso_local global %struct.gstruct_t zeroinitializer, align 4, !dbg !0
+ at ext = external dso_local global i32, align 4, !dbg !26
+ at gbl = dso_local global i32 0, align 4, !dbg !16
+ at pgbl = dso_local local_unnamed_addr global ptr null, align 8, !dbg !20
+ at vvar = dso_local global i32 0, align 4, !dbg !23
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn
+define dso_local i32 @foo(i32 returned %a1, ptr nocapture noundef %p1) local_unnamed_addr #0 !dbg !37 {
+entry:
+ #dbg_value(i32 %a1, !41, !DIExpression(), !43)
+ #dbg_value(ptr %p1, !42, !DIExpression(), !43)
+ %0 = atomicrmw add ptr @gstruct, i32 1 monotonic, align 4, !dbg !44
+ %1 = atomicrmw add ptr @ext, i32 1 monotonic, align 4, !dbg !45
+ %2 = atomicrmw add ptr @gbl, i32 1 monotonic, align 4, !dbg !46
+ %3 = load ptr, ptr @pgbl, align 8, !dbg !47, !tbaa !48
+ %4 = atomicrmw add ptr %3, i32 1 monotonic, align 4, !dbg !52
+ %5 = atomicrmw volatile add ptr @vvar, i32 1 monotonic, align 4, !dbg !53
+ %6 = atomicrmw add ptr %p1, i32 1 monotonic, align 4, !dbg !54
+ ret i32 %a1, !dbg !55
+}
+
+; CHECK: .long 1 # BTF_KIND_INT(id = 1)
+; CHECK-NEXT: .long 16777216 # 0x1000000
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 16777248 # 0x1000020
+; CHECK-NEXT: .long 0 # BTF_KIND_PTR(id = 2)
+; CHECK-NEXT: .long 33554432 # 0x2000000
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 3)
+; CHECK-NEXT: .long 218103810 # 0xd000002
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 11 # BTF_KIND_FUNC(id = 4)
+; CHECK-NEXT: .long 201326593 # 0xc000001
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long 66 # BTF_KIND_STRUCT(id = 5)
+; CHECK-NEXT: .long 67108865 # 0x4000001
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 76
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 0 # 0x0
+; CHECK-NEXT: .long 78 # BTF_KIND_VAR(id = 6)
+; CHECK-NEXT: .long 234881024 # 0xe000000
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 86 # BTF_KIND_VAR(id = 7)
+; CHECK-NEXT: .long 234881024 # 0xe000000
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 90 # BTF_KIND_VAR(id = 8)
+; CHECK-NEXT: .long 234881024 # 0xe000000
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 94 # BTF_KIND_VAR(id = 9)
+; CHECK-NEXT: .long 234881024 # 0xe000000
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 0 # BTF_KIND_VOLATILE(id = 10)
+; CHECK-NEXT: .long 150994944 # 0x9000000
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 99 # BTF_KIND_VAR(id = 11)
+; CHECK-NEXT: .long 234881024 # 0xe000000
+; CHECK-NEXT: .long 10
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 104 # BTF_KIND_DATASEC(id = 12)
+; CHECK-NEXT: .long 251658244 # 0xf000004
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 6
+; CHECK-NEXT: .long gstruct
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .long gbl
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .long pgbl
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .long 11
+; CHECK-NEXT: .long vvar
+; CHECK-NEXT: .long 4
+
+
+; CHECK: .byte 0 # string offset=0
+; CHECK: .ascii "int" # string offset=1
+; CHECK: .ascii "a1" # string offset=5
+; CHECK: .ascii "p1" # string offset=8
+; CHECK: .ascii "foo" # string offset=11
+; CHECK: .ascii "gstruct_t" # string offset=66
+; CHECK: .byte 97 # string offset=76
+; CHECK: .ascii "gstruct" # string offset=78
+; CHECK: .ascii "ext" # string offset=86
+; CHECK: .ascii "gbl" # string offset=90
+; CHECK: .ascii "pgbl" # string offset=94
+; CHECK: .ascii "vvar" # string offset=99
+
+attributes #0 = { mustprogress nofree norecurse nounwind willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!31, !32, !33, !34, !35}
+!llvm.ident = !{!36}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "gstruct", scope: !2, file: !3, line: 4, type: !28, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git a7bdb883df5731338d84603c60210d93c86f0870)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !15, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "atomics.c", directory: "/tmp/home/yhs/tests/result/atomics", checksumkind: CSK_MD5, checksum: "cabe3f3bfcfa90a93ff6d959be6e563a")
+!4 = !{!5}
+!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "memory_order", file: !6, line: 68, baseType: !7, size: 32, elements: !8)
+!6 = !DIFile(filename: "work/yhs/llvm-project/llvm/build/install/lib/clang/20/include/stdatomic.h", directory: "/home/yhs", checksumkind: CSK_MD5, checksum: "f17199a988fe91afffaf0f943ef87096")
+!7 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!8 = !{!9, !10, !11, !12, !13, !14}
+!9 = !DIEnumerator(name: "memory_order_relaxed", value: 0)
+!10 = !DIEnumerator(name: "memory_order_consume", value: 1)
+!11 = !DIEnumerator(name: "memory_order_acquire", value: 2)
+!12 = !DIEnumerator(name: "memory_order_release", value: 3)
+!13 = !DIEnumerator(name: "memory_order_acq_rel", value: 4)
+!14 = !DIEnumerator(name: "memory_order_seq_cst", value: 5)
+!15 = !{!0, !16, !20, !23, !26}
+!16 = !DIGlobalVariableExpression(var: !17, expr: !DIExpression())
+!17 = distinct !DIGlobalVariable(name: "gbl", scope: !2, file: !3, line: 6, type: !18, isLocal: false, isDefinition: true)
+!18 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !19)
+!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!20 = !DIGlobalVariableExpression(var: !21, expr: !DIExpression())
+!21 = distinct !DIGlobalVariable(name: "pgbl", scope: !2, file: !3, line: 7, type: !22, isLocal: false, isDefinition: true)
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64)
+!23 = !DIGlobalVariableExpression(var: !24, expr: !DIExpression())
+!24 = distinct !DIGlobalVariable(name: "vvar", scope: !2, file: !3, line: 8, type: !25, isLocal: false, isDefinition: true)
+!25 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !18)
+!26 = !DIGlobalVariableExpression(var: !27, expr: !DIExpression())
+!27 = distinct !DIGlobalVariable(name: "ext", scope: !2, file: !3, line: 5, type: !18, isLocal: false, isDefinition: false)
+!28 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "gstruct_t", file: !3, line: 2, size: 32, elements: !29)
+!29 = !{!30}
+!30 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !28, file: !3, line: 3, baseType: !18, size: 32)
+!31 = !{i32 7, !"Dwarf Version", i32 5}
+!32 = !{i32 2, !"Debug Info Version", i32 3}
+!33 = !{i32 1, !"wchar_size", i32 4}
+!34 = !{i32 7, !"frame-pointer", i32 2}
+!35 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!36 = !{!"clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git a7bdb883df5731338d84603c60210d93c86f0870)"}
+!37 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 9, type: !38, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !40)
+!38 = !DISubroutineType(types: !39)
+!39 = !{!18, !18, !22}
+!40 = !{!41, !42}
+!41 = !DILocalVariable(name: "a1", arg: 1, scope: !37, file: !3, line: 9, type: !18)
+!42 = !DILocalVariable(name: "p1", arg: 2, scope: !37, file: !3, line: 9, type: !22)
+!43 = !DILocation(line: 0, scope: !37)
+!44 = !DILocation(line: 10, column: 9, scope: !37)
+!45 = !DILocation(line: 11, column: 9, scope: !37)
+!46 = !DILocation(line: 12, column: 9, scope: !37)
+!47 = !DILocation(line: 13, column: 32, scope: !37)
+!48 = !{!49, !49, i64 0}
+!49 = !{!"any pointer", !50, i64 0}
+!50 = !{!"omnipotent char", !51, i64 0}
+!51 = !{!"Simple C/C++ TBAA"}
+!52 = !DILocation(line: 13, column: 9, scope: !37)
+!53 = !DILocation(line: 14, column: 9, scope: !37)
+!54 = !DILocation(line: 15, column: 9, scope: !37)
+!55 = !DILocation(line: 17, column: 3, scope: !37)
diff --git a/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll b/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll
new file mode 100644
index 00000000000000..31081586bf7afc
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=bpfel -mcpu=v1 -filetype=asm < %s | FileCheck %s
+;
+; Source:
+; $ cat atomics_mem_order_v1.c
+; #include <stdatomic.h>
+;
+; void test_fetch_add_32_noret(int _Atomic *i) {
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_add_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_sub_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_sub_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_sub(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_release) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_and_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_and_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_and(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_and(i, 10, memory_order_release) +
+; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_or_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_or_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_or(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_or(i, 10, memory_order_release) +
+; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_xor_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_xor_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_release) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst);
+; }
+
+target triple = "bpf"
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_add_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_add_32_noret:
+; CHECK: .Ltest_fetch_add_32_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_add_32_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw add ptr %i, i32 10 acquire, align 4
+ %2 = atomicrmw add ptr %i, i32 10 release, align 4
+ %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4
+ %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_add_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_add_64_noret:
+; CHECK: .Ltest_fetch_add_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_add_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw add ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw add ptr %i, i64 10 release, align 8
+ %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_sub_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_sub_64_noret:
+; CHECK: .Ltest_fetch_sub_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_sub_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r2 = -r2
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw sub ptr %i, i64 10 release, align 8
+ %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_sub_64_ret:
+; CHECK: .Ltest_fetch_sub_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_sub_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r2 = -r2
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = r2
+; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw sub ptr %i, i64 10 acquire, align 8
+ %1 = atomicrmw sub ptr %i, i64 10 release, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8
+ %add8 = add nsw i64 %add5, %3
+ ret i64 %add8
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_and_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_and_64_noret:
+; CHECK: .Ltest_fetch_and_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_and_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) &= r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw and ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw and ptr %i, i64 10 release, align 8
+ %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_and_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_and_64_ret:
+; CHECK: .Ltest_fetch_and_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_and_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw and ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw and ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_or_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_or_64_noret:
+; CHECK: .Ltest_fetch_or_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_or_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) |= r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw or ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw or ptr %i, i64 10 release, align 8
+ %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_or_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_or_64_ret:
+; CHECK: .Ltest_fetch_or_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_or_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw or ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw or ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_xor_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_xor_64_noret:
+; CHECK: .Ltest_fetch_xor_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_xor_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) ^= r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw xor ptr %i, i64 10 release, align 8
+ %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_xor_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_xor_64_ret:
+; CHECK: .Ltest_fetch_xor_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_xor_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw xor ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"}
diff --git a/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll b/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll
new file mode 100644
index 00000000000000..49c26dde8ae602
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll
@@ -0,0 +1,781 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=bpfel -mcpu=v3 -filetype=asm < %s | FileCheck %s
+;
+; Source:
+; $ cat atomics_mem_order_v3.c
+; #include <stdatomic.h>
+;
+; void test_fetch_add_32_noret(int _Atomic *i) {
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst);
+; }
+;
+; int test_fetch_add_32_ret(int _Atomic *i) {
+; return __c11_atomic_fetch_add(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_add(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_add(i, 10, memory_order_release) +
+; __c11_atomic_fetch_add(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_add(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_add_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_add_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_add(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_add(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_add(i, 10, memory_order_release) +
+; __c11_atomic_fetch_add(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_add(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_sub_32_noret(int _Atomic *i) {
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst);
+; }
+;
+; int test_fetch_sub_32_ret(int _Atomic *i) {
+; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_release) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_sub_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_sub_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_release) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_and_32_noret(int _Atomic *i) {
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
+; }
+;
+; int test_fetch_and_32_ret(int _Atomic *i) {
+; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_and(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_and(i, 10, memory_order_release) +
+; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_and_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_and_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_and(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_and(i, 10, memory_order_release) +
+; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_or_32_noret(int _Atomic *i) {
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst);
+; }
+;
+; int test_fetch_or_32_ret(int _Atomic *i) {
+; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_or(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_or(i, 10, memory_order_release) +
+; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_or_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_or_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_or(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_or(i, 10, memory_order_release) +
+; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_xor_32_noret(int _Atomic *i) {
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst);
+; }
+;
+; int test_fetch_xor_32_ret(int _Atomic *i) {
+; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_release) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst);
+; }
+;
+; void test_fetch_xor_64_noret(long _Atomic *i) {
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel);
+; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst);
+; }
+;
+; long test_fetch_xor_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_release) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) +
+; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst);
+; }
+
+target triple = "bpf"
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_add_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_add_32_noret:
+; CHECK: .Ltest_fetch_add_32_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_add_32_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw add ptr %i, i32 10 acquire, align 4
+ %2 = atomicrmw add ptr %i, i32 10 release, align 4
+ %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4
+ %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i32 @test_fetch_add_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_add_32_ret:
+; CHECK: .Ltest_fetch_add_32_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_add_32_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 = 10
+; CHECK-NEXT: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: w0 += w2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw add ptr %i, i32 10 acquire, align 4
+ %add = add nsw i32 %1, %0
+ %2 = atomicrmw add ptr %i, i32 10 release, align 4
+ %add5 = add nsw i32 %add, %2
+ %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4
+ %add8 = add nsw i32 %add5, %3
+ %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4
+ %add11 = add nsw i32 %add8, %4
+ ret i32 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_add_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_add_64_noret:
+; CHECK: .Ltest_fetch_add_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_add_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw add ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw add ptr %i, i64 10 release, align 8
+ %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_add_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_add_64_ret:
+; CHECK: .Ltest_fetch_add_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_add_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw add ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw add ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_sub_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_sub_32_noret:
+; CHECK: .Ltest_fetch_sub_32_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_sub_32_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w2 = -w2
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw sub ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw sub ptr %i, i32 10 acquire, align 4
+ %2 = atomicrmw sub ptr %i, i32 10 release, align 4
+ %3 = atomicrmw sub ptr %i, i32 10 acq_rel, align 4
+ %4 = atomicrmw sub ptr %i, i32 10 seq_cst, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i32 @test_fetch_sub_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_sub_32_ret:
+; CHECK: .Ltest_fetch_sub_32_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_sub_32_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w2 = -w2
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 = w2
+; CHECK-NEXT: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = w2
+; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: w0 += w2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw sub ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw sub ptr %i, i32 10 acquire, align 4
+ %add = add nsw i32 %1, %0
+ %2 = atomicrmw sub ptr %i, i32 10 release, align 4
+ %add5 = add nsw i32 %add, %2
+ %3 = atomicrmw sub ptr %i, i32 10 acq_rel, align 4
+ %add8 = add nsw i32 %add5, %3
+ %4 = atomicrmw sub ptr %i, i32 10 seq_cst, align 4
+ %add11 = add nsw i32 %add8, %4
+ ret i32 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_sub_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_sub_64_noret:
+; CHECK: .Ltest_fetch_sub_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_sub_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r2 = -r2
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw sub ptr %i, i64 10 release, align 8
+ %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_sub_64_ret:
+; CHECK: .Ltest_fetch_sub_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_sub_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r2 = -r2
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = r2
+; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw sub ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_and_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_and_32_noret:
+; CHECK: .Ltest_fetch_and_32_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_and_32_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) &= w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w2 = atomic_fetch_and((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw and ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw and ptr %i, i32 10 acquire, align 4
+ %2 = atomicrmw and ptr %i, i32 10 release, align 4
+ %3 = atomicrmw and ptr %i, i32 10 acq_rel, align 4
+ %4 = atomicrmw and ptr %i, i32 10 seq_cst, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i32 @test_fetch_and_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_and_32_ret:
+; CHECK: .Ltest_fetch_and_32_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_and_32_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 = 10
+; CHECK-NEXT: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w2 = atomic_fetch_and((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: w0 += w2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw and ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw and ptr %i, i32 10 acquire, align 4
+ %add = add nsw i32 %1, %0
+ %2 = atomicrmw and ptr %i, i32 10 release, align 4
+ %add5 = add nsw i32 %add, %2
+ %3 = atomicrmw and ptr %i, i32 10 acq_rel, align 4
+ %add8 = add nsw i32 %add5, %3
+ %4 = atomicrmw and ptr %i, i32 10 seq_cst, align 4
+ %add11 = add nsw i32 %add8, %4
+ ret i32 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_and_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_and_64_noret:
+; CHECK: .Ltest_fetch_and_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_and_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) &= r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw and ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw and ptr %i, i64 10 release, align 8
+ %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_and_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_and_64_ret:
+; CHECK: .Ltest_fetch_and_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_and_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw and ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw and ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_or_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_or_32_noret:
+; CHECK: .Ltest_fetch_or_32_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_or_32_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) |= w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w2 = atomic_fetch_or((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw or ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw or ptr %i, i32 10 acquire, align 4
+ %2 = atomicrmw or ptr %i, i32 10 release, align 4
+ %3 = atomicrmw or ptr %i, i32 10 acq_rel, align 4
+ %4 = atomicrmw or ptr %i, i32 10 seq_cst, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i32 @test_fetch_or_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_or_32_ret:
+; CHECK: .Ltest_fetch_or_32_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_or_32_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 = 10
+; CHECK-NEXT: w0 = atomic_fetch_or((u32 *)(r1 + 0), w0)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w2 = atomic_fetch_or((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: w0 += w2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw or ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw or ptr %i, i32 10 acquire, align 4
+ %add = add nsw i32 %1, %0
+ %2 = atomicrmw or ptr %i, i32 10 release, align 4
+ %add5 = add nsw i32 %add, %2
+ %3 = atomicrmw or ptr %i, i32 10 acq_rel, align 4
+ %add8 = add nsw i32 %add5, %3
+ %4 = atomicrmw or ptr %i, i32 10 seq_cst, align 4
+ %add11 = add nsw i32 %add8, %4
+ ret i32 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_or_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_or_64_noret:
+; CHECK: .Ltest_fetch_or_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_or_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) |= r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw or ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw or ptr %i, i64 10 release, align 8
+ %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_or_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_or_64_ret:
+; CHECK: .Ltest_fetch_or_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_or_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw or ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw or ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_xor_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_xor_32_noret:
+; CHECK: .Ltest_fetch_xor_32_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_xor_32_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: lock *(u32 *)(r1 + 0) ^= w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw xor ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw xor ptr %i, i32 10 acquire, align 4
+ %2 = atomicrmw xor ptr %i, i32 10 release, align 4
+ %3 = atomicrmw xor ptr %i, i32 10 acq_rel, align 4
+ %4 = atomicrmw xor ptr %i, i32 10 seq_cst, align 4
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i32 @test_fetch_xor_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_xor_32_ret:
+; CHECK: .Ltest_fetch_xor_32_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_xor_32_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: w2 = 10
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 = 10
+; CHECK-NEXT: w0 = atomic_fetch_xor((u32 *)(r1 + 0), w0)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w3 = 10
+; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3)
+; CHECK-NEXT: w0 += w3
+; CHECK-NEXT: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
+; CHECK-NEXT: w0 += w2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw xor ptr %i, i32 10 monotonic, align 4
+ %1 = atomicrmw xor ptr %i, i32 10 acquire, align 4
+ %add = add nsw i32 %1, %0
+ %2 = atomicrmw xor ptr %i, i32 10 release, align 4
+ %add5 = add nsw i32 %add, %2
+ %3 = atomicrmw xor ptr %i, i32 10 acq_rel, align 4
+ %add8 = add nsw i32 %add5, %3
+ %4 = atomicrmw xor ptr %i, i32 10 seq_cst, align 4
+ %add11 = add nsw i32 %add8, %4
+ ret i32 %add11
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local void @test_fetch_xor_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_xor_64_noret:
+; CHECK: .Ltest_fetch_xor_64_noret$local:
+; CHECK-NEXT: .type .Ltest_fetch_xor_64_noret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: lock *(u64 *)(r1 + 0) ^= r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8
+ %2 = atomicrmw xor ptr %i, i64 10 release, align 8
+ %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8
+ %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8
+ ret void
+}
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_xor_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fetch_xor_64_ret:
+; CHECK: .Ltest_fetch_xor_64_ret$local:
+; CHECK-NEXT: .type .Ltest_fetch_xor_64_ret$local, at function
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: r2 = 10
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 = 10
+; CHECK-NEXT: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r3 = 10
+; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3)
+; CHECK-NEXT: r0 += r3
+; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
+; CHECK-NEXT: r0 += r2
+; CHECK-NEXT: exit
+entry:
+ %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8
+ %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8
+ %add = add nsw i64 %1, %0
+ %2 = atomicrmw xor ptr %i, i64 10 release, align 8
+ %add5 = add nsw i64 %add, %2
+ %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8
+ %add8 = add nsw i64 %add5, %3
+ %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8
+ %add11 = add nsw i64 %add8, %4
+ ret i64 %add11
+}
+
+attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"}
diff --git a/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll b/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll
new file mode 100644
index 00000000000000..4d630d475b2962
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll
@@ -0,0 +1,27 @@
+; RUN: not llc -march=bpfel -mcpu=v1 -filetype=asm < %s
+;
+; Source:
+; $ cat atomics_sub64_relaxed_v1.c
+; #include <stdatomic.h>
+;
+; long test_fetch_sub_64_ret(long _Atomic *i) {
+; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed);
+; }
+
+target triple = "bpf"
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+entry:
+ %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8
+ ret i64 %0
+}
+
+attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"}
diff --git a/llvm/test/CodeGen/BPF/xaddd_v1.ll b/llvm/test/CodeGen/BPF/xaddd_v1.ll
new file mode 100644
index 00000000000000..d3bfd8d81b15b5
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/xaddd_v1.ll
@@ -0,0 +1,25 @@
+; RUN: not llc -march=bpfel -mcpu=v1 -filetype=asm < %s
+;
+; Source:
+; $ cat xaddd_v1.c
+; long test_fetch_add_64_ret(long *i) {
+; return __sync_fetch_and_add(i, 10);
+; }
+
+target triple = "bpf"
+
+; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
+define dso_local i64 @test_fetch_add_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 {
+entry:
+ %0 = atomicrmw add ptr %i, i64 10 seq_cst, align 8
+ ret i64 %0
+}
+
+attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"}
More information about the cfe-commits
mailing list