[clang] [llvm] [BPF] Add load-acquire and store-release instructions under -mcpu=v4 (PR #108636)
Peilin Ye via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 31 14:07:57 PDT 2024
https://github.com/peilin-ye updated https://github.com/llvm/llvm-project/pull/108636
>From 885d5141f6707a0fdf4be363351083f8fdf8fd54 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Sat, 5 Oct 2024 06:44:21 +0000
Subject: [PATCH 1/3] [BPF] Rename isST*() and isLD*() functions in
BPFMISimplifyPatchable.cpp (NFC)
We are planning to add load (specifically, atomic acquiring load, or
"load-acquire") instructions under the STX instruction class. To make
that easier, rename the isST*() and isLD*() helper functions based on
what the instructions actually do, rather than their instruction class.
---
.../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 22 +++++++++----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 39390e8c38f8c1..4a1684ccebb793 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -94,35 +94,35 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
}
-static bool isST(unsigned Opcode) {
+static bool isStoreImm(unsigned Opcode) {
return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm ||
Opcode == BPF::STW_imm || Opcode == BPF::STD_imm;
}
-static bool isSTX32(unsigned Opcode) {
+static bool isStore32(unsigned Opcode) {
return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
}
-static bool isSTX64(unsigned Opcode) {
+static bool isStore64(unsigned Opcode) {
return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
Opcode == BPF::STD;
}
-static bool isLDX32(unsigned Opcode) {
+static bool isLoad32(unsigned Opcode) {
return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
}
-static bool isLDX64(unsigned Opcode) {
+static bool isLoad64(unsigned Opcode) {
return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
Opcode == BPF::LDD;
}
-static bool isLDSX(unsigned Opcode) {
+static bool isLoadSext(unsigned Opcode) {
return Opcode == BPF::LDBSX || Opcode == BPF::LDHSX || Opcode == BPF::LDWSX;
}
bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) {
- return isLDX32(Opcode) || isLDX64(Opcode) || isLDSX(Opcode);
+ return isLoad32(Opcode) || isLoad64(Opcode) || isLoadSext(Opcode);
}
void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
@@ -143,11 +143,11 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
MachineInstr *DefInst = MO.getParent();
unsigned Opcode = DefInst->getOpcode();
unsigned COREOp;
- if (isLDX64(Opcode) || isLDSX(Opcode))
+ if (isLoad64(Opcode) || isLoadSext(Opcode))
COREOp = BPF::CORE_LD64;
- else if (isLDX32(Opcode))
+ else if (isLoad32(Opcode))
COREOp = BPF::CORE_LD32;
- else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode))
+ else if (isStore64(Opcode) || isStore32(Opcode) || isStoreImm(Opcode))
COREOp = BPF::CORE_ST;
else
continue;
@@ -160,7 +160,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
// Reject the form:
// %1 = ADD_rr %2, %3
// *(type *)(%2 + 0) = %1
- if (isSTX64(Opcode) || isSTX32(Opcode)) {
+ if (isStore64(Opcode) || isStore32(Opcode)) {
const MachineOperand &Opnd = DefInst->getOperand(0);
if (Opnd.isReg() && Opnd.getReg() == MO.getReg())
continue;
>From db3cef60b0b8194e8ebd0ce0a0bc792f114c15f3 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Sat, 5 Oct 2024 07:31:54 +0000
Subject: [PATCH 2/3] [BPF] Add load-acquire and store-release instructions
under -mcpu=v4
As discussed in [1], introduce BPF instructions with load-acquire and
store-release semantics under -mcpu=v4.
The following new flags are defined:
BPF_ATOMIC_LOAD 0x10
BPF_ATOMIC_STORE 0x20
BPF_RELAXED: 0x0
BPF_ACQUIRE: 0x1
BPF_RELEASE: 0x2
BPF_ACQ_REL: 0x3
BPF_SEQ_CST: 0x4
A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
field set to BPF_ATOMIC_LOAD | BPF_ACQUIRE (0x11).
Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
the 'imm' field set to BPF_ATOMIC_STORE | BPF_RELEASE (0x22).
Unlike existing atomic operations that only support BPF_W (32-bit) and
BPF_DW (64-bit) size modifiers, load-acquires and store-releases also
support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire
zero-extends the value before writing it to a 32-bit register, just like
ARM64 instruction LDAPRH and friends.
As an example, for -march=bpfel (big-endian):
long foo(long *ptr) {
return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
}
foo() can be compiled to:
db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
95 00 00 00 00 00 00 00 exit
opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
imm (0x00000011): BPF_ATOMIC_LOAD | BPF_ACQUIRE
Similarly:
void bar(short *ptr, short val) {
__atomic_store_n(ptr, val, __ATOMIC_RELEASE);
}
bar() can be compiled to:
cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
95 00 00 00 00 00 00 00 exit
opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
imm (0x00000022): BPF_ATOMIC_STORE | BPF_RELEASE
Inline assembly is also supported. For example:
asm volatile("%0 = load_acquire((u64 *)(%1 + 0x0))" :
"=r"(ret) : "r"(ptr) : "memory");
Add two macros, __BPF_FEATURE_LOAD_ACQUIRE and
__BPF_FEATURE_STORE_RELEASE, to let developers detect these new features
in source code. They can also be disabled using two new llc options,
-disable-load-acquire and -disable-store-release, respectively.
Using __ATOMIC_RELAXED for __atomic_store{,_n}() will generate a "plain"
store (BPF_MEM | BPF_STX) instruction:
void foo(short *ptr, short val) {
__atomic_store_n(ptr, val, __ATOMIC_RELAXED);
}
6b 21 00 00 00 00 00 00 *(u16 *)(r1 + 0x0) = w2
95 00 00 00 00 00 00 00 exit
Similarly, using __ATOMIC_RELAXED for __atomic_load{,_n}() will generate
a zero-extending, "plain" load (BPF_MEM | BPF_LDX) instruction:
int foo(char *ptr) {
return __atomic_load_n(ptr, __ATOMIC_RELAXED);
}
71 11 00 00 00 00 00 00 w1 = *(u8 *)(r1 + 0x0)
bc 10 08 00 00 00 00 00 w0 = (s8)w1
95 00 00 00 00 00 00 00 exit
Currently __ATOMIC_CONSUME is an alias for __ATOMIC_ACQUIRE. Finally,
using __ATOMIC_SEQ_CST ("sequentially consistent") is not supported yet
and will cause an error.
[1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
---
clang/lib/Basic/Targets/BPF.cpp | 2 +
.../test/Preprocessor/bpf-predefined-macros.c | 9 ++
.../lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 2 +
llvm/lib/Target/BPF/BPFInstrFormats.td | 17 +++
llvm/lib/Target/BPF/BPFInstrInfo.td | 128 ++++++++++++++++
.../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 12 +-
llvm/lib/Target/BPF/BPFSubtarget.cpp | 10 ++
llvm/lib/Target/BPF/BPFSubtarget.h | 5 +-
.../CodeGen/BPF/assembler-disassembler-v4.s | 20 +++
llvm/test/CodeGen/BPF/atomic-load-store.ll | 142 ++++++++++++++++++
10 files changed, 342 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/BPF/atomic-load-store.ll
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index f4684765b7ffb3..090f6bf38d6b45 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -67,6 +67,8 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__BPF_FEATURE_SDIV_SMOD");
Builder.defineMacro("__BPF_FEATURE_GOTOL");
Builder.defineMacro("__BPF_FEATURE_ST");
+ Builder.defineMacro("__BPF_FEATURE_LOAD_ACQUIRE");
+ Builder.defineMacro("__BPF_FEATURE_STORE_RELEASE");
}
}
diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index 8c2143f767c40c..8fae8d6ddea090 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -67,6 +67,12 @@ int t;
#ifdef __BPF_FEATURE_MAY_GOTO
int u;
#endif
+#ifdef __BPF_FEATURE_LOAD_ACQUIRE
+int v;
+#endif
+#ifdef __BPF_FEATURE_STORE_RELEASE
+int w;
+#endif
// CHECK: int b;
// CHECK: int c;
@@ -106,6 +112,9 @@ int u;
// CPU_V3: int u;
// CPU_V4: int u;
+// CPU_V4: int v;
+// CPU_V4: int w;
+
// CPU_GENERIC: int g;
// CPU_PROBE: int f;
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 32ddf11ec31968..8823435faf37d5 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -237,6 +237,7 @@ struct BPFOperand : public MCParsedAsmOperand {
.Case("exit", true)
.Case("lock", true)
.Case("ld_pseudo", true)
+ .Case("store_release", true)
.Default(false);
}
@@ -273,6 +274,7 @@ struct BPFOperand : public MCParsedAsmOperand {
.Case("cmpxchg_64", true)
.Case("cmpxchg32_32", true)
.Case("addr_space_cast", true)
+ .Case("load_acquire", true)
.Default(false);
}
};
diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index feffdbc69465ea..3a1286bf83810f 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -48,6 +48,23 @@ def BPF_END : BPFArithOp<0xd>;
def BPF_XCHG : BPFArithOp<0xe>;
def BPF_CMPXCHG : BPFArithOp<0xf>;
+class BPFAtomicLoadStoreOp<bits<4> val> {
+ bits<4> Value = val;
+}
+
+def BPF_ATOMIC_LOAD : BPFAtomicLoadStoreOp<0x1>;
+def BPF_ATOMIC_STORE : BPFAtomicLoadStoreOp<0x2>;
+
+class BPFAtomicOrdering<bits<4> val> {
+ bits<4> Value = val;
+}
+
+def BPF_RELAXED : BPFAtomicOrdering<0x0>;
+def BPF_ACQUIRE : BPFAtomicOrdering<0x1>;
+def BPF_RELEASE : BPFAtomicOrdering<0x2>;
+def BPF_ACQ_REL : BPFAtomicOrdering<0x3>;
+def BPF_SEQ_CST : BPFAtomicOrdering<0x4>;
+
class BPFEndDir<bits<1> val> {
bits<1> Value = val;
}
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 62d6e25f83b59f..40f108077e8f65 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -60,6 +60,8 @@ def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;
+def BPFHasLoadAcquire : Predicate<"Subtarget->hasLoadAcquire()">;
+def BPFHasStoreRelease : Predicate<"Subtarget->hasStoreRelease()">;
class ImmediateAsmOperand<string name> : AsmOperandClass {
let Name = name;
@@ -566,6 +568,48 @@ let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
(STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
}
+class STORE_RELEASE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs),
+ (ins RegTp:$src, MEMri:$addr),
+ "store_release(("#OpcodeStr#" *)($addr), $src)",
+ []> {
+ bits<4> src;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = src;
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_ATOMIC_STORE.Value;
+ let Inst{3-0} = BPF_RELEASE.Value;
+ let BPFClass = BPF_STX;
+}
+
+class STORE_RELEASEi64<BPFWidthModifer Opc, string OpcodeStr>
+ : STORE_RELEASE<Opc, OpcodeStr, GPR>;
+
+class relaxed_store<PatFrag base>
+ : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingReleaseOrStronger = 0;
+}
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingRelease = 1;
+}
+
+let Predicates = [BPFHasStoreRelease] in {
+ def STDREL : STORE_RELEASEi64<BPF_DW, "u64">;
+
+ foreach P = [[relaxed_store<atomic_store_64>, STD],
+ [releasing_store<atomic_store_64>, STDREL],
+ ] in {
+ def : Pat<(P[0] GPR:$val, ADDRri:$addr), (P[1] GPR:$val, ADDRri:$addr)>;
+ }
+}
+
// LOAD instructions
class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
: TYPE_LD_ST<ModOp.Value, SizeOp.Value,
@@ -622,6 +666,48 @@ let Predicates = [BPFHasLdsx] in {
def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
+class LOAD_ACQUIRE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp>
+ : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+ (outs RegTp:$dst),
+ (ins MEMri:$addr),
+ "$dst = load_acquire(("#OpcodeStr#" *)($addr))",
+ []> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = dst;
+ let Inst{55-52} = addr{19-16}; // base reg
+ let Inst{47-32} = addr{15-0}; // offset
+ let Inst{7-4} = BPF_ATOMIC_LOAD.Value;
+ let Inst{3-0} = BPF_ACQUIRE.Value;
+ let BPFClass = BPF_STX;
+}
+
+class LOAD_ACQUIREi64<BPFWidthModifer SizeOp, string OpcodeStr>
+ : LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR>;
+
+class relaxed_load<PatFrags base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquireOrStronger = 0;
+}
+
+class acquiring_load<PatFrags base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquire = 1;
+}
+
+let Predicates = [BPFHasLoadAcquire] in {
+ def LDDACQ : LOAD_ACQUIREi64<BPF_DW, "u64">;
+
+ foreach P = [[relaxed_load<atomic_load_64>, LDD],
+ [acquiring_load<atomic_load_64>, LDDACQ],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
+ }
+}
+
class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
(outs),
@@ -1181,10 +1267,19 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
: STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;
+class STORE_RELEASEi32<BPFWidthModifer Opc, string OpcodeStr>
+ : STORE_RELEASE<Opc, OpcodeStr, GPR32>;
+
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def STW32 : STOREi32<BPF_W, "u32", store>;
def STH32 : STOREi32<BPF_H, "u16", truncstorei16>;
def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
+
+ let Predicates = [BPFHasStoreRelease] in {
+ def STWREL32 : STORE_RELEASEi32<BPF_W, "u32">;
+ def STHREL32 : STORE_RELEASEi32<BPF_H, "u16">;
+ def STBREL32 : STORE_RELEASEi32<BPF_B, "u8">;
+ }
}
class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
@@ -1205,10 +1300,19 @@ class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, lis
class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
: LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
+class LOAD_ACQUIREi32<BPFWidthModifer SizeOp, string OpcodeStr>
+ : LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR32>;
+
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
+
+ let Predicates = [BPFHasLoadAcquire] in {
+ def LDWACQ32 : LOAD_ACQUIREi32<BPF_W, "u32">;
+ def LDHACQ32 : LOAD_ACQUIREi32<BPF_H, "u16">;
+ def LDBACQ32 : LOAD_ACQUIREi32<BPF_B, "u8">;
+ }
}
let Predicates = [BPFHasALU32] in {
@@ -1238,6 +1342,30 @@ let Predicates = [BPFHasALU32] in {
(SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>;
def : Pat<(i64 (extloadi32 ADDRri:$src)),
(SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>;
+
+ let Predicates = [BPFHasLoadAcquire] in {
+ foreach P = [[relaxed_load<atomic_load_32>, LDW32],
+ [relaxed_load<atomic_load_az_16>, LDH32],
+ [relaxed_load<atomic_load_az_8>, LDB32],
+ [acquiring_load<atomic_load_32>, LDWACQ32],
+ [acquiring_load<atomic_load_az_16>, LDHACQ32],
+ [acquiring_load<atomic_load_az_8>, LDBACQ32],
+ ] in {
+ def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
+ }
+ }
+
+ let Predicates = [BPFHasStoreRelease] in {
+ foreach P = [[relaxed_store<atomic_store_32>, STW32],
+ [relaxed_store<atomic_store_16>, STH32],
+ [relaxed_store<atomic_store_8>, STB32],
+ [releasing_store<atomic_store_32>, STWREL32],
+ [releasing_store<atomic_store_16>, STHREL32],
+ [releasing_store<atomic_store_8>, STBREL32],
+ ] in {
+ def : Pat<(P[0] GPR32:$val, ADDRri:$addr), (P[1] GPR32:$val, ADDRri:$addr)>;
+ }
+ }
}
let usesCustomInserter = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 4a1684ccebb793..f6735adbde6400 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -100,21 +100,25 @@ static bool isStoreImm(unsigned Opcode) {
}
static bool isStore32(unsigned Opcode) {
- return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
+ return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32 ||
+ Opcode == BPF::STBREL32 || Opcode == BPF::STHREL32 ||
+ Opcode == BPF::STWREL32;
}
static bool isStore64(unsigned Opcode) {
return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
- Opcode == BPF::STD;
+ Opcode == BPF::STD || Opcode == BPF::STDREL;
}
static bool isLoad32(unsigned Opcode) {
- return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
+ return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 ||
+ Opcode == BPF::LDBACQ32 || Opcode == BPF::LDHACQ32 ||
+ Opcode == BPF::LDWACQ32;
}
static bool isLoad64(unsigned Opcode) {
return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
- Opcode == BPF::LDD;
+ Opcode == BPF::LDD || Opcode == BPF::LDDACQ;
}
static bool isLoadSext(unsigned Opcode) {
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 305e9a2bf2cda3..c3b24659552179 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -40,6 +40,12 @@ static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
static cl::opt<bool>
Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
cl::desc("Disable BPF_ST (immediate store) insn"));
+static cl::opt<bool>
+ Disable_load_acquire("disable-load-acquire", cl::Hidden, cl::init(false),
+ cl::desc("Disable load-acquire insns"));
+static cl::opt<bool>
+ Disable_store_release("disable-store-release", cl::Hidden, cl::init(false),
+ cl::desc("Disable store-release insns"));
void BPFSubtarget::anchor() {}
@@ -62,6 +68,8 @@ void BPFSubtarget::initializeEnvironment() {
HasSdivSmod = false;
HasGotol = false;
HasStoreImm = false;
+ HasLoadAcquire = false;
+ HasStoreRelease = false;
}
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -91,6 +99,8 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
HasSdivSmod = !Disable_sdiv_smod;
HasGotol = !Disable_gotol;
HasStoreImm = !Disable_StoreImm;
+ HasLoadAcquire = !Disable_load_acquire;
+ HasStoreRelease = !Disable_store_release;
return;
}
}
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 33747546eadc3b..a3e8e1f017085e 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -64,7 +64,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool UseDwarfRIS;
// whether cpu v4 insns are enabled.
- bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;
+ bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm,
+ HasLoadAcquire, HasStoreRelease;
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
@@ -92,6 +93,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool hasSdivSmod() const { return HasSdivSmod; }
bool hasGotol() const { return HasGotol; }
bool hasStoreImm() const { return HasStoreImm; }
+ bool hasLoadAcquire() const { return HasLoadAcquire; }
+ bool hasStoreRelease() const { return HasStoreRelease; }
bool isLittleEndian() const { return IsLittleEndian; }
diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
index d52985986bdc36..388bab78a28283 100644
--- a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
+++ b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
@@ -42,3 +42,23 @@ r2 s%= r4
// CHECK: 9c 42 01 00 00 00 00 00 w2 s%= w4
w1 s/= w3
w2 s%= w4
+
+// CHECK: d3 10 00 00 11 00 00 00 w0 = load_acquire((u8 *)(r1 + 0x0))
+// CHECK: cb 10 00 00 11 00 00 00 w0 = load_acquire((u16 *)(r1 + 0x0))
+// CHECK: c3 10 00 00 11 00 00 00 w0 = load_acquire((u32 *)(r1 + 0x0))
+w0 = load_acquire((u8 *)(r1 + 0))
+w0 = load_acquire((u16 *)(r1 + 0))
+w0 = load_acquire((u32 *)(r1 + 0))
+
+// CHECK: db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
+r0 = load_acquire((u64 *)(r1 + 0))
+
+// CHECK: d3 21 00 00 22 00 00 00 store_release((u8 *)(r1 + 0x0), w2)
+// CHECK: cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
+// CHECK: c3 21 00 00 22 00 00 00 store_release((u32 *)(r1 + 0x0), w2)
+store_release((u8 *)(r1 + 0), w2)
+store_release((u16 *)(r1 + 0), w2)
+store_release((u32 *)(r1 + 0), w2)
+
+// CHECK: db 21 00 00 22 00 00 00 store_release((u64 *)(r1 + 0x0), r2)
+store_release((u64 *)(r1 + 0), r2)
diff --git a/llvm/test/CodeGen/BPF/atomic-load-store.ll b/llvm/test/CodeGen/BPF/atomic-load-store.ll
new file mode 100644
index 00000000000000..5b2eda60316d60
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/atomic-load-store.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding \
+; RUN: | FileCheck -check-prefixes=CHECK-LE %s
+; RUN: llc < %s -march=bpfeb -mcpu=v4 -verify-machineinstrs -show-mc-encoding \
+; RUN: | FileCheck -check-prefixes=CHECK-BE %s
+
+; Source:
+; void atomic_load_i8(char *p) {
+; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+; }
+; void atomic_load_i16(short *p) {
+; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+; }
+; void atomic_load_i32(int *p) {
+; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+; }
+; void atomic_load_i64(long *p) {
+; (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+; (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+; }
+; void atomic_store_i8(char *p, char v) {
+; __atomic_store_n(p, v, __ATOMIC_RELAXED);
+; __atomic_store_n(p, v, __ATOMIC_RELEASE);
+; }
+; void atomic_store_i16(short *p, short v) {
+; __atomic_store_n(p, v, __ATOMIC_RELAXED);
+; __atomic_store_n(p, v, __ATOMIC_RELEASE);
+; }
+; void atomic_store_i32(int *p, int v) {
+; __atomic_store_n(p, v, __ATOMIC_RELAXED);
+; __atomic_store_n(p, v, __ATOMIC_RELEASE);
+; }
+; void atomic_store_i64(long *p, long v) {
+; __atomic_store_n(p, v, __ATOMIC_RELAXED);
+; __atomic_store_n(p, v, __ATOMIC_RELEASE);
+; }
+
+define dso_local void @atomic_load_i8(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i8
+; CHECK-LE: w2 = *(u8 *)(r1 + 0) # encoding: [0x71,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: w1 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x11,0x00,0x00,0x11,0x00,0x00,0x00]
+;
+; CHECK-BE: w2 = *(u8 *)(r1 + 0) # encoding: [0x71,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: w1 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x11,0x00,0x00,0x00,0x00,0x00,0x11]
+entry:
+ %0 = load atomic i8, ptr %p monotonic, align 1
+ %1 = load atomic i8, ptr %p acquire, align 1
+ ret void
+}
+
+define dso_local void @atomic_load_i16(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i16
+; CHECK-LE: w2 = *(u16 *)(r1 + 0) # encoding: [0x69,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: w1 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x11,0x00,0x00,0x11,0x00,0x00,0x00]
+;
+; CHECK-BE: w2 = *(u16 *)(r1 + 0) # encoding: [0x69,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: w1 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x11,0x00,0x00,0x00,0x00,0x00,0x11]
+entry:
+ %0 = load atomic i16, ptr %p monotonic, align 2
+ %1 = load atomic i16, ptr %p acquire, align 2
+ ret void
+}
+
+define dso_local void @atomic_load_i32(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i32
+; CHECK-LE: w2 = *(u32 *)(r1 + 0) # encoding: [0x61,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: w1 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x11,0x00,0x00,0x11,0x00,0x00,0x00]
+;
+; CHECK-BE: w2 = *(u32 *)(r1 + 0) # encoding: [0x61,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: w1 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x11,0x00,0x00,0x00,0x00,0x00,0x11]
+entry:
+ %0 = load atomic i32, ptr %p monotonic, align 4
+ %1 = load atomic i32, ptr %p acquire, align 4
+ ret void
+}
+
+define dso_local void @atomic_load_i64(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i64
+; CHECK-LE: r2 = *(u64 *)(r1 + 0) # encoding: [0x79,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: r1 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x11,0x00,0x00,0x11,0x00,0x00,0x00]
+;
+; CHECK-BE: r2 = *(u64 *)(r1 + 0) # encoding: [0x79,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: r1 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x11,0x00,0x00,0x00,0x00,0x00,0x11]
+entry:
+ %0 = load atomic i64, ptr %p monotonic, align 8
+ %1 = load atomic i64, ptr %p acquire, align 8
+ ret void
+}
+
+define dso_local void @atomic_store_i8(ptr nocapture noundef writeonly %p, i8 noundef signext %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i8
+; CHECK-LE: *(u8 *)(r1 + 0) = w2 # encoding: [0x73,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x21,0x00,0x00,0x22,0x00,0x00,0x00]
+;
+; CHECK-BE: *(u8 *)(r1 + 0) = w2 # encoding: [0x73,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x12,0x00,0x00,0x00,0x00,0x00,0x22]
+entry:
+ store atomic i8 %v, ptr %p monotonic, align 1
+ store atomic i8 %v, ptr %p release, align 1
+ ret void
+}
+
+define dso_local void @atomic_store_i16(ptr nocapture noundef writeonly %p, i16 noundef signext %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i16
+; CHECK-LE: *(u16 *)(r1 + 0) = w2 # encoding: [0x6b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x21,0x00,0x00,0x22,0x00,0x00,0x00]
+;
+; CHECK-BE: *(u16 *)(r1 + 0) = w2 # encoding: [0x6b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x12,0x00,0x00,0x00,0x00,0x00,0x22]
+entry:
+ store atomic i16 %v, ptr %p monotonic, align 2
+ store atomic i16 %v, ptr %p release, align 2
+ ret void
+}
+
+define dso_local void @atomic_store_i32(ptr nocapture noundef writeonly %p, i32 noundef %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i32
+; CHECK-LE: *(u32 *)(r1 + 0) = w2 # encoding: [0x63,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x21,0x00,0x00,0x22,0x00,0x00,0x00]
+;
+; CHECK-BE: *(u32 *)(r1 + 0) = w2 # encoding: [0x63,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x12,0x00,0x00,0x00,0x00,0x00,0x22]
+entry:
+ store atomic i32 %v, ptr %p monotonic, align 4
+ store atomic i32 %v, ptr %p release, align 4
+ ret void
+}
+
+define dso_local void @atomic_store_i64(ptr nocapture noundef writeonly %p, i64 noundef %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i64
+; CHECK-LE: *(u64 *)(r1 + 0) = r2 # encoding: [0x7b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x21,0x00,0x00,0x22,0x00,0x00,0x00]
+;
+; CHECK-BE: *(u64 *)(r1 + 0) = r2 # encoding: [0x7b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x12,0x00,0x00,0x00,0x00,0x00,0x22]
+entry:
+ store atomic i64 %v, ptr %p monotonic, align 8
+ store atomic i64 %v, ptr %p release, align 8
+ ret void
+}
>From ddce643da7b52c9bcb8cd1697fe53fce3a47e811 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Sat, 21 Sep 2024 04:00:40 +0000
Subject: [PATCH 3/3] [BPF] Improve error message for seq_cst atomic load and
store
Sequentially consistent (seq_cst) atomic load and store are not
supported yet for BPF. Right now, calling __atomic_{load,store}{,_n}()
with __ATOMIC_SEQ_CST will cause an error:
$ cat bar.c
int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); }
$ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null
fatal error: error in backend: Cannot select: t8: i32,ch = AtomicLoad<(load seq_cst (s32) from %ir.0)> t7:1, t7
...
Which isn't very useful. Just like commit 379d90884807 ("BPF: provide
better error message for unsupported atomic operations"), make it
generate an error message saying that the requested operation isn't
supported, before triggering that "fatal error":
$ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null
bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is not supported
1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); }
| ^
...
---
llvm/lib/Target/BPF/BPFISelLowering.cpp | 25 +++++++++++++++++++++++++
llvm/lib/Target/BPF/BPFISelLowering.h | 2 +-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index ff23d3b055d0d5..ee2766955502f6 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -93,6 +93,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
}
+ for (auto VT : {MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+ }
+
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i32 && !STI.getHasAlu32())
continue;
@@ -291,6 +296,9 @@ void BPFTargetLowering::ReplaceNodeResults(
else
Msg = "unsupported atomic operation, please use 64 bit version";
break;
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE:
+ return;
}
SDLoc DL(N);
@@ -316,6 +324,9 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSDIVSREM(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE:
+ return LowerATOMIC_LOAD_STORE(Op, DAG);
}
}
@@ -703,6 +714,20 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);
}
+SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDLoc DL(N);
+
+ if (cast<AtomicSDNode>(N)->getMergedOrdering() ==
+ AtomicOrdering::SequentiallyConsistent)
+ fail(DL, DAG,
+ "sequentially consistent (seq_cst) "
+ "atomic load/store is not supported");
+
+ return Op;
+}
+
const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((BPFISD::NodeType)Opcode) {
case BPFISD::FIRST_NUMBER:
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index d59098f9f569ba..ad048ad05e6dd0 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -77,7 +77,7 @@ class BPFTargetLowering : public TargetLowering {
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
More information about the cfe-commits
mailing list