[clang] [llvm] [BPF] Add load-acquire and store-release instructions under -mcpu=v5 (PR #108636)

Peilin Ye via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 24 13:08:16 PDT 2024


https://github.com/peilin-ye updated https://github.com/llvm/llvm-project/pull/108636

>From 68d003f7156b16656a11a1395b88b6fbed368401 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Thu, 15 Aug 2024 21:49:23 +0000
Subject: [PATCH 1/4] [BPF] Refactor BPFSubtarget::initSubtargetFeatures()
 (NFC)

Refactor it to make it match the style of
BPFTargetInfo::getTargetDefines(), so that when we add -mcpu=v5 in the
future, we can simply append e.g.:

  if (CpuVerNum >= 5)
    HasFoo = true;

instead of saying:

  if (CPU == "v5") {
    HasJmpExt = true;
    HasJmp32 = true;
    HasAlu32 = true;
    HasLdsx = !Disable_ldsx;
    HasMovsx = !Disable_movsx;
    HasBswap = !Disable_bswap;
    HasSdivSmod = !Disable_sdiv_smod;
    HasGotol = !Disable_gotol;
    HasStoreImm = !Disable_StoreImm;
    HasFoo = true;
  }

This also makes it clearer that newer "CPU"s always support older
features.  No functional changes intended.
---
 llvm/lib/Target/BPF/BPFSubtarget.cpp | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 305e9a2bf2cda3..d5b20403d1e42d 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -71,27 +71,23 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     CPU = sys::detail::getHostCPUNameForBPF();
   if (CPU == "generic" || CPU == "v1")
     return;
-  if (CPU == "v2") {
-    HasJmpExt = true;
-    return;
-  }
-  if (CPU == "v3") {
+
+  int CpuVerNum = CPU.back() - '0';
+  if (CpuVerNum >= 2)
     HasJmpExt = true;
+
+  if (CpuVerNum >= 3) {
     HasJmp32 = true;
     HasAlu32 = true;
-    return;
   }
-  if (CPU == "v4") {
-    HasJmpExt = true;
-    HasJmp32 = true;
-    HasAlu32 = true;
+
+  if (CpuVerNum >= 4) {
     HasLdsx = !Disable_ldsx;
     HasMovsx = !Disable_movsx;
     HasBswap = !Disable_bswap;
     HasSdivSmod = !Disable_sdiv_smod;
     HasGotol = !Disable_gotol;
     HasStoreImm = !Disable_StoreImm;
-    return;
   }
 }
 

>From a59b60c2e9d0b0f1e2ce3b25b70d46957af4d091 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Fri, 23 Aug 2024 19:11:05 +0000
Subject: [PATCH 2/4] [BPF] Refactor {LOAD,STORE}{,32} classes (NFC)

We will need different AsmString formats for load-acquire and
store-release instructions.  To make that easier, refactor
{LOAD,STORE}{,32} classes to take AsmString as an argument directly.
Add a BPFModeModifer parameter to STORE{,32} for similar reasons.

No functional changes intended.
---
 llvm/lib/Target/BPF/BPFInstrInfo.td | 35 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index f7e17901c7ed5e..aab2291a70efc2 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -497,12 +497,11 @@ def LD_pseudo
 }
 
 // STORE instructions
-class STORE<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
-    : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class STORE<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string AsmString, list<dag> Pattern>
+    : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
                  (outs),
                  (ins GPR:$src, MEMri:$addr),
-                 "*("#OpcodeStr#" *)($addr) = $src",
-                 Pattern> {
+                 AsmString, Pattern> {
   bits<4> src;
   bits<20> addr;
 
@@ -513,7 +512,7 @@ class STORE<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
 }
 
 class STOREi64<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
-    : STORE<Opc, OpcodeStr, [(OpNode GPR:$src, ADDRri:$addr)]>;
+    : STORE<Opc, BPF_MEM, "*("#OpcodeStr#" *)($addr) = $src", [(OpNode GPR:$src, ADDRri:$addr)]>;
 
 let Predicates = [BPFNoALU32] in {
   def STW : STOREi64<BPF_W, "u32", truncstorei32>;
@@ -567,12 +566,11 @@ let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
 }
 
 // LOAD instructions
-class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string AsmString, list<dag> Pattern>
     : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
                  (outs GPR:$dst),
                  (ins MEMri:$addr),
-                 "$dst = *("#OpcodeStr#" *)($addr)",
-                 Pattern> {
+                 AsmString, Pattern> {
   bits<4> dst;
   bits<20> addr;
 
@@ -583,7 +581,8 @@ class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<
 }
 
 class LOADi64<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
-    : LOAD<SizeOp, ModOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
+    : LOAD<SizeOp, ModOp, "$dst = *("#OpcodeStr#" *)($addr)",
+           [(set i64:$dst, (OpNode ADDRri:$addr))]>;
 
 let isCodeGenOnly = 1 in {
   class CORE_LD<RegisterClass RegClass, string Sz>
@@ -1069,12 +1068,11 @@ def : Pat<(i32 (trunc GPR:$src)),
 def : Pat<(i64 (anyext GPR32:$src)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
 
-class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
-    : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class STORE32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string AsmString, list<dag> Pattern>
+    : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
                  (outs),
                  (ins GPR32:$src, MEMri:$addr),
-                 "*("#OpcodeStr#" *)($addr) = $src",
-                 Pattern> {
+                 AsmString, Pattern> {
   bits<4> src;
   bits<20> addr;
 
@@ -1085,7 +1083,8 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
 }
 
 class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
-    : STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;
+    : STORE32<Opc, BPF_MEM, "*("#OpcodeStr#" *)($addr) = $src",
+              [(OpNode GPR32:$src, ADDRri:$addr)]>;
 
 let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def STW32 : STOREi32<BPF_W, "u32", store>;
@@ -1093,12 +1092,11 @@ let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
 }
 
-class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string AsmString, list<dag> Pattern>
     : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
                 (outs GPR32:$dst),
                 (ins MEMri:$addr),
-                "$dst = *("#OpcodeStr#" *)($addr)",
-                Pattern> {
+                AsmString, Pattern> {
   bits<4> dst;
   bits<20> addr;
 
@@ -1109,7 +1107,8 @@ class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, lis
 }
 
 class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
-    : LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
+    : LOAD32<SizeOp, ModOp, "$dst = *("#OpcodeStr#" *)($addr)",
+             [(set i32:$dst, (OpNode ADDRri:$addr))]>;
 
 let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;

>From 4c209da8a1ec4bfda71ffd0b51accae84a39bdd5 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Tue, 10 Sep 2024 23:09:28 +0000
Subject: [PATCH 3/4] [BPF] Add load-acquire and store-release instructions
 under -mcpu=v5

As discussed in [1], introduce BPF instructions with load-acquire and
store-release semantics under -mcpu=v5.

A "load_acquire" is a BPF_LDX instruction with a new mode modifier,
BPF_MEMACQ ("acquiring atomic load").  Similarly, a "store_release" is a
BPF_STX instruction with another new mode modifier, BPF_MEMREL
("releasing atomic store").

BPF_MEMACQ and BPF_MEMREL share the same numeric value, 0x7 (or 0b111).
For example:

  long foo(long *ptr) {
      return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
  }

foo() can be compiled to:

  f9 10 00 00 00 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0))
  95 00 00 00 00 00 00 00 exit

Opcode 0xf9, or 0b11111001, can be decoded as:

  0b 111        11     001
     BPF_MEMACQ BPF_DW BPF_LDX

Similarly:

  void bar(short *ptr, short val) {
      __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
  }

bar() can be compiled to:

  eb 21 00 00 00 00 00 00 store_release((u16 *)(r1 + 0x0), w2)
  95 00 00 00 00 00 00 00 exit

Opcode 0xeb, or 0b11101011, can be decoded as:

  0b 111        01    011
     BPF_MEMREL BPF_H BPF_STX

Inline assembly is also supported.  For example:

  asm volatile("%0 = load_acquire((u64 *)(%1 + 0x0))" :
               "=r"(ret) : "r"(ptr) : "memory");

Let 'llvm-objdump -d' use -mcpu=v5 by default, just like commit
03958680b23d ("[BPF] Make llvm-objdump disasm default cpu v4
(#102166)").

Add two macros, __BPF_FEATURE_LOAD_ACQUIRE and
__BPF_FEATURE_STORE_RELEASE, to let developers detect these new features
in source code.  They can also be disabled using two new llc options,
-disable-load-acquire and -disable-store-release, respectively.

Also use ACQUIRE or RELEASE if user requested weaker memory orders
(RELAXED or CONSUME) until we actually support them.  Requesting a
stronger memory order (i.e. SEQ_CST) will cause an error.

[1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/
---
 clang/lib/Basic/Targets/BPF.cpp               |  9 +-
 clang/lib/Basic/Targets/BPF.h                 |  2 +-
 clang/test/Misc/target-invalid-cpu-note/bpf.c |  1 +
 llvm/lib/Object/ELFObjectFile.cpp             |  2 +-
 .../lib/Target/BPF/AsmParser/BPFAsmParser.cpp |  2 +
 llvm/lib/Target/BPF/BPF.td                    |  1 +
 llvm/lib/Target/BPF/BPFInstrFormats.td        |  2 +
 llvm/lib/Target/BPF/BPFInstrInfo.td           | 94 ++++++++++++++++++
 .../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 12 ++-
 llvm/lib/Target/BPF/BPFSubtarget.cpp          | 13 +++
 llvm/lib/Target/BPF/BPFSubtarget.h            |  5 +
 .../BPF/Disassembler/BPFDisassembler.cpp      |  7 +-
 llvm/test/CodeGen/BPF/acquire-release.ll      | 95 +++++++++++++++++++
 .../CodeGen/BPF/assembler-disassembler-v5.s   | 22 +++++
 14 files changed, 257 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/acquire-release.ll
 create mode 100644 llvm/test/CodeGen/BPF/assembler-disassembler-v5.s

diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index 931f407ecb0d7e..3388b3d9b9f849 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -67,10 +67,15 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__BPF_FEATURE_GOTOL");
     Builder.defineMacro("__BPF_FEATURE_ST");
   }
+
+  if (CpuVerNum >= 5) {
+    Builder.defineMacro("__BPF_FEATURE_LOAD_ACQUIRE");
+    Builder.defineMacro("__BPF_FEATURE_STORE_RELEASE");
+  }
 }
 
-static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
-                                                        "v3", "v4", "probe"};
+static constexpr llvm::StringLiteral ValidCPUNames[] = {
+    "generic", "v1", "v2", "v3", "v4", "v5", "probe"};
 
 bool BPFTargetInfo::isValidCPUName(StringRef Name) const {
   return llvm::is_contained(ValidCPUNames, Name);
diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
index d19b37dd4df7a7..3ca9c07f955f9b 100644
--- a/clang/lib/Basic/Targets/BPF.h
+++ b/clang/lib/Basic/Targets/BPF.h
@@ -106,7 +106,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
   void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 
   bool setCPU(const std::string &Name) override {
-    if (Name == "v3" || Name == "v4") {
+    if (Name == "v3" || Name == "v4" || Name == "v5") {
       HasAlu32 = true;
     }
 
diff --git a/clang/test/Misc/target-invalid-cpu-note/bpf.c b/clang/test/Misc/target-invalid-cpu-note/bpf.c
index fe925f86cdd137..7f90e64ab16f16 100644
--- a/clang/test/Misc/target-invalid-cpu-note/bpf.c
+++ b/clang/test/Misc/target-invalid-cpu-note/bpf.c
@@ -10,6 +10,7 @@
 // CHECK-SAME: {{^}}, v2
 // CHECK-SAME: {{^}}, v3
 // CHECK-SAME: {{^}}, v4
+// CHECK-SAME: {{^}}, v5
 // CHECK-SAME: {{^}}, probe
 // CHECK-SAME: {{$}}
 
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index f79c233d93fe8e..408af398e4ef17 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -442,7 +442,7 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
   case ELF::EM_PPC64:
     return StringRef("future");
   case ELF::EM_BPF:
-    return StringRef("v4");
+    return StringRef("v5");
   default:
     return std::nullopt;
   }
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 06b7743e0cd310..a07e3924a77dcf 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -237,6 +237,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("exit", true)
         .Case("lock", true)
         .Case("ld_pseudo", true)
+        .Case("store_release", true)
         .Default(false);
   }
 
@@ -273,6 +274,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("cmpxchg_64", true)
         .Case("cmpxchg32_32", true)
         .Case("addr_space_cast", true)
+        .Case("load_acquire", true)
         .Default(false);
   }
 };
diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index dff76ca07af511..dd2d4989561bc3 100644
--- a/llvm/lib/Target/BPF/BPF.td
+++ b/llvm/lib/Target/BPF/BPF.td
@@ -32,6 +32,7 @@ def : Proc<"v1", []>;
 def : Proc<"v2", []>;
 def : Proc<"v3", [ALU32]>;
 def : Proc<"v4", [ALU32]>;
+def : Proc<"v5", [ALU32]>;
 def : Proc<"probe", []>;
 
 def BPFInstPrinter : AsmWriter {
diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index feffdbc69465ea..cb68b0d1250e6e 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -94,6 +94,8 @@ def BPF_IND  : BPFModeModifer<0x2>;
 def BPF_MEM  : BPFModeModifer<0x3>;
 def BPF_MEMSX  : BPFModeModifer<0x4>;
 def BPF_ATOMIC : BPFModeModifer<0x6>;
+def BPF_MEMACQ : BPFModeModifer<0x7>;
+def BPF_MEMREL : BPFModeModifer<0x7>;
 
 class BPFAtomicFlag<bits<4> val> {
   bits<4> Value = val;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index aab2291a70efc2..6b26ec586bdb5e 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -60,6 +60,8 @@ def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
 def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
 def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
 def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;
+def BPFHasLoadAcquire : Predicate<"Subtarget->hasLoadAcquire()">;
+def BPFHasStoreRelease : Predicate<"Subtarget->hasStoreRelease()">;
 
 class ImmediateAsmOperand<string name> : AsmOperandClass {
   let Name = name;
@@ -514,6 +516,9 @@ class STORE<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string AsmString, list
 class STOREi64<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
     : STORE<Opc, BPF_MEM, "*("#OpcodeStr#" *)($addr) = $src", [(OpNode GPR:$src, ADDRri:$addr)]>;
 
+class STORE_RELEASEi64<BPFWidthModifer Opc, string OpcodeStr>
+    : STORE<Opc, BPF_MEMREL, "store_release(("#OpcodeStr#" *)($addr), $src)", []>;
+
 let Predicates = [BPFNoALU32] in {
   def STW : STOREi64<BPF_W, "u32", truncstorei32>;
   def STH : STOREi64<BPF_H, "u16", truncstorei16>;
@@ -521,6 +526,28 @@ let Predicates = [BPFNoALU32] in {
 }
 def STD : STOREi64<BPF_DW, "u64", store>;
 
+class relaxed_store<PatFrag base>
+  : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingReleaseOrStronger = 0;
+}
+
+class releasing_store<PatFrag base>
+  : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingRelease = 1;
+}
+
+let Predicates = [BPFHasStoreRelease] in {
+  def STDREL : STORE_RELEASEi64<BPF_DW, "u64">;
+
+  foreach P = [[relaxed_store<atomic_store_64>, STDREL],
+               [releasing_store<atomic_store_64>, STDREL],
+              ] in {
+    def : Pat<(P[0] GPR:$val, ADDRri:$addr), (P[1] GPR:$val, ADDRri:$addr)>;
+  }
+}
+
 class STORE_imm<BPFWidthModifer SizeOp,
                 string OpcodeStr, dag Pattern>
     : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
@@ -584,6 +611,9 @@ class LOADi64<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, Pa
     : LOAD<SizeOp, ModOp, "$dst = *("#OpcodeStr#" *)($addr)",
            [(set i64:$dst, (OpNode ADDRri:$addr))]>;
 
+class LOAD_ACQUIREi64<BPFWidthModifer SizeOp, string OpcodeStr>
+    : LOAD<SizeOp, BPF_MEMACQ, "$dst = load_acquire(("#OpcodeStr#" *)($addr))", []>;
+
 let isCodeGenOnly = 1 in {
   class CORE_LD<RegisterClass RegClass, string Sz>
                 : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
@@ -621,6 +651,28 @@ let Predicates = [BPFHasLdsx] in {
 
 def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
 
+class relaxed_load<PatFrags base>
+    : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingAcquireOrStronger = 0;
+}
+
+class acquiring_load<PatFrags base>
+    : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingAcquire = 1;
+}
+
+let Predicates = [BPFHasLoadAcquire] in {
+  def LDDACQ : LOAD_ACQUIREi64<BPF_DW, "u64">;
+
+  foreach P = [[relaxed_load<atomic_load_64>, LDDACQ],
+               [acquiring_load<atomic_load_64>, LDDACQ],
+              ] in {
+    def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
+  }
+}
+
 class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
     : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
                    (outs),
@@ -1086,10 +1138,19 @@ class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
     : STORE32<Opc, BPF_MEM, "*("#OpcodeStr#" *)($addr) = $src",
               [(OpNode GPR32:$src, ADDRri:$addr)]>;
 
+class STORE_RELEASEi32<BPFWidthModifer Opc, string OpcodeStr>
+    : STORE32<Opc, BPF_MEMREL, "store_release(("#OpcodeStr#" *)($addr), $src)", []>;
+
 let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def STW32 : STOREi32<BPF_W, "u32", store>;
   def STH32 : STOREi32<BPF_H, "u16", truncstorei16>;
   def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
+
+  let Predicates = [BPFHasStoreRelease] in {
+    def STWREL32 : STORE_RELEASEi32<BPF_W, "u32">;
+    def STHREL32 : STORE_RELEASEi32<BPF_H, "u16">;
+    def STBREL32 : STORE_RELEASEi32<BPF_B, "u8">;
+  }
 }
 
 class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string AsmString, list<dag> Pattern>
@@ -1110,10 +1171,19 @@ class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, Pa
     : LOAD32<SizeOp, ModOp, "$dst = *("#OpcodeStr#" *)($addr)",
              [(set i32:$dst, (OpNode ADDRri:$addr))]>;
 
+class LOAD_ACQUIREi32<BPFWidthModifer SizeOp, string OpcodeStr>
+    : LOAD32<SizeOp, BPF_MEMACQ, "$dst = load_acquire(("#OpcodeStr#" *)($addr))", []>;
+
 let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
   def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
   def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
+
+  let Predicates = [BPFHasLoadAcquire] in {
+    def LDWACQ32 : LOAD_ACQUIREi32<BPF_W, "u32">;
+    def LDHACQ32 : LOAD_ACQUIREi32<BPF_H, "u16">;
+    def LDBACQ32 : LOAD_ACQUIREi32<BPF_B, "u8">;
+  }
 }
 
 let Predicates = [BPFHasALU32] in {
@@ -1143,6 +1213,30 @@ let Predicates = [BPFHasALU32] in {
             (SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>;
   def : Pat<(i64 (extloadi32 ADDRri:$src)),
             (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>;
+
+  let Predicates = [BPFHasLoadAcquire] in {
+    foreach P = [[relaxed_load<atomic_load_32>, LDWACQ32],
+                 [relaxed_load<atomic_load_az_16>, LDHACQ32],
+                 [relaxed_load<atomic_load_az_8>, LDBACQ32],
+                 [acquiring_load<atomic_load_32>, LDWACQ32],
+                 [acquiring_load<atomic_load_az_16>, LDHACQ32],
+                 [acquiring_load<atomic_load_az_8>, LDBACQ32],
+                ] in {
+      def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
+    }
+  }
+
+  let Predicates = [BPFHasStoreRelease] in {
+    foreach P = [[relaxed_store<atomic_store_32>, STWREL32],
+                 [relaxed_store<atomic_store_16>, STHREL32],
+                 [relaxed_store<atomic_store_8>, STBREL32],
+                 [releasing_store<atomic_store_32>, STWREL32],
+                 [releasing_store<atomic_store_16>, STHREL32],
+                 [releasing_store<atomic_store_8>, STBREL32],
+                ] in {
+      def : Pat<(P[0] GPR32:$val, ADDRri:$addr), (P[1] GPR32:$val, ADDRri:$addr)>;
+    }
+  }
 }
 
 let usesCustomInserter = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 39390e8c38f8c1..0763550cb03419 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -100,21 +100,25 @@ static bool isST(unsigned Opcode) {
 }
 
 static bool isSTX32(unsigned Opcode) {
-  return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
+  return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32 ||
+         Opcode == BPF::STBREL32 || Opcode == BPF::STHREL32 ||
+         Opcode == BPF::STWREL32;
 }
 
 static bool isSTX64(unsigned Opcode) {
   return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
-         Opcode == BPF::STD;
+         Opcode == BPF::STD || Opcode == BPF::STDREL;
 }
 
 static bool isLDX32(unsigned Opcode) {
-  return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
+  return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 ||
+         Opcode == BPF::LDBACQ32 || Opcode == BPF::LDHACQ32 ||
+         Opcode == BPF::LDWACQ32;
 }
 
 static bool isLDX64(unsigned Opcode) {
   return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
-         Opcode == BPF::LDD;
+         Opcode == BPF::LDD || Opcode == BPF::LDDACQ;
 }
 
 static bool isLDSX(unsigned Opcode) {
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index d5b20403d1e42d..42a21e8ac3a029 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -40,6 +40,12 @@ static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
 static cl::opt<bool>
     Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
                      cl::desc("Disable BPF_ST (immediate store) insn"));
+static cl::opt<bool>
+    Disable_load_acquire("disable-load-acquire", cl::Hidden, cl::init(false),
+                         cl::desc("Disable load-acquire insns"));
+static cl::opt<bool>
+    Disable_store_release("disable-store-release", cl::Hidden, cl::init(false),
+                          cl::desc("Disable store-release insns"));
 
 void BPFSubtarget::anchor() {}
 
@@ -62,6 +68,8 @@ void BPFSubtarget::initializeEnvironment() {
   HasSdivSmod = false;
   HasGotol = false;
   HasStoreImm = false;
+  HasLoadAcquire = false;
+  HasStoreRelease = false;
 }
 
 void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -89,6 +97,11 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     HasGotol = !Disable_gotol;
     HasStoreImm = !Disable_StoreImm;
   }
+
+  if (CpuVerNum >= 5) {
+    HasLoadAcquire = !Disable_load_acquire;
+    HasStoreRelease = !Disable_store_release;
+  }
 }
 
 BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 33747546eadc3b..aa7995c3af5ecf 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -66,6 +66,9 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   // whether cpu v4 insns are enabled.
   bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;
 
+  // whether cpu v5 insns are enabled.
+  bool HasLoadAcquire, HasStoreRelease;
+
   std::unique_ptr<CallLowering> CallLoweringInfo;
   std::unique_ptr<InstructionSelector> InstSelector;
   std::unique_ptr<LegalizerInfo> Legalizer;
@@ -92,6 +95,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   bool hasSdivSmod() const { return HasSdivSmod; }
   bool hasGotol() const { return HasGotol; }
   bool hasStoreImm() const { return HasStoreImm; }
+  bool hasLoadAcquire() const { return HasLoadAcquire; }
+  bool hasStoreRelease() const { return HasStoreRelease; }
 
   bool isLittleEndian() const { return IsLittleEndian; }
 
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 536bee5393843a..d1b9769d3bed96 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -58,7 +58,9 @@ class BPFDisassembler : public MCDisassembler {
     BPF_IND = 0x2,
     BPF_MEM = 0x3,
     BPF_MEMSX = 0x4,
-    BPF_ATOMIC = 0x6
+    BPF_ATOMIC = 0x6,
+    BPF_MEMACQ = 0x7,
+    BPF_MEMREL = 0x7
   };
 
   BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
@@ -177,7 +179,8 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
   uint8_t InstMode = getInstMode(Insn);
   if ((InstClass == BPF_LDX || InstClass == BPF_STX) &&
       getInstSize(Insn) != BPF_DW &&
-      (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) &&
+      (InstMode == BPF_MEM || InstMode == BPF_ATOMIC ||
+       InstMode == BPF_MEMACQ /* or BPF_MEMREL */) &&
       STI.hasFeature(BPF::ALU32))
     Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address,
                                this, STI);
diff --git a/llvm/test/CodeGen/BPF/acquire-release.ll b/llvm/test/CodeGen/BPF/acquire-release.ll
new file mode 100644
index 00000000000000..1c7db417b6c43f
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/acquire-release.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=bpfel -mcpu=v5 -verify-machineinstrs -show-mc-encoding | FileCheck %s
+;
+; Source:
+;   char load_acquire_i8(char *p) {
+;     return __atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   short load_acquire_i16(short *p) {
+;     return __atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   int load_acquire_i32(int *p) {
+;     return __atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   long load_acquire_i64(long *p) {
+;     return __atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   void store_release_i8(char *p, char v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+;   void store_release_i16(short *p, short v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+;   void store_release_i32(int *p, int v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+;   void store_release_i64(long *p, long v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+
+; CHECK-LABEL: load_acquire_i8
+; CHECK: w0 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xf1,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+define dso_local i8 @load_acquire_i8(ptr nocapture noundef readonly %p) local_unnamed_addr {
+entry:
+  %0 = load atomic i8, ptr %p acquire, align 1
+  ret i8 %0
+}
+
+; CHECK-LABEL: load_acquire_i16
+; CHECK: w0 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xe9,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+define dso_local i16 @load_acquire_i16(ptr nocapture noundef readonly %p) local_unnamed_addr {
+entry:
+  %0 = load atomic i16, ptr %p acquire, align 2
+  ret i16 %0
+}
+
+; CHECK-LABEL: load_acquire_i32
+; CHECK: w0 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xe1,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+define dso_local i32 @load_acquire_i32(ptr nocapture noundef readonly %p) local_unnamed_addr {
+entry:
+  %0 = load atomic i32, ptr %p acquire, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: load_acquire_i64
+; CHECK: r0 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xf9,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+define dso_local i64 @load_acquire_i64(ptr nocapture noundef readonly %p) local_unnamed_addr {
+entry:
+  %0 = load atomic i64, ptr %p acquire, align 8
+  ret i64 %0
+}
+
+; CHECK-LABEL: store_release_i8
+; CHECK: store_release((u8 *)(r1 + 0), w2) # encoding: [0xf3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+define void @store_release_i8(ptr nocapture noundef writeonly %p,
+                              i8 noundef signext %v) local_unnamed_addr {
+entry:
+  store atomic i8 %v, ptr %p release, align 1
+  ret void
+}
+
+; CHECK-LABEL: store_release_i16
+; CHECK: store_release((u16 *)(r1 + 0), w2) # encoding: [0xeb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+define void @store_release_i16(ptr nocapture noundef writeonly %p,
+                               i16 noundef signext %v) local_unnamed_addr {
+entry:
+  store atomic i16 %v, ptr %p release, align 2
+  ret void
+}
+
+; CHECK-LABEL: store_release_i32
+; CHECK: store_release((u32 *)(r1 + 0), w2) # encoding: [0xe3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+define void @store_release_i32(ptr nocapture noundef writeonly %p,
+                               i32 noundef %v) local_unnamed_addr {
+entry:
+  store atomic i32 %v, ptr %p release, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_release_i64
+; CHECK: store_release((u64 *)(r1 + 0), r2) # encoding: [0xfb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+define void @store_release_i64(ptr nocapture noundef writeonly %p,
+                               i64 noundef %v) local_unnamed_addr {
+entry:
+  store atomic i64 %v, ptr %p release, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler-v5.s b/llvm/test/CodeGen/BPF/assembler-disassembler-v5.s
new file mode 100644
index 00000000000000..da94cf9b09421e
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/assembler-disassembler-v5.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -triple bpfel --mcpu=v5 --assemble --filetype=obj %s \
+// RUN:   | llvm-objdump -d - | FileCheck %s
+
+// CHECK: f1 10 00 00 00 00 00 00	w0 = load_acquire((u8 *)(r1 + 0x0))
+// CHECK: e9 10 00 00 00 00 00 00	w0 = load_acquire((u16 *)(r1 + 0x0))
+// CHECK: e1 10 00 00 00 00 00 00	w0 = load_acquire((u32 *)(r1 + 0x0))
+w0 = load_acquire((u8 *)(r1 + 0))
+w0 = load_acquire((u16 *)(r1 + 0))
+w0 = load_acquire((u32 *)(r1 + 0))
+
+// CHECK: f9 10 00 00 00 00 00 00	r0 = load_acquire((u64 *)(r1 + 0x0))
+r0 = load_acquire((u64 *)(r1 + 0))
+
+// CHECK: f3 21 00 00 00 00 00 00	store_release((u8 *)(r1 + 0x0), w2)
+// CHECK: eb 21 00 00 00 00 00 00	store_release((u16 *)(r1 + 0x0), w2)
+// CHECK: e3 21 00 00 00 00 00 00	store_release((u32 *)(r1 + 0x0), w2)
+store_release((u8 *)(r1 + 0), w2)
+store_release((u16 *)(r1 + 0), w2)
+store_release((u32 *)(r1 + 0), w2)
+
+// CHECK: fb 21 00 00 00 00 00 00	store_release((u64 *)(r1 + 0x0), r2)
+store_release((u64 *)(r1 + 0), r2)

>From 2925e051125763e3a9ed1d7a449d1db7c613e9fd Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin at google.com>
Date: Sat, 21 Sep 2024 04:00:40 +0000
Subject: [PATCH 4/4] [BPF] Improve error message for seq_cst atomic load and
 store

Sequentially consistent (seq_cst) atomic load and store are not
supported yet for BPF.  Right now, calling __atomic_{load,store}{,_n}()
with __ATOMIC_SEQ_CST will cause an error:

  $ cat bar.c
  int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); }
  $ clang --target=bpf -mcpu=v5 -c bar.c > /dev/null
  fatal error: error in backend: Cannot select: t8: i32,ch = AtomicLoad<(load seq_cst (s32) from %ir.0)> t7:1, t7
  ...

Which isn't very useful.  Just like commit 379d90884807 ("BPF: provide
better error message for unsupported atomic operations"), make it
generate an error message saying that the requested operation isn't
supported, before triggering that "fatal error":

  $ clang --target=bpf -mcpu=v5 -c bar.c > /dev/null
  bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is not supported
    1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); }
      |     ^
  ...
---
 llvm/lib/Target/BPF/BPFISelLowering.cpp | 25 +++++++++++++++++++++++++
 llvm/lib/Target/BPF/BPFISelLowering.h   |  2 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index ff23d3b055d0d5..63e7cb7af4def3 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -93,6 +93,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
   }
 
+  for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+    setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
+    setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+  }
+
   for (auto VT : { MVT::i32, MVT::i64 }) {
     if (VT == MVT::i32 && !STI.getHasAlu32())
       continue;
@@ -291,6 +296,9 @@ void BPFTargetLowering::ReplaceNodeResults(
     else
       Msg = "unsupported atomic operation, please use 64 bit version";
     break;
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+    return;
   }
 
   SDLoc DL(N);
@@ -316,6 +324,9 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSDIVSREM(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+    return LowerAtomicLoadStore(Op, DAG);
   }
 }
 
@@ -703,6 +714,20 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);
 }
 
+SDValue BPFTargetLowering::LowerAtomicLoadStore(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  const char *Msg =
+      "sequentially consistent (seq_cst) atomic load/store is not supported";
+  SDNode *N = Op.getNode();
+  SDLoc DL(N);
+
+  if (cast<AtomicSDNode>(N)->getMergedOrdering() ==
+      AtomicOrdering::SequentiallyConsistent)
+    fail(DL, DAG, Msg);
+
+  return Op;
+}
+
 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((BPFISD::NodeType)Opcode) {
   case BPFISD::FIRST_NUMBER:
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 42707949e864cd..8b254cc524a887 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -77,7 +77,7 @@ class BPFTargetLowering : public TargetLowering {
   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-
+  SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
 



More information about the cfe-commits mailing list