[clang] 17bfc00 - [BPF] Add load-acquire and store-release instructions under -mcpu=v4 (#108636)

via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 4 09:19:44 PST 2025


Author: Peilin Ye
Date: 2025-03-04T09:19:39-08:00
New Revision: 17bfc00f7c4a424d7b5dc6da575865833701fd1a

URL: https://github.com/llvm/llvm-project/commit/17bfc00f7c4a424d7b5dc6da575865833701fd1a
DIFF: https://github.com/llvm/llvm-project/commit/17bfc00f7c4a424d7b5dc6da575865833701fd1a.diff

LOG: [BPF] Add load-acquire and store-release instructions under -mcpu=v4 (#108636)

As discussed in [1], introduce BPF instructions with load-acquire and
store-release semantics under -mcpu=v4.  Define 2 new flags:

  BPF_LOAD_ACQ    0x100
  BPF_STORE_REL   0x110

A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
field set to BPF_LOAD_ACQ (0x100).

Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
the 'imm' field set to BPF_STORE_REL (0x110).

Unlike existing atomic read-modify-write operations that only support
BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and
store-releases also support BPF_B (8-bit) and BPF_H (16-bit).  An 8- or
16-bit load-acquire zero-extends the value before writing it to a 32-bit
register, just like ARM64 instruction LDAPRH and friends.

As an example (assuming little-endian):

  long foo(long *ptr) {
      return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
  }

foo() can be compiled to:

  db 10 00 00 00 01 00 00  r0 = load_acquire((u64 *)(r1 + 0x0))
  95 00 00 00 00 00 00 00  exit

  opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
  imm (0x00000100): BPF_LOAD_ACQ

Similarly:

  void bar(short *ptr, short val) {
      __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
  }

bar() can be compiled to:

  cb 21 00 00 10 01 00 00  store_release((u16 *)(r1 + 0x0), w2)
  95 00 00 00 00 00 00 00  exit

  opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
  imm (0x00000110): BPF_STORE_REL

Inline assembly is also supported.

Add a pre-defined macro, __BPF_FEATURE_LOAD_ACQ_STORE_REL, to let
developers detect this new feature.  It can also be disabled using a new
llc option, -disable-load-acq-store-rel.

Using __ATOMIC_RELAXED for __atomic_store{,_n}() will generate a "plain"
store (BPF_MEM | BPF_STX) instruction:

  void foo(short *ptr, short val) {
      __atomic_store_n(ptr, val, __ATOMIC_RELAXED);
  }

  6b 21 00 00 00 00 00 00  *(u16 *)(r1 + 0x0) = w2
  95 00 00 00 00 00 00 00  exit

Similarly, using __ATOMIC_RELAXED for __atomic_load{,_n}() will generate
a zero-extending, "plain" load (BPF_MEM | BPF_LDX) instruction:

  int foo(char *ptr) {
      return __atomic_load_n(ptr, __ATOMIC_RELAXED);
  }

  71 11 00 00 00 00 00 00  w1 = *(u8 *)(r1 + 0x0)
  bc 10 08 00 00 00 00 00  w0 = (s8)w1
  95 00 00 00 00 00 00 00  exit

Currently __ATOMIC_CONSUME is an alias for __ATOMIC_ACQUIRE.  Using
__ATOMIC_SEQ_CST ("sequentially consistent") is not supported yet and
will cause an error:

  $ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null
bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is
not supported
1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); }
      |     ^
  ...

Finally, rename those isST*() and isLD*() helper functions in
BPFMISimplifyPatchable.cpp based on what the instructions actually do,
rather than their instruction class.

[1]
https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@google.com/

Added: 
    llvm/test/CodeGen/BPF/atomic-load-store.ll

Modified: 
    clang/lib/Basic/Targets/BPF.cpp
    clang/test/Preprocessor/bpf-predefined-macros.c
    llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
    llvm/lib/Target/BPF/BPFISelLowering.cpp
    llvm/lib/Target/BPF/BPFISelLowering.h
    llvm/lib/Target/BPF/BPFInstrFormats.td
    llvm/lib/Target/BPF/BPFInstrInfo.td
    llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
    llvm/lib/Target/BPF/BPFSubtarget.cpp
    llvm/lib/Target/BPF/BPFSubtarget.h
    llvm/test/CodeGen/BPF/assembler-disassembler-v4.s

Removed: 
    


################################################################################
diff  --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index a463de0884020..4b85a3645b17d 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -75,6 +75,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__BPF_FEATURE_SDIV_SMOD");
     Builder.defineMacro("__BPF_FEATURE_GOTOL");
     Builder.defineMacro("__BPF_FEATURE_ST");
+    Builder.defineMacro("__BPF_FEATURE_LOAD_ACQ_STORE_REL");
   }
 }
 

diff  --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index 8c2143f767c40..cd8a2ec031925 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -67,6 +67,9 @@ int t;
 #ifdef __BPF_FEATURE_MAY_GOTO
 int u;
 #endif
+#ifdef __BPF_FEATURE_LOAD_ACQ_STORE_REL
+int v;
+#endif
 
 // CHECK: int b;
 // CHECK: int c;
@@ -106,6 +109,8 @@ int u;
 // CPU_V3: int u;
 // CPU_V4: int u;
 
+// CPU_V4: int v;
+
 // CPU_GENERIC: int g;
 
 // CPU_PROBE: int f;

diff  --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 119684b8bbeda..494445fa89b5e 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -235,6 +235,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("exit", true)
         .Case("lock", true)
         .Case("ld_pseudo", true)
+        .Case("store_release", true)
         .Default(false);
   }
 
@@ -271,6 +272,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("cmpxchg_64", true)
         .Case("cmpxchg32_32", true)
         .Case("addr_space_cast", true)
+        .Case("load_acquire", true)
         .Default(false);
   }
 };

diff  --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 1f5cadf37ab58..6c196309d2d1a 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -92,6 +92,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
   }
 
+  for (auto VT : {MVT::i32, MVT::i64}) {
+    setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
+    setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+  }
+
   for (auto VT : { MVT::i32, MVT::i64 }) {
     if (VT == MVT::i32 && !STI.getHasAlu32())
       continue;
@@ -290,6 +295,9 @@ void BPFTargetLowering::ReplaceNodeResults(
     else
       Msg = "unsupported atomic operation, please use 64 bit version";
     break;
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+    return;
   }
 
   SDLoc DL(N);
@@ -315,6 +323,9 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSDIVSREM(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+    return LowerATOMIC_LOAD_STORE(Op, DAG);
   }
 }
 
@@ -701,6 +712,20 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(BPFISD::SELECT_CC, DL, Op.getValueType(), Ops);
 }
 
+SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SDNode *N = Op.getNode();
+  SDLoc DL(N);
+
+  if (cast<AtomicSDNode>(N)->getMergedOrdering() ==
+      AtomicOrdering::SequentiallyConsistent)
+    fail(DL, DAG,
+         "sequentially consistent (seq_cst) "
+         "atomic load/store is not supported");
+
+  return Op;
+}
+
 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((BPFISD::NodeType)Opcode) {
   case BPFISD::FIRST_NUMBER:

diff  --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index d59098f9f569b..ad048ad05e6dd 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -77,7 +77,7 @@ class BPFTargetLowering : public TargetLowering {
   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-
+  SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
 

diff  --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index feffdbc69465e..50cca5148de7c 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -48,6 +48,13 @@ def BPF_END  : BPFArithOp<0xd>;
 def BPF_XCHG    : BPFArithOp<0xe>;
 def BPF_CMPXCHG : BPFArithOp<0xf>;
 
+class BPFAtomicOp<bits<5> val> {
+  bits<5> Value = val;
+}
+
+def BPF_LOAD_ACQ : BPFAtomicOp<0x10>;
+def BPF_STORE_REL : BPFAtomicOp<0x11>;
+
 class BPFEndDir<bits<1> val> {
   bits<1> Value = val;
 }

diff  --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 86929a952d1ba..2dcf1eae086be 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -60,6 +60,7 @@ def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
 def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
 def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
 def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;
+def BPFHasLoadAcqStoreRel : Predicate<"Subtarget->hasLoadAcqStoreRel()">;
 
 class ImmediateAsmOperand<string name> : AsmOperandClass {
   let Name = name;
@@ -566,6 +567,47 @@ let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
             (STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
 }
 
+class STORE_RELEASE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp>
+    : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+                 (outs),
+                 (ins RegTp:$src, MEMri:$addr),
+                 "store_release(("#OpcodeStr#" *)($addr), $src)",
+                 []> {
+  bits<4> src;
+  bits<20> addr;
+
+  let Inst{51-48} = addr{19-16}; // base reg
+  let Inst{55-52} = src;
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{8-4} = BPF_STORE_REL.Value;
+  let BPFClass = BPF_STX;
+}
+
+class STORE_RELEASEi64<BPFWidthModifer Opc, string OpcodeStr>
+    : STORE_RELEASE<Opc, OpcodeStr, GPR>;
+
+class relaxed_store<PatFrag base>
+  : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingReleaseOrStronger = 0;
+}
+
+class releasing_store<PatFrag base>
+  : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingRelease = 1;
+}
+
+let Predicates = [BPFHasLoadAcqStoreRel] in {
+  def STDREL : STORE_RELEASEi64<BPF_DW, "u64">;
+
+  foreach P = [[relaxed_store<atomic_store_64>, STD],
+               [releasing_store<atomic_store_64>, STDREL],
+              ] in {
+    def : Pat<(P[0] GPR:$val, ADDRri:$addr), (P[1] GPR:$val, ADDRri:$addr)>;
+  }
+}
+
 // LOAD instructions
 class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
     : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
@@ -622,6 +664,47 @@ let Predicates = [BPFHasLdsx] in {
 
 def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
 
+class LOAD_ACQUIRE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp>
+    : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
+                 (outs RegTp:$dst),
+                 (ins MEMri:$addr),
+                 "$dst = load_acquire(("#OpcodeStr#" *)($addr))",
+                 []> {
+  bits<4> dst;
+  bits<20> addr;
+
+  let Inst{51-48} = dst;
+  let Inst{55-52} = addr{19-16}; // base reg
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{8-4} = BPF_LOAD_ACQ.Value;
+  let BPFClass = BPF_STX;
+}
+
+class LOAD_ACQUIREi64<BPFWidthModifer SizeOp, string OpcodeStr>
+    : LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR>;
+
+class relaxed_load<PatFrags base>
+    : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingAcquireOrStronger = 0;
+}
+
+class acquiring_load<PatFrags base>
+    : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+  let IsAtomic = 1;
+  let IsAtomicOrderingAcquire = 1;
+}
+
+let Predicates = [BPFHasLoadAcqStoreRel] in {
+  def LDDACQ : LOAD_ACQUIREi64<BPF_DW, "u64">;
+
+  foreach P = [[relaxed_load<atomic_load_64>, LDD],
+               [acquiring_load<atomic_load_64>, LDDACQ],
+              ] in {
+    def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
+  }
+}
+
 class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
     : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
                    (outs),
@@ -1181,10 +1264,19 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
 class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
     : STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;
 
+class STORE_RELEASEi32<BPFWidthModifer Opc, string OpcodeStr>
+    : STORE_RELEASE<Opc, OpcodeStr, GPR32>;
+
 let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def STW32 : STOREi32<BPF_W, "u32", store>;
   def STH32 : STOREi32<BPF_H, "u16", truncstorei16>;
   def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
+
+  let Predicates = [BPFHasLoadAcqStoreRel] in {
+    def STWREL32 : STORE_RELEASEi32<BPF_W, "u32">;
+    def STHREL32 : STORE_RELEASEi32<BPF_H, "u16">;
+    def STBREL32 : STORE_RELEASEi32<BPF_B, "u8">;
+  }
 }
 
 class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
@@ -1205,10 +1297,19 @@ class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, lis
 class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
     : LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
 
+class LOAD_ACQUIREi32<BPFWidthModifer SizeOp, string OpcodeStr>
+    : LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR32>;
+
 let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
   def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
   def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
   def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
+
+  let Predicates = [BPFHasLoadAcqStoreRel] in {
+    def LDWACQ32 : LOAD_ACQUIREi32<BPF_W, "u32">;
+    def LDHACQ32 : LOAD_ACQUIREi32<BPF_H, "u16">;
+    def LDBACQ32 : LOAD_ACQUIREi32<BPF_B, "u8">;
+  }
 }
 
 let Predicates = [BPFHasALU32] in {
@@ -1238,6 +1339,30 @@ let Predicates = [BPFHasALU32] in {
             (SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>;
   def : Pat<(i64 (extloadi32 ADDRri:$src)),
             (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>;
+
+  let Predicates = [BPFHasLoadAcqStoreRel] in {
+    foreach P = [[relaxed_load<atomic_load_32>, LDW32],
+                 [relaxed_load<atomic_load_az_16>, LDH32],
+                 [relaxed_load<atomic_load_az_8>, LDB32],
+                 [acquiring_load<atomic_load_32>, LDWACQ32],
+                 [acquiring_load<atomic_load_az_16>, LDHACQ32],
+                 [acquiring_load<atomic_load_az_8>, LDBACQ32],
+                ] in {
+      def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
+    }
+  }
+
+  let Predicates = [BPFHasLoadAcqStoreRel] in {
+    foreach P = [[relaxed_store<atomic_store_32>, STW32],
+                 [relaxed_store<atomic_store_16>, STH32],
+                 [relaxed_store<atomic_store_8>, STB32],
+                 [releasing_store<atomic_store_32>, STWREL32],
+                 [releasing_store<atomic_store_16>, STHREL32],
+                 [releasing_store<atomic_store_8>, STBREL32],
+                ] in {
+      def : Pat<(P[0] GPR32:$val, ADDRri:$addr), (P[1] GPR32:$val, ADDRri:$addr)>;
+    }
+  }
 }
 
 let usesCustomInserter = 1, isCodeGenOnly = 1 in {

diff  --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 39390e8c38f8c..f6735adbde640 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -94,35 +94,39 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
   LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
 }
 
-static bool isST(unsigned Opcode) {
+static bool isStoreImm(unsigned Opcode) {
   return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm ||
          Opcode == BPF::STW_imm || Opcode == BPF::STD_imm;
 }
 
-static bool isSTX32(unsigned Opcode) {
-  return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
+static bool isStore32(unsigned Opcode) {
+  return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32 ||
+         Opcode == BPF::STBREL32 || Opcode == BPF::STHREL32 ||
+         Opcode == BPF::STWREL32;
 }
 
-static bool isSTX64(unsigned Opcode) {
+static bool isStore64(unsigned Opcode) {
   return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
-         Opcode == BPF::STD;
+         Opcode == BPF::STD || Opcode == BPF::STDREL;
 }
 
-static bool isLDX32(unsigned Opcode) {
-  return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
+static bool isLoad32(unsigned Opcode) {
+  return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 ||
+         Opcode == BPF::LDBACQ32 || Opcode == BPF::LDHACQ32 ||
+         Opcode == BPF::LDWACQ32;
 }
 
-static bool isLDX64(unsigned Opcode) {
+static bool isLoad64(unsigned Opcode) {
   return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
-         Opcode == BPF::LDD;
+         Opcode == BPF::LDD || Opcode == BPF::LDDACQ;
 }
 
-static bool isLDSX(unsigned Opcode) {
+static bool isLoadSext(unsigned Opcode) {
   return Opcode == BPF::LDBSX || Opcode == BPF::LDHSX || Opcode == BPF::LDWSX;
 }
 
 bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) {
-  return isLDX32(Opcode) || isLDX64(Opcode) || isLDSX(Opcode);
+  return isLoad32(Opcode) || isLoad64(Opcode) || isLoadSext(Opcode);
 }
 
 void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
@@ -143,11 +147,11 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
     MachineInstr *DefInst = MO.getParent();
     unsigned Opcode = DefInst->getOpcode();
     unsigned COREOp;
-    if (isLDX64(Opcode) || isLDSX(Opcode))
+    if (isLoad64(Opcode) || isLoadSext(Opcode))
       COREOp = BPF::CORE_LD64;
-    else if (isLDX32(Opcode))
+    else if (isLoad32(Opcode))
       COREOp = BPF::CORE_LD32;
-    else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode))
+    else if (isStore64(Opcode) || isStore32(Opcode) || isStoreImm(Opcode))
       COREOp = BPF::CORE_ST;
     else
       continue;
@@ -160,7 +164,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
     // Reject the form:
     //   %1 = ADD_rr %2, %3
     //   *(type *)(%2 + 0) = %1
-    if (isSTX64(Opcode) || isSTX32(Opcode)) {
+    if (isStore64(Opcode) || isStore32(Opcode)) {
       const MachineOperand &Opnd = DefInst->getOperand(0);
       if (Opnd.isReg() && Opnd.getReg() == MO.getReg())
         continue;

diff  --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 305e9a2bf2cda..4167547680b12 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -40,6 +40,9 @@ static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
 static cl::opt<bool>
     Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
                      cl::desc("Disable BPF_ST (immediate store) insn"));
+static cl::opt<bool> Disable_load_acq_store_rel(
+    "disable-load-acq-store-rel", cl::Hidden, cl::init(false),
+    cl::desc("Disable load-acquire and store-release insns"));
 
 void BPFSubtarget::anchor() {}
 
@@ -62,6 +65,7 @@ void BPFSubtarget::initializeEnvironment() {
   HasSdivSmod = false;
   HasGotol = false;
   HasStoreImm = false;
+  HasLoadAcqStoreRel = false;
 }
 
 void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -91,6 +95,7 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     HasSdivSmod = !Disable_sdiv_smod;
     HasGotol = !Disable_gotol;
     HasStoreImm = !Disable_StoreImm;
+    HasLoadAcqStoreRel = !Disable_load_acq_store_rel;
     return;
   }
 }

diff  --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 33747546eadc3..aed2211265e23 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -64,7 +64,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   bool UseDwarfRIS;
 
   // whether cpu v4 insns are enabled.
-  bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;
+  bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm,
+      HasLoadAcqStoreRel;
 
   std::unique_ptr<CallLowering> CallLoweringInfo;
   std::unique_ptr<InstructionSelector> InstSelector;
@@ -92,6 +93,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   bool hasSdivSmod() const { return HasSdivSmod; }
   bool hasGotol() const { return HasGotol; }
   bool hasStoreImm() const { return HasStoreImm; }
+  bool hasLoadAcqStoreRel() const { return HasLoadAcqStoreRel; }
 
   bool isLittleEndian() const { return IsLittleEndian; }
 

diff  --git a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
index d52985986bdc3..c4949d5d1df5e 100644
--- a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
+++ b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
@@ -42,3 +42,23 @@ r2 s%= r4
 // CHECK: 9c 42 01 00 00 00 00 00     w2 s%= w4
 w1 s/= w3
 w2 s%= w4
+
+// CHECK: d3 10 00 00 00 01 00 00	w0 = load_acquire((u8 *)(r1 + 0x0))
+// CHECK: cb 10 00 00 00 01 00 00	w0 = load_acquire((u16 *)(r1 + 0x0))
+// CHECK: c3 10 00 00 00 01 00 00	w0 = load_acquire((u32 *)(r1 + 0x0))
+w0 = load_acquire((u8 *)(r1 + 0))
+w0 = load_acquire((u16 *)(r1 + 0))
+w0 = load_acquire((u32 *)(r1 + 0))
+
+// CHECK: db 10 00 00 00 01 00 00	r0 = load_acquire((u64 *)(r1 + 0x0))
+r0 = load_acquire((u64 *)(r1 + 0))
+
+// CHECK: d3 21 00 00 10 01 00 00	store_release((u8 *)(r1 + 0x0), w2)
+// CHECK: cb 21 00 00 10 01 00 00	store_release((u16 *)(r1 + 0x0), w2)
+// CHECK: c3 21 00 00 10 01 00 00	store_release((u32 *)(r1 + 0x0), w2)
+store_release((u8 *)(r1 + 0), w2)
+store_release((u16 *)(r1 + 0), w2)
+store_release((u32 *)(r1 + 0), w2)
+
+// CHECK: db 21 00 00 10 01 00 00	store_release((u64 *)(r1 + 0x0), r2)
+store_release((u64 *)(r1 + 0), r2)

diff  --git a/llvm/test/CodeGen/BPF/atomic-load-store.ll b/llvm/test/CodeGen/BPF/atomic-load-store.ll
new file mode 100644
index 0000000000000..b331ae7b1a3c0
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/atomic-load-store.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding \
+; RUN:   | FileCheck -check-prefixes=CHECK-LE %s
+; RUN: llc < %s -march=bpfeb -mcpu=v4 -verify-machineinstrs -show-mc-encoding \
+; RUN:   | FileCheck -check-prefixes=CHECK-BE %s
+
+; Source:
+;   void atomic_load_i8(char *p) {
+;     (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+;     (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   void atomic_load_i16(short *p) {
+;     (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+;     (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   void atomic_load_i32(int *p) {
+;     (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+;     (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   void atomic_load_i64(long *p) {
+;     (void)__atomic_load_n(p, __ATOMIC_RELAXED);
+;     (void)__atomic_load_n(p, __ATOMIC_ACQUIRE);
+;   }
+;   void atomic_store_i8(char *p, char v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELAXED);
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+;   void atomic_store_i16(short *p, short v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELAXED);
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+;   void atomic_store_i32(int *p, int v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELAXED);
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+;   void atomic_store_i64(long *p, long v) {
+;     __atomic_store_n(p, v, __ATOMIC_RELAXED);
+;     __atomic_store_n(p, v, __ATOMIC_RELEASE);
+;   }
+
+define dso_local void @atomic_load_i8(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i8
+; CHECK-LE:      w2 = *(u8 *)(r1 + 0) # encoding: [0x71,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: w1 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
+;
+; CHECK-BE:      w2 = *(u8 *)(r1 + 0) # encoding: [0x71,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: w1 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
+entry:
+  %0 = load atomic i8, ptr %p monotonic, align 1
+  %1 = load atomic i8, ptr %p acquire, align 1
+  ret void
+}
+
+define dso_local void @atomic_load_i16(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i16
+; CHECK-LE:      w2 = *(u16 *)(r1 + 0) # encoding: [0x69,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: w1 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
+;
+; CHECK-BE:      w2 = *(u16 *)(r1 + 0) # encoding: [0x69,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: w1 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
+entry:
+  %0 = load atomic i16, ptr %p monotonic, align 2
+  %1 = load atomic i16, ptr %p acquire, align 2
+  ret void
+}
+
+define dso_local void @atomic_load_i32(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i32
+; CHECK-LE:      w2 = *(u32 *)(r1 + 0) # encoding: [0x61,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: w1 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
+;
+; CHECK-BE:      w2 = *(u32 *)(r1 + 0) # encoding: [0x61,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: w1 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
+entry:
+  %0 = load atomic i32, ptr %p monotonic, align 4
+  %1 = load atomic i32, ptr %p acquire, align 4
+  ret void
+}
+
+define dso_local void @atomic_load_i64(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_load_i64
+; CHECK-LE:      r2 = *(u64 *)(r1 + 0) # encoding: [0x79,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: r1 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x11,0x00,0x00,0x00,0x01,0x00,0x00]
+;
+; CHECK-BE:      r2 = *(u64 *)(r1 + 0) # encoding: [0x79,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: r1 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x11,0x00,0x00,0x00,0x00,0x01,0x00]
+entry:
+  %0 = load atomic i64, ptr %p monotonic, align 8
+  %1 = load atomic i64, ptr %p acquire, align 8
+  ret void
+}
+
+define dso_local void @atomic_store_i8(ptr nocapture noundef writeonly %p, i8 noundef signext %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i8
+; CHECK-LE:      *(u8 *)(r1 + 0) = w2 # encoding: [0x73,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
+;
+; CHECK-BE:      *(u8 *)(r1 + 0) = w2 # encoding: [0x73,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
+entry:
+  store atomic i8 %v, ptr %p monotonic, align 1
+  store atomic i8 %v, ptr %p release, align 1
+  ret void
+}
+
+define dso_local void @atomic_store_i16(ptr nocapture noundef writeonly %p, i16 noundef signext %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i16
+; CHECK-LE:      *(u16 *)(r1 + 0) = w2 # encoding: [0x6b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
+;
+; CHECK-BE:      *(u16 *)(r1 + 0) = w2 # encoding: [0x6b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
+entry:
+  store atomic i16 %v, ptr %p monotonic, align 2
+  store atomic i16 %v, ptr %p release, align 2
+  ret void
+}
+
+define dso_local void @atomic_store_i32(ptr nocapture noundef writeonly %p, i32 noundef %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i32
+; CHECK-LE:      *(u32 *)(r1 + 0) = w2 # encoding: [0x63,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
+;
+; CHECK-BE:      *(u32 *)(r1 + 0) = w2 # encoding: [0x63,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
+entry:
+  store atomic i32 %v, ptr %p monotonic, align 4
+  store atomic i32 %v, ptr %p release, align 4
+  ret void
+}
+
+define dso_local void @atomic_store_i64(ptr nocapture noundef writeonly %p, i64 noundef %v) local_unnamed_addr #0 {
+; CHECK-LABEL: atomic_store_i64
+; CHECK-LE:      *(u64 *)(r1 + 0) = r2 # encoding: [0x7b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-LE-NEXT: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x21,0x00,0x00,0x10,0x01,0x00,0x00]
+;
+; CHECK-BE:      *(u64 *)(r1 + 0) = r2 # encoding: [0x7b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-BE-NEXT: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x12,0x00,0x00,0x00,0x00,0x01,0x10]
+entry:
+  store atomic i64 %v, ptr %p monotonic, align 8
+  store atomic i64 %v, ptr %p release, align 8
+  ret void
+}


        


More information about the cfe-commits mailing list