[llvm] BPF: Ensure __sync_fetch_and_add() always generate atomic_fetch_add insn (PR #101428)

Fri Aug 2 20:51:59 PDT 2024

https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/101428

>From 225d7265a4f7117d2c587faf99239bb35eb971d3 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Wed, 31 Jul 2024 16:09:05 -0700
Subject: [PATCH] BPF: Ensure __sync_fetch_and_add() always generate
 atomic_fetch_add insn

Peilen Ye reported an issue ([1]) where for __sync_fetch_and_add(...)
without return value like
  __sync_fetch_and_add(&foo, 1);
llvm BPF backend generates locked insn e.g.
  lock *(u32 *)(r1 + 0) += r2

If __sync_fetch_and_add(...) returns a value like
  res = __sync_fetch_and_add(&foo, 1);
llvm BPF backend generates like
  r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)

The above generation of 'lock *(u32 *)(r1 + 0) += r2' caused a problem in jit
since proper barrier is not inserted.

The above discrepancy is due to commit [2] where it tries to maintain
backward compatability since before commit [2], __sync_fetch_and_add(...)
generates lock insn in BPF backend.

Based on discussion in [1], now it is time to fix the above discrepancy
so we can have proper barrier support in jit. This patch made sure that
__sync_fetch_and_add(...) always generates atomic_fetch_add(...) insns.
Now 'lock *(u32 *)(r1 + 0) += r2' can only be generated by inline asm.
I also removed the whole BPFMIChecking.cpp file whose original purpose
is to detect and issue errors if XADD{W,D,W32} may return a value used
subsequently. Since insns XADD{W,D,W32} are all inline asm only now,
such error detection is not needed.

  [1] https://lore.kernel.org/bpf/ZqqiQQWRnz7H93Hc@google.com/T/#mb68d67bc8f39e35a0c3db52468b9de59b79f021f
  [2] https://github.com/llvm/llvm-project/commit/286daafd65129228e08a1d07aa4ca74488615744
---
 llvm/lib/Target/BPF/BPF.h                |   2 -
 llvm/lib/Target/BPF/BPFInstrInfo.td      |  17 +-
 llvm/lib/Target/BPF/BPFMIChecking.cpp    | 251 -----------------------
 llvm/lib/Target/BPF/BPFTargetMachine.cpp |   1 -
 llvm/lib/Target/BPF/CMakeLists.txt       |   1 -
 llvm/test/CodeGen/BPF/atomics.ll         |  17 +-
 llvm/test/CodeGen/BPF/atomics_2.ll       |  16 +-
 llvm/test/CodeGen/BPF/objdump_atomics.ll |   4 +-
 llvm/test/CodeGen/BPF/xadd.ll            |  59 ------
 llvm/test/CodeGen/BPF/xadd_legal.ll      |   4 +-
 10 files changed, 28 insertions(+), 344 deletions(-)
 delete mode 100644 llvm/lib/Target/BPF/BPFMIChecking.cpp
 delete mode 100644 llvm/test/CodeGen/BPF/xadd.ll

diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 694d7bacf6421..f7bc6f958470b 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -28,7 +28,6 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
 FunctionPass *createBPFMISimplifyPatchablePass();
 FunctionPass *createBPFMIPeepholePass();
 FunctionPass *createBPFMIPreEmitPeepholePass();
-FunctionPass *createBPFMIPreEmitCheckingPass();
 
 InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
                                                   const BPFSubtarget &,
@@ -37,7 +36,6 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
 void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
 void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &);
 void initializeBPFMIPeepholePass(PassRegistry &);
-void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
 void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
 void initializeBPFMISimplifyPatchablePass(PassRegistry &);
 
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 55989f5eb6a3c..2ee630e29790f 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -789,12 +789,12 @@ let Predicates = [BPFNoALU32] in {
 }
 
 // Atomic XADD for BPFNoALU32
-class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
     : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
                  (outs GPR:$dst),
                  (ins MEMri:$addr, GPR:$val),
                  "lock *("#OpcodeStr#" *)($addr) += $val",
-                 [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
+                 []> {
   bits<4> dst;
   bits<20> addr;
 
@@ -805,12 +805,6 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
   let BPFClass = BPF_STX;
 }
 
-let Constraints = "$dst = $val" in {
-  let Predicates = [BPFNoALU32] in {
-    def XADDW : XADD<BPF_W, "u32", atomic_load_add_i32>;
-  }
-}
-
 // Atomic add, and, or, xor
 class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr>
     : TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value,
@@ -895,6 +889,13 @@ class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
   let BPFClass = BPF_STX;
 }
 
+let Constraints = "$dst = $val" in {
+  let Predicates = [BPFNoALU32] in {
+    def XADDW : XADD<BPF_W, "u32">;
+    def XFADDW : XFALU64<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
+  }
+}
+
 let Constraints = "$dst = $val" in {
   let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
     def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
deleted file mode 100644
index a968950f5bfcb..0000000000000
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs checking to signal errors for certain illegal usages at
-// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
-// not be used. The pass is done at the PreEmit pass right before the
-// machine code is emitted at which point the register liveness information
-// is still available.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BPF.h"
-#include "BPFInstrInfo.h"
-#include "BPFTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "bpf-mi-checking"
-
-namespace {
-
-struct BPFMIPreEmitChecking : public MachineFunctionPass {
-
-  static char ID;
-  MachineFunction *MF;
-  const TargetRegisterInfo *TRI;
-
-  BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
-    initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
-  }
-
-private:
-  // Initialize class variables.
-  void initialize(MachineFunction &MFParm);
-
-  bool processAtomicInsts();
-
-public:
-
-  // Main entry point for this pass.
-  bool runOnMachineFunction(MachineFunction &MF) override {
-    if (!skipFunction(MF.getFunction())) {
-      initialize(MF);
-      return processAtomicInsts();
-    }
-    return false;
-  }
-};
-
-// Initialize class variables.
-void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
-  MF = &MFParm;
-  TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
-  LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
-}
-
-// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
-// used.
-//
-// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
-// source and destination operands of XADD are GPR32, there is no sub-register
-// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
-// will raise false alarm on GPR32 Def.
-//
-// To support GPR32 Def, ideally we could just enable sub-registr liveness track
-// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
-// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
-//
-// However, sub-register liveness tracking module inside LLVM is actually
-// designed for the situation where one register could be split into more than
-// one sub-registers for which case each sub-register could have their own
-// liveness and kill one of them doesn't kill others. So, tracking liveness for
-// each make sense.
-//
-// For BPF, each 64-bit register could only have one 32-bit sub-register. This
-// is exactly the case which LLVM think brings no benefits for doing
-// sub-register tracking, because the live range of sub-register must always
-// equal to its parent register, therefore liveness tracking is disabled even
-// the back-end has implemented enableSubRegLiveness. The detailed information
-// is at r232695:
-//
-//   Author: Matthias Braun <matze at braunis.de>
-//   Date:   Thu Mar 19 00:21:58 2015 +0000
-//   Do not track subregister liveness when it brings no benefits
-//
-// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
-// sub-register always has the same liveness as its parent register, LLVM is
-// already attaching a implicit 64-bit register Def whenever the there is
-// a sub-register Def. The liveness of the implicit 64-bit Def is available.
-// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
-// be:
-//
-//   $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
-//                        implicit killed $r9, implicit-def dead $r9
-//
-// Even though w9 is not marked as Dead, the parent register r9 is marked as
-// Dead correctly, and it is safe to use such information or our purpose.
-static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
-  const MCRegisterClass *GPR64RegClass =
-    &BPFMCRegisterClasses[BPF::GPRRegClassID];
-  std::vector<unsigned> GPR32LiveDefs;
-  std::vector<unsigned> GPR64DeadDefs;
-
-  for (const MachineOperand &MO : MI.operands()) {
-    bool RegIsGPR64;
-
-    if (!MO.isReg() || MO.isUse())
-      continue;
-
-    RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
-    if (!MO.isDead()) {
-      // It is a GPR64 live Def, we are sure it is live. */
-      if (RegIsGPR64)
-        return true;
-      // It is a GPR32 live Def, we are unsure whether it is really dead due to
-      // no sub-register liveness tracking. Push it to vector for deferred
-      // check.
-      GPR32LiveDefs.push_back(MO.getReg());
-      continue;
-    }
-
-    // Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
-    // low 32-bit.
-    if (RegIsGPR64)
-      GPR64DeadDefs.push_back(MO.getReg());
-  }
-
-  // No GPR32 live Def, safe to return false.
-  if (GPR32LiveDefs.empty())
-    return false;
-
-  // No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
-  // must be truely live, safe to return true.
-  if (GPR64DeadDefs.empty())
-    return true;
-
-  // Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
-  for (auto I : GPR32LiveDefs)
-    for (MCPhysReg SR : TRI->superregs(I))
-      if (!llvm::is_contained(GPR64DeadDefs, SR))
-        return true;
-
-  return false;
-}
-
-bool BPFMIPreEmitChecking::processAtomicInsts() {
-  for (MachineBasicBlock &MBB : *MF) {
-    for (MachineInstr &MI : MBB) {
-      if (MI.getOpcode() != BPF::XADDW &&
-          MI.getOpcode() != BPF::XADDD &&
-          MI.getOpcode() != BPF::XADDW32)
-        continue;
-
-      LLVM_DEBUG(MI.dump());
-      if (hasLiveDefs(MI, TRI)) {
-        DebugLoc Empty;
-        const DebugLoc &DL = MI.getDebugLoc();
-        const Function &F = MF->getFunction();
-        F.getContext().diagnose(DiagnosticInfoUnsupported{
-            F, "Invalid usage of the XADD return value", DL});
-      }
-    }
-  }
-
-  // Check return values of atomic_fetch_and_{add,and,or,xor}.
-  // If the return is not used, the atomic_fetch_and_<op> instruction
-  // is replaced with atomic_<op> instruction.
-  MachineInstr *ToErase = nullptr;
-  bool Changed = false;
-  const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
-  for (MachineBasicBlock &MBB : *MF) {
-    for (MachineInstr &MI : MBB) {
-      if (ToErase) {
-        ToErase->eraseFromParent();
-        ToErase = nullptr;
-      }
-
-      if (MI.getOpcode() != BPF::XFADDW32 && MI.getOpcode() != BPF::XFADDD &&
-          MI.getOpcode() != BPF::XFANDW32 && MI.getOpcode() != BPF::XFANDD &&
-          MI.getOpcode() != BPF::XFXORW32 && MI.getOpcode() != BPF::XFXORD &&
-          MI.getOpcode() != BPF::XFORW32 && MI.getOpcode() != BPF::XFORD)
-        continue;
-
-      if (hasLiveDefs(MI, TRI))
-        continue;
-
-      LLVM_DEBUG(dbgs() << "Transforming "; MI.dump());
-      unsigned newOpcode;
-      switch (MI.getOpcode()) {
-      case BPF::XFADDW32:
-        newOpcode = BPF::XADDW32;
-        break;
-      case BPF::XFADDD:
-        newOpcode = BPF::XADDD;
-        break;
-      case BPF::XFANDW32:
-        newOpcode = BPF::XANDW32;
-        break;
-      case BPF::XFANDD:
-        newOpcode = BPF::XANDD;
-        break;
-      case BPF::XFXORW32:
-        newOpcode = BPF::XXORW32;
-        break;
-      case BPF::XFXORD:
-        newOpcode = BPF::XXORD;
-        break;
-      case BPF::XFORW32:
-        newOpcode = BPF::XORW32;
-        break;
-      case BPF::XFORD:
-        newOpcode = BPF::XORD;
-        break;
-      default:
-        llvm_unreachable("Incorrect Atomic Instruction Opcode");
-      }
-
-      BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode))
-          .add(MI.getOperand(0))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3));
-
-      ToErase = &MI;
-      Changed = true;
-    }
-  }
-
-  return Changed;
-}
-
-} // end default namespace
-
-INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
-                "BPF PreEmit Checking", false, false)
-
-char BPFMIPreEmitChecking::ID = 0;
-FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
-{
-  return new BPFMIPreEmitChecking();
-}
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 7d91fa8bb824c..64b115b8fc8af 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -178,7 +178,6 @@ void BPFPassConfig::addMachineSSAOptimization() {
 }
 
 void BPFPassConfig::addPreEmitPass() {
-  addPass(createBPFMIPreEmitCheckingPass());
   if (getOptLevel() != CodeGenOptLevel::None)
     if (!DisableMIPeephole)
       addPass(createBPFMIPreEmitPeepholePass());
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index eade4cacb7100..253660d4d62e3 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -39,7 +39,6 @@ add_llvm_target(BPFCodeGen
   BPFSubtarget.cpp
   BPFTargetMachine.cpp
   BPFMIPeephole.cpp
-  BPFMIChecking.cpp
   BPFMISimplifyPatchable.cpp
   BTFDebug.cpp
 
diff --git a/llvm/test/CodeGen/BPF/atomics.ll b/llvm/test/CodeGen/BPF/atomics.ll
index 096d14c869477..0c16c49f2a873 100644
--- a/llvm/test/CodeGen/BPF/atomics.ll
+++ b/llvm/test/CodeGen/BPF/atomics.ll
@@ -1,11 +1,10 @@
-; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s
-; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck --check-prefixes=CHECK,CHECK-V2 %s
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefixes=CHECK,CHECK-V3 %s
 
 ; CHECK-LABEL: test_load_add_32
-; CHECK: lock *(u32 *)(r1 + 0) += r2
-; CHECK: encoding: [0xc3,0x21
-; CHECK-V3: lock *(u32 *)(r1 + 0) += w2
-; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-V2: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
+; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
 define void @test_load_add_32(ptr %p, i32 zeroext %v) {
 entry:
   atomicrmw add ptr %p, i32 %v seq_cst
@@ -13,10 +12,8 @@ entry:
 }
 
 ; CHECK-LABEL: test_load_add_64
-; CHECK: lock *(u64 *)(r1 + 0) += r2
-; CHECK: encoding: [0xdb,0x21
-; CHECK-V3: lock *(u64 *)(r1 + 0) += r2
-; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
 define void @test_load_add_64(ptr %p, i64 zeroext %v) {
 entry:
   atomicrmw add ptr %p, i64 %v seq_cst
diff --git a/llvm/test/CodeGen/BPF/atomics_2.ll b/llvm/test/CodeGen/BPF/atomics_2.ll
index 37a9d8322938c..c670ddb05b6a7 100644
--- a/llvm/test/CodeGen/BPF/atomics_2.ll
+++ b/llvm/test/CodeGen/BPF/atomics_2.ll
@@ -214,8 +214,8 @@ entry:
 }
 
 ; CHECK-LABEL: test_atomic_xor_32
-; CHECK: lock *(u32 *)(r1 + 0) ^= w2
-; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
+; CHECK: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
+; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
 ; CHECK: w0 = 0
 define dso_local i32 @test_atomic_xor_32(ptr nocapture %p, i32 %v) local_unnamed_addr {
 entry:
@@ -224,8 +224,8 @@ entry:
 }
 
 ; CHECK-LABEL: test_atomic_xor_64
-; CHECK: lock *(u64 *)(r1 + 0) ^= r2
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
+; CHECK: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
 ; CHECK: w0 = 0
 define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
 entry:
@@ -234,8 +234,8 @@ entry:
 }
 
 ; CHECK-LABEL: test_atomic_and_64
-; CHECK: lock *(u64 *)(r1 + 0) &= r2
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x50,0x00,0x00,0x00]
+; CHECK: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x51,0x00,0x00,0x00]
 ; CHECK: w0 = 0
 define dso_local i32 @test_atomic_and_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
 entry:
@@ -244,8 +244,8 @@ entry:
 }
 
 ; CHECK-LABEL: test_atomic_or_64
-; CHECK: lock *(u64 *)(r1 + 0) |= r2
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x40,0x00,0x00,0x00]
+; CHECK: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x41,0x00,0x00,0x00]
 ; CHECK: w0 = 0
 define dso_local i32 @test_atomic_or_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
 entry:
diff --git a/llvm/test/CodeGen/BPF/objdump_atomics.ll b/llvm/test/CodeGen/BPF/objdump_atomics.ll
index 14a6dfdab11c4..3ec364f7368b5 100644
--- a/llvm/test/CodeGen/BPF/objdump_atomics.ll
+++ b/llvm/test/CodeGen/BPF/objdump_atomics.ll
@@ -2,7 +2,7 @@
 
 ; CHECK-LABEL: test_load_add_32
 ; CHECK: c3 21
-; CHECK: lock *(u32 *)(r1 + 0) += r2
+; CHECK: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
 define void @test_load_add_32(ptr %p, i32 zeroext %v) {
 entry:
   atomicrmw add ptr %p, i32 %v seq_cst
@@ -11,7 +11,7 @@ entry:
 
 ; CHECK-LABEL: test_load_add_64
 ; CHECK: db 21
-; CHECK: lock *(u64 *)(r1 + 0) += r2
+; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
 define void @test_load_add_64(ptr %p, i64 zeroext %v) {
 entry:
   atomicrmw add ptr %p, i64 %v seq_cst
diff --git a/llvm/test/CodeGen/BPF/xadd.ll b/llvm/test/CodeGen/BPF/xadd.ll
deleted file mode 100644
index 5aeeb9baf7b89..0000000000000
--- a/llvm/test/CodeGen/BPF/xadd.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: not llc -march=bpfel < %s 2>&1 | FileCheck %s
-; RUN: not llc -march=bpfeb < %s 2>&1 | FileCheck %s
-
-; This file is generated with the source command and source
-; $ clang -target bpf -O2 -g -S -emit-llvm t.c
-; $ cat t.c
-; int test(int *ptr) {
-;    int r;
-;    __sync_fetch_and_add(ptr, 4);
-;    r = __sync_fetch_and_add(ptr, 6);
-;    return r;
-; }
-
-; ModuleID = 't.c'
-source_filename = "t.c"
-target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128"
-target triple = "bpf"
-
-; Function Attrs: nounwind
-define dso_local i32 @test(ptr nocapture %ptr) local_unnamed_addr #0 !dbg !7 {
-entry:
-  call void @llvm.dbg.value(metadata ptr %ptr, metadata !13, metadata !DIExpression()), !dbg !15
-  %0 = atomicrmw add ptr %ptr, i32 4 seq_cst, !dbg !16
-  %1 = atomicrmw add ptr %ptr, i32 6 seq_cst, !dbg !17
-; CHECK: in function test i32 (ptr): Invalid usage of the XADD return value
-  call void @llvm.dbg.value(metadata i32 %1, metadata !14, metadata !DIExpression()), !dbg !18
-  ret i32 %1, !dbg !19
-}
-
-; Function Attrs: nounwind readnone speculatable
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone speculatable }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 342605) (llvm/trunk 342612)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
-!1 = !DIFile(filename: "t.c", directory: "/home/yhs/work/tests/llvm/sync/test1")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{!"clang version 8.0.0 (trunk 342605) (llvm/trunk 342612)"}
-!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
-!8 = !DISubroutineType(types: !9)
-!9 = !{!10, !11}
-!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
-!12 = !{!13, !14}
-!13 = !DILocalVariable(name: "ptr", arg: 1, scope: !7, file: !1, line: 1, type: !11)
-!14 = !DILocalVariable(name: "r", scope: !7, file: !1, line: 2, type: !10)
-!15 = !DILocation(line: 1, column: 15, scope: !7)
-!16 = !DILocation(line: 3, column: 4, scope: !7)
-!17 = !DILocation(line: 4, column: 8, scope: !7)
-!18 = !DILocation(line: 2, column: 8, scope: !7)
-!19 = !DILocation(line: 5, column: 4, scope: !7)
diff --git a/llvm/test/CodeGen/BPF/xadd_legal.ll b/llvm/test/CodeGen/BPF/xadd_legal.ll
index f4d4f737cc619..88f04d85a779f 100644
--- a/llvm/test/CodeGen/BPF/xadd_legal.ll
+++ b/llvm/test/CodeGen/BPF/xadd_legal.ll
@@ -19,8 +19,8 @@ define dso_local i32 @test(ptr nocapture %ptr, i64 %a) {
 entry:
   %conv = trunc i64 %a to i32
   %0 = atomicrmw add ptr %ptr, i32 %conv seq_cst
-; CHECK-64: lock *(u32 *)(r1 + 0) += r2
-; CHECK-32: lock *(u32 *)(r1 + 0) += w2
+; CHECK-64: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
+; CHECK-32: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
   %1 = load i32, ptr %ptr, align 4
   ret i32 %1
 }