[llvm] BPF: Generate locked insn for __sync_fetch_and_add() with cpu v1/v2 (PR #106494)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 23:16:36 PDT 2024
https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/106494
>From 98d3e3fa3c0c31d1e484863d22f6db4693f5d0ac Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Wed, 28 Aug 2024 19:43:37 -0700
Subject: [PATCH 1/2] Revert "BPF: Ensure __sync_fetch_and_add() always
generate atomic_fetch_add insn (#101428)"
This reverts commit c566769d7c097e3956c6b36c2040bd8baa2e9929.
See discussion in [1]. Currently, with -mcpu=v1/v2, atomic_fetch_add() insn is generated
for (void)__sync_fetch_and_add(...). This breaks backward compatibility since
there are codes who runs on old system (< 5.12) which does not support atomci_fetch_add().
Now let revert previous comment ([1]) and add additional logic in the next patch
to ensure for (void)__sync_fetch_and_add(...),
v1/v2: generate locked add insn
>= v3: generate atomic_fetch_add insn
[1] https://github.com/llvm/llvm-project/pull/101428
---
llvm/lib/Target/BPF/BPF.h | 2 +
llvm/lib/Target/BPF/BPFInstrInfo.td | 17 +-
llvm/lib/Target/BPF/BPFMIChecking.cpp | 251 +++++++++++++++++++++++
llvm/lib/Target/BPF/BPFTargetMachine.cpp | 1 +
llvm/lib/Target/BPF/CMakeLists.txt | 1 +
llvm/test/CodeGen/BPF/atomics.ll | 17 +-
llvm/test/CodeGen/BPF/atomics_2.ll | 16 +-
llvm/test/CodeGen/BPF/objdump_atomics.ll | 4 +-
llvm/test/CodeGen/BPF/xadd.ll | 59 ++++++
llvm/test/CodeGen/BPF/xadd_legal.ll | 4 +-
10 files changed, 344 insertions(+), 28 deletions(-)
create mode 100644 llvm/lib/Target/BPF/BPFMIChecking.cpp
create mode 100644 llvm/test/CodeGen/BPF/xadd.ll
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index f7bc6f958470b9..694d7bacf64211 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -28,6 +28,7 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
FunctionPass *createBPFMIPreEmitPeepholePass();
+FunctionPass *createBPFMIPreEmitCheckingPass();
InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
const BPFSubtarget &,
@@ -36,6 +37,7 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &);
void initializeBPFMIPeepholePass(PassRegistry &);
+void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 4baeeb017699d6..cf9764ca62123d 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -787,12 +787,12 @@ let Predicates = [BPFNoALU32] in {
}
// Atomic XADD for BPFNoALU32
-class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
+class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
(outs GPR:$dst),
(ins MEMri:$addr, GPR:$val),
"lock *("#OpcodeStr#" *)($addr) += $val",
- []> {
+ [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
bits<4> dst;
bits<20> addr;
@@ -803,6 +803,12 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
let BPFClass = BPF_STX;
}
+let Constraints = "$dst = $val" in {
+ let Predicates = [BPFNoALU32] in {
+ def XADDW : XADD<BPF_W, "u32", atomic_load_add_i32>;
+ }
+}
+
// Atomic add, and, or, xor
class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr>
: TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value,
@@ -887,13 +893,6 @@ class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
let BPFClass = BPF_STX;
}
-let Constraints = "$dst = $val" in {
- let Predicates = [BPFNoALU32] in {
- def XADDW : XADD<BPF_W, "u32">;
- def XFADDW : XFALU64<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
- }
-}
-
let Constraints = "$dst = $val" in {
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
new file mode 100644
index 00000000000000..a968950f5bfcb8
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -0,0 +1,251 @@
+//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs checking to signal errors for certain illegal usages at
+// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
+// not be used. The pass is done at the PreEmit pass right before the
+// machine code is emitted at which point the register liveness information
+// is still available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstrInfo.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-mi-checking"
+
+namespace {
+
+struct BPFMIPreEmitChecking : public MachineFunctionPass {
+
+ static char ID;
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+
+ BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
+ initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ bool processAtomicInsts();
+
+public:
+
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!skipFunction(MF.getFunction())) {
+ initialize(MF);
+ return processAtomicInsts();
+ }
+ return false;
+ }
+};
+
+// Initialize class variables.
+void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
+}
+
+// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
+// used.
+//
+// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
+// source and destination operands of XADD are GPR32, there is no sub-register
+// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
+// will raise false alarm on GPR32 Def.
+//
+// To support GPR32 Def, ideally we could just enable sub-registr liveness track
+// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
+// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
+//
+// However, sub-register liveness tracking module inside LLVM is actually
+// designed for the situation where one register could be split into more than
+// one sub-registers for which case each sub-register could have their own
+// liveness and kill one of them doesn't kill others. So, tracking liveness for
+// each make sense.
+//
+// For BPF, each 64-bit register could only have one 32-bit sub-register. This
+// is exactly the case which LLVM think brings no benefits for doing
+// sub-register tracking, because the live range of sub-register must always
+// equal to its parent register, therefore liveness tracking is disabled even
+// the back-end has implemented enableSubRegLiveness. The detailed information
+// is at r232695:
+//
+// Author: Matthias Braun <matze at braunis.de>
+// Date: Thu Mar 19 00:21:58 2015 +0000
+// Do not track subregister liveness when it brings no benefits
+//
+// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
+// sub-register always has the same liveness as its parent register, LLVM is
+// already attaching a implicit 64-bit register Def whenever the there is
+// a sub-register Def. The liveness of the implicit 64-bit Def is available.
+// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
+// be:
+//
+// $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
+// implicit killed $r9, implicit-def dead $r9
+//
+// Even though w9 is not marked as Dead, the parent register r9 is marked as
+// Dead correctly, and it is safe to use such information or our purpose.
+static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+ const MCRegisterClass *GPR64RegClass =
+ &BPFMCRegisterClasses[BPF::GPRRegClassID];
+ std::vector<unsigned> GPR32LiveDefs;
+ std::vector<unsigned> GPR64DeadDefs;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ bool RegIsGPR64;
+
+ if (!MO.isReg() || MO.isUse())
+ continue;
+
+ RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
+ if (!MO.isDead()) {
+ // It is a GPR64 live Def, we are sure it is live. */
+ if (RegIsGPR64)
+ return true;
+ // It is a GPR32 live Def, we are unsure whether it is really dead due to
+ // no sub-register liveness tracking. Push it to vector for deferred
+ // check.
+ GPR32LiveDefs.push_back(MO.getReg());
+ continue;
+ }
+
+ // Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
+ // low 32-bit.
+ if (RegIsGPR64)
+ GPR64DeadDefs.push_back(MO.getReg());
+ }
+
+ // No GPR32 live Def, safe to return false.
+ if (GPR32LiveDefs.empty())
+ return false;
+
+ // No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
+ // must be truely live, safe to return true.
+ if (GPR64DeadDefs.empty())
+ return true;
+
+ // Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
+ for (auto I : GPR32LiveDefs)
+ for (MCPhysReg SR : TRI->superregs(I))
+ if (!llvm::is_contained(GPR64DeadDefs, SR))
+ return true;
+
+ return false;
+}
+
+bool BPFMIPreEmitChecking::processAtomicInsts() {
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != BPF::XADDW &&
+ MI.getOpcode() != BPF::XADDD &&
+ MI.getOpcode() != BPF::XADDW32)
+ continue;
+
+ LLVM_DEBUG(MI.dump());
+ if (hasLiveDefs(MI, TRI)) {
+ DebugLoc Empty;
+ const DebugLoc &DL = MI.getDebugLoc();
+ const Function &F = MF->getFunction();
+ F.getContext().diagnose(DiagnosticInfoUnsupported{
+ F, "Invalid usage of the XADD return value", DL});
+ }
+ }
+ }
+
+ // Check return values of atomic_fetch_and_{add,and,or,xor}.
+ // If the return is not used, the atomic_fetch_and_<op> instruction
+ // is replaced with atomic_<op> instruction.
+ MachineInstr *ToErase = nullptr;
+ bool Changed = false;
+ const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ if (MI.getOpcode() != BPF::XFADDW32 && MI.getOpcode() != BPF::XFADDD &&
+ MI.getOpcode() != BPF::XFANDW32 && MI.getOpcode() != BPF::XFANDD &&
+ MI.getOpcode() != BPF::XFXORW32 && MI.getOpcode() != BPF::XFXORD &&
+ MI.getOpcode() != BPF::XFORW32 && MI.getOpcode() != BPF::XFORD)
+ continue;
+
+ if (hasLiveDefs(MI, TRI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Transforming "; MI.dump());
+ unsigned newOpcode;
+ switch (MI.getOpcode()) {
+ case BPF::XFADDW32:
+ newOpcode = BPF::XADDW32;
+ break;
+ case BPF::XFADDD:
+ newOpcode = BPF::XADDD;
+ break;
+ case BPF::XFANDW32:
+ newOpcode = BPF::XANDW32;
+ break;
+ case BPF::XFANDD:
+ newOpcode = BPF::XANDD;
+ break;
+ case BPF::XFXORW32:
+ newOpcode = BPF::XXORW32;
+ break;
+ case BPF::XFXORD:
+ newOpcode = BPF::XXORD;
+ break;
+ case BPF::XFORW32:
+ newOpcode = BPF::XORW32;
+ break;
+ case BPF::XFORD:
+ newOpcode = BPF::XORD;
+ break;
+ default:
+ llvm_unreachable("Incorrect Atomic Instruction Opcode");
+ }
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+
+ ToErase = &MI;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
+ "BPF PreEmit Checking", false, false)
+
+char BPFMIPreEmitChecking::ID = 0;
+FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
+{
+ return new BPFMIPreEmitChecking();
+}
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 64b115b8fc8afa..7d91fa8bb824cf 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -178,6 +178,7 @@ void BPFPassConfig::addMachineSSAOptimization() {
}
void BPFPassConfig::addPreEmitPass() {
+ addPass(createBPFMIPreEmitCheckingPass());
if (getOptLevel() != CodeGenOptLevel::None)
if (!DisableMIPeephole)
addPass(createBPFMIPreEmitPeepholePass());
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index 253660d4d62e37..eade4cacb7100e 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_target(BPFCodeGen
BPFSubtarget.cpp
BPFTargetMachine.cpp
BPFMIPeephole.cpp
+ BPFMIChecking.cpp
BPFMISimplifyPatchable.cpp
BTFDebug.cpp
diff --git a/llvm/test/CodeGen/BPF/atomics.ll b/llvm/test/CodeGen/BPF/atomics.ll
index 0c16c49f2a873b..096d14c8694779 100644
--- a/llvm/test/CodeGen/BPF/atomics.ll
+++ b/llvm/test/CodeGen/BPF/atomics.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck --check-prefixes=CHECK,CHECK-V2 %s
-; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefixes=CHECK,CHECK-V3 %s
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s
; CHECK-LABEL: test_load_add_32
-; CHECK-V2: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
-; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
-; CHECK: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
+; CHECK: lock *(u32 *)(r1 + 0) += r2
+; CHECK: encoding: [0xc3,0x21
+; CHECK-V3: lock *(u32 *)(r1 + 0) += w2
+; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
entry:
atomicrmw add ptr %p, i32 %v seq_cst
@@ -12,8 +13,10 @@ entry:
}
; CHECK-LABEL: test_load_add_64
-; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
+; CHECK: lock *(u64 *)(r1 + 0) += r2
+; CHECK: encoding: [0xdb,0x21
+; CHECK-V3: lock *(u64 *)(r1 + 0) += r2
+; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
entry:
atomicrmw add ptr %p, i64 %v seq_cst
diff --git a/llvm/test/CodeGen/BPF/atomics_2.ll b/llvm/test/CodeGen/BPF/atomics_2.ll
index c670ddb05b6a77..37a9d8322938cc 100644
--- a/llvm/test/CodeGen/BPF/atomics_2.ll
+++ b/llvm/test/CodeGen/BPF/atomics_2.ll
@@ -214,8 +214,8 @@ entry:
}
; CHECK-LABEL: test_atomic_xor_32
-; CHECK: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
-; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
+; CHECK: lock *(u32 *)(r1 + 0) ^= w2
+; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_xor_32(ptr nocapture %p, i32 %v) local_unnamed_addr {
entry:
@@ -224,8 +224,8 @@ entry:
}
; CHECK-LABEL: test_atomic_xor_64
-; CHECK: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
+; CHECK: lock *(u64 *)(r1 + 0) ^= r2
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
entry:
@@ -234,8 +234,8 @@ entry:
}
; CHECK-LABEL: test_atomic_and_64
-; CHECK: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x51,0x00,0x00,0x00]
+; CHECK: lock *(u64 *)(r1 + 0) &= r2
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x50,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_and_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
entry:
@@ -244,8 +244,8 @@ entry:
}
; CHECK-LABEL: test_atomic_or_64
-; CHECK: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x41,0x00,0x00,0x00]
+; CHECK: lock *(u64 *)(r1 + 0) |= r2
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x40,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_or_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
entry:
diff --git a/llvm/test/CodeGen/BPF/objdump_atomics.ll b/llvm/test/CodeGen/BPF/objdump_atomics.ll
index c4cb16b2c36418..fcc889ba300e39 100644
--- a/llvm/test/CodeGen/BPF/objdump_atomics.ll
+++ b/llvm/test/CodeGen/BPF/objdump_atomics.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: test_load_add_32
; CHECK: c3 21
-; CHECK: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK: lock *(u32 *)(r1 + 0) += w2
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
entry:
atomicrmw add ptr %p, i32 %v seq_cst
@@ -11,7 +11,7 @@ entry:
; CHECK-LABEL: test_load_add_64
; CHECK: db 21
-; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK: lock *(u64 *)(r1 + 0) += r2
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
entry:
atomicrmw add ptr %p, i64 %v seq_cst
diff --git a/llvm/test/CodeGen/BPF/xadd.ll b/llvm/test/CodeGen/BPF/xadd.ll
new file mode 100644
index 00000000000000..5aeeb9baf7b892
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/xadd.ll
@@ -0,0 +1,59 @@
+; RUN: not llc -march=bpfel < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=bpfeb < %s 2>&1 | FileCheck %s
+
+; This file is generated with the source command and source
+; $ clang -target bpf -O2 -g -S -emit-llvm t.c
+; $ cat t.c
+; int test(int *ptr) {
+; int r;
+; __sync_fetch_and_add(ptr, 4);
+; r = __sync_fetch_and_add(ptr, 6);
+; return r;
+; }
+
+; ModuleID = 't.c'
+source_filename = "t.c"
+target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128"
+target triple = "bpf"
+
+; Function Attrs: nounwind
+define dso_local i32 @test(ptr nocapture %ptr) local_unnamed_addr #0 !dbg !7 {
+entry:
+ call void @llvm.dbg.value(metadata ptr %ptr, metadata !13, metadata !DIExpression()), !dbg !15
+ %0 = atomicrmw add ptr %ptr, i32 4 seq_cst, !dbg !16
+ %1 = atomicrmw add ptr %ptr, i32 6 seq_cst, !dbg !17
+; CHECK: in function test i32 (ptr): Invalid usage of the XADD return value
+ call void @llvm.dbg.value(metadata i32 %1, metadata !14, metadata !DIExpression()), !dbg !18
+ ret i32 %1, !dbg !19
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 342605) (llvm/trunk 342612)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "t.c", directory: "/home/yhs/work/tests/llvm/sync/test1")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 8.0.0 (trunk 342605) (llvm/trunk 342612)"}
+!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
+!12 = !{!13, !14}
+!13 = !DILocalVariable(name: "ptr", arg: 1, scope: !7, file: !1, line: 1, type: !11)
+!14 = !DILocalVariable(name: "r", scope: !7, file: !1, line: 2, type: !10)
+!15 = !DILocation(line: 1, column: 15, scope: !7)
+!16 = !DILocation(line: 3, column: 4, scope: !7)
+!17 = !DILocation(line: 4, column: 8, scope: !7)
+!18 = !DILocation(line: 2, column: 8, scope: !7)
+!19 = !DILocation(line: 5, column: 4, scope: !7)
diff --git a/llvm/test/CodeGen/BPF/xadd_legal.ll b/llvm/test/CodeGen/BPF/xadd_legal.ll
index 88f04d85a779f8..f4d4f737cc6190 100644
--- a/llvm/test/CodeGen/BPF/xadd_legal.ll
+++ b/llvm/test/CodeGen/BPF/xadd_legal.ll
@@ -19,8 +19,8 @@ define dso_local i32 @test(ptr nocapture %ptr, i64 %a) {
entry:
%conv = trunc i64 %a to i32
%0 = atomicrmw add ptr %ptr, i32 %conv seq_cst
-; CHECK-64: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
-; CHECK-32: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK-64: lock *(u32 *)(r1 + 0) += r2
+; CHECK-32: lock *(u32 *)(r1 + 0) += w2
%1 = load i32, ptr %ptr, align 4
ret i32 %1
}
>From 7e0c7e1634ab0b96dc01384029eecb483c40dbbe Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Wed, 28 Aug 2024 21:12:08 -0700
Subject: [PATCH 2/2] [BPF] Do not convert atomic_fetch_and_*() to
atomic_<op>()'s
In previous commit, atomic_fetch_and_*() operations are converted
to atomic_<op>()'s if there are no return values. This is not
what we want, we would like to preserve atomic_fetch_and_*() insn
so bpf jit can add proper barrier insns.
Preserving atomic_fetch_and_*() are okay for most __sync_fetch_and_*()
functions, but not for __sync_fetch_and_add() since
__sync_fetch_and_add() has been used to generic locked insns
in cpu=v1/v2.
So after preserving atomic_fetch_and_*() even if return value
is not used, XFADDD in BPFInstrInfo.td is adjusted to emit
locked add insn for cpu v1/v2 and to emit atomic_fetch_and_add()
for cpu >= v3.
---
llvm/lib/Target/BPF/BPF.h | 2 +-
llvm/lib/Target/BPF/BPFInstrInfo.td | 7 ++-
llvm/lib/Target/BPF/BPFMIChecking.cpp | 84 +++------------------------
llvm/test/CodeGen/BPF/atomics.ll | 8 +--
llvm/test/CodeGen/BPF/atomics_2.ll | 16 ++---
llvm/test/CodeGen/BPF/xadd_legal.ll | 2 +-
6 files changed, 25 insertions(+), 94 deletions(-)
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 694d7bacf64211..f07ae4c9baf1c6 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -37,7 +37,7 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &);
void initializeBPFMIPeepholePass(PassRegistry &);
-void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
+void initializeBPFMIPreEmitCheckingPass(PassRegistry &);
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index cf9764ca62123d..f5bd30cceb0d9c 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -806,6 +806,7 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
let Constraints = "$dst = $val" in {
let Predicates = [BPFNoALU32] in {
def XADDW : XADD<BPF_W, "u32", atomic_load_add_i32>;
+ def XADDD : XADD<BPF_DW, "u64", atomic_load_add_i64>;
}
}
@@ -849,8 +850,6 @@ let Constraints = "$dst = $val" in {
def XORW32 : ATOMIC32_NOFETCH<BPF_OR, "|">;
def XXORW32 : ATOMIC32_NOFETCH<BPF_XOR, "^">;
}
-
- def XADDD : ATOMIC_NOFETCH<BPF_ADD, "+">;
def XANDD : ATOMIC_NOFETCH<BPF_AND, "&">;
def XORD : ATOMIC_NOFETCH<BPF_OR, "|">;
def XXORD : ATOMIC_NOFETCH<BPF_XOR, "^">;
@@ -901,7 +900,9 @@ let Constraints = "$dst = $val" in {
def XFXORW32 : XFALU32<BPF_W, BPF_XOR, "u32", "xor", atomic_load_xor_i32>;
}
- def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_i64>;
+ let Predicates = [BPFHasALU32] in {
+ def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_i64>;
+ }
def XFANDD : XFALU64<BPF_DW, BPF_AND, "u64", "and", atomic_load_and_i64>;
def XFORD : XFALU64<BPF_DW, BPF_OR, "u64", "or", atomic_load_or_i64>;
def XFXORD : XFALU64<BPF_DW, BPF_XOR, "u64", "xor", atomic_load_xor_i64>;
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index a968950f5bfcb8..24224f6c1e9e66 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -43,15 +43,14 @@ struct BPFMIPreEmitChecking : public MachineFunctionPass {
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- bool processAtomicInsts();
+ void processAtomicInsts();
public:
-
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
if (!skipFunction(MF.getFunction())) {
initialize(MF);
- return processAtomicInsts();
+ processAtomicInsts();
}
return false;
}
@@ -107,7 +106,7 @@ void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
// Dead correctly, and it is safe to use such information or our purpose.
static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
const MCRegisterClass *GPR64RegClass =
- &BPFMCRegisterClasses[BPF::GPRRegClassID];
+ &BPFMCRegisterClasses[BPF::GPRRegClassID];
std::vector<unsigned> GPR32LiveDefs;
std::vector<unsigned> GPR64DeadDefs;
@@ -153,12 +152,10 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
return false;
}
-bool BPFMIPreEmitChecking::processAtomicInsts() {
+void BPFMIPreEmitChecking::processAtomicInsts() {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() != BPF::XADDW &&
- MI.getOpcode() != BPF::XADDD &&
- MI.getOpcode() != BPF::XADDW32)
+ if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD)
continue;
LLVM_DEBUG(MI.dump());
@@ -171,81 +168,14 @@ bool BPFMIPreEmitChecking::processAtomicInsts() {
}
}
}
-
- // Check return values of atomic_fetch_and_{add,and,or,xor}.
- // If the return is not used, the atomic_fetch_and_<op> instruction
- // is replaced with atomic_<op> instruction.
- MachineInstr *ToErase = nullptr;
- bool Changed = false;
- const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
- for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB) {
- if (ToErase) {
- ToErase->eraseFromParent();
- ToErase = nullptr;
- }
-
- if (MI.getOpcode() != BPF::XFADDW32 && MI.getOpcode() != BPF::XFADDD &&
- MI.getOpcode() != BPF::XFANDW32 && MI.getOpcode() != BPF::XFANDD &&
- MI.getOpcode() != BPF::XFXORW32 && MI.getOpcode() != BPF::XFXORD &&
- MI.getOpcode() != BPF::XFORW32 && MI.getOpcode() != BPF::XFORD)
- continue;
-
- if (hasLiveDefs(MI, TRI))
- continue;
-
- LLVM_DEBUG(dbgs() << "Transforming "; MI.dump());
- unsigned newOpcode;
- switch (MI.getOpcode()) {
- case BPF::XFADDW32:
- newOpcode = BPF::XADDW32;
- break;
- case BPF::XFADDD:
- newOpcode = BPF::XADDD;
- break;
- case BPF::XFANDW32:
- newOpcode = BPF::XANDW32;
- break;
- case BPF::XFANDD:
- newOpcode = BPF::XANDD;
- break;
- case BPF::XFXORW32:
- newOpcode = BPF::XXORW32;
- break;
- case BPF::XFXORD:
- newOpcode = BPF::XXORD;
- break;
- case BPF::XFORW32:
- newOpcode = BPF::XORW32;
- break;
- case BPF::XFORD:
- newOpcode = BPF::XORD;
- break;
- default:
- llvm_unreachable("Incorrect Atomic Instruction Opcode");
- }
-
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode))
- .add(MI.getOperand(0))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3));
-
- ToErase = &MI;
- Changed = true;
- }
- }
-
- return Changed;
}
-} // end default namespace
+} // namespace
INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
"BPF PreEmit Checking", false, false)
char BPFMIPreEmitChecking::ID = 0;
-FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
-{
+FunctionPass *llvm::createBPFMIPreEmitCheckingPass() {
return new BPFMIPreEmitChecking();
}
diff --git a/llvm/test/CodeGen/BPF/atomics.ll b/llvm/test/CodeGen/BPF/atomics.ll
index 096d14c8694779..c17b94af5f7bd9 100644
--- a/llvm/test/CodeGen/BPF/atomics.ll
+++ b/llvm/test/CodeGen/BPF/atomics.ll
@@ -4,8 +4,8 @@
; CHECK-LABEL: test_load_add_32
; CHECK: lock *(u32 *)(r1 + 0) += r2
; CHECK: encoding: [0xc3,0x21
-; CHECK-V3: lock *(u32 *)(r1 + 0) += w2
-; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
+; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
entry:
atomicrmw add ptr %p, i32 %v seq_cst
@@ -15,8 +15,8 @@ entry:
; CHECK-LABEL: test_load_add_64
; CHECK: lock *(u64 *)(r1 + 0) += r2
; CHECK: encoding: [0xdb,0x21
-; CHECK-V3: lock *(u64 *)(r1 + 0) += r2
-; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK-V3: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
+; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
entry:
atomicrmw add ptr %p, i64 %v seq_cst
diff --git a/llvm/test/CodeGen/BPF/atomics_2.ll b/llvm/test/CodeGen/BPF/atomics_2.ll
index 37a9d8322938cc..6371e3b875638e 100644
--- a/llvm/test/CodeGen/BPF/atomics_2.ll
+++ b/llvm/test/CodeGen/BPF/atomics_2.ll
@@ -214,8 +214,8 @@ entry:
}
; CHECK-LABEL: test_atomic_xor_32
-; CHECK: lock *(u32 *)(r1 + 0) ^= w2
-; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
+; CHECK: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
+; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_xor_32(ptr nocapture %p, i32 %v) local_unnamed_addr {
entry:
@@ -224,8 +224,8 @@ entry:
}
; CHECK-LABEL: test_atomic_xor_64
-; CHECK: lock *(u64 *)(r1 + 0) ^= r2
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
+; CHECK: atomic_fetch_xor((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
entry:
@@ -234,8 +234,8 @@ entry:
}
; CHECK-LABEL: test_atomic_and_64
-; CHECK: lock *(u64 *)(r1 + 0) &= r2
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x50,0x00,0x00,0x00]
+; CHECK: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x51,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_and_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
entry:
@@ -244,8 +244,8 @@ entry:
}
; CHECK-LABEL: test_atomic_or_64
-; CHECK: lock *(u64 *)(r1 + 0) |= r2
-; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x40,0x00,0x00,0x00]
+; CHECK: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x41,0x00,0x00,0x00]
; CHECK: w0 = 0
define dso_local i32 @test_atomic_or_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
entry:
diff --git a/llvm/test/CodeGen/BPF/xadd_legal.ll b/llvm/test/CodeGen/BPF/xadd_legal.ll
index f4d4f737cc6190..9b07afade3fee9 100644
--- a/llvm/test/CodeGen/BPF/xadd_legal.ll
+++ b/llvm/test/CodeGen/BPF/xadd_legal.ll
@@ -20,7 +20,7 @@ entry:
%conv = trunc i64 %a to i32
%0 = atomicrmw add ptr %ptr, i32 %conv seq_cst
; CHECK-64: lock *(u32 *)(r1 + 0) += r2
-; CHECK-32: lock *(u32 *)(r1 + 0) += w2
+; CHECK-32: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
%1 = load i32, ptr %ptr, align 4
ret i32 %1
}
More information about the llvm-commits
mailing list