[clang] [llvm] [BPF] introduce `__attribute__((bpf_fastcall))` (PR #101228)

via cfe-commits cfe-commits at lists.llvm.org
Thu Aug 15 12:31:13 PDT 2024


https://github.com/eddyz87 updated https://github.com/llvm/llvm-project/pull/101228

>From 6a3b5c79d148863f39b84ce3da4e4794829c1c56 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Wed, 8 May 2024 15:29:47 -0700
Subject: [PATCH 1/5] [BPF] introduce __attribute__((bpf_fastcall))

This commit introduces attribute bpf_fastcall to declare BPF functions
that do not clobber some of the caller saved registers (R0-R5).

The idea is to generate the code complying with generic BPF ABI,
but allow compatible Linux Kernel to remove unnecessary spills and
fills of non-scratched registers (given some compiler assistance).

For such functions do register allocation as-if caller saved registers
are not clobbered, but later wrap the calls with spill and fill
patterns that are simple to recognize in kernel.

For example for the following C code:

   #define __bpf_fastcall __attribute__((bpf_fastcall))

   void bar(void) __bpf_fastcall;
   void buz(long i, long j, long k);

   void foo(long i, long j, long k) {
     bar();
     buz(i, j, k);
   }

First allocate registers as if:

   foo:
     call bar    # note: no spills for i,j,k (r1,r2,r3)
     call buz
     exit

And later insert spills fills on the peephole phase:

   foo:
     *(u64 *)(r10 - 8) = r1;  # Such call pattern is
     *(u64 *)(r10 - 16) = r2; # correct when used with
     *(u64 *)(r10 - 24) = r3; # old kernels.
     call bar
     r3 = *(u64 *)(r10 - 24); # But also allows new
     r2 = *(u64 *)(r10 - 16); # kernels to recognize the
     r1 = *(u64 *)(r10 - 8);  # pattern and remove spills/fills.
     call buz
     exit

The offsets for generated spills/fills are picked as minimal stack
offsets for the function. Allocated stack slots are not used for any
other purposes, in order to simplify in-kernel analysis.
---
 clang/include/clang/Basic/Attr.td             |   8 ++
 clang/include/clang/Basic/AttrDocs.td         |  19 +++
 clang/lib/CodeGen/CGCall.cpp                  |   2 +
 clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c  |  24 ++++
 ...a-attribute-supported-attributes-list.test |   1 +
 clang/test/Sema/bpf-attr-bpf-fastcall.c       |   7 ++
 llvm/lib/Target/BPF/BPFCallingConv.td         |   1 +
 llvm/lib/Target/BPF/BPFISelLowering.cpp       |  31 +++++
 llvm/lib/Target/BPF/BPFInstrInfo.td           |   4 +-
 llvm/lib/Target/BPF/BPFMIPeephole.cpp         |  88 ++++++++++++++
 llvm/lib/Target/BPF/BPFRegisterInfo.cpp       |  11 ++
 llvm/lib/Target/BPF/BPFRegisterInfo.h         |   3 +
 llvm/test/CodeGen/BPF/bpf-fastcall-1.ll       |  46 ++++++++
 llvm/test/CodeGen/BPF/bpf-fastcall-2.ll       |  68 +++++++++++
 llvm/test/CodeGen/BPF/bpf-fastcall-3.ll       |  62 ++++++++++
 .../CodeGen/BPF/bpf-fastcall-regmask-1.ll     | 110 ++++++++++++++++++
 16 files changed, 482 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
 create mode 100644 clang/test/Sema/bpf-attr-bpf-fastcall.c
 create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
 create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-2.ll
 create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-3.ll
 create mode 100644 llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 46d0a66d59c375..437cf3999bc365 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2189,6 +2189,14 @@ def BTFTypeTag : TypeAttr {
   let LangOpts = [COnly];
 }
 
+def BPFFastCall : InheritableAttr,
+                  TargetSpecificAttr<TargetBPF> {
+  let Spellings = [GCC<"bpf_fastcall">];
+  let Subjects = SubjectList<[FunctionLike]>;
+  let Documentation = [BPFFastCallDocs];
+  let SimpleHandler = 1;
+}
+
 def WebAssemblyExportName : InheritableAttr,
                             TargetSpecificAttr<TargetWebAssembly> {
   let Spellings = [Clang<"export_name">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 4b8d520d73893d..6f0f659f064d95 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2317,6 +2317,25 @@ section.
   }];
 }
 
+def BPFFastCallDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+Functions annotated with this attribute are likely to be inlined by BPF JIT.
+It is assumed that inlined implementation uses less caller saved registers,
+than a regular function.
+Specifically, the following registers are likely to be preserved:
+- ``R0`` if function return value is ``void``;
+- ``R2-R5` if function takes 1 argument;
+- ``R3-R5` if function takes 2 arguments;
+- ``R4-R5` if function takes 3 arguments;
+- ``R5`` if function takes 4 arguments;
+
+For such functions Clang generates code pattern that allows BPF JIT
+to recognize and remove unnecessary spills and fills of the preserved
+registers.
+  }];
+}
+
 def MipsInterruptDocs : Documentation {
   let Category = DocCatFunction;
   let Heading = "interrupt (MIPS)";
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2f3dd5d01fa6c9..dbddf49de964bc 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2447,6 +2447,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
       FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
     if (TargetDecl->hasAttr<LeafAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::NoCallback);
+    if (TargetDecl->hasAttr<BPFFastCallAttr>())
+      FuncAttrs.addAttribute("bpf_fastcall");
 
     HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
     if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
diff --git a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
new file mode 100644
index 00000000000000..bf49c3c9be0864
--- /dev/null
+++ b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
@@ -0,0 +1,24 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+#define __bpf_fastcall __attribute__((bpf_fastcall))
+
+void test(void) __bpf_fastcall;
+void (*ptr)(void) __bpf_fastcall;
+
+void foo(void) {
+  test();
+  (*ptr)();
+}
+
+// CHECK: @ptr = global ptr null
+// CHECK: define {{.*}} @foo()
+// CHECK: entry:
+// CHECK:   call void @test() #[[test_attr:[0-9]+]]
+// CHECK:   %[[ptr:.*]] = load ptr, ptr @ptr, align 8
+// CHECK:   call void %[[ptr]]() #[[test_attr]]
+// CHECK:   ret void
+
+// CHECK: declare void @test() #[[ptr_attr:[0-9]+]]
+// CHECK: attributes #1 = { {{.*}}"bpf_fastcall"{{.*}} }
+// CHECK: attributes #[[test_attr]] = { {{.*}}"bpf_fastcall"{{.*}} }
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index e082db698ef0ce..b8f2b02d758d9e 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -22,6 +22,7 @@
 // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function)
 // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
 // CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias)
+// CHECK-NEXT: BPFFastCall (SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)
 // CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record)
 // CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias)
diff --git a/clang/test/Sema/bpf-attr-bpf-fastcall.c b/clang/test/Sema/bpf-attr-bpf-fastcall.c
new file mode 100644
index 00000000000000..81a5cf68a8c061
--- /dev/null
+++ b/clang/test/Sema/bpf-attr-bpf-fastcall.c
@@ -0,0 +1,7 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 %s -triple bpf -verify
+
+__attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attribute only applies to functions and function pointers}}
+
+__attribute__((bpf_fastcall)) void func();
+__attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}}
diff --git a/llvm/lib/Target/BPF/BPFCallingConv.td b/llvm/lib/Target/BPF/BPFCallingConv.td
index ef4ef1930aa8fb..a557211437e95f 100644
--- a/llvm/lib/Target/BPF/BPFCallingConv.td
+++ b/llvm/lib/Target/BPF/BPFCallingConv.td
@@ -46,3 +46,4 @@ def CC_BPF32 : CallingConv<[
 ]>;
 
 def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>;
+def CSR_PreserveAll : CalleeSavedRegs<(add R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10)>;
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 071fe004806e3e..ff23d3b055d0d5 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -402,6 +402,21 @@ SDValue BPFTargetLowering::LowerFormalArguments(
 
 const size_t BPFTargetLowering::MaxArgs = 5;
 
+static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
+                            MCRegister Reg) {
+  for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+    RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
+}
+
+static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
+                                     MachineFunction &MF,
+                                     const uint32_t *BaseRegMask) {
+  uint32_t *RegMask = MF.allocateRegMask();
+  unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+  memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize);
+  return RegMask;
+}
+
 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                      SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG = CLI.DAG;
@@ -513,6 +528,22 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   for (auto &Reg : RegsToPass)
     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
 
+  bool HasFastCall =
+      (CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall"));
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (HasFastCall) {
+    uint32_t *RegMask = regMaskFromTemplate(
+        TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
+    for (auto const &RegPair : RegsToPass)
+      resetRegMaskBit(TRI, RegMask, RegPair.first);
+    if (!CLI.CB->getType()->isVoidTy())
+      resetRegMaskBit(TRI, RegMask, BPF::R0);
+    Ops.push_back(DAG.getRegisterMask(RegMask));
+  } else {
+    Ops.push_back(
+        DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv)));
+  }
+
   if (InGlue.getNode())
     Ops.push_back(InGlue);
 
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 55989f5eb6a3c0..cf9764ca62123d 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -677,9 +677,7 @@ let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
 }
 
 // Jump and link
-let isCall=1, hasDelaySlot=0, Uses = [R11],
-    // Potentially clobbered registers
-    Defs = [R0, R1, R2, R3, R4, R5] in {
+let isCall=1, hasDelaySlot=0, Uses = [R11] in {
   def JAL  : CALL<"call">;
   def JALX  : CALLX<"callx">;
 }
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index f0edf706bd8fd7..5ada86a8bf1c29 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -24,6 +24,8 @@
 #include "BPFInstrInfo.h"
 #include "BPFTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -319,6 +321,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
   bool in16BitRange(int Num);
   bool eliminateRedundantMov();
   bool adjustBranch();
+  bool insertMissingCallerSavedSpills();
 
 public:
 
@@ -333,6 +336,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
     Changed = eliminateRedundantMov();
     if (SupportGotol)
       Changed = adjustBranch() || Changed;
+    Changed |= insertMissingCallerSavedSpills();
     return Changed;
   }
 };
@@ -596,6 +600,90 @@ bool BPFMIPreEmitPeephole::adjustBranch() {
   return Changed;
 }
 
+static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2,
+                                           BPF::R3, BPF::R4, BPF::R5};
+
+struct BPFFastCall {
+  MachineInstr *MI;
+  unsigned LiveCallerSavedRegs;
+};
+
+static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
+                                LivePhysRegs &LiveRegs, MachineBasicBlock &BB,
+                                SmallVectorImpl<BPFFastCall> &Calls) {
+  LiveRegs.init(*TRI);
+  LiveRegs.addLiveOuts(BB);
+  Calls.clear();
+  for (MachineInstr &MI : llvm::reverse(BB)) {
+    unsigned LiveCallerSavedRegs;
+    if (!MI.isCall())
+      goto NextInsn;
+    LiveCallerSavedRegs = 0;
+    for (MCRegister R : CallerSavedRegs) {
+      bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R);
+      if (!DoSpillFill)
+        continue;
+      LiveCallerSavedRegs |= 1 << R;
+    }
+    if (LiveCallerSavedRegs)
+      Calls.push_back({&MI, LiveCallerSavedRegs});
+  NextInsn:
+    LiveRegs.stepBackward(MI);
+  }
+}
+
+static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI,
+                                        unsigned SlotSize) {
+  int64_t MinFixedObjOffset = 0;
+  // Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh()
+  for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) {
+    if (MFI.isDeadObjectIndex(I))
+      continue;
+    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
+  }
+  MinFixedObjOffset -=
+      (SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1);
+  return MinFixedObjOffset;
+}
+
+bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() {
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+  SmallVector<BPFFastCall, 8> Calls;
+  LivePhysRegs LiveRegs;
+  const unsigned SlotSize = 8;
+  int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize);
+  bool Changed = false;
+  for (MachineBasicBlock &BB : *MF) {
+    collectBPFFastCalls(TRI, LiveRegs, BB, Calls);
+    Changed |= !Calls.empty();
+    for (BPFFastCall &Call : Calls) {
+      int64_t CurOffset = MinFixedObjOffset;
+      for (MCRegister Reg : CallerSavedRegs) {
+        if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0)
+          continue;
+        // Allocate stack object
+        CurOffset -= SlotSize;
+        MFI.CreateFixedSpillStackObject(SlotSize, CurOffset);
+        // Generate spill
+        BuildMI(BB, Call.MI->getIterator(), Call.MI->getDebugLoc(),
+                TII->get(BPF::STD))
+            .addReg(Reg)
+            .addReg(BPF::R10)
+            .addImm(CurOffset)
+            .addImm(0);
+        // Generate fill
+        BuildMI(BB, ++Call.MI->getIterator(), Call.MI->getDebugLoc(),
+                TII->get(BPF::LDD))
+            .addReg(Reg)
+            .addReg(BPF::R10)
+            .addImm(CurOffset)
+            .addImm(0);
+      }
+    }
+  }
+  return Changed;
+}
+
 } // end default namespace
 
 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
index 84af6806abb36c..69e1318954a973 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -40,6 +40,17 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CSR_SaveList;
 }
 
+const uint32_t *
+BPFRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                      CallingConv::ID CC) const {
+  switch (CC) {
+  default:
+    return CSR_RegMask;
+  case CallingConv::PreserveAll:
+    return CSR_PreserveAll_RegMask;
+  }
+}
+
 BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer
diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.h b/llvm/lib/Target/BPF/BPFRegisterInfo.h
index f7dea75ebea6f9..db868769a1579a 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -26,6 +26,9 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
 
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const override;
+
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
   bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
new file mode 100644
index 00000000000000..fd81314a495ef8
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
@@ -0,0 +1,46 @@
+; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+;   void bar(void) __bpf_fastcall;
+;   void buz(long i, long j, long k);
+;
+;   void foo(long i, long j, long k) {
+;     bar();
+;     buz(i, j, k);
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check that function marked with bpf_fastcall does not clobber R1-R5.
+
+define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call void @bar() #1
+  tail call void @buz(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      foo:
+; CHECK:      # %bb.0:
+; CHECK-NEXT:   *(u64 *)(r10 - 8) = r1
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = *(u64 *)(r10 - 8)
+; CHECK-NEXT:   call buz
+; CHECK-NEXT:   exit
+
+declare dso_local void @bar() #0
+declare dso_local void @buz(i64 noundef, i64 noundef, i64 noundef)
+
+attributes #0 = { "bpf_fastcall" }
+attributes #1 = { nounwind "bpf_fastcall" }
diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll
new file mode 100644
index 00000000000000..e3e29cdddca8ea
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll
@@ -0,0 +1,68 @@
+; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+;   void bar(void) __bpf_fastcall;
+;   void buz(long i, long j);
+;
+;   void foo(long i, long j, long k, long l) {
+;     bar();
+;     if (k > 42l)
+;       buz(i, 1);
+;     else
+;       buz(1, j);
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check that function marked with bpf_fastcall does not clobber R1-R5.
+; Use R1 in one branch following call and R2 in another branch following call.
+
+define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k, i64 noundef %l) {
+entry:
+  tail call void @bar() #0
+  %cmp = icmp sgt i64 %k, 42
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @buz(i64 noundef %i, i64 noundef 1)
+  br label %if.end
+
+if.else:
+  tail call void @buz(i64 noundef 1, i64 noundef %j)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK:      foo:                                    # @foo
+; CHECK:      # %bb.0:                                # %entry
+; CHECK-NEXT:   *(u64 *)(r10 - 8) = r1
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = *(u64 *)(r10 - 8)
+; CHECK-NEXT:   r4 = 43
+; CHECK-NEXT:   if r4 s> r3 goto [[ELSE:.*]]
+; CHECK-NEXT: # %bb.1:                                # %if.then
+; CHECK-NEXT:   r2 = 1
+; CHECK-NEXT:   goto [[END:.*]]
+; CHECK-NEXT: [[ELSE]]:                               # %if.else
+; CHECK-NEXT:   r1 = 1
+; CHECK-NEXT: [[END]]:                                # %if.end
+; CHECK-NEXT:   call buz
+; CHECK-NEXT:   exit
+
+declare dso_local void @bar() #0
+declare dso_local void @buz(i64 noundef, i64 noundef)
+
+attributes #0 = { "bpf_fastcall" }
diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll
new file mode 100644
index 00000000000000..81ca4e1ac57bc7
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll
@@ -0,0 +1,62 @@
+; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
+
+; Generated from the following C code:
+;
+; #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+; void quux(void *);
+; void bar(long) __bpf_fastcall;
+; void buz(long i, long j);
+;
+; void foo(long i, long j) {
+;   long k;
+;   bar(i);
+;   bar(i);
+;   buz(i, j);
+;   quux(&k);
+; }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check that function marked with bpf_fastcall does not clobber R1-R5.
+; Check that spills/fills wrapping the call use and reuse lowest stack offsets.
+
+define dso_local void @foo(i64 noundef %i, i64 noundef %j) {
+entry:
+  %k = alloca i64, align 8
+  tail call void @bar(i64 noundef %i) #0
+  tail call void @bar(i64 noundef %i) #0
+  tail call void @buz(i64 noundef %i, i64 noundef %j)
+  call void @quux(ptr noundef nonnull %k)
+  ret void
+}
+
+; CHECK:      # %bb.0:
+; CHECK-NEXT:   r3 = r1
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = r3
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = r3
+; CHECK-NEXT:   call buz
+; CHECK-NEXT:   r1 = r10
+; CHECK-NEXT:   r1 += -8
+; CHECK-NEXT:   call quux
+; CHECK-NEXT:   exit
+
+declare dso_local void @bar(i64 noundef) #0
+declare dso_local void @buz(i64 noundef, i64 noundef)
+declare dso_local void @quux(ptr noundef)
+
+attributes #0 = { "bpf_fastcall" }
diff --git a/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll
new file mode 100644
index 00000000000000..857d2f000d1d5a
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll
@@ -0,0 +1,110 @@
+; RUN: llc -O2 --march=bpfel \
+; RUN:   -print-after=stack-slot-coloring %s \
+; RUN:   -o /dev/null 2>&1 | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+;   void bar1(void) __bpf_fastcall;
+;   void buz1(long i, long j, long k);
+;   void foo1(long i, long j, long k) {
+;     bar1();
+;     buz1(i, j, k);
+;   }
+;
+;   long bar2(void) __bpf_fastcall;
+;   void buz2(long i, long j, long k);
+;   void foo2(long i, long j, long k) {
+;     bar2();
+;     buz2(i, j, k);
+;   }
+;
+;   void bar3(long) __bpf_fastcall;
+;   void buz3(long i, long j, long k);
+;   void foo3(long i, long j, long k) {
+;     bar3(i);
+;     buz3(i, j, k);
+;   }
+;
+;   long bar4(long, long) __bpf_fastcall;
+;   void buz4(long i, long j, long k);
+;   void foo4(long i, long j, long k) {
+;     bar4(i, j);
+;     buz4(i, j, k);
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check regmask for calls to functions marked with bpf_fastcall:
+; - void function w/o parameters
+; - non-void function w/o parameters
+; - void function with parameters
+; - non-void function with parameters
+
+declare dso_local void @bar1() #0
+declare dso_local void @buz1(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo1(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call void @bar1() #1
+  tail call void @buz1(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar1, <regmask $r0 $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit-def $r11
+; CHECK:      JAL @buz1, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+declare dso_local i64 @bar2() #0
+declare dso_local void @buz2(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo2(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call i64 @bar2() #1
+  tail call void @buz2(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar2, <regmask $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit-def $r11, implicit-def dead $r0
+; CHECK:      JAL @buz2, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+declare dso_local void @bar3(i64) #0
+declare dso_local void @buz3(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo3(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call void @bar3(i64 noundef %i) #1
+  tail call void @buz3(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar3, <regmask $r0 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w0 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit-def $r11
+; CHECK:      JAL @buz3, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+declare dso_local i64 @bar4(i64 noundef, i64 noundef) #0
+declare dso_local void @buz4(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo4(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call i64 @bar4(i64 noundef %i, i64 noundef %j) #1
+  tail call void @buz4(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar4, <regmask $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit-def $r11, implicit-def dead $r0
+; CHECK:      JAL @buz4, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+attributes #0 = { "bpf_fastcall" }
+attributes #1 = { nounwind "bpf_fastcall" }

>From 94f8b89e7110738ce024fe7035edc2528a2018b7 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Fri, 2 Aug 2024 12:11:49 -0700
Subject: [PATCH 2/5] styling fix: remove goto in collectBPFFastCalls

---
 llvm/lib/Target/BPF/BPFMIPeephole.cpp | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index 5ada86a8bf1c29..58dad71b920552 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -615,19 +615,17 @@ static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
   LiveRegs.addLiveOuts(BB);
   Calls.clear();
   for (MachineInstr &MI : llvm::reverse(BB)) {
-    unsigned LiveCallerSavedRegs;
-    if (!MI.isCall())
-      goto NextInsn;
-    LiveCallerSavedRegs = 0;
-    for (MCRegister R : CallerSavedRegs) {
-      bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R);
-      if (!DoSpillFill)
-        continue;
-      LiveCallerSavedRegs |= 1 << R;
+    if (MI.isCall()) {
+      unsigned LiveCallerSavedRegs = 0;
+      for (MCRegister R : CallerSavedRegs) {
+        bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R);
+        if (!DoSpillFill)
+          continue;
+        LiveCallerSavedRegs |= 1 << R;
+      }
+      if (LiveCallerSavedRegs)
+        Calls.push_back({&MI, LiveCallerSavedRegs});
     }
-    if (LiveCallerSavedRegs)
-      Calls.push_back({&MI, LiveCallerSavedRegs});
-  NextInsn:
     LiveRegs.stepBackward(MI);
   }
 }

>From b89fa14f78e734bc2496821f5425ef7460e4c59c Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Fri, 2 Aug 2024 12:12:22 -0700
Subject: [PATCH 3/5] test fix: use [[names]] for attributes in
 bpf-attr-bpf-fastcall-1

Also change attribute names to be less confusing.
---
 clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
index bf49c3c9be0864..fa740d8e44ff51 100644
--- a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
+++ b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
@@ -12,13 +12,13 @@ void foo(void) {
 }
 
 // CHECK: @ptr = global ptr null
-// CHECK: define {{.*}} @foo()
+// CHECK: define {{.*}} void @foo()
 // CHECK: entry:
-// CHECK:   call void @test() #[[test_attr:[0-9]+]]
+// CHECK:   call void @test() #[[call_attr:[0-9]+]]
 // CHECK:   %[[ptr:.*]] = load ptr, ptr @ptr, align 8
-// CHECK:   call void %[[ptr]]() #[[test_attr]]
+// CHECK:   call void %[[ptr]]() #[[call_attr]]
 // CHECK:   ret void
 
-// CHECK: declare void @test() #[[ptr_attr:[0-9]+]]
-// CHECK: attributes #1 = { {{.*}}"bpf_fastcall"{{.*}} }
-// CHECK: attributes #[[test_attr]] = { {{.*}}"bpf_fastcall"{{.*}} }
+// CHECK: declare void @test() #[[func_attr:[0-9]+]]
+// CHECK: attributes #[[func_attr]] = { {{.*}}"bpf_fastcall"{{.*}} }
+// CHECK: attributes #[[call_attr]] = { "bpf_fastcall" }

>From 8f06f5bbeb1ba004e687417aa42558a2c95bf6a6 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Thu, 15 Aug 2024 12:11:39 -0700
Subject: [PATCH 4/5] use `Clang` syntax instead of `GCC` for bpf_fastcall

---
 clang/include/clang/Basic/Attr.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 437cf3999bc365..0106fcf6944a92 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2191,7 +2191,7 @@ def BTFTypeTag : TypeAttr {
 
 def BPFFastCall : InheritableAttr,
                   TargetSpecificAttr<TargetBPF> {
-  let Spellings = [GCC<"bpf_fastcall">];
+  let Spellings = [Clang<"bpf_fastcall">];
   let Subjects = SubjectList<[FunctionLike]>;
   let Documentation = [BPFFastCallDocs];
   let SimpleHandler = 1;

>From 4bf461678b6eb12da78c0ebcb793b120a2918843 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87 at gmail.com>
Date: Thu, 15 Aug 2024 12:28:05 -0700
Subject: [PATCH 5/5] adjust Sema test case to show that bpf_fastcall is not a
 type attr

As discussed in the pull request thread, it is not an error to assign
pointer with bpf_fastcall attribute to a pointer w/o such attribute
and vice-versa.
(Hence, no need to track bpf_fastcall as a part of the type).
---
 clang/test/Sema/bpf-attr-bpf-fastcall.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/test/Sema/bpf-attr-bpf-fastcall.c b/clang/test/Sema/bpf-attr-bpf-fastcall.c
index 81a5cf68a8c061..178b1f50741e87 100644
--- a/clang/test/Sema/bpf-attr-bpf-fastcall.c
+++ b/clang/test/Sema/bpf-attr-bpf-fastcall.c
@@ -5,3 +5,10 @@ __attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attr
 
 __attribute__((bpf_fastcall)) void func();
 __attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}}
+
+void (*ptr1)(void) __attribute__((bpf_fastcall));
+void (*ptr2)(void);
+void foo(void) {
+  ptr2 = ptr1; // not an error
+  ptr1 = ptr2; // not an error
+}



More information about the cfe-commits mailing list