[clang] 64e4643 - [BPF] introduce __attribute__((bpf_fastcall)) (#105417)

via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 21 17:41:00 PDT 2024


Author: eddyz87
Date: 2024-08-22T03:40:56+03:00
New Revision: 64e464349bfca0d90e07f6db2f710d4d53cdacd4

URL: https://github.com/llvm/llvm-project/commit/64e464349bfca0d90e07f6db2f710d4d53cdacd4
DIFF: https://github.com/llvm/llvm-project/commit/64e464349bfca0d90e07f6db2f710d4d53cdacd4.diff

LOG: [BPF] introduce __attribute__((bpf_fastcall)) (#105417)

This commit introduces attribute bpf_fastcall to declare BPF functions
that do not clobber some of the caller saved registers (R0-R5).

The idea is to generate the code complying with generic BPF ABI,
but allow compatible Linux Kernel to remove unnecessary spills and
fills of non-scratched registers (given some compiler assistance).

For such functions do register allocation as-if caller saved registers
are not clobbered, but later wrap the calls with spill and fill
patterns that are simple to recognize in kernel.

For example for the following C code:

    #define __bpf_fastcall __attribute__((bpf_fastcall))

    void bar(void) __bpf_fastcall;
    void buz(long i, long j, long k);

    void foo(long i, long j, long k) {
      bar();
      buz(i, j, k);
    }

First allocate registers as if:

    foo:
      call bar    # note: no spills for i,j,k (r1,r2,r3)
      call buz
      exit

And later insert spills fills on the peephole phase:

    foo:
      *(u64 *)(r10 - 8) = r1;  # Such call pattern is
      *(u64 *)(r10 - 16) = r2; # correct when used with
      *(u64 *)(r10 - 24) = r3; # old kernels.
      call bar
      r3 = *(u64 *)(r10 - 24); # But also allows new
      r2 = *(u64 *)(r10 - 16); # kernels to recognize the
      r1 = *(u64 *)(r10 - 8);  # pattern and remove spills/fills.
      call buz
      exit

The offsets for generated spills/fills are picked as minimal stack
offsets for the function. Allocated stack slots are not used for any
other purposes, in order to simplify in-kernel analysis.

Added: 
    clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
    clang/test/Sema/bpf-attr-bpf-fastcall.c
    llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
    llvm/test/CodeGen/BPF/bpf-fastcall-2.ll
    llvm/test/CodeGen/BPF/bpf-fastcall-3.ll
    llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll

Modified: 
    clang/include/clang/Basic/Attr.td
    clang/include/clang/Basic/AttrDocs.td
    clang/lib/CodeGen/CGCall.cpp
    clang/test/Misc/pragma-attribute-supported-attributes-list.test
    llvm/lib/Target/BPF/BPFCallingConv.td
    llvm/lib/Target/BPF/BPFISelLowering.cpp
    llvm/lib/Target/BPF/BPFInstrInfo.td
    llvm/lib/Target/BPF/BPFMIPeephole.cpp
    llvm/lib/Target/BPF/BPFRegisterInfo.cpp
    llvm/lib/Target/BPF/BPFRegisterInfo.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 10a9d9e899e007..98bedfe20f5d98 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2200,6 +2200,15 @@ def BTFTypeTag : TypeAttr {
   let LangOpts = [COnly];
 }
 
+def BPFFastCall : InheritableAttr,
+                  TargetSpecificAttr<TargetBPF> {
+  let Spellings = [Clang<"bpf_fastcall">];
+  let Subjects = SubjectList<[FunctionLike]>;
+  let Documentation = [BPFFastCallDocs];
+  let LangOpts = [COnly];
+  let SimpleHandler = 1;
+}
+
 def WebAssemblyExportName : InheritableAttr,
                             TargetSpecificAttr<TargetWebAssembly> {
   let Spellings = [Clang<"export_name">];

diff  --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 19cbb9a0111a28..df36a2163b9f0b 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2345,6 +2345,25 @@ section.
   }];
 }
 
+def BPFFastCallDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+Functions annotated with this attribute are likely to be inlined by BPF JIT.
+It is assumed that inlined implementation uses less caller saved registers,
+than a regular function.
+Specifically, the following registers are likely to be preserved:
+- ``R0`` if function return value is ``void``;
+- ``R2-R5` if function takes 1 argument;
+- ``R3-R5` if function takes 2 arguments;
+- ``R4-R5` if function takes 3 arguments;
+- ``R5`` if function takes 4 arguments;
+
+For such functions Clang generates code pattern that allows BPF JIT
+to recognize and remove unnecessary spills and fills of the preserved
+registers.
+  }];
+}
+
 def MipsInterruptDocs : Documentation {
   let Category = DocCatFunction;
   let Heading = "interrupt (MIPS)";

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 34ca2227608361..ca2c79b51ac96b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2421,6 +2421,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
       FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
     if (TargetDecl->hasAttr<LeafAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::NoCallback);
+    if (TargetDecl->hasAttr<BPFFastCallAttr>())
+      FuncAttrs.addAttribute("bpf_fastcall");
 
     HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
     if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {

diff  --git a/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
new file mode 100644
index 00000000000000..fa740d8e44ff51
--- /dev/null
+++ b/clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
@@ -0,0 +1,24 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+#define __bpf_fastcall __attribute__((bpf_fastcall))
+
+void test(void) __bpf_fastcall;
+void (*ptr)(void) __bpf_fastcall;
+
+void foo(void) {
+  test();
+  (*ptr)();
+}
+
+// CHECK: @ptr = global ptr null
+// CHECK: define {{.*}} void @foo()
+// CHECK: entry:
+// CHECK:   call void @test() #[[call_attr:[0-9]+]]
+// CHECK:   %[[ptr:.*]] = load ptr, ptr @ptr, align 8
+// CHECK:   call void %[[ptr]]() #[[call_attr]]
+// CHECK:   ret void
+
+// CHECK: declare void @test() #[[func_attr:[0-9]+]]
+// CHECK: attributes #[[func_attr]] = { {{.*}}"bpf_fastcall"{{.*}} }
+// CHECK: attributes #[[call_attr]] = { "bpf_fastcall" }

diff  --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 1a71556213bb16..a7e425e3d5f431 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -22,6 +22,7 @@
 // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function)
 // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
 // CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias)
+// CHECK-NEXT: BPFFastCall (SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)
 // CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record)
 // CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias)

diff  --git a/clang/test/Sema/bpf-attr-bpf-fastcall.c b/clang/test/Sema/bpf-attr-bpf-fastcall.c
new file mode 100644
index 00000000000000..178b1f50741e87
--- /dev/null
+++ b/clang/test/Sema/bpf-attr-bpf-fastcall.c
@@ -0,0 +1,14 @@
+// REQUIRES: bpf-registered-target
+// RUN: %clang_cc1 %s -triple bpf -verify
+
+__attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attribute only applies to functions and function pointers}}
+
+__attribute__((bpf_fastcall)) void func();
+__attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}}
+
+void (*ptr1)(void) __attribute__((bpf_fastcall));
+void (*ptr2)(void);
+void foo(void) {
+  ptr2 = ptr1; // not an error
+  ptr1 = ptr2; // not an error
+}

diff  --git a/llvm/lib/Target/BPF/BPFCallingConv.td b/llvm/lib/Target/BPF/BPFCallingConv.td
index ef4ef1930aa8fb..a557211437e95f 100644
--- a/llvm/lib/Target/BPF/BPFCallingConv.td
+++ b/llvm/lib/Target/BPF/BPFCallingConv.td
@@ -46,3 +46,4 @@ def CC_BPF32 : CallingConv<[
 ]>;
 
 def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>;
+def CSR_PreserveAll : CalleeSavedRegs<(add R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10)>;

diff  --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 071fe004806e3e..ff23d3b055d0d5 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -402,6 +402,21 @@ SDValue BPFTargetLowering::LowerFormalArguments(
 
 const size_t BPFTargetLowering::MaxArgs = 5;
 
+static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
+                            MCRegister Reg) {
+  for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+    RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
+}
+
+static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
+                                     MachineFunction &MF,
+                                     const uint32_t *BaseRegMask) {
+  uint32_t *RegMask = MF.allocateRegMask();
+  unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+  memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize);
+  return RegMask;
+}
+
 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                      SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG = CLI.DAG;
@@ -513,6 +528,22 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   for (auto &Reg : RegsToPass)
     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
 
+  bool HasFastCall =
+      (CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall"));
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (HasFastCall) {
+    uint32_t *RegMask = regMaskFromTemplate(
+        TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
+    for (auto const &RegPair : RegsToPass)
+      resetRegMaskBit(TRI, RegMask, RegPair.first);
+    if (!CLI.CB->getType()->isVoidTy())
+      resetRegMaskBit(TRI, RegMask, BPF::R0);
+    Ops.push_back(DAG.getRegisterMask(RegMask));
+  } else {
+    Ops.push_back(
+        DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv)));
+  }
+
   if (InGlue.getNode())
     Ops.push_back(InGlue);
 

diff  --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 2ee630e29790f3..4baeeb017699d6 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -677,9 +677,7 @@ let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
 }
 
 // Jump and link
-let isCall=1, hasDelaySlot=0, Uses = [R11],
-    // Potentially clobbered registers
-    Defs = [R0, R1, R2, R3, R4, R5] in {
+let isCall=1, hasDelaySlot=0, Uses = [R11] in {
   def JAL  : CALL<"call">;
   def JALX  : CALLX<"callx">;
 }

diff  --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index f0edf706bd8fd7..c41eab319dbb9b 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -24,6 +24,8 @@
 #include "BPFInstrInfo.h"
 #include "BPFTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -319,6 +321,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
   bool in16BitRange(int Num);
   bool eliminateRedundantMov();
   bool adjustBranch();
+  bool insertMissingCallerSavedSpills();
 
 public:
 
@@ -333,6 +336,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
     Changed = eliminateRedundantMov();
     if (SupportGotol)
       Changed = adjustBranch() || Changed;
+    Changed |= insertMissingCallerSavedSpills();
     return Changed;
   }
 };
@@ -596,6 +600,86 @@ bool BPFMIPreEmitPeephole::adjustBranch() {
   return Changed;
 }
 
+static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2,
+                                           BPF::R3, BPF::R4, BPF::R5};
+
+struct BPFFastCall {
+  MachineInstr *MI;
+  unsigned LiveCallerSavedRegs;
+};
+
+static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
+                                LivePhysRegs &LiveRegs, MachineBasicBlock &BB,
+                                SmallVectorImpl<BPFFastCall> &Calls) {
+  LiveRegs.init(*TRI);
+  LiveRegs.addLiveOuts(BB);
+  Calls.clear();
+  for (MachineInstr &MI : llvm::reverse(BB)) {
+    if (MI.isCall()) {
+      unsigned LiveCallerSavedRegs = 0;
+      for (MCRegister R : CallerSavedRegs) {
+        bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R);
+        if (!DoSpillFill)
+          continue;
+        LiveCallerSavedRegs |= 1 << R;
+      }
+      if (LiveCallerSavedRegs)
+        Calls.push_back({&MI, LiveCallerSavedRegs});
+    }
+    LiveRegs.stepBackward(MI);
+  }
+}
+
+static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI,
+                                        unsigned SlotSize) {
+  int64_t MinFixedObjOffset = 0;
+  // Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh()
+  for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) {
+    if (MFI.isDeadObjectIndex(I))
+      continue;
+    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
+  }
+  MinFixedObjOffset -=
+      (SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1);
+  return MinFixedObjOffset;
+}
+
+bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() {
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+  SmallVector<BPFFastCall, 8> Calls;
+  LivePhysRegs LiveRegs;
+  const unsigned SlotSize = 8;
+  int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize);
+  bool Changed = false;
+  for (MachineBasicBlock &BB : *MF) {
+    collectBPFFastCalls(TRI, LiveRegs, BB, Calls);
+    Changed |= !Calls.empty();
+    for (BPFFastCall &Call : Calls) {
+      int64_t CurOffset = MinFixedObjOffset;
+      for (MCRegister Reg : CallerSavedRegs) {
+        if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0)
+          continue;
+        // Allocate stack object
+        CurOffset -= SlotSize;
+        MFI.CreateFixedSpillStackObject(SlotSize, CurOffset);
+        // Generate spill
+        BuildMI(BB, Call.MI->getIterator(), Call.MI->getDebugLoc(),
+                TII->get(BPF::STD))
+            .addReg(Reg, RegState::Kill)
+            .addReg(BPF::R10)
+            .addImm(CurOffset);
+        // Generate fill
+        BuildMI(BB, ++Call.MI->getIterator(), Call.MI->getDebugLoc(),
+                TII->get(BPF::LDD))
+            .addReg(Reg, RegState::Define)
+            .addReg(BPF::R10)
+            .addImm(CurOffset);
+      }
+    }
+  }
+  return Changed;
+}
+
 } // end default namespace
 
 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",

diff  --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
index 84af6806abb36c..69e1318954a973 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -40,6 +40,17 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CSR_SaveList;
 }
 
+const uint32_t *
+BPFRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                      CallingConv::ID CC) const {
+  switch (CC) {
+  default:
+    return CSR_RegMask;
+  case CallingConv::PreserveAll:
+    return CSR_PreserveAll_RegMask;
+  }
+}
+
 BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer

diff  --git a/llvm/lib/Target/BPF/BPFRegisterInfo.h b/llvm/lib/Target/BPF/BPFRegisterInfo.h
index f7dea75ebea6f9..db868769a1579a 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -26,6 +26,9 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
 
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const override;
+
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
   bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,

diff  --git a/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
new file mode 100644
index 00000000000000..fd81314a495ef8
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
@@ -0,0 +1,46 @@
+; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+;   void bar(void) __bpf_fastcall;
+;   void buz(long i, long j, long k);
+;
+;   void foo(long i, long j, long k) {
+;     bar();
+;     buz(i, j, k);
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check that function marked with bpf_fastcall does not clobber R1-R5.
+
+define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call void @bar() #1
+  tail call void @buz(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      foo:
+; CHECK:      # %bb.0:
+; CHECK-NEXT:   *(u64 *)(r10 - 8) = r1
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = *(u64 *)(r10 - 8)
+; CHECK-NEXT:   call buz
+; CHECK-NEXT:   exit
+
+declare dso_local void @bar() #0
+declare dso_local void @buz(i64 noundef, i64 noundef, i64 noundef)
+
+attributes #0 = { "bpf_fastcall" }
+attributes #1 = { nounwind "bpf_fastcall" }

diff  --git a/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll
new file mode 100644
index 00000000000000..e3e29cdddca8ea
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-2.ll
@@ -0,0 +1,68 @@
+; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+;   void bar(void) __bpf_fastcall;
+;   void buz(long i, long j);
+;
+;   void foo(long i, long j, long k, long l) {
+;     bar();
+;     if (k > 42l)
+;       buz(i, 1);
+;     else
+;       buz(1, j);
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check that function marked with bpf_fastcall does not clobber R1-R5.
+; Use R1 in one branch following call and R2 in another branch following call.
+
+define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k, i64 noundef %l) {
+entry:
+  tail call void @bar() #0
+  %cmp = icmp sgt i64 %k, 42
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void @buz(i64 noundef %i, i64 noundef 1)
+  br label %if.end
+
+if.else:
+  tail call void @buz(i64 noundef 1, i64 noundef %j)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK:      foo:                                    # @foo
+; CHECK:      # %bb.0:                                # %entry
+; CHECK-NEXT:   *(u64 *)(r10 - 8) = r1
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = *(u64 *)(r10 - 8)
+; CHECK-NEXT:   r4 = 43
+; CHECK-NEXT:   if r4 s> r3 goto [[ELSE:.*]]
+; CHECK-NEXT: # %bb.1:                                # %if.then
+; CHECK-NEXT:   r2 = 1
+; CHECK-NEXT:   goto [[END:.*]]
+; CHECK-NEXT: [[ELSE]]:                               # %if.else
+; CHECK-NEXT:   r1 = 1
+; CHECK-NEXT: [[END]]:                                # %if.end
+; CHECK-NEXT:   call buz
+; CHECK-NEXT:   exit
+
+declare dso_local void @bar() #0
+declare dso_local void @buz(i64 noundef, i64 noundef)
+
+attributes #0 = { "bpf_fastcall" }

diff  --git a/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll
new file mode 100644
index 00000000000000..81ca4e1ac57bc7
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-3.ll
@@ -0,0 +1,62 @@
+; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
+
+; Generated from the following C code:
+;
+; #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+; void quux(void *);
+; void bar(long) __bpf_fastcall;
+; void buz(long i, long j);
+;
+; void foo(long i, long j) {
+;   long k;
+;   bar(i);
+;   bar(i);
+;   buz(i, j);
+;   quux(&k);
+; }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check that function marked with bpf_fastcall does not clobber R1-R5.
+; Check that spills/fills wrapping the call use and reuse lowest stack offsets.
+
+define dso_local void @foo(i64 noundef %i, i64 noundef %j) {
+entry:
+  %k = alloca i64, align 8
+  tail call void @bar(i64 noundef %i) #0
+  tail call void @bar(i64 noundef %i) #0
+  tail call void @buz(i64 noundef %i, i64 noundef %j)
+  call void @quux(ptr noundef nonnull %k)
+  ret void
+}
+
+; CHECK:      # %bb.0:
+; CHECK-NEXT:   r3 = r1
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = r3
+; CHECK-NEXT:   *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT:   *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT:   call bar
+; CHECK-NEXT:   r3 = *(u64 *)(r10 - 24)
+; CHECK-NEXT:   r2 = *(u64 *)(r10 - 16)
+; CHECK-NEXT:   r1 = r3
+; CHECK-NEXT:   call buz
+; CHECK-NEXT:   r1 = r10
+; CHECK-NEXT:   r1 += -8
+; CHECK-NEXT:   call quux
+; CHECK-NEXT:   exit
+
+declare dso_local void @bar(i64 noundef) #0
+declare dso_local void @buz(i64 noundef, i64 noundef)
+declare dso_local void @quux(ptr noundef)
+
+attributes #0 = { "bpf_fastcall" }

diff  --git a/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll
new file mode 100644
index 00000000000000..857d2f000d1d5a
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bpf-fastcall-regmask-1.ll
@@ -0,0 +1,110 @@
+; RUN: llc -O2 --march=bpfel \
+; RUN:   -print-after=stack-slot-coloring %s \
+; RUN:   -o /dev/null 2>&1 | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __bpf_fastcall __attribute__((bpf_fastcall))
+;
+;   void bar1(void) __bpf_fastcall;
+;   void buz1(long i, long j, long k);
+;   void foo1(long i, long j, long k) {
+;     bar1();
+;     buz1(i, j, k);
+;   }
+;
+;   long bar2(void) __bpf_fastcall;
+;   void buz2(long i, long j, long k);
+;   void foo2(long i, long j, long k) {
+;     bar2();
+;     buz2(i, j, k);
+;   }
+;
+;   void bar3(long) __bpf_fastcall;
+;   void buz3(long i, long j, long k);
+;   void foo3(long i, long j, long k) {
+;     bar3(i);
+;     buz3(i, j, k);
+;   }
+;
+;   long bar4(long, long) __bpf_fastcall;
+;   void buz4(long i, long j, long k);
+;   void foo4(long i, long j, long k) {
+;     bar4(i, j);
+;     buz4(i, j, k);
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -emit-llvm -O2 -S -o - t.c
+;
+; (unnecessary attrs removed maually)
+
+; Check regmask for calls to functions marked with bpf_fastcall:
+; - void function w/o parameters
+; - non-void function w/o parameters
+; - void function with parameters
+; - non-void function with parameters
+
+declare dso_local void @bar1() #0
+declare dso_local void @buz1(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo1(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call void @bar1() #1
+  tail call void @buz1(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar1, <regmask $r0 $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit-def $r11
+; CHECK:      JAL @buz1, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+declare dso_local i64 @bar2() #0
+declare dso_local void @buz2(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo2(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call i64 @bar2() #1
+  tail call void @buz2(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar2, <regmask $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit-def $r11, implicit-def dead $r0
+; CHECK:      JAL @buz2, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+declare dso_local void @bar3(i64) #0
+declare dso_local void @buz3(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo3(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call void @bar3(i64 noundef %i) #1
+  tail call void @buz3(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar3, <regmask $r0 $r2 $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w0 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit-def $r11
+; CHECK:      JAL @buz3, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+declare dso_local i64 @bar4(i64 noundef, i64 noundef) #0
+declare dso_local void @buz4(i64 noundef, i64 noundef, i64 noundef)
+define dso_local void @foo4(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
+entry:
+  tail call i64 @bar4(i64 noundef %i, i64 noundef %j) #1
+  tail call void @buz4(i64 noundef %i, i64 noundef %j, i64 noundef %k)
+  ret void
+}
+
+; CHECK:      JAL @bar4, <regmask $r3 $r4 $r5 $r6 $r7 $r8 $r9 $r10
+; CHECK-SAME:                     $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit-def $r11, implicit-def dead $r0
+; CHECK:      JAL @buz4, <regmask $r6 $r7 $r8 $r9 $r10 $w6 $w7 $w8 $w9 $w10>
+; CHECK-SAME:          , implicit $r11, implicit $r1, implicit $r2, implicit $r3, implicit-def $r11
+
+attributes #0 = { "bpf_fastcall" }
+attributes #1 = { nounwind "bpf_fastcall" }


        


More information about the cfe-commits mailing list