[llvm] 02141a1 - [PowerPC][Future] Remove redundant r2 save and restore for indirect call

Victor Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 22 10:07:11 PDT 2020


Author: Victor Huang
Date: 2020-04-22T12:05:51-05:00
New Revision: 02141a17aea0603b89baee14febea6a3b89474d1

URL: https://github.com/llvm/llvm-project/commit/02141a17aea0603b89baee14febea6a3b89474d1
DIFF: https://github.com/llvm/llvm-project/commit/02141a17aea0603b89baee14febea6a3b89474d1.diff

LOG: [PowerPC][Future] Remove redundant r2 save and restore for indirect call

Currently an indirect call produces the following sequence on PCRelative mode:

extern void function( );
extern void (*ptrfunc) ( );

void g() {
    ptrfunc=function;
}

void f() {
    (*ptrfunc) ( );
}

Producing

paddi 3, 0, .LC0 at PCREL, 1
ld 3, 0(3)
std 2, 24(1)
ld 12, 0(3)
mtctr 12
bctrl
ld 2, 24(1)

Though the caller does not use or preserve r2, it is still saved and restored
across a function call. This patch is added to remove these redundant save and
restores for indirect calls.

Differential Revision: https://reviews.llvm.org/D77749

Added: 
    llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
    llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4f065dfd4f7c..effb1651d045 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5152,6 +5152,12 @@ static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
   return true;
 }
 
+// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
+static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
+  return Subtarget.isAIXABI() ||
+         (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
+}
+
 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
                               const Function &Caller,
                               const SDValue &Callee,
@@ -5168,20 +5174,12 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
     // pointer is modeled by using a pseudo instruction for the call opcode that
     // represents the 2 instruction sequence of an indirect branch and link,
     // immediately followed by a load of the TOC pointer from the the stack save
-    // slot into gpr2.
-    if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
-      return PPCISD::BCTRL_LOAD_TOC;
-
-    // An indirect call that does not need a TOC restore.
-    return PPCISD::BCTRL;
+    // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
+    // as it is not saved or used.
+    return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
+                                               : PPCISD::BCTRL;
   }
 
-  // FIXME: At this moment indirect calls are treated ahead of the
-  // PC Relative condition because binaries can still contain a possible
-  // mix of functions that use a TOC and functions that do not use a TOC.
-  // Once the PC Relative feature is complete this condition should be moved
-  // up ahead of the indirect calls and should return a PPCISD::BCTRL for
-  // that case.
   if (Subtarget.isUsingPCRelativeCalls()) {
     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
     return PPCISD::CALL_NOTOC;
@@ -5439,7 +5437,9 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
     // pointer from the linkage area. The operand for the TOC restore is an add
     // of the TOC save offset to the stack pointer. This must be the second
     // operand: after the chain input but before any other variadic arguments.
-    if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+    // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
+    // saved or used.
+    if (isTOCSaveRestoreRequired(Subtarget)) {
       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
 
       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
@@ -6509,17 +6509,21 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
   // See prepareDescriptorIndirectCall and buildCallOperands for more
   // information about calls through function pointers in the 64-bit SVR4 ABI.
   if (CFlags.IsIndirect) {
-    assert(!CFlags.IsTailCall &&  "Indirect tails calls not supported");
-    // Load r2 into a virtual register and store it to the TOC save area.
-    setUsesTOCBasePtr(DAG);
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
-    // TOC save area offset.
-    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
-    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
-    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(
-        Val.getValue(1), dl, Val, AddPtr,
-        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+    // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
+    // caller in the TOC save area.
+    if (isTOCSaveRestoreRequired(Subtarget)) {
+      assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
+      // Load r2 into a virtual register and store it to the TOC save area.
+      setUsesTOCBasePtr(DAG);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+      // TOC save area offset.
+      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+      SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+      Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
+                           MachinePointerInfo::getStack(
+                               DAG.getMachineFunction(), TOCSaveOffset));
+    }
     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
     // This does not mean the MTCTR instruction must use R12; it's easier
     // to model this as an extra parameter, so do that.

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index ed96e732b08b..010704f546d0 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -328,14 +328,10 @@ entry:
 
 define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: IndirectCall1:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep13 at ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep13 at l
-; CHECK-S:         .localentry     IndirectCall1, .Lfunc_lep13-.Lfunc_gep13
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    pld r12, indirectCall at PCREL(0), 1
@@ -343,7 +339,6 @@ define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) loca
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    mtctr r12
 ; CHECK-S-NEXT:    bctrl
-; CHECK-S-NEXT:    ld 2, 24(r1)
 ; CHECK-S-NEXT:    plwz r4, globalVar at PCREL(0), 1
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -362,14 +357,10 @@ entry:
 
 define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: IndirectCall2:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep14 at ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep14 at l
-; CHECK-S:         .localentry     IndirectCall2, .Lfunc_lep14-.Lfunc_gep14
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    pld r12, indirectCall at PCREL(0), 1
@@ -377,7 +368,6 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    mtctr r12
 ; CHECK-S-NEXT:    bctrl
-; CHECK-S-NEXT:    ld 2, 24(r1)
 ; CHECK-S-NEXT:    pld r4, externGlobalVar at got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
@@ -397,14 +387,10 @@ entry:
 
 define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr {
 ; CHECK-ALL-LABEL: IndirectCall3:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep15 at ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep15 at l
-; CHECK-S:         .localentry     IndirectCall3, .Lfunc_lep15-.Lfunc_gep15
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    add r3, r4, r3
@@ -412,7 +398,6 @@ define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32
 ; CHECK-S-NEXT:    mtctr r5
 ; CHECK-S-NEXT:    mr r12, r5
 ; CHECK-S-NEXT:    bctrl
-; CHECK-S-NEXT:    ld 2, 24(r1)
 ; CHECK-S-NEXT:    plwz r4, globalVar at PCREL(0), 1
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -430,9 +415,6 @@ entry:
 
 define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr {
 ; CHECK-ALL-LABEL: IndirectCallNoGlobal:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep16 at ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep16 at l
-; CHECK-S:         .localentry     IndirectCallNoGlobal, .Lfunc_lep16-.Lfunc_gep16
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 48
@@ -443,10 +425,8 @@ define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %
 ; CHECK-S-NEXT:    stdu r1, -48(r1)
 ; CHECK-S-NEXT:    mtctr r5
 ; CHECK-S-NEXT:    mr r12, r5
-; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    mr r30, r4
 ; CHECK-S-NEXT:    bctrl
-; CHECK-S-NEXT:    ld 2, 24(r1)
 ; CHECK-S-NEXT:    add r3, r3, r30
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    addi r1, r1, 48
@@ -462,20 +442,15 @@ entry:
 
 define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr {
 ; CHECK-ALL-LABEL: IndirectCallOnly:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep17 at ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep17 at l
-; CHECK-S:         .localentry     IndirectCallOnly, .Lfunc_lep17-.Lfunc_gep17
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    mtctr r4
 ; CHECK-S-NEXT:    mr r12, r4
 ; CHECK-S-NEXT:    bctrl
-; CHECK-S-NEXT:    ld 2, 24(r1)
 ; CHECK-S-NEXT:    addi r1, r1, 32
 ; CHECK-S-NEXT:    ld r0, 16(r1)
 ; CHECK-S-NEXT:    mtlr r0

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
index e9aeccd4ac16..a3404a8951a1 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
@@ -219,14 +219,12 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr  {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    std r2, 24(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    pld r3, ptrfunc at got@pcrel(0), 1
 ; CHECK-NEXT:    ld r12, 0(r3)
 ; CHECK-NEXT:    mtctr r12
 ; CHECK-NEXT:    bctrl
-; CHECK-NEXT:    ld 2, 24(r1)
 ; CHECK-NEXT:    addi r1, r1, 32
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll
new file mode 100644
index 000000000000..7806d691c530
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll
@@ -0,0 +1,37 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names < %s | FileCheck %s
+
+; The test checks the behavior of PC Relative indirect calls. When using
+; PC Relative, TOC save and restore are no longer required. Function pointer
+; is passed as a parameter in this test.
+
+; Function Attrs: noinline
+define dso_local void @IndirectCallExternFuncPtr(void ()* nocapture %ptrfunc) {
+; CHECK-LABEL: IndirectCallExternFuncPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    mr r12, r3
+; CHECK-NEXT:    bctrl
+
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  tail call void %ptrfunc()
+  ret void
+}
+
+define dso_local void @FuncPtrPassAsParam() {
+entry:
+  tail call void @IndirectCallExternFuncPtr(void ()* nonnull @Function)
+  ret void
+}
+
+declare void @Function()


        


More information about the llvm-commits mailing list