[llvm] b294e00 - [PowerPC] Fix for PC Relative call protocol

Kamau Bridgeman via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 1 05:09:15 PDT 2020


Author: Stefan Pintilie
Date: 2020-07-01T07:08:41-05:00
New Revision: b294e00fb079600f337f479eb29fc27058228302

URL: https://github.com/llvm/llvm-project/commit/b294e00fb079600f337f479eb29fc27058228302
DIFF: https://github.com/llvm/llvm-project/commit/b294e00fb079600f337f479eb29fc27058228302.diff

LOG: [PowerPC] Fix for PC Relative call protocol

The situation where the caller uses a TOC and the callee does not
but is marked as clobbers the TOC (st_other=1) was not being compiled
correctly if both functions where in the same object file.

The call site where we had `callee` was missing a nop after the call.
This is because it was assumed that since the two functions where in
the same DSO they would be sharing a TOC. This is not the case if the
callee uses PC Relative because in that case it may clobber the TOC.
This patch makes sure that we add the cnop correctly so that the
linker has a place to restore the TOC.

Reviewers: sfertile, NeHuang, saghir

Differential Revision: https://reviews.llvm.org/D81126

Added: 
    llvm/test/CodeGen/PowerPC/func-alias.ll
    llvm/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/ifunc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a0f4c5c3a569..2368e3d7725b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4700,30 +4700,67 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
 
 static bool isFunctionGlobalAddress(SDValue Callee);
 
-static bool
-callsShareTOCBase(const Function *Caller, SDValue Callee,
-                    const TargetMachine &TM) {
-   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
-   // don't have enough information to determine if the caller and calle share
-   // the same  TOC base, so we have to pessimistically assume they don't for
-   // correctness.
-   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-   if (!G)
-     return false;
-
-   const GlobalValue *GV = G->getGlobal();
+static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
+                              const TargetMachine &TM) {
+  // It does not make sense to call callsShareTOCBase() with a caller that
+  // is PC Relative since PC Relative callers do not have a TOC.
+#ifndef NDEBUG
+  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
+  assert(!STICaller->isUsingPCRelativeCalls() &&
+         "PC Relative callers do not have a TOC and cannot share a TOC Base");
+#endif
+
+  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+  // don't have enough information to determine if the caller and callee share
+  // the same  TOC base, so we have to pessimistically assume they don't for
+  // correctness.
+  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+  if (!G)
+    return false;
+
+  const GlobalValue *GV = G->getGlobal();
+
+  // If the callee is preemptable, then the static linker will use a plt-stub
+  // which saves the toc to the stack, and needs a nop after the call
+  // instruction to convert to a toc-restore.
+  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
+    return false;
+
+  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
+  // We may need a TOC restore in the situation where the caller requires a
+  // valid TOC but the callee is PC Relative and does not.
+  const Function *F = dyn_cast<Function>(GV);
+  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
+
+  // If we have an Alias we can try to get the function from there.
+  if (Alias) {
+    const GlobalObject *GlobalObj = Alias->getBaseObject();
+    F = dyn_cast<Function>(GlobalObj);
+  }
+
+  // If we still have no valid function pointer we do not have enough
+  // information to determine if the callee uses PC Relative calls so we must
+  // assume that it does.
+  if (!F)
+    return false;
+
+  // If the callee uses PC Relative we cannot guarantee that the callee won't
+  // clobber the TOC of the caller and so we must assume that the two
+  // functions do not share a TOC base.
+  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
+  if (STICallee->isUsingPCRelativeCalls())
+    return false;
+
   // The medium and large code models are expected to provide a sufficiently
   // large TOC to provide all data addressing needs of a module with a
-  // single TOC. Since each module will be addressed with a single TOC then we
-  // only need to check that caller and callee don't cross dso boundaries.
+  // single TOC.
   if (CodeModel::Medium == TM.getCodeModel() ||
       CodeModel::Large == TM.getCodeModel())
-    return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
+    return true;
 
   // Otherwise we need to ensure callee and caller are in the same section,
   // since the linker may allocate multiple TOCs, and we don't know which
   // sections will belong to the same TOC base.
-
   if (!GV->isStrongDefinitionForLinker())
     return false;
 
@@ -4738,26 +4775,6 @@ callsShareTOCBase(const Function *Caller, SDValue Callee,
       return false;
   }
 
-  // If the callee might be interposed, then we can't assume the ultimate call
-  // target will be in the same section. Even in cases where we can assume that
-  // interposition won't happen, in any case where the linker might insert a
-  // stub to allow for interposition, we must generate code as though
-  // interposition might occur. To understand why this matters, consider a
-  // situation where: a -> b -> c where the arrows indicate calls. b and c are
-  // in the same section, but a is in a 
diff erent module (i.e. has a 
diff erent
-  // TOC base pointer). If the linker allows for interposition between b and c,
-  // then it will generate a stub for the call edge between b and c which will
-  // save the TOC pointer into the designated stack slot allocated by b. If we
-  // return true here, and therefore allow a tail call between b and c, that
-  // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
-  // pointer into the stack slot allocated by a (where the a -> b stub saved
-  // a's TOC base pointer). If we're not considering a tail call, but rather,
-  // whether a nop is needed after the call instruction in b, because the linker
-  // will insert a stub, it might complain about a missing nop if we omit it
-  // (although many don't complain in this case).
-  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
-    return false;
-
   return true;
 }
 
@@ -5277,8 +5294,8 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
   // will rewrite the nop to be a load of the TOC pointer from the linkage area
   // into gpr2.
   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
-      return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
-                                                    : PPCISD::CALL_NOP;
+    return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+                                                  : PPCISD::CALL_NOP;
 
   return PPCISD::CALL;
 }

diff  --git a/llvm/test/CodeGen/PowerPC/func-alias.ll b/llvm/test/CodeGen/PowerPC/func-alias.ll
new file mode 100644
index 000000000000..7382722b97a7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/func-alias.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=P10
+
+ at newname = dso_local alias i32 (...), bitcast (i32 ()* @oldname to i32 (...)*)
+
+; Function Attrs: noinline nounwind optnone
+define dso_local signext i32 @oldname() #0 {
+entry:
+  ret i32 55
+}
+
+; Function Attrs: noinline nounwind optnone
+define dso_local signext i32 @caller() #0 {
+; #P9-LABEL: caller
+; #P9:       bl newname
+; #P9-NOT:   nop
+; #P9:       blr
+; #P10-LABEL: caller
+; #P10:       bl newname at notoc
+; #P10-NOT:   nop
+; #P10:       blr
+entry:
+  %call = call signext i32 bitcast (i32 (...)* @newname to i32 ()*)()
+  ret i32 %call
+}
+
+; Function Attrs: noinline nounwind optnone -pcrelative-memops
+; This caller does not use PC Relative memops
+define dso_local signext i32 @caller_nopcrel() #1 {
+; #P9-LABEL: caller_nopcrel
+; #P9:       bl newname
+; #P9-NOT:   nop
+; #P9:       blr
+; #P10-LABEL: caller_nopcrel
+; #P10:       bl newname
+; #P10-NEXT:  nop
+; #P10:       blr
+entry:
+  %call = call signext i32 bitcast (i32 (...)* @newname to i32 ()*)()
+  ret i32 %call
+}
+
+attributes #0 = { noinline nounwind optnone }
+attributes #1 = { noinline nounwind optnone "target-features"="-pcrelative-memops" }

diff  --git a/llvm/test/CodeGen/PowerPC/ifunc.ll b/llvm/test/CodeGen/PowerPC/ifunc.ll
index a964a2bac1c5..a58601f8f32f 100644
--- a/llvm/test/CodeGen/PowerPC/ifunc.ll
+++ b/llvm/test/CodeGen/PowerPC/ifunc.ll
@@ -2,6 +2,10 @@
 ; RUN: llc %s -o - -mtriple=powerpc -relocation-model=pic | FileCheck --check-prefix=PLTREL %s
 ; RUN: llc %s -o - -mtriple=powerpc64 | FileCheck --check-prefix=REL %s
 ; RUN: llc %s -o - -mtriple=powerpc64 -relocation-model=pic | FileCheck --check-prefix=REL %s
+; RUN: llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
+; RUN:   -verify-machineinstrs | FileCheck --check-prefix=LEP8 %s
+; RUN: llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \
+; RUN:   -verify-machineinstrs | FileCheck --check-prefix=LEP10 %s
 
 @ifunc1 = dso_local ifunc void(), i8*()* @resolver
 @ifunc2 = ifunc void(), i8*()* @resolver
@@ -9,10 +13,23 @@
 define i8* @resolver() { ret i8* null }
 
 define void @foo() #0 {
-  ; REL: bl ifunc1{{$}}
-  ; REL: bl ifunc2{{$}}
-  ; PLTREL: bl ifunc1 at PLT+32768
-  ; PLTREL: bl ifunc2 at PLT+32768
+  ; REL-LABEL:    foo
+  ; REL:          bl ifunc1{{$}}
+  ; REL:          bl ifunc2{{$}}
+  ; PLTREL-LABEL: foo
+  ; PLTREL:       bl ifunc1 at PLT+32768
+  ; PLTREL:       bl ifunc2 at PLT+32768
+  ; LEP8-LABEL:   foo
+  ; LEP8:         bl ifunc1
+  ; LEP8-NEXT:    nop
+  ; LEP8-NEXT:    bl ifunc2
+  ; LEP8-NEXT:    nop
+  ; LEP8:         blr
+  ; LEP10-LABEL:  foo
+  ; LEP10:        bl ifunc1 at notoc
+  ; LEP10-NEXT:   bl ifunc2 at notoc
+  ; LEP10-NOT:    nop
+  ; LEP10:        blr
   call void @ifunc1()
   call void @ifunc2()
   ret void

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll b/llvm/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll
new file mode 100644
index 000000000000..2e248506c7b7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck %s
+
+; The purpose of this test is to check the call protocols for the situation
+; where the caller has PC Relative disabled, the callee has PC Relative
+; enabled and both functions are in the same file.
+; Note that the callee does not know if it clobbers the TOC because it
+; contains an external call to @externalFunc.
+
+ at global = external local_unnamed_addr global i32, align 4
+
+define dso_local signext i32 @callee(i32 signext %a) local_unnamed_addr #0 {
+; CHECK-LABEL: callee:
+; CHECK:         .localentry callee, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl externalFunc at notoc
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call signext i32 @externalFunc(i32 signext %a) #3
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+declare signext i32 @externalFunc(i32 signext) local_unnamed_addr #1
+
+define dso_local void @caller(i32 signext %a) local_unnamed_addr #2 {
+; CHECK-LABEL: caller:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r30, .LC0 at toc@l(r4)
+; CHECK-NEXT:    lwz r4, 0(r30)
+; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mullw r3, r3, r3
+; CHECK-NEXT:    stw r3, 0(r30)
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @global, align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @callee(i32 signext %add)
+  %mul = mul nsw i32 %call, %call
+  store i32 %mul, i32* @global, align 4
+  ret void
+}
+
+define dso_local signext i32 @tail_caller(i32 signext %a) local_unnamed_addr #2 {
+; CHECK-LABEL: tail_caller:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    lwz r4, 0(r4)
+; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @global, align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @callee(i32 signext %add)
+  ret i32 %call
+}
+
+
+; Left the target features in this test because it is important that caller has
+; -pcrelative-memops while callee has +pcrelative-memops
+attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
+attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
+attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-qpx,-spe" }
+attributes #3 = { nounwind }


        


More information about the llvm-commits mailing list