[llvm] r325049 - [X86] Use EDI for retpoline when no scratch regs are left

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 13 12:47:49 PST 2018


Author: rnk
Date: Tue Feb 13 12:47:49 2018
New Revision: 325049

URL: http://llvm.org/viewvc/llvm-project?rev=325049&view=rev
Log:
[X86] Use EDI for retpoline when no scratch regs are left

Summary:
Instead of solving the hard problem of how to pass the callee to the indirect
jump thunk without a register, just use a CSR. At a call boundary, there's
nothing stopping us from using a CSR to hold the callee as long as we save and
restore it in the prologue.

Also, add tests for this mregparm=3 case. I wrote execution tests for
__llvm_retpoline_push, but they never got committed as lit tests, either
because I never rewrote them or because they got lost in merge conflicts.

Reviewers: chandlerc, dwmw2

Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D43214

Added:
    llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp
    llvm/trunk/test/CodeGen/X86/retpoline.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=325049&r1=325048&r2=325049&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 13 12:47:49 2018
@@ -27081,9 +27081,6 @@ static const char *getRetpolineSymbol(co
     // attempt to help out kernels and other systems where duplicating the
     // thunks is costly.
     switch (Reg) {
-    case 0:
-      assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
-      return "__x86_indirect_thunk";
     case X86::EAX:
       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
       return "__x86_indirect_thunk_eax";
@@ -27093,6 +27090,9 @@ static const char *getRetpolineSymbol(co
     case X86::EDX:
       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
       return "__x86_indirect_thunk_edx";
+    case X86::EDI:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__x86_indirect_thunk_edi";
     case X86::R11:
       assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
       return "__x86_indirect_thunk_r11";
@@ -27102,9 +27102,6 @@ static const char *getRetpolineSymbol(co
 
   // When targeting an internal COMDAT thunk use an LLVM-specific name.
   switch (Reg) {
-  case 0:
-    assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
-    return "__llvm_retpoline_push";
   case X86::EAX:
     assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
     return "__llvm_retpoline_eax";
@@ -27114,6 +27111,9 @@ static const char *getRetpolineSymbol(co
   case X86::EDX:
     assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
     return "__llvm_retpoline_edx";
+  case X86::EDI:
+    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+    return "__llvm_retpoline_edi";
   case X86::R11:
     assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
     return "__llvm_retpoline_r11";
@@ -27135,15 +27135,13 @@ X86TargetLowering::EmitLoweredRetpoline(
   // just use R11, but we scan for uses anyway to ensure we don't generate
   // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
   // already a register use operand to the call to hold the callee. If none
-  // are available, push the callee instead. This is less efficient, but is
-  // necessary for functions using 3 regparms. Such function calls are
-  // (currently) not eligible for tail call optimization, because there is no
-  // scratch register available to hold the address of the callee.
+  // are available, use EDI instead. EDI is chosen because EBX is the PIC base
+  // register and ESI is the base pointer to realigned stack frames with VLAs.
   SmallVector<unsigned, 3> AvailableRegs;
   if (Subtarget.is64Bit())
     AvailableRegs.push_back(X86::R11);
   else
-    AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
+    AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
 
   // Zero out any registers that are already used.
   for (const auto &MO : MI.operands()) {
@@ -27161,30 +27159,18 @@ X86TargetLowering::EmitLoweredRetpoline(
       break;
     }
   }
+  if (!AvailableReg)
+    report_fatal_error("calling convention incompatible with retpoline, no "
+                       "available registers");
 
   const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
 
-  if (AvailableReg == 0) {
-    // No register available. Use PUSH. This must not be a tailcall, and this
-    // must not be x64.
-    if (Subtarget.is64Bit())
-      report_fatal_error(
-          "Cannot make an indirect call on x86-64 using both retpoline and a "
-          "calling convention that preservers r11");
-    if (Opc != X86::CALLpcrel32)
-      report_fatal_error("Cannot make an indirect tail call on x86 using "
-                         "retpoline without a preserved register");
-    BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
-    MI.getOperand(0).ChangeToES(Symbol);
-    MI.setDesc(TII->get(Opc));
-  } else {
-    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
-        .addReg(CalleeVReg);
-    MI.getOperand(0).ChangeToES(Symbol);
-    MI.setDesc(TII->get(Opc));
-    MachineInstrBuilder(*BB->getParent(), &MI)
-        .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
-  }
+  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
+      .addReg(CalleeVReg);
+  MI.getOperand(0).ChangeToES(Symbol);
+  MI.setDesc(TII->get(Opc));
+  MachineInstrBuilder(*BB->getParent(), &MI)
+      .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
   return BB;
 }
 

Modified: llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp?rev=325049&r1=325048&r2=325049&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RetpolineThunks.cpp Tue Feb 13 12:47:49 2018
@@ -43,7 +43,7 @@ static const char R11ThunkName[]    = "_
 static const char EAXThunkName[]    = "__llvm_retpoline_eax";
 static const char ECXThunkName[]    = "__llvm_retpoline_ecx";
 static const char EDXThunkName[]    = "__llvm_retpoline_edx";
-static const char PushThunkName[]   = "__llvm_retpoline_push";
+static const char EDIThunkName[]    = "__llvm_retpoline_edi";
 
 namespace {
 class X86RetpolineThunks : public MachineFunctionPass {
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFun
       createThunkFunction(M, R11ThunkName);
     else
       for (StringRef Name :
-           {EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
+           {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
         createThunkFunction(M, Name);
     InsertedThunks = true;
     return true;
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFun
     populateThunk(MF, X86::R11);
   } else {
     // For 32-bit targets we need to emit a collection of thunks for various
-    // possible scratch registers as well as a fallback that is used when
-    // there are no scratch registers and assumes the retpoline target has
-    // been pushed.
+    // possible scratch registers as well as a fallback that uses EDI, which is
+    // normally callee saved.
     //   __llvm_retpoline_eax:
     //         calll .Leax_call_target
     //   .Leax_capture_spec:
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFun
     //         movl %edx, (%esp)
     //         retl
     //
-    // This last one is a bit more special and so needs a little extra
-    // handling.
-    // __llvm_retpoline_push:
-    //         calll .Lpush_call_target
-    // .Lpush_capture_spec:
-    //         pause
-    //         lfence
-    //         jmp .Lpush_capture_spec
-    // .align 16
-    // .Lpush_call_target:
-    //         # Clear pause_loop return address.
-    //         addl $4, %esp
-    //         # Top of stack words are: Callee, RA. Exchange Callee and RA.
-    //         pushl 4(%esp)  # Push callee
-    //         pushl 4(%esp)  # Push RA
-    //         popl 8(%esp)   # Pop RA to final RA
-    //         popl (%esp)    # Pop callee to next top of stack
-    //         retl           # Ret to callee
+    //   __llvm_retpoline_edi:
+    //   ... # Same setup
+    //         movl %edi, (%esp)
+    //         retl
     if (MF.getName() == EAXThunkName)
       populateThunk(MF, X86::EAX);
     else if (MF.getName() == ECXThunkName)
       populateThunk(MF, X86::ECX);
     else if (MF.getName() == EDXThunkName)
       populateThunk(MF, X86::EDX);
-    else if (MF.getName() == PushThunkName)
-      populateThunk(MF);
+    else if (MF.getName() == EDIThunkName)
+      populateThunk(MF, X86::EDI);
     else
       llvm_unreachable("Invalid thunk name on x86-32!");
   }
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(M
   CaptureSpec->addSuccessor(CaptureSpec);
 
   CallTarget->setAlignment(4);
-  if (Reg) {
-    insertRegReturnAddrClobber(*CallTarget, *Reg);
-  } else {
-    assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
-    insert32BitPushReturnAddrClobber(*CallTarget);
-  }
+  insertRegReturnAddrClobber(*CallTarget, *Reg);
   BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
 }

Added: llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll?rev=325049&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll (added)
+++ llvm/trunk/test/CodeGen/X86/retpoline-regparm.ll Tue Feb 13 12:47:49 2018
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
+
+; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
+; because there are no available scratch registers.  The Linux kernel builds
+; with -mregparm=3, so we need to support it.  TCO should fail because we need
+; to restore EDI.
+
+define void @call_edi(void (i32, i32, i32)* %fp) #0 {
+entry:
+  tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+  ret void
+}
+
+; CHECK-LABEL: call_edi:
+;     EDI is used, so it must be saved.
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __llvm_retpoline_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+define void @edi_external(void (i32, i32, i32)* %fp) #1 {
+entry:
+  tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+  ret void
+}
+
+; CHECK-LABEL: edi_external:
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __x86_indirect_thunk_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+attributes #0 = { "target-features"="+retpoline" }
+attributes #1 = { "target-features"="+retpoline-external-thunk" }

Modified: llvm/trunk/test/CodeGen/X86/retpoline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/retpoline.ll?rev=325049&r1=325048&r2=325049&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/retpoline.ll (original)
+++ llvm/trunk/test/CodeGen/X86/retpoline.ll Tue Feb 13 12:47:49 2018
@@ -340,10 +340,10 @@ latch:
 ; X86-NEXT:          movl    %edx, (%esp)
 ; X86-NEXT:          retl
 ;
-; X86-LABEL:         .section        .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
-; X86-NEXT:          .hidden __llvm_retpoline_push
-; X86-NEXT:          .weak   __llvm_retpoline_push
-; X86:       __llvm_retpoline_push:
+; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
+; X86-NEXT:          .hidden __llvm_retpoline_edi
+; X86-NEXT:          .weak   __llvm_retpoline_edi
+; X86:       __llvm_retpoline_edi:
 ; X86-NEXT:  # {{.*}}                                # %entry
 ; X86-NEXT:          calll   [[CALL_TARGET:.*]]
 ; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
@@ -355,11 +355,7 @@ latch:
 ; X86-NEXT:          .p2align        4, 0x90
 ; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
 ; X86-NEXT:                                          # %entry
-; X86-NEXT:          addl    $4, %esp
-; X86-NEXT:          pushl   4(%esp)
-; X86-NEXT:          pushl   4(%esp)
-; X86-NEXT:          popl    8(%esp)
-; X86-NEXT:          popl    (%esp)
+; X86-NEXT:          movl    %edi, (%esp)
 ; X86-NEXT:          retl
 
 




More information about the llvm-commits mailing list