[llvm] r354837 - [X86] Fix bug in x86_intrcc with arg copy elision

Mon Feb 25 18:11:25 PST 2019

Author: rnk
Date: Mon Feb 25 18:11:25 2019
New Revision: 354837

URL: http://llvm.org/viewvc/llvm-project?rev=354837&view=rev
Log:
[X86] Fix bug in x86_intrcc with arg copy elision

Summary:
Use a custom calling convention handler for interrupts instead of fixing
up the locations in LowerMemArgument. This way, the offsets are correct
when constructed and we don't need to account for them in as many
places.

Depends on D56883

Replaces D56275

Reviewers: craig.topper, phil-opp

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D56944

Modified:
    llvm/trunk/lib/Target/X86/X86CallingConv.cpp
    llvm/trunk/lib/Target/X86/X86CallingConv.td
    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll
    llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.cpp?rev=354837&r1=354836&r2=354837&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86CallingConv.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.cpp Mon Feb 25 18:11:25 2019
@@ -287,5 +287,45 @@ static bool CC_X86_32_MCUInReg(unsigned
   return true;
 }
 
+/// X86 interrupt handlers can only take one or two stack arguments, but if
+/// there are two arguments, they are in the opposite order from the standard
+/// convention. Therefore, we have to look at the argument count up front before
+/// allocating stack for each argument.
+static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                        CCValAssign::LocInfo &LocInfo,
+                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  const MachineFunction &MF = State.getMachineFunction();
+  size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
+  bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
+  unsigned SlotSize = Is64Bit ? 8 : 4;
+  unsigned Offset;
+  if (ArgCount == 1 && ValNo == 0) {
+    // If we have one argument, the argument is five stack slots big, at fixed
+    // offset zero.
+    Offset = State.AllocateStack(5 * SlotSize, 4);
+  } else if (ArgCount == 2 && ValNo == 0) {
+    // If we have two arguments, the stack slot is *after* the error code
+    // argument. Pretend it doesn't consume stack space, and account for it when
+    // we assign the second argument.
+    Offset = SlotSize;
+  } else if (ArgCount == 2 && ValNo == 1) {
+    // If this is the second of two arguments, it must be the error code. It
+    // appears first on the stack, and is then followed by the five slot
+    // interrupt struct.
+    Offset = 0;
+    (void)State.AllocateStack(6 * SlotSize, 4);
+  } else {
+    report_fatal_error("unsupported x86 interrupt prototype");
+  }
+
+  // FIXME: This should be accounted for in
+  // X86FrameLowering::getFrameIndexReference, not here.
+  if (Is64Bit && ArgCount == 2)
+    Offset += SlotSize;
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
 // Provides entry points of CC_X86 and RetCC_X86.
 #include "X86GenCallingConv.inc"

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=354837&r1=354836&r2=354837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Feb 25 18:11:25 2019
@@ -985,14 +985,6 @@ def CC_Intel_OCL_BI : CallingConv<[
   CCDelegateTo<CC_X86_32_C>
 ]>;
 
-def CC_X86_32_Intr : CallingConv<[
-  CCAssignToStack<4, 4>
-]>;
-
-def CC_X86_64_Intr : CallingConv<[
-  CCAssignToStack<8, 8>
-]>;
-
 //===----------------------------------------------------------------------===//
 // X86 Root Argument Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -1001,7 +993,7 @@ def CC_X86_64_Intr : CallingConv<[
 def CC_X86_32 : CallingConv<[
   // X86_INTR calling convention is valid in MCU target and should override the
   // MCU calling convention. Thus, this should be checked before isTargetMCU().
-  CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
+  CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
   CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
   CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
@@ -1029,7 +1021,7 @@ def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_RegCall",
     CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
   CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
-  CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
+  CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
 
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,

Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=354837&r1=354836&r2=354837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Mon Feb 25 18:11:25 2019
@@ -1773,6 +1773,15 @@ int X86FrameLowering::getFrameIndexRefer
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   int64_t FPDelta = 0;
 
+  // In an x86 interrupt, remove the offset we added to account for the return
+  // address from any stack object allocated in the caller's frame. Interrupts
+  // do not have a standard return address. Fixed objects in the current frame,
+  // such as SSE register spills, should not get this treatment.
+  if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
+      Offset >= 0) {
+    Offset += getOffsetOfLocalArea();
+  }
+
   if (IsWin64Prologue) {
     assert(!MFI.hasCalls() || (StackSize % 16) == 8);
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=354837&r1=354836&r2=354837&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Feb 25 18:11:25 2019
@@ -2976,22 +2976,6 @@ X86TargetLowering::LowerMemArgument(SDVa
   else
     ValVT = VA.getValVT();
 
-  // Calculate SP offset of interrupt parameter, re-arrange the slot normally
-  // taken by a return address.
-  int Offset = 0;
-  if (CallConv == CallingConv::X86_INTR) {
-    // X86 interrupts may take one or two arguments.
-    // On the stack there will be no return address as in regular call.
-    // Offset of last argument need to be set to -4/-8 bytes.
-    // Where offset of the first argument out of two, should be set to 0 bytes.
-    Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
-    if (Subtarget.is64Bit() && Ins.size() == 2) {
-      // The stack pointer needs to be realigned for 64 bit handlers with error
-      // code, so the argument offset changes by 8 bytes.
-      Offset += 8;
-    }
-  }
-
   // FIXME: For now, all byval parameter objects are marked mutable. This can be
   // changed with more analysis.
   // In case of tail call optimization mark all arguments mutable. Since they
@@ -3004,10 +2988,6 @@ X86TargetLowering::LowerMemArgument(SDVa
     // can be improved with deeper analysis.
     int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
                                    /*isAliased=*/true);
-    // Adjust SP offset of interrupt parameter.
-    if (CallConv == CallingConv::X86_INTR) {
-      MFI.setObjectOffset(FI, Offset);
-    }
     return DAG.getFrameIndex(FI, PtrVT);
   }
 
@@ -3062,11 +3042,6 @@ X86TargetLowering::LowerMemArgument(SDVa
     MFI.setObjectSExt(FI, true);
   }
 
-  // Adjust SP offset of interrupt parameter.
-  if (CallConv == CallingConv::X86_INTR) {
-    MFI.setObjectOffset(FI, Offset);
-  }
-
   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   SDValue Val = DAG.getLoad(
       ValVT, dl, Chain, FIN,
@@ -3156,14 +3131,6 @@ SDValue X86TargetLowering::LowerFormalAr
       !(isVarArg && canGuaranteeTCO(CallConv)) &&
       "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
 
-  if (CallConv == CallingConv::X86_INTR) {
-    bool isLegal = Ins.size() == 1 ||
-                   (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
-                                        (!Is64Bit && Ins[1].VT == MVT::i32)));
-    if (!isLegal)
-      report_fatal_error("X86 interrupts may take one or two arguments");
-  }
-
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

Modified: llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll?rev=354837&r1=354836&r2=354837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll Mon Feb 25 18:11:25 2019
@@ -3,7 +3,9 @@
 
 %struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
 
- at llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata"
+ at sink_address = global i32* null
+ at sink_i32 = global i32 0
+
 
 ; Spills eax, putting original esp at +4.
 ; No stack adjustment if declared with no error code
@@ -93,3 +95,67 @@ entry:
   store x86_fp80 %add, x86_fp80* @f80, align 4
   ret void
 }
+
+; Use a frame pointer to check the offsets. No return address, arguments start
+; at EBP+4.
+define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 {
+  ; CHECK-LABEL: test_fp_1:
+  ; CHECK: # %bb.0: # %entry
+  ; CHECK-NEXT: pushl %ebp
+  ; CHECK-NEXT: movl %esp, %ebp
+  ; CHECK: cld
+  ; CHECK-DAG: leal 4(%ebp), %[[R1:[^ ]*]]
+  ; CHECK-DAG: leal 20(%ebp), %[[R2:[^ ]*]]
+  ; CHECK: movl %[[R1]], sink_address
+  ; CHECK: movl %[[R2]], sink_address
+  ; CHECK: popl %ebp
+  ; CHECK: iretl
+entry:
+  %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0
+  %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4
+  store volatile i32* %arrayidx, i32** @sink_address
+  store volatile i32* %arrayidx2, i32** @sink_address
+  ret void
+}
+
+; The error code is between EBP and the interrupt_frame.
+define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i32 %err) #0 {
+  ; CHECK-LABEL: test_fp_2:
+  ; CHECK: # %bb.0: # %entry
+  ; CHECK-NEXT: pushl %ebp
+  ; CHECK-NEXT: movl %esp, %ebp
+  ; CHECK: cld
+  ; CHECK-DAG: movl 4(%ebp), %[[R3:[^ ]*]]
+  ; CHECK-DAG: leal 8(%ebp), %[[R1:[^ ]*]]
+  ; CHECK-DAG: leal 24(%ebp), %[[R2:[^ ]*]]
+  ; CHECK: movl %[[R1]], sink_address
+  ; CHECK: movl %[[R2]], sink_address
+  ; CHECK: movl %[[R3]], sink_i32
+  ; CHECK: popl %ebp
+  ; CHECK: iretl
+entry:
+  %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0
+  %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4
+  store volatile i32* %arrayidx, i32** @sink_address
+  store volatile i32* %arrayidx2, i32** @sink_address
+  store volatile i32 %err, i32* @sink_i32
+  ret void
+}
+
+; Test argument copy elision when copied to a local alloca.
+define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i32 %err) #0 {
+  ; CHECK-LABEL: test_copy_elide:
+  ; CHECK: # %bb.0: # %entry
+  ; CHECK-NEXT: pushl %ebp
+  ; CHECK-NEXT: movl %esp, %ebp
+  ; CHECK: cld
+  ; CHECK: leal 4(%ebp), %[[R1:[^ ]*]]
+  ; CHECK: movl %[[R1]], sink_address
+entry:
+  %err.addr = alloca i32, align 4
+  store i32 %err, i32* %err.addr, align 4
+  store volatile i32* %err.addr, i32** @sink_address
+  ret void
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }

Modified: llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll?rev=354837&r1=354836&r2=354837&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll Mon Feb 25 18:11:25 2019
@@ -3,7 +3,8 @@
 
 %struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
 
- at llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata"
+ at sink_address = global i64* null
+ at sink_i32 = global i64 0
 
 ; Spills rax, putting original esp at +8.
 ; No stack adjustment if declared with no error code
@@ -105,3 +106,75 @@ entry:
   store x86_fp80 %add, x86_fp80* @f80, align 4
   ret void
 }
+
+; Use a frame pointer to check the offsets. No return address, arguments start
+; at RBP+4.
+define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 {
+  ; CHECK-LABEL: test_fp_1:
+  ; CHECK: # %bb.0: # %entry
+  ; CHECK-NEXT: pushq %rbp
+  ; CHECK-NEXT: movq %rsp, %rbp
+  ; CHECK: cld
+  ; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
+  ; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
+  ; CHECK: movq %[[R1]], sink_address
+  ; CHECK: movq %[[R2]], sink_address
+  ; CHECK: popq %rbp
+  ; CHECK: iretq
+entry:
+  %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0
+  %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4
+  store volatile i64* %arrayidx, i64** @sink_address
+  store volatile i64* %arrayidx2, i64** @sink_address
+  ret void
+}
+
+; The error code is between RBP and the interrupt_frame.
+define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i64 %err) #0 {
+  ; CHECK-LABEL: test_fp_2:
+  ; CHECK: # %bb.0: # %entry
+  ; This RAX push is just to align the stack.
+  ; CHECK-NEXT: pushq %rax
+  ; CHECK-NEXT: pushq %rbp
+  ; CHECK-NEXT: movq %rsp, %rbp
+  ; CHECK: cld
+  ; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
+  ; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
+  ; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
+  ; CHECK: movq %[[R1]], sink_address(%rip)
+  ; CHECK: movq %[[R2]], sink_address(%rip)
+  ; CHECK: movq %[[R3]], sink_i32(%rip)
+  ; CHECK: popq %rbp
+  ; Pop off both the error code and the 8 byte alignment adjustment from the
+  ; prologue.
+  ; CHECK: addq $16, %rsp
+  ; CHECK: iretq
+entry:
+  %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0
+  %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4
+  store volatile i64* %arrayidx, i64** @sink_address
+  store volatile i64* %arrayidx2, i64** @sink_address
+  store volatile i64 %err, i64* @sink_i32
+  ret void
+}
+
+; Test argument copy elision when copied to a local alloca.
+define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i64 %err) #0 {
+  ; CHECK-LABEL: test_copy_elide:
+  ; CHECK: # %bb.0: # %entry
+  ; This RAX push is just to align the stack.
+  ; CHECK-NEXT: pushq %rax
+  ; CHECK-NEXT: pushq %rbp
+  ; CHECK-NEXT: movq %rsp, %rbp
+  ; CHECK: cld
+  ; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
+  ; CHECK: movq %[[R1]], sink_address(%rip)
+entry:
+  %err.addr = alloca i64, align 4
+  store i64 %err, i64* %err.addr, align 4
+  store volatile i64* %err.addr, i64** @sink_address
+  ret void
+}
+
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }