[llvm] r287485 - [X86] RegCall - Handling long double arguments

Oren Ben Simhon via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 20 03:06:07 PST 2016


Author: orenb
Date: Sun Nov 20 05:06:07 2016
New Revision: 287485

URL: http://llvm.org/viewvc/llvm-project?rev=287485&view=rev
Log:
[X86] RegCall - Handling long double arguments

The change is part of RegCall calling convention support for LLVM.
Long double (f80) requires special treatment as the first f80 parameter is saved in FP0 (floating point stack).
This review present the change and the corresponding tests.

Differential Revision: https://reviews.llvm.org/D26151

Modified:
    llvm/trunk/lib/Target/X86/X86CallingConv.td
    llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll

Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=287485&r1=287484&r2=287485&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Sun Nov 20 05:06:07 2016
@@ -24,6 +24,8 @@ class RC_X86_RegCall {
   list<Register> GPR_16 = [];
   list<Register> GPR_32 = [];
   list<Register> GPR_64 = [];
+  list<Register> FP_CALL = [FP0];
+  list<Register> FP_RET = [FP0, FP1];
   list<Register> XMM = [];
   list<Register> YMM = [];
   list<Register> ZMM = [];
@@ -90,14 +92,14 @@ def CC_#NAME : CallingConv<[
     // TODO: Handle the case of mask types (v*i1)
     CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
 
-    // TODO: Handle the case of long double (f80)
-    CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
-
     // float, double, float128 --> XMM
     // In the case of SSE disabled --> save to stack
     CCIfType<[f32, f64, f128], 
       CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
 
+    // long double --> FP
+    CCIfType<[f80], CCAssignToReg<RC.FP_CALL>>,
+
     // __m128, __m128i, __m128d --> XMM
     // In the case of SSE disabled --> save to stack
     CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
@@ -129,7 +131,7 @@ def CC_#NAME : CallingConv<[
 
     // float 128 get stack slots whose size and alignment depends 
     // on the subtarget.
-    CCIfType<[f128], CCAssignToStack<0, 0>>,
+    CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
 
     // Vectors get 16-byte stack slots that are 16-byte aligned.
     CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
@@ -166,7 +168,7 @@ def RetCC_#NAME : CallingConv<[
     CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
 
     // long double --> FP
-    CCIfType<[f80], CCAssignToReg<[FP0]>>,
+    CCIfType<[f80], CCAssignToReg<RC.FP_RET>>,
 
     // float, double, float128 --> XMM
     CCIfType<[f32, f64, f128], 

Modified: llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp?rev=287485&r1=287484&r2=287485&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp Sun Nov 20 05:06:07 2016
@@ -206,6 +206,13 @@ namespace {
       RegMap[Reg] = StackTop++;
     }
 
+    // popReg - Pop a register from the stack.
+    void popReg() {
+      if (StackTop == 0)
+        report_fatal_error("Cannot pop empty stack!");
+      RegMap[Stack[--StackTop]] = ~0;     // Update state
+    }
+
     bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
     void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
       DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
@@ -329,6 +336,25 @@ bool FPS::runOnMachineFunction(MachineFu
   df_iterator_default_set<MachineBasicBlock*> Processed;
   MachineBasicBlock *Entry = &MF.front();
 
+  LiveBundle &Bundle =
+    LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
+  
+  // In regcall convention, some FP registers may not be passed through
+  // the stack, so they will need to be assigned to the stack first
+  if ((Entry->getParent()->getFunction()->getCallingConv() ==
+    CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
+    // In the register calling convention, up to one FP argument could be 
+    // saved in the first FP register.
+    // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
+    // that the FP registers contain arguments.
+    // The actual value is passed in FP0.
+    // Here we fix the stack and mark FP0 as pre-assigned register.
+    assert((Bundle.Mask & 0xFE) == 0 &&
+      "Only FP0 could be passed as an argument");
+    Bundle.FixCount = 1;
+    Bundle.FixStack[0] = 0;
+  }
+
   bool Changed = false;
   for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
     Changed |= processBasicBlock(MF, *BB);
@@ -791,9 +817,8 @@ void FPS::popStackAfter(MachineBasicBloc
   MachineInstr &MI = *I;
   const DebugLoc &dl = MI.getDebugLoc();
   ASSERT_SORTED(PopTable);
-  if (StackTop == 0)
-    report_fatal_error("Cannot pop empty stack!");
-  RegMap[Stack[--StackTop]] = ~0;     // Update state
+
+  popReg();
 
   // Check to see if there is a popping version of this instruction...
   int Opcode = Lookup(PopTable, I->getOpcode());
@@ -929,6 +954,7 @@ void FPS::shuffleStackTop(const unsigned
 
 void FPS::handleCall(MachineBasicBlock::iterator &I) {
   unsigned STReturns = 0;
+  const MachineFunction* MF = I->getParent()->getParent();
 
   for (const auto &MO : I->operands()) {
     if (!MO.isReg())
@@ -937,7 +963,10 @@ void FPS::handleCall(MachineBasicBlock::
     unsigned R = MO.getReg() - X86::FP0;
 
     if (R < 8) {
-      assert(MO.isDef() && MO.isImplicit());
+      if (MF->getFunction()->getCallingConv() != CallingConv::X86_RegCall) {
+        assert(MO.isDef() && MO.isImplicit());
+      }
+
       STReturns |= 1 << R;
     }
   }
@@ -945,9 +974,15 @@ void FPS::handleCall(MachineBasicBlock::
   unsigned N = countTrailingOnes(STReturns);
 
   // FP registers used for function return must be consecutive starting at
-  // FP0.
+  // FP0
   assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
 
+  // Reset the FP Stack - It is required because of possible leftovers from
+  // passed arguments. The caller should assume that the FP stack is 
+  // returned empty (unless the callee returns values on FP stack).
+  while (StackTop > 0)
+    popReg();
+
   for (unsigned I = 0; I < N; ++I)
     pushReg(N - I - 1);
 }

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=287485&r1=287484&r2=287485&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 20 05:06:07 2016
@@ -2816,6 +2816,8 @@ SDValue X86TargetLowering::LowerFormalAr
           RC = &X86::FR32RegClass;
         else if (RegVT == MVT::f64)
           RC = &X86::FR64RegClass;
+        else if (RegVT == MVT::f80)
+          RC = &X86::RFP80RegClass;
         else if (RegVT == MVT::f128)
           RC = &X86::FR128RegClass;
         else if (RegVT.is512BitVector())

Modified: llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll?rev=287485&r1=287484&r2=287485&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll Sun Nov 20 05:06:07 2016
@@ -246,6 +246,44 @@ define x86_regcallcc double @test_Callar
   ret double %d
 }
 
+; X32: test_argRetf80
+; X32-NOT: fldt
+; X32: fadd	%st(0), %st(0)
+; X32: retl
+
+; WIN64: test_argRetf80
+; WIN64-NOT: fldt
+; WIN64: fadd	%st(0), %st(0)
+; WIN64: retq
+
+; Test regcall when receiving/returning long double
+define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
+  %r0 = fadd x86_fp80 %a0, %a0
+  ret x86_fp80 %r0
+}
+
+; X32: test_CallargRetf80
+; X32-NOT: fldt
+; X32: fadd	%st({{[0-7]}}), %st({{[0-7]}})
+; X32: call{{.*}}   {{.*}}test_argRetf80
+; X32: fadd{{.*}}	%st({{[0-7]}})
+; X32: retl
+
+; WIN64: test_CallargRetf80
+; WIN64-NOT: fldt
+; WIN64: fadd	%st({{[0-7]}}), %st({{[0-7]}})
+; WIN64: call{{.*}}   {{.*}}test_argRetf80
+; WIN64: fadd{{.*}}	%st({{[0-7]}})
+; WIN64: retq
+
+; Test regcall when passing/retrieving long double
+define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
+  %b = fadd x86_fp80 %a, %a
+  %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
+  %d = fadd x86_fp80 %c, %c
+  ret x86_fp80 %d
+}
+
 ; X32-LABEL:  test_argRetPointer:
 ; X32:        incl %eax
 ; X32:        ret{{.*}}




More information about the llvm-commits mailing list