[llvm] r212476 - [PowerPC] Fix "byval align" arguments

Ulrich Weigand ulrich.weigand at de.ibm.com
Mon Jul 7 12:26:41 PDT 2014


Author: uweigand
Date: Mon Jul  7 14:26:41 2014
New Revision: 212476

URL: http://llvm.org/viewvc/llvm-project?rev=212476&view=rev
Log:
[PowerPC] Fix "byval align" arguments

Arguments passed as "byval align" should get the specified alignment
in the parameter save area.  There was some code in PPCISelLowering.cpp
that attempted to implement this, but this didn't work correctly:
while code did update the ArgOffset value, it neglected to update
the PtrOff value (which was already computed from the old ArgOffset),
and it also neglected to update GPR_idx -- fields skipped due to
alignment in the save area must likewise be skipped in GPRs.

This patch fixes and simplifies this logic by:
- handling argument offset alignment right at the beginning
  of argument processing, using a new helper routine
  CalculateStackSlotAlignment (this avoids having to update
  PtrOff and other derived values later on)
- not tracking GPR_idx separately, but always computing the
  correct GPR_idx for each argument *from* its ArgOffset
- removing some redundant computation in LowerFormalArguments:
  MinReservedArea must equal ArgOffset after argument processing,
  so there's no use in computing it twice.

[This doesn't change the behavior of the current clang front-end,
since that never creates "byval align" arguments at the moment.
This will change with a follow-on patch, however.]

Added:
    llvm/trunk/test/CodeGen/PowerPC/ppc64-byval-align.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=212476&r1=212475&r2=212476&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Jul  7 14:26:41 2014
@@ -2131,6 +2131,34 @@ static unsigned CalculateStackSlotSize(E
 
   return ArgSize;
 }
+
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+                                            unsigned PtrByteSize) {
+  unsigned Align = PtrByteSize;
+
+  // Altivec parameters are padded to a 16 byte boundary.
+  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+      ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+    Align = 16;
+
+  // ByVal parameters are aligned as requested.
+  if (Flags.isByVal()) {
+    unsigned BVAlign = Flags.getByValAlign();
+    if (BVAlign > PtrByteSize) {
+      if (BVAlign % PtrByteSize != 0)
+          llvm_unreachable(
+            "ByVal alignment is not a multiple of the pointer size");
+
+      Align = BVAlign;
+    }
+  }
+
+  return Align;
+}
+
 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
 /// ensure minimum alignment required for target.
 static unsigned EnsureStackAlignment(const TargetMachine &Target,
@@ -2422,8 +2450,6 @@ PPCTargetLowering::LowerFormalArguments_
 
   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
   unsigned ArgOffset = LinkageSize;
-  // Area that is at least reserved in caller of this function.
-  unsigned MinReservedArea = ArgOffset;
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2445,7 +2471,7 @@ PPCTargetLowering::LowerFormalArguments_
   const unsigned Num_FPR_Regs = 13;
   const unsigned Num_VR_Regs  = array_lengthof(VR);
 
-  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
 
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
@@ -2464,16 +2490,15 @@ PPCTargetLowering::LowerFormalArguments_
     std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
     CurArgIdx = Ins[ArgNo].OrigArgIndex;
 
+    /* Respect alignment of argument on the stack.  */
+    unsigned Align =
+      CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
     unsigned CurArgOffset = ArgOffset;
 
-    // Altivec parameters are padded to a 16 byte boundary.
-    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
-        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
-        ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64)
-      MinReservedArea = ((MinReservedArea+15)/16)*16;
-
-    // Calculate min reserved area.
-    MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize);
+    /* Compute GPR index associated with argument offset.  */
+    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+    GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
 
     // FIXME the codegen can be much improved in some cases.
     // We do not have to keep everything in memory.
@@ -2495,12 +2520,6 @@ PPCTargetLowering::LowerFormalArguments_
         continue;
       }
 
-      unsigned BVAlign = Flags.getByValAlign();
-      if (BVAlign > 8) {
-        ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
-        CurArgOffset = ArgOffset;
-      }
-
       // All aggregates smaller than 8 bytes must be passed right-justified.
       if (ObjSize < PtrByteSize && !isLittleEndian)
         CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -2536,7 +2555,6 @@ PPCTargetLowering::LowerFormalArguments_
           }
 
           MemOps.push_back(Store);
-          ++GPR_idx;
         }
         // Whether we copied from a register or not, advance the offset
         // into the parameter save area by a full doubleword.
@@ -2581,8 +2599,6 @@ PPCTargetLowering::LowerFormalArguments_
           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
           // value to MVT::i64 and then truncate to the correct register size.
           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
-        ++GPR_idx;
       } else {
         needsLoad = true;
         ArgSize = PtrByteSize;
@@ -2592,11 +2608,6 @@ PPCTargetLowering::LowerFormalArguments_
 
     case MVT::f32:
     case MVT::f64:
-      // Every 8 bytes of argument space consumes one of the GPRs available for
-      // argument passing.
-      if (GPR_idx != Num_GPR_Regs) {
-        ++GPR_idx;
-      }
       if (FPR_idx != Num_FPR_Regs) {
         unsigned VReg;
 
@@ -2622,12 +2633,6 @@ PPCTargetLowering::LowerFormalArguments_
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
-      // Vectors are aligned to a 16-byte boundary in the argument save area.
-      while ((ArgOffset % 16) != 0) {
-        ArgOffset += PtrByteSize;
-        if (GPR_idx != Num_GPR_Regs)
-          GPR_idx++;
-      }
       if (VR_idx != Num_VR_Regs) {
         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
@@ -2635,11 +2640,9 @@ PPCTargetLowering::LowerFormalArguments_
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++VR_idx;
       } else {
-        CurArgOffset = ArgOffset;
         needsLoad = true;
       }
       ArgOffset += 16;
-      GPR_idx = std::min(GPR_idx + 2, Num_GPR_Regs);
       break;
     }
 
@@ -2658,7 +2661,8 @@ PPCTargetLowering::LowerFormalArguments_
   }
 
   // Area that is at least reserved in the caller of this function.
-  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+  unsigned MinReservedArea;
+  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
 
   // Set the size that is at least reserved in caller of this function.  Tail
   // call optimized functions' reserved stack space needs to be aligned so that
@@ -2679,7 +2683,8 @@ PPCTargetLowering::LowerFormalArguments_
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by deferencing the
     // result of va_next.
-    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+         GPR_idx < Num_GPR_Regs; ++GPR_idx) {
       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -3971,15 +3976,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
 
-    // Altivec parameters are padded to a 16 byte boundary.
-    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
-        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
-        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
-      NumBytes = ((NumBytes+15)/16)*16;
+    /* Respect alignment of argument on the stack.  */
+    unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+    NumBytes = ((NumBytes + Align - 1) / Align) * Align;
 
     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
   }
 
+  unsigned NumBytesActuallyUsed = NumBytes;
+
   // The prolog code of the callee may store up to 8 GPR argument registers to
   // the stack, allowing va_start to index over them in memory if its varargs.
   // Because we cannot tell if this is needed on the caller side, we have to
@@ -4023,7 +4028,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
   unsigned ArgOffset = LinkageSize;
-  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -4052,6 +4057,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
+    /* Respect alignment of argument on the stack.  */
+    unsigned Align =
+      CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+    /* Compute GPR index associated with argument offset.  */
+    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+    GPR_idx = std::min(GPR_idx, NumGPRs);
+
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff;
@@ -4083,15 +4097,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
       if (Size == 0)
         continue;
 
-      unsigned BVAlign = Flags.getByValAlign();
-      if (BVAlign > 8) {
-        if (BVAlign % PtrByteSize != 0)
-          llvm_unreachable(
-            "ByVal alignment is not a multiple of the pointer size");
-
-        ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
-      }
-
       // All aggregates smaller than 8 bytes must be passed right-justified.
       if (Size==1 || Size==2 || Size==4) {
         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4100,7 +4105,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
                                         MachinePointerInfo(), VT,
                                         false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
-          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
 
           ArgOffset += PtrByteSize;
           continue;
@@ -4162,7 +4167,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
                                    MachinePointerInfo(),
                                    false, false, false, 0);
         MemOpChains.push_back(Load.getValue(1));
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
 
         // Done with this argument.
         ArgOffset += PtrByteSize;
@@ -4195,7 +4200,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
     case MVT::i32:
     case MVT::i64:
       if (GPR_idx != NumGPRs) {
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
       } else {
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, false, MemOpChains,
@@ -4230,11 +4235,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
                                        MachinePointerInfo(), false, false,
                                        false, 0);
             MemOpChains.push_back(Load.getValue(1));
-            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
           }
-        } else if (GPR_idx != NumGPRs)
-          // If we have any FPRs remaining, we may also have GPRs remaining.
-          ++GPR_idx;
+        }
       } else {
         // Single-precision floating-point values are mapped to the
         // second (rightmost) word of the stack doubleword.
@@ -4255,13 +4258,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
-      // Vectors are aligned to a 16-byte boundary in the argument save area.
-      while (ArgOffset % 16 !=0) {
-        ArgOffset += PtrByteSize;
-        if (GPR_idx != NumGPRs)
-          GPR_idx++;
-      }
-
       // For a varargs call, named arguments go into VRs or on the stack as
       // usual; unnamed arguments always go to the stack or the corresponding
       // GPRs when within range.  For now, we always put the value in both
@@ -4269,8 +4265,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
       if (isVarArg) {
         // We could elide this store in the case where the object fits
         // entirely in R registers.  Maybe later.
-        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
-                            DAG.getConstant(ArgOffset, PtrVT));
         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
                                      MachinePointerInfo(), false, false, 0);
         MemOpChains.push_back(Store);
@@ -4315,11 +4309,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
                          TailCallArguments, dl);
       }
       ArgOffset += 16;
-      GPR_idx = std::min(GPR_idx + 2, NumGPRs);
       break;
     }
   }
 
+  assert(NumBytesActuallyUsed == ArgOffset);
+
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 

Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-byval-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-byval-align.ll?rev=212476&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-byval-align.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-byval-align.ll Mon Jul  7 14:26:41 2014
@@ -0,0 +1,56 @@
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.test = type { i64, [8 x i8] }
+%struct.pad = type { [8 x i64] }
+
+ at gt = common global %struct.test zeroinitializer, align 16
+ at gp = common global %struct.pad zeroinitializer, align 8
+
+define signext i32 @callee1(i32 signext %x, %struct.test* byval align 16 nocapture readnone %y, i32 signext %z) {
+entry:
+  ret i32 %z
+}
+; CHECK-LABEL: @callee1
+; CHECK: mr 3, 7
+; CHECK: blr
+
+declare signext i32 @test1(i32 signext, %struct.test* byval align 16, i32 signext)
+define void @caller1(i32 signext %z) {
+entry:
+  %call = tail call signext i32 @test1(i32 signext 0, %struct.test* byval align 16 @gt, i32 signext %z)
+  ret void
+}
+; CHECK-LABEL: @caller1
+; CHECK: mr [[REG:[0-9]+]], 3
+; CHECK: mr 7, [[REG]]
+; CHECK: bl test1
+
+define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) {
+entry:
+  %x1 = getelementptr inbounds %struct.test* %z, i64 0, i32 0
+  %0 = load i64* %x1, align 16
+  ret i64 %0
+}
+; CHECK-LABEL: @callee2
+; CHECK: ld [[REG:[0-9]+]], 128(1)
+; CHECK: mr 3, [[REG]]
+; CHECK: blr
+
+declare i64 @test2(%struct.pad* byval, i32 signext, %struct.test* byval align 16)
+define void @caller2(i64 %z) {
+entry:
+  %tmp = alloca %struct.test, align 16
+  %.compoundliteral.sroa.0.0..sroa_idx = getelementptr inbounds %struct.test* %tmp, i64 0, i32 0
+  store i64 %z, i64* %.compoundliteral.sroa.0.0..sroa_idx, align 16
+  %call = call i64 @test2(%struct.pad* byval @gp, i32 signext 0, %struct.test* byval align 16 %tmp)
+  ret void
+}
+; CHECK-LABEL: @caller2
+; CHECK: std 3, [[OFF:[0-9]+]](1)
+; CHECK: ld [[REG:[0-9]+]], [[OFF]](1)
+; CHECK: std [[REG]], 128(1)
+; CHECK: bl test2
+





More information about the llvm-commits mailing list