[llvm] 8abfd2c - [PowerPC][AIX] Enable passing byval formal arguments in multiple registers.

Sean Fertile via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 8 08:22:35 PDT 2020


Author: Sean Fertile
Date: 2020-04-08T11:16:33-04:00
New Revision: 8abfd2c3bb0d66a123b6a6ae590a3d0200f7a688

URL: https://github.com/llvm/llvm-project/commit/8abfd2c3bb0d66a123b6a6ae590a3d0200f7a688
DIFF: https://github.com/llvm/llvm-project/commit/8abfd2c3bb0d66a123b6a6ae590a3d0200f7a688.diff

LOG: [PowerPC][AIX] Enable passing byval formal arguments in multiple registers.

Any or all the argument registers can be used to pass a byval formal
argument, with the limitation that the argument must fit in the
available registers (ie: is not split between registers and stack).

Differential Revision: https://reviews.llvm.org/D76902

Added: 
    llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/aix-cc-byval.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index db81a6c2cb70..b1a2d4e5fd14 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7059,12 +7059,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
 
   SmallVector<SDValue, 8> MemOps;
 
-  for (CCValAssign &VA : ArgLocs) {
-    EVT ValVT = VA.getValVT();
+  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
+    CCValAssign &VA = ArgLocs[I++];
     MVT LocVT = VA.getLocVT();
     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
-    assert((VA.isRegLoc() || VA.isMemLoc()) &&
-           "Unexpected location for function call argument.");
 
     // For compatibility with the AIX XL compiler, the float args in the
     // parameter save area are initialized even if the argument is available
@@ -7092,42 +7090,64 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
     if (Flags.isByVal()) {
       assert(VA.isRegLoc() && "MemLocs should already be handled.");
 
-      const unsigned ByValSize = Flags.getByValSize();
-      if (ByValSize > PtrByteSize)
-        report_fatal_error("Formal arguments greater then register size not "
-                           "implemented yet.");
-
       const MCPhysReg ArgReg = VA.getLocReg();
       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
-      const unsigned Offset = mapArgRegToOffsetAIX(ArgReg, FL);
 
-      const unsigned StackSize = alignTo(ByValSize, PtrByteSize);
+      if (Flags.getByValAlign() > PtrByteSize)
+        report_fatal_error("Over aligned byvals not supported yet.");
+
+      const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
       const int FI = MF.getFrameInfo().CreateFixedObject(
-          StackSize, Offset, /* IsImmutable */ false, /* IsAliased */ true);
+          StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
+          /* IsAliased */ true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-
       InVals.push_back(FIN);
 
-      const unsigned VReg = MF.addLiveIn(ArgReg, IsPPC64 ? &PPC::G8RCRegClass
-                                                         : &PPC::GPRCRegClass);
-
-      // Since the callers side has left justified the aggregate in the
-      // register, we can simply store the entire register into the stack
-      // slot.
-      // The store to the fixedstack object is needed becuase accessing a
-      // field of the ByVal will use a gep and load. Ideally we will optimize
-      // to extracting the value from the register directly, and elide the
-      // stores when the arguments address is not taken, but that will need to
-      // be future work.
-      SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
-      SDValue Store =
-          DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom, FIN,
-                       MachinePointerInfo::getFixedStack(MF, FI, 0));
+      // Add live ins for all the RegLocs for the same ByVal.
+      const TargetRegisterClass *RegClass =
+          IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+
+      auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
+                                               unsigned Offset) {
+        const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
+        // Since the callers side has left justified the aggregate in the
+        // register, we can simply store the entire register into the stack
+        // slot.
+        SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+        // The store to the fixedstack object is needed becuase accessing a
+        // field of the ByVal will use a gep and load. Ideally we will optimize
+        // to extracting the value from the register directly, and elide the
+        // stores when the arguments address is not taken, but that will need to
+        // be future work.
+        SDValue Store =
+            DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
+                         DAG.getObjectPtrOffset(dl, FIN, Offset),
+                         MachinePointerInfo::getFixedStack(MF, FI, Offset));
 
-      MemOps.push_back(Store);
+        MemOps.push_back(Store);
+      };
+
+      unsigned Offset = 0;
+      HandleRegLoc(VA.getLocReg(), Offset);
+      Offset += PtrByteSize;
+      for (; Offset != StackSize; Offset += PtrByteSize) {
+        assert(I != End &&
+               "Expecting enough RegLocs to copy entire ByVal arg.");
+
+        if (!ArgLocs[I].isRegLoc())
+          report_fatal_error("Passing ByVals split between registers and stack "
+                             "not yet implemented.");
+
+        assert(ArgLocs[I].getValNo() == VA.getValNo() &&
+               "Expecting more RegLocs for ByVal argument.");
+
+        const CCValAssign RL = ArgLocs[I++];
+        HandleRegLoc(RL.getLocReg(), Offset);
+      }
       continue;
     }
 
+    EVT ValVT = VA.getValVT();
     if (VA.isRegLoc()) {
       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
       unsigned VReg =

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll
new file mode 100644
index 000000000000..9117e3f288f1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll
@@ -0,0 +1,20 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp \
+; RUN:   -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck  %s
+
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp \
+; RUN:   -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck  %s
+
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in registers.
+
+%struct.Spill = type { [12 x i64 ] }
+ at GS = external global %struct.Spill, align 4
+
+define i64 @test(%struct.Spill* byval(%struct.Spill) align 4 %s) {
+entry:
+  %arrayidx_a = getelementptr inbounds %struct.Spill, %struct.Spill* %s, i32 0, i32 0, i32 2
+  %arrayidx_b = getelementptr inbounds %struct.Spill, %struct.Spill* %s, i32 0, i32 0, i32 10
+  %a = load i64, i64* %arrayidx_a
+  %b = load i64, i64* %arrayidx_b
+  %add = add i64 %a, %b
+  ret i64 %add
+}

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
index 52a4476100c6..a7dcd5a8771c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
@@ -687,8 +687,6 @@ entry:
   ret void
 }
 
-declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s)
-
 ; CHECK-LABEL: name: call_test_byval_32Byte{{.*}}
 
 ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
@@ -740,18 +738,78 @@ declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s
 ; ASM64-NEXT:  bl .test_byval_32Byte
 ; ASM64-NEXT:  nop
 
+define zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s) {
+entry:
+  %arrayidx = getelementptr inbounds %struct.S32, %struct.S32* %s, i32 0, i32 0, i32 21
+  %0 = load i8, i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; The ByVal handling produces dead stores. See `LowerFormalArguments_AIX` for
+; details on why.
+
+; CHECK-LABEL: name:            test_byval_32Byte
+
+; 32BIT:      fixedStack:
+; 32BIT-NEXT:   - { id: 0, type: default, offset: 24, size: 32, alignment: 8, stack-id: default,
+; 32BIT-NEXT:       isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 32BIT:      bb.0.entry:
+; 32BIT-NEXT:   liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
+; 32BIT-DAG:    STW killed renamable $r3,   0, %fixed-stack.0 :: (store 4 into %fixed-stack.0
+; 32BIT-DAG:    STW killed renamable $r4,   4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4
+; 32BIT-DAG:    STW killed renamable $r5,   8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8
+; 32BIT-DAG:    STW killed renamable $r6,  12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12
+; 32BIT-DAG:    STW killed renamable $r7,  16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16
+; 32BIT-DAG:    STW killed renamable $r8,  20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20
+; 32BIT-DAG:    STW killed renamable $r9,  24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24
+; 32BIT-DAG:    STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28
+; 32BIT:        renamable $r3 = LBZ 21, %fixed-stack.0 :: (dereferenceable load 1
+; 32BIT:        BLR
+
+; 64BIT:      fixedStack:
+; 64BIT-NEXT:   - { id: 0, type: default, offset: 48, size: 32, alignment: 16, stack-id: default,
+; 64BIT-NEXT:       isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 64BIT:      bb.0.entry:
+; 64BIT-NEXT:   liveins: $x3, $x4, $x5, $x6
+; 64BIT-DAG:    STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0
+; 64BIT-DAG:    STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8
+; 64BIT-DAG:    STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16
+; 64BIT-DAG:    STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24
+; 64BIT-NEXT:   renamable $x3 = LBZ8 21, %fixed-stack.0 :: (dereferenceable load 1
+; 64BIT-NEXT:   BLR8
+
+; ASM-LABEL: .test_byval_32Byte:
 
-%struct.S31 = type { [31 x i8] }
+; ASM32:       stw 8, 44(1)
+; ASM32:       stw 3, 24(1)
+; ASM32-DAG:   lbz 3, 45(1)
+; ASM32-DAG:   stw 4, 28(1)
+; ASM32-DAG:   stw 5, 32(1)
+; ASM32-DAG:   stw 6, 36(1)
+; ASM32-DAG:   stw 7, 40(1)
+; ASM32-DAG:   stw 9, 48(1)
+; ASM32-DAG:   stw 10, 52(1)
+; ASM32-NEXT:  blr
+
+; ASM64:       std 5, 64(1)
+; ASM64:       std 3, 48(1)
+; ASM64-DAG:   lbz 3, 69(1)
+; ASM64-DAG:   std 4, 56(1)
+; ASM64-DAG:   std 6, 72(1)
+; ASM64-NEXT:  blr
+
+%struct.S31 = type <{ float, i32, i64, double, i32, i16, i8 }>
 
 @gS31 = external global %struct.S31, align 1
 
 define void @call_test_byval_31Byte() {
 entry:
-  %call = call zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
+  %call = call double @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
   ret void
 }
 
-declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)
 
 ; CHECK-LABEL: name: call_test_byval_31Byte{{.*}}
 
@@ -821,6 +879,66 @@ declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)
 ; ASM64-NEXT:  nop
 
 
+
+define double @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 %s) {
+entry:
+  %gep = getelementptr inbounds %struct.S31, %struct.S31* %s, i32 0, i32 3
+  %load = load double, double* %gep, align 1
+  ret double %load
+}
+
+; CHECK-LABEL: name:            test_byval_31Byte
+
+; 32BIT:      fixedStack:
+; 32BIT-NEXT:   - { id: 0, type: default, offset: 24, size: 32, alignment: 8, stack-id: default,
+; 32BIT-NEXT:       isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 32BIT:      bb.0.entry:
+; 32BIT-NEXT:   liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
+; 32BIT-DAG:    STW killed renamable $r3,   0, %fixed-stack.0 :: (store 4 into %fixed-stack.0
+; 32BIT-DAG:    STW killed renamable $r4,   4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4
+; 32BIT-DAG:    STW killed renamable $r5,   8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8
+; 32BIT-DAG:    STW killed renamable $r6,  12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12
+; 32BIT-DAG:    STW killed renamable $r7,  16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16
+; 32BIT-DAG:    STW killed renamable $r8,  20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20
+; 32BIT-DAG:    STW killed renamable $r9,  24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24
+; 32BIT-DAG:    STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28
+; 32BIT-NEXT:   renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8
+; 32BIT-NEXT:   BLR
+
+; 64BIT:      fixedStack:
+; 64BIT-NEXT:   - { id: 0, type: default, offset: 48, size: 32, alignment: 16, stack-id: default,
+; 64BIT-NEXT:       isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 64BIT:      bb.0.entry:
+; 64BIT-NEXT:   liveins: $x3, $x4, $x5, $x6
+; 64BIT-DAG:    STD killed renamable $x3,  0, %fixed-stack.0 :: (store 8 into %fixed-stack.0
+; 64BIT-DAG:    STD killed renamable $x4,  8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8
+; 64BIT-DAG:    STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16
+; 64BIT-DAG:    STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24
+; 64BIT-NEXT:   renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8
+; 64BIT-NEXT:   BLR8
+
+; ASM32-LABEL: .test_byval_31Byte:
+
+; ASM32-DAG:      stw 8, 44(1)
+; ASM32:          stw 7, 40(1)
+; ASM32-DAG:      lfd 1, 40(1)
+; ASM32-DAG:      stw 3, 24(1)
+; ASM32-DAG:      stw 4, 28(1)
+; ASM32-DAG:      stw 5, 32(1)
+; ASM32-DAG:      stw 6, 36(1)
+; ASM32-DAG:      stw 9, 48(1)
+; ASM32-DAG:      stw 10, 52(1)
+; ASM32-NEXT:     blr
+
+; ASM64:          std 5, 64(1)
+; ASM64:          lfd 1, 64(1)
+; ASM64-DAG:      std 3, 48(1)
+; ASM64-DAG:      std 4, 56(1)
+; ASM64-DAG:      std 6, 72(1)
+; ASM64-NEXT:     blr
+
 %struct.F = type { float, float, float }
 
 define i32 @call_test_byval_homogeneous_float_struct() {


        


More information about the llvm-commits mailing list