[llvm] 8abfd2c - [PowerPC][AIX] Enable passing byval formal arguments in multiple registers.
Sean Fertile via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 8 08:22:35 PDT 2020
Author: Sean Fertile
Date: 2020-04-08T11:16:33-04:00
New Revision: 8abfd2c3bb0d66a123b6a6ae590a3d0200f7a688
URL: https://github.com/llvm/llvm-project/commit/8abfd2c3bb0d66a123b6a6ae590a3d0200f7a688
DIFF: https://github.com/llvm/llvm-project/commit/8abfd2c3bb0d66a123b6a6ae590a3d0200f7a688.diff
LOG: [PowerPC][AIX] Enable passing byval formal arguments in multiple registers.
Any or all the argument registers can be used to pass a byval formal
argument, with the limitation that the argument must fit in the
available registers (ie: is not split between registers and stack).
Differential Revision: https://reviews.llvm.org/D76902
Added:
llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index db81a6c2cb70..b1a2d4e5fd14 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7059,12 +7059,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
SmallVector<SDValue, 8> MemOps;
- for (CCValAssign &VA : ArgLocs) {
- EVT ValVT = VA.getValVT();
+ for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
+ CCValAssign &VA = ArgLocs[I++];
MVT LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
- assert((VA.isRegLoc() || VA.isMemLoc()) &&
- "Unexpected location for function call argument.");
// For compatibility with the AIX XL compiler, the float args in the
// parameter save area are initialized even if the argument is available
@@ -7092,42 +7090,64 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (Flags.isByVal()) {
assert(VA.isRegLoc() && "MemLocs should already be handled.");
- const unsigned ByValSize = Flags.getByValSize();
- if (ByValSize > PtrByteSize)
- report_fatal_error("Formal arguments greater then register size not "
- "implemented yet.");
-
const MCPhysReg ArgReg = VA.getLocReg();
const PPCFrameLowering *FL = Subtarget.getFrameLowering();
- const unsigned Offset = mapArgRegToOffsetAIX(ArgReg, FL);
- const unsigned StackSize = alignTo(ByValSize, PtrByteSize);
+ if (Flags.getByValAlign() > PtrByteSize)
+ report_fatal_error("Over aligned byvals not supported yet.");
+
+ const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
const int FI = MF.getFrameInfo().CreateFixedObject(
- StackSize, Offset, /* IsImmutable */ false, /* IsAliased */ true);
+ StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
+ /* IsAliased */ true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-
InVals.push_back(FIN);
- const unsigned VReg = MF.addLiveIn(ArgReg, IsPPC64 ? &PPC::G8RCRegClass
- : &PPC::GPRCRegClass);
-
- // Since the callers side has left justified the aggregate in the
- // register, we can simply store the entire register into the stack
- // slot.
- // The store to the fixedstack object is needed becuase accessing a
- // field of the ByVal will use a gep and load. Ideally we will optimize
- // to extracting the value from the register directly, and elide the
- // stores when the arguments address is not taken, but that will need to
- // be future work.
- SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
- SDValue Store =
- DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom, FIN,
- MachinePointerInfo::getFixedStack(MF, FI, 0));
+ // Add live ins for all the RegLocs for the same ByVal.
+ const TargetRegisterClass *RegClass =
+ IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+
+ auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
+ unsigned Offset) {
+ const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
+ // Since the callers side has left justified the aggregate in the
+ // register, we can simply store the entire register into the stack
+ // slot.
+ SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ // The store to the fixedstack object is needed becuase accessing a
+ // field of the ByVal will use a gep and load. Ideally we will optimize
+ // to extracting the value from the register directly, and elide the
+ // stores when the arguments address is not taken, but that will need to
+ // be future work.
+ SDValue Store =
+ DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
+ DAG.getObjectPtrOffset(dl, FIN, Offset),
+ MachinePointerInfo::getFixedStack(MF, FI, Offset));
- MemOps.push_back(Store);
+ MemOps.push_back(Store);
+ };
+
+ unsigned Offset = 0;
+ HandleRegLoc(VA.getLocReg(), Offset);
+ Offset += PtrByteSize;
+ for (; Offset != StackSize; Offset += PtrByteSize) {
+ assert(I != End &&
+ "Expecting enough RegLocs to copy entire ByVal arg.");
+
+ if (!ArgLocs[I].isRegLoc())
+ report_fatal_error("Passing ByVals split between registers and stack "
+ "not yet implemented.");
+
+ assert(ArgLocs[I].getValNo() == VA.getValNo() &&
+ "Expecting more RegLocs for ByVal argument.");
+
+ const CCValAssign RL = ArgLocs[I++];
+ HandleRegLoc(RL.getLocReg(), Offset);
+ }
continue;
}
+ EVT ValVT = VA.getValVT();
if (VA.isRegLoc()) {
MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
unsigned VReg =
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll
new file mode 100644
index 000000000000..9117e3f288f1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll
@@ -0,0 +1,20 @@
+; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp \
+; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck %s
+
+; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp \
+; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck %s
+
+; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in registers.
+
+%struct.Spill = type { [12 x i64 ] }
+ at GS = external global %struct.Spill, align 4
+
+define i64 @test(%struct.Spill* byval(%struct.Spill) align 4 %s) {
+entry:
+ %arrayidx_a = getelementptr inbounds %struct.Spill, %struct.Spill* %s, i32 0, i32 0, i32 2
+ %arrayidx_b = getelementptr inbounds %struct.Spill, %struct.Spill* %s, i32 0, i32 0, i32 10
+ %a = load i64, i64* %arrayidx_a
+ %b = load i64, i64* %arrayidx_b
+ %add = add i64 %a, %b
+ ret i64 %add
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
index 52a4476100c6..a7dcd5a8771c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
@@ -687,8 +687,6 @@ entry:
ret void
}
-declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s)
-
; CHECK-LABEL: name: call_test_byval_32Byte{{.*}}
; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
@@ -740,18 +738,78 @@ declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s
; ASM64-NEXT: bl .test_byval_32Byte
; ASM64-NEXT: nop
+define zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s) {
+entry:
+ %arrayidx = getelementptr inbounds %struct.S32, %struct.S32* %s, i32 0, i32 0, i32 21
+ %0 = load i8, i8* %arrayidx, align 1
+ ret i8 %0
+}
+
+; The ByVal handling produces dead stores. See `LowerFormalArguments_AIX` for
+; details on why.
+
+; CHECK-LABEL: name: test_byval_32Byte
+
+; 32BIT: fixedStack:
+; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 32, alignment: 8, stack-id: default,
+; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 32BIT: bb.0.entry:
+; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
+; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0
+; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4
+; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8
+; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12
+; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16
+; 32BIT-DAG: STW killed renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20
+; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24
+; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28
+; 32BIT: renamable $r3 = LBZ 21, %fixed-stack.0 :: (dereferenceable load 1
+; 32BIT: BLR
+
+; 64BIT: fixedStack:
+; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 32, alignment: 16, stack-id: default,
+; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 64BIT: bb.0.entry:
+; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6
+; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0
+; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8
+; 64BIT-DAG: STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16
+; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24
+; 64BIT-NEXT: renamable $x3 = LBZ8 21, %fixed-stack.0 :: (dereferenceable load 1
+; 64BIT-NEXT: BLR8
+
+; ASM-LABEL: .test_byval_32Byte:
-%struct.S31 = type { [31 x i8] }
+; ASM32: stw 8, 44(1)
+; ASM32: stw 3, 24(1)
+; ASM32-DAG: lbz 3, 45(1)
+; ASM32-DAG: stw 4, 28(1)
+; ASM32-DAG: stw 5, 32(1)
+; ASM32-DAG: stw 6, 36(1)
+; ASM32-DAG: stw 7, 40(1)
+; ASM32-DAG: stw 9, 48(1)
+; ASM32-DAG: stw 10, 52(1)
+; ASM32-NEXT: blr
+
+; ASM64: std 5, 64(1)
+; ASM64: std 3, 48(1)
+; ASM64-DAG: lbz 3, 69(1)
+; ASM64-DAG: std 4, 56(1)
+; ASM64-DAG: std 6, 72(1)
+; ASM64-NEXT: blr
+
+%struct.S31 = type <{ float, i32, i64, double, i32, i16, i8 }>
@gS31 = external global %struct.S31, align 1
define void @call_test_byval_31Byte() {
entry:
- %call = call zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
+ %call = call double @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
ret void
}
-declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)
; CHECK-LABEL: name: call_test_byval_31Byte{{.*}}
@@ -821,6 +879,66 @@ declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)
; ASM64-NEXT: nop
+
+define double @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 %s) {
+entry:
+ %gep = getelementptr inbounds %struct.S31, %struct.S31* %s, i32 0, i32 3
+ %load = load double, double* %gep, align 1
+ ret double %load
+}
+
+; CHECK-LABEL: name: test_byval_31Byte
+
+; 32BIT: fixedStack:
+; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 32, alignment: 8, stack-id: default,
+; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 32BIT: bb.0.entry:
+; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
+; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0
+; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4
+; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8
+; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12
+; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16
+; 32BIT-DAG: STW killed renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20
+; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24
+; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28
+; 32BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8
+; 32BIT-NEXT: BLR
+
+; 64BIT: fixedStack:
+; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 32, alignment: 16, stack-id: default,
+; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,
+
+; 64BIT: bb.0.entry:
+; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6
+; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0
+; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8
+; 64BIT-DAG: STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16
+; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24
+; 64BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8
+; 64BIT-NEXT: BLR8
+
+; ASM32-LABEL: .test_byval_31Byte:
+
+; ASM32-DAG: stw 8, 44(1)
+; ASM32: stw 7, 40(1)
+; ASM32-DAG: lfd 1, 40(1)
+; ASM32-DAG: stw 3, 24(1)
+; ASM32-DAG: stw 4, 28(1)
+; ASM32-DAG: stw 5, 32(1)
+; ASM32-DAG: stw 6, 36(1)
+; ASM32-DAG: stw 9, 48(1)
+; ASM32-DAG: stw 10, 52(1)
+; ASM32-NEXT: blr
+
+; ASM64: std 5, 64(1)
+; ASM64: lfd 1, 64(1)
+; ASM64-DAG: std 3, 48(1)
+; ASM64-DAG: std 4, 56(1)
+; ASM64-DAG: std 6, 72(1)
+; ASM64-NEXT: blr
+
%struct.F = type { float, float, float }
define i32 @call_test_byval_homogeneous_float_struct() {
More information about the llvm-commits
mailing list