[PATCH] D120980: [PowerPC] make splat struct like an array for function arguments
ChenZheng via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 4 01:16:04 PST 2022
shchenz created this revision.
shchenz added reviewers: jsji, nemanjai, PowerPC.
Herald added subscribers: kbarton, hiraditya.
Herald added a project: All.
shchenz requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Splat struct should be the same with an array. With this, we can get a more accurate alignment in `CalculateStackSlotAlignment` instead of conservative pointer size alignment.
In some cases, with accurate alignment, we can save parameter save areas like the test case change shows.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D120980
Files:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/PowerPC/splat-struct.ll
Index: llvm/test/CodeGen/PowerPC/splat-struct.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/splat-struct.ll
+++ llvm/test/CodeGen/PowerPC/splat-struct.ll
@@ -9,20 +9,19 @@
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -144(r1)
-; CHECK-NEXT: lfs f8, 140(r1)
-; CHECK-NEXT: lfs f7, 136(r1)
-; CHECK-NEXT: li r3, 10
-; CHECK-NEXT: lfs f6, 132(r1)
-; CHECK-NEXT: lfs f5, 128(r1)
-; CHECK-NEXT: std r3, 96(r1)
-; CHECK-NEXT: lfs f4, 124(r1)
-; CHECK-NEXT: lfs f3, 120(r1)
-; CHECK-NEXT: lfs f2, 116(r1)
-; CHECK-NEXT: lfs f1, 112(r1)
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: lfs f8, 60(r1)
+; CHECK-NEXT: lfs f7, 56(r1)
+; CHECK-NEXT: li r7, 10
+; CHECK-NEXT: lfs f6, 52(r1)
+; CHECK-NEXT: lfs f5, 48(r1)
+; CHECK-NEXT: lfs f4, 44(r1)
+; CHECK-NEXT: lfs f3, 40(r1)
+; CHECK-NEXT: lfs f2, 36(r1)
+; CHECK-NEXT: lfs f1, 32(r1)
; CHECK-NEXT: bl bar
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r1, r1, 144
+; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1101,16 +1101,7 @@
/// contiguous block of registers in calling convention CallConv.
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
- const DataLayout &DL) const override {
- // We support any array type as "consecutive" block in the parameter
- // save area. The element type defines the alignment requirement and
- // whether the argument should go in GPRs, FPRs, or VRs if available.
- //
- // Note that clang uses this capability both to implement the ELFv2
- // homogeneous float/vector aggregate ABI, and to avoid having to use
- // "byval" when passing aggregates that might fully fit in registers.
- return Ty->isArrayTy();
- }
+ const DataLayout &DL) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -34,6 +34,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -16769,6 +16770,28 @@
return PPC::createFastISel(FuncInfo, LibInfo);
}
+bool PPCTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+ const DataLayout &DL) const {
+ // We support any array type as "consecutive" block in the parameter
+ // save area. The element type defines the alignment requirement and
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ if (Ty->isArrayTy())
+ return true;
+
+ // All non aggregate members of the type must have the same type.
+ if (!Ty->isStructTy())
+ return false;
+
+ SmallVector<EVT> ValueVTs;
+ ComputeValueVTs(*this, DL, Ty, ValueVTs);
+ return is_splat(ValueVTs);
+}
+
// 'Inverted' means the FMA opcode after negating one multiplicand.
// For example, (fma -a b c) = (fnmsub a b c)
static unsigned invertFMAOpcode(unsigned Opc) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120980.412954.patch
Type: text/x-patch
Size: 3972 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220304/257c8c48/attachment.bin>
More information about the llvm-commits
mailing list