[PATCH] D120980: [PowerPC] make splat struct like an array for function arguments
ChenZheng via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 8 19:11:35 PST 2022
shchenz updated this revision to Diff 413994.
shchenz added a comment.
address @amyk comments
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D120980/new/
https://reviews.llvm.org/D120980
Files:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/PowerPC/splat-struct.ll
Index: llvm/test/CodeGen/PowerPC/splat-struct.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/splat-struct.ll
+++ llvm/test/CodeGen/PowerPC/splat-struct.ll
@@ -9,20 +9,19 @@
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -144(r1)
-; CHECK-NEXT: lfs f8, 140(r1)
-; CHECK-NEXT: lfs f7, 136(r1)
-; CHECK-NEXT: li r3, 10
-; CHECK-NEXT: lfs f6, 132(r1)
-; CHECK-NEXT: lfs f5, 128(r1)
-; CHECK-NEXT: std r3, 96(r1)
-; CHECK-NEXT: lfs f4, 124(r1)
-; CHECK-NEXT: lfs f3, 120(r1)
-; CHECK-NEXT: lfs f2, 116(r1)
-; CHECK-NEXT: lfs f1, 112(r1)
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: lfs f8, 60(r1)
+; CHECK-NEXT: lfs f7, 56(r1)
+; CHECK-NEXT: li r7, 10
+; CHECK-NEXT: lfs f6, 52(r1)
+; CHECK-NEXT: lfs f5, 48(r1)
+; CHECK-NEXT: lfs f4, 44(r1)
+; CHECK-NEXT: lfs f3, 40(r1)
+; CHECK-NEXT: lfs f2, 36(r1)
+; CHECK-NEXT: lfs f1, 32(r1)
; CHECK-NEXT: bl bar
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r1, r1, 144
+; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1101,16 +1101,7 @@
/// contiguous block of registers in calling convention CallConv.
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
- const DataLayout &DL) const override {
- // We support any array type as "consecutive" block in the parameter
- // save area. The element type defines the alignment requirement and
- // whether the argument should go in GPRs, FPRs, or VRs if available.
- //
- // Note that clang uses this capability both to implement the ELFv2
- // homogeneous float/vector aggregate ABI, and to avoid having to use
- // "byval" when passing aggregates that might fully fit in registers.
- return Ty->isArrayTy();
- }
+ const DataLayout &DL) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -34,6 +34,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -16769,6 +16770,27 @@
return PPC::createFastISel(FuncInfo, LibInfo);
}
+bool PPCTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+ const DataLayout &DL) const {
+ // We support any array type and splat struct type as "consecutive" block in
+ // the parameter save area. The element type defines the alignment requirement
+ // and whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ if (Ty->isArrayTy())
+ return true;
+
+ if (!Ty->isStructTy())
+ return false;
+
+ SmallVector<EVT> ValueVTs;
+ ComputeValueVTs(*this, DL, Ty, ValueVTs);
+ return is_splat(ValueVTs);
+}
+
// 'Inverted' means the FMA opcode after negating one multiplicand.
// For example, (fma -a b c) = (fnmsub a b c)
static unsigned invertFMAOpcode(unsigned Opc) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120980.413994.patch
Type: text/x-patch
Size: 3924 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220309/afcdd0d5/attachment.bin>
More information about the llvm-commits
mailing list