[PATCH] D120980: [PowerPC] make splat struct like an array for function arguments

Fri Mar 4 01:16:04 PST 2022

shchenz created this revision.
shchenz added reviewers: jsji, nemanjai, PowerPC.
Herald added subscribers: kbarton, hiraditya.
Herald added a project: All.
shchenz requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Splat struct should be the same with an array. With this, we can get a more accurate alignment in `CalculateStackSlotAlignment` instead of conservative pointer size alignment.

In some cases, with accurate alignment, we can save parameter save areas like the test case change shows.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120980

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/test/CodeGen/PowerPC/splat-struct.ll


Index: llvm/test/CodeGen/PowerPC/splat-struct.ll
===================================================================

--- llvm/test/CodeGen/PowerPC/splat-struct.ll
+++ llvm/test/CodeGen/PowerPC/splat-struct.ll
@@ -9,20 +9,19 @@
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -144(r1)
-; CHECK-NEXT:    lfs f8, 140(r1)
-; CHECK-NEXT:    lfs f7, 136(r1)
-; CHECK-NEXT:    li r3, 10
-; CHECK-NEXT:    lfs f6, 132(r1)
-; CHECK-NEXT:    lfs f5, 128(r1)
-; CHECK-NEXT:    std r3, 96(r1)
-; CHECK-NEXT:    lfs f4, 124(r1)
-; CHECK-NEXT:    lfs f3, 120(r1)
-; CHECK-NEXT:    lfs f2, 116(r1)
-; CHECK-NEXT:    lfs f1, 112(r1)
+; CHECK-NEXT:    stdu r1, -64(r1)
+; CHECK-NEXT:    lfs f8, 60(r1)
+; CHECK-NEXT:    lfs f7, 56(r1)
+; CHECK-NEXT:    li r7, 10
+; CHECK-NEXT:    lfs f6, 52(r1)
+; CHECK-NEXT:    lfs f5, 48(r1)
+; CHECK-NEXT:    lfs f4, 44(r1)
+; CHECK-NEXT:    lfs f3, 40(r1)
+; CHECK-NEXT:    lfs f2, 36(r1)
+; CHECK-NEXT:    lfs f1, 32(r1)
 ; CHECK-NEXT:    bl bar
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r1, r1, 144
+; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1101,16 +1101,7 @@
     /// contiguous block of registers in calling convention CallConv.
     bool functionArgumentNeedsConsecutiveRegisters(
         Type *Ty, CallingConv::ID CallConv, bool isVarArg,
-        const DataLayout &DL) const override {
-      // We support any array type as "consecutive" block in the parameter
-      // save area.  The element type defines the alignment requirement and
-      // whether the argument should go in GPRs, FPRs, or VRs if available.
-      //
-      // Note that clang uses this capability both to implement the ELFv2
-      // homogeneous float/vector aggregate ABI, and to avoid having to use
-      // "byval" when passing aggregates that might fully fit in registers.
-      return Ty->isArrayTy();
-    }
+        const DataLayout &DL) const override;
 
     /// If a physical register, this returns the register that receives the
     /// exception address on entry to an EH pad.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -16769,6 +16770,28 @@
   return PPC::createFastISel(FuncInfo, LibInfo);
 }
 
+bool PPCTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+    Type *Ty, CallingConv::ID CallConv, bool isVarArg,
+    const DataLayout &DL) const {
+  // We support any array type as "consecutive" block in the parameter
+  // save area.  The element type defines the alignment requirement and
+  // whether the argument should go in GPRs, FPRs, or VRs if available.
+  //
+  // Note that clang uses this capability both to implement the ELFv2
+  // homogeneous float/vector aggregate ABI, and to avoid having to use
+  // "byval" when passing aggregates that might fully fit in registers.
+  if (Ty->isArrayTy())
+    return true;
+
+  // All non aggregate members of the type must have the same type.
+  if (!Ty->isStructTy())
+    return false;
+
+  SmallVector<EVT> ValueVTs;
+  ComputeValueVTs(*this, DL, Ty, ValueVTs);
+  return is_splat(ValueVTs);
+}
+
 // 'Inverted' means the FMA opcode after negating one multiplicand.
 // For example, (fma -a b c) = (fnmsub a b c)
 static unsigned invertFMAOpcode(unsigned Opc) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120980.412954.patch
Type: text/x-patch
Size: 3972 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220304/257c8c48/attachment.bin>