[llvm] 43ad058 - [RISCV] Fix stack slot for argument types (Bug 49500)

Thu Apr 29 01:18:11 PDT 2021

Author: Fraser Cormack
Date: 2021-04-29T09:10:48+01:00
New Revision: 43ad058a01881962604a941ae95209fad095aa18

URL: https://github.com/llvm/llvm-project/commit/43ad058a01881962604a941ae95209fad095aa18
DIFF: https://github.com/llvm/llvm-project/commit/43ad058a01881962604a941ae95209fad095aa18.diff

LOG: [RISCV] Fix stack slot for argument types (Bug 49500)

This is an complementary/alternative fix for D99068. It takes a slightly
different approach by explicitly summing up all of the required split
part type sizes and ensuring we allocate enough space for them. It also
takes the maximum alignment of each part.

Compared with D99068 there are fewer changes to the stack objects in
existing tests. However, @luismarques has shown in that patch that there
are opportunities to reduce our stack usage in the future.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D99087

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
    llvm/test/CodeGen/RISCV/stack-slot-size.ll
    llvm/test/CodeGen/RISCV/vector-abi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index eafe5ad060d3..712b8f39e35c 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7080,6 +7080,11 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
   return true;
 }
 
+static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
+  return DAG.getDataLayout().getPrefTypeAlign(
+      VT.getTypeForEVT(*DAG.getContext()));
+}
+
 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
 // and output parameter nodes.
 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
@@ -7194,11 +7199,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
     // For now, only handle fully promoted and indirect arguments.
     if (VA.getLocInfo() == CCValAssign::Indirect) {
       // Store the argument in a stack slot and pass its address.
-      SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
-      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-      MemOpChains.push_back(
-          DAG.getStore(Chain, DL, ArgValue, SpillSlot,
-                       MachinePointerInfo::getFixedStack(MF, FI)));
+      Align StackAlign =
+          std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
+                   getPrefTypeAlign(ArgValue.getValueType(), DAG));
+      TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
       // If the original argument was split (e.g. i128), we need
       // to store the required parts of it here (and pass just one address).
       // Vectors may be partly split to registers and partly to the stack, in
@@ -7207,15 +7211,32 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
       unsigned ArgIndex = Outs[i].OrigArgIndex;
       unsigned ArgPartOffset = Outs[i].PartOffset;
       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
+      // Calculate the total size to store. We don't have access to what we're
+      // actually storing other than performing the loop and collecting the
+      // info.
+      SmallVector<std::pair<SDValue, unsigned>> Parts;
       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
         SDValue PartValue = OutVals[i + 1];
         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+        EVT PartVT = PartValue.getValueType();
+        StoredSize += PartVT.getStoreSize();
+        StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
+        Parts.push_back(std::make_pair(PartValue, PartOffset));
+        ++i;
+      }
+      SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      MemOpChains.push_back(
+          DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+                       MachinePointerInfo::getFixedStack(MF, FI)));
+      for (const auto &Part : Parts) {
+        SDValue PartValue = Part.first;
+        unsigned PartOffset = Part.second;
         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
                                       DAG.getIntPtrConstant(PartOffset, DL));
         MemOpChains.push_back(
             DAG.getStore(Chain, DL, PartValue, Address,
                          MachinePointerInfo::getFixedStack(MF, FI)));
-        ++i;
       }
       ArgValue = SpillSlot;
     } else {

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 138d3627644d..2b0b67dcb783 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -1074,13 +1074,13 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ;
 ; LMULMAX2-LABEL: call_split_vector_args:
 ; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi sp, sp, -256
-; LMULMAX2-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX2-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; LMULMAX2-NEXT:    addi sp, sp, -128
+; LMULMAX2-NEXT:    .cfi_def_cfa_offset 128
+; LMULMAX2-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; LMULMAX2-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
 ; LMULMAX2-NEXT:    .cfi_offset ra, -8
 ; LMULMAX2-NEXT:    .cfi_offset s0, -16
-; LMULMAX2-NEXT:    addi s0, sp, 256
+; LMULMAX2-NEXT:    addi s0, sp, 128
 ; LMULMAX2-NEXT:    .cfi_def_cfa s0, 0
 ; LMULMAX2-NEXT:    andi sp, sp, -128
 ; LMULMAX2-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
@@ -1105,21 +1105,21 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ; LMULMAX2-NEXT:    vmv1r.v v12, v8
 ; LMULMAX2-NEXT:    vmv2r.v v22, v14
 ; LMULMAX2-NEXT:    call split_vector_args at plt
-; LMULMAX2-NEXT:    addi sp, s0, -256
-; LMULMAX2-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT:    addi sp, sp, 256
+; LMULMAX2-NEXT:    addi sp, s0, -128
+; LMULMAX2-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; LMULMAX2-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; LMULMAX2-NEXT:    addi sp, sp, 128
 ; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-LABEL: call_split_vector_args:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -256
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 256
-; LMULMAX1-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; LMULMAX1-NEXT:    addi sp, sp, -128
+; LMULMAX1-NEXT:    .cfi_def_cfa_offset 128
+; LMULMAX1-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; LMULMAX1-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
 ; LMULMAX1-NEXT:    .cfi_offset ra, -8
 ; LMULMAX1-NEXT:    .cfi_offset s0, -16
-; LMULMAX1-NEXT:    addi s0, sp, 256
+; LMULMAX1-NEXT:    addi s0, sp, 128
 ; LMULMAX1-NEXT:    .cfi_def_cfa s0, 0
 ; LMULMAX1-NEXT:    andi sp, sp, -128
 ; LMULMAX1-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
@@ -1158,10 +1158,10 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ; LMULMAX1-NEXT:    vmv1r.v v22, v14
 ; LMULMAX1-NEXT:    vmv1r.v v23, v15
 ; LMULMAX1-NEXT:    call split_vector_args at plt
-; LMULMAX1-NEXT:    addi sp, s0, -256
-; LMULMAX1-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT:    addi sp, sp, 256
+; LMULMAX1-NEXT:    addi sp, s0, -128
+; LMULMAX1-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT:    addi sp, sp, 128
 ; LMULMAX1-NEXT:    ret
   %a = load <2 x i32>, <2 x i32>* %pa
   %b = load <32 x i32>, <32 x i32>* %pb

diff  --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll
index 14dd2b8cbcdb..7edbec2da97f 100644
--- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll
+++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll
@@ -13,7 +13,6 @@ declare void @callee129(i129)
 declare void @callee160(i160)
 declare void @callee161(i161)
 
-; FIXME: Stack write clobbers the spilled value (on RV64).
 define i32 @caller129() nounwind {
 ; RV32I-LABEL: caller129:
 ; RV32I:       # %bb.0:
@@ -35,18 +34,18 @@ define i32 @caller129() nounwind {
 ;
 ; RV64I-LABEL: caller129:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -32
-; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    addi a0, zero, 42
-; RV64I-NEXT:    sw a0, 20(sp)
+; RV64I-NEXT:    sw a0, 36(sp)
 ; RV64I-NEXT:    sd zero, 16(sp)
 ; RV64I-NEXT:    sd zero, 8(sp)
 ; RV64I-NEXT:    mv a0, sp
 ; RV64I-NEXT:    sd zero, 0(sp)
 ; RV64I-NEXT:    call callee129 at plt
-; RV64I-NEXT:    lw a0, 20(sp)
-; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    lw a0, 36(sp)
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = alloca i32
   store i32 42, i32* %1
@@ -55,7 +54,6 @@ define i32 @caller129() nounwind {
   ret i32 %2
 }
 
-; FIXME: Stack write clobbers the spilled value (on RV64).
 define i32 @caller160() nounwind {
 ; RV32I-LABEL: caller160:
 ; RV32I:       # %bb.0:
@@ -77,18 +75,18 @@ define i32 @caller160() nounwind {
 ;
 ; RV64I-LABEL: caller160:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -32
-; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    addi a0, zero, 42
-; RV64I-NEXT:    sw a0, 20(sp)
+; RV64I-NEXT:    sw a0, 36(sp)
 ; RV64I-NEXT:    sd zero, 16(sp)
 ; RV64I-NEXT:    sd zero, 8(sp)
 ; RV64I-NEXT:    mv a0, sp
 ; RV64I-NEXT:    sd zero, 0(sp)
 ; RV64I-NEXT:    call callee160 at plt
-; RV64I-NEXT:    lw a0, 20(sp)
-; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    lw a0, 36(sp)
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = alloca i32
   store i32 42, i32* %1

diff  --git a/llvm/test/CodeGen/RISCV/vector-abi.ll b/llvm/test/CodeGen/RISCV/vector-abi.ll
index 5b89ca0336b1..e97f75394bb0 100644
--- a/llvm/test/CodeGen/RISCV/vector-abi.ll
+++ b/llvm/test/CodeGen/RISCV/vector-abi.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=riscv32 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV32
 ; RUN: llc -mtriple=riscv64 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV64
 
-; FIXME: The stack location used to pass the parameter to the function has the
-; incorrect size and alignment for how we use it, and we clobber the stack.
-
 declare void @callee(<4 x i8> %v)
 
 define void @caller() {
   ; RV32-LABEL: name: caller
   ; RV32: stack:
-  ; RV32:     - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+  ; RV32:     - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4,
   ; RV32-NEXT:    stack-id: default, callee-saved-register: '', callee-saved-restored: true,
   ; RV32-NEXT:    debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
   ; RV32: bb.0 (%ir-block.0):
@@ -29,7 +26,7 @@ define void @caller() {
   ; RV32:   PseudoRET
   ; RV64-LABEL: name: caller
   ; RV64: stack:
-  ; RV64:     - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+  ; RV64:     - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 8,
   ; RV64-NEXT:    stack-id: default, callee-saved-register: '', callee-saved-restored: true,
   ; RV64-NEXT:    debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
   ; RV64: bb.0 (%ir-block.0):