[llvm] 2b37c40 - [RISCV] Scale scalably-typed split argument offsets by VSCALE

Mon May 31 02:51:27 PDT 2021

Author: Fraser Cormack
Date: 2021-05-31T10:43:13+01:00
New Revision: 2b37c405cc18019ea5056a63fa65f839a4890b50

URL: https://github.com/llvm/llvm-project/commit/2b37c405cc18019ea5056a63fa65f839a4890b50
DIFF: https://github.com/llvm/llvm-project/commit/2b37c405cc18019ea5056a63fa65f839a4890b50.diff

LOG: [RISCV] Scale scalably-typed split argument offsets by VSCALE

This patch fixes a bug in lowering scalable-vector types in RISC-V's
main calling convention. When scalable-vector types are split and passed
indirectly, the target is responsible for scaling the offset --
initially set to the known-minimum store size -- by the scalable factor.

Before this we were issuing overlapping loads or stores to the different
parts, leading to incorrect codegen.

Credit to @HsiangKai for spotting this.

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D103262

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ff76626e033a..148e2e519aa8 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7199,8 +7199,10 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
         CCValAssign &PartVA = ArgLocs[i + 1];
         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
-        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
-                                      DAG.getIntPtrConstant(PartOffset, DL));
+        SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
+        if (PartVA.getValVT().isScalableVector())
+          Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
+        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
                                      MachinePointerInfo()));
         ++i;
@@ -7482,14 +7484,17 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
       // Calculate the total size to store. We don't have access to what we're
       // actually storing other than performing the loop and collecting the
       // info.
-      SmallVector<std::pair<SDValue, unsigned>> Parts;
+      SmallVector<std::pair<SDValue, SDValue>> Parts;
       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
         SDValue PartValue = OutVals[i + 1];
         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+        SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
         EVT PartVT = PartValue.getValueType();
+        if (PartVT.isScalableVector())
+          Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
         StoredSize += PartVT.getStoreSize();
         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
-        Parts.push_back(std::make_pair(PartValue, PartOffset));
+        Parts.push_back(std::make_pair(PartValue, Offset));
         ++i;
       }
       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
@@ -7499,9 +7504,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
                        MachinePointerInfo::getFixedStack(MF, FI)));
       for (const auto &Part : Parts) {
         SDValue PartValue = Part.first;
-        unsigned PartOffset = Part.second;
-        SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
-                                      DAG.getIntPtrConstant(PartOffset, DL));
+        SDValue PartOffset = Part.second;
+        SDValue Address =
+            DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
         MemOpChains.push_back(
             DAG.getStore(Chain, DL, PartValue, Address,
                          MachinePointerInfo::getFixedStack(MF, FI)));

diff  --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
index 7b4235e66f79..5000dcf7c96e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -3,12 +3,12 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV64
 
 ; Check that we correctly scale the split part indirect offsets by VSCALE.
-; FIXME: We don't; we're loading a full 8 vector registers 64 bytes ahead of
-; the first address. This should be scaled by vlenb!
 define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) {
 ; RV32-LABEL: callee_scalable_vector_split_indirect:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi a1, a0, 64
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, a0, a1
 ; RV32-NEXT:    vl8re32.v v24, (a0)
 ; RV32-NEXT:    vl8re32.v v0, (a1)
 ; RV32-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
@@ -18,7 +18,9 @@ define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x
 ;
 ; RV64-LABEL: callee_scalable_vector_split_indirect:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a1, a0, 64
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, a0, a1
 ; RV64-NEXT:    vl8re32.v v24, (a0)
 ; RV64-NEXT:    vl8re32.v v0, (a1)
 ; RV64-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
@@ -30,7 +32,6 @@ define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x
 }
 
 ; Call the function above. Check that we set the arguments correctly.
-; FIXME: We don't, see above.
 define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x i32> %x) {
 ; RV32-LABEL: caller_scalable_vector_split_indirect:
 ; RV32:       # %bb.0:
@@ -41,7 +42,10 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    sub sp, sp, a0
-; RV32-NEXT:    addi a0, sp, 96
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    addi a1, sp, 32
+; RV32-NEXT:    add a0, a1, a0
 ; RV32-NEXT:    vs8r.v v16, (a0)
 ; RV32-NEXT:    addi a0, sp, 32
 ; RV32-NEXT:    vs8r.v v8, (a0)
@@ -66,7 +70,10 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
 ; RV64-NEXT:    sub sp, sp, a0
-; RV64-NEXT:    addi a0, sp, 88
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    addi a1, sp, 24
+; RV64-NEXT:    add a0, a1, a0
 ; RV64-NEXT:    vs8r.v v16, (a0)
 ; RV64-NEXT:    addi a0, sp, 24
 ; RV64-NEXT:    vs8r.v v8, (a0)