[llvm] 2b37c40 - [RISCV] Scale scalably-typed split argument offsets by VSCALE
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon May 31 02:51:27 PDT 2021
Author: Fraser Cormack
Date: 2021-05-31T10:43:13+01:00
New Revision: 2b37c405cc18019ea5056a63fa65f839a4890b50
URL: https://github.com/llvm/llvm-project/commit/2b37c405cc18019ea5056a63fa65f839a4890b50
DIFF: https://github.com/llvm/llvm-project/commit/2b37c405cc18019ea5056a63fa65f839a4890b50.diff
LOG: [RISCV] Scale scalably-typed split argument offsets by VSCALE
This patch fixes a bug in lowering scalable-vector types in RISC-V's
main calling convention. When scalable-vector types are split and passed
indirectly, the target is responsible for scaling the offset --
initially set to the known-minimum store size -- by the scalable factor.
Before this we were issuing overlapping loads or stores to the different
parts, leading to incorrect codegen.
Credit to @HsiangKai for spotting this.
Reviewed By: HsiangKai
Differential Revision: https://reviews.llvm.org/D103262
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ff76626e033a..148e2e519aa8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7199,8 +7199,10 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
- SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
- DAG.getIntPtrConstant(PartOffset, DL));
+ SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
+ if (PartVA.getValVT().isScalableVector())
+ Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
MachinePointerInfo()));
++i;
@@ -7482,14 +7484,17 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
- SmallVector<std::pair<SDValue, unsigned>> Parts;
+ SmallVector<std::pair<SDValue, SDValue>> Parts;
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+ SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
EVT PartVT = PartValue.getValueType();
+ if (PartVT.isScalableVector())
+ Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
StoredSize += PartVT.getStoreSize();
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
- Parts.push_back(std::make_pair(PartValue, PartOffset));
+ Parts.push_back(std::make_pair(PartValue, Offset));
++i;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
@@ -7499,9 +7504,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachinePointerInfo::getFixedStack(MF, FI)));
for (const auto &Part : Parts) {
SDValue PartValue = Part.first;
- unsigned PartOffset = Part.second;
- SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
- DAG.getIntPtrConstant(PartOffset, DL));
+ SDValue PartOffset = Part.second;
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
index 7b4235e66f79..5000dcf7c96e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -3,12 +3,12 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV64
; Check that we correctly scale the split part indirect offsets by VSCALE.
-; FIXME: We don't; we're loading a full 8 vector registers 64 bytes ahead of
-; the first address. This should be scaled by vlenb!
define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) {
; RV32-LABEL: callee_scalable_vector_split_indirect:
; RV32: # %bb.0:
-; RV32-NEXT: addi a1, a0, 64
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: vl8re32.v v24, (a0)
; RV32-NEXT: vl8re32.v v0, (a1)
; RV32-NEXT: vsetvli a0, zero, e32,m8,ta,mu
@@ -18,7 +18,9 @@ define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x
;
; RV64-LABEL: callee_scalable_vector_split_indirect:
; RV64: # %bb.0:
-; RV64-NEXT: addi a1, a0, 64
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: vl8re32.v v24, (a0)
; RV64-NEXT: vl8re32.v v0, (a1)
; RV64-NEXT: vsetvli a0, zero, e32,m8,ta,mu
@@ -30,7 +32,6 @@ define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x
}
; Call the function above. Check that we set the arguments correctly.
-; FIXME: We don't, see above.
define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x i32> %x) {
; RV32-LABEL: caller_scalable_vector_split_indirect:
; RV32: # %bb.0:
@@ -41,7 +42,10 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: sub sp, sp, a0
-; RV32-NEXT: addi a0, sp, 96
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: addi a1, sp, 32
+; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: vs8r.v v16, (a0)
; RV32-NEXT: addi a0, sp, 32
; RV32-NEXT: vs8r.v v8, (a0)
@@ -66,7 +70,10 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: sub sp, sp, a0
-; RV64-NEXT: addi a0, sp, 88
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addi a1, sp, 24
+; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: vs8r.v v16, (a0)
; RV64-NEXT: addi a0, sp, 24
; RV64-NEXT: vs8r.v v8, (a0)
More information about the llvm-commits
mailing list