[llvm] [GISel][RISCV]Implement indirect parameter passing for large scalars (PR #95429)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 13 12:07:17 PDT 2024
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/95429
>From 3db3254691292a7793877d8bcc28351060598ca9 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 15:50:34 +0200
Subject: [PATCH 1/5] [GISel][RISCV]Implement indirect parameter passing for
large scalars
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 61 +++++--
.../Target/RISCV/GISel/RISCVCallLowering.cpp | 15 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 167 ++++++++++++++++++
.../calling-conv-lp64-lp64f-lp64d-common.ll | 79 +++++++++
4 files changed, 302 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 412cd0a21ad41..daa5e0c07e9b7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -751,6 +751,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+ const LLT PointerTy = LLT::pointer(0, DL.getPointerSizeInBits(0));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
@@ -764,19 +765,28 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If we can't directly assign the register, we need one or more
// intermediate values.
Args[i].Regs.resize(NumParts);
-
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part)
- Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+
+ // When we have indirect parameter passing we are receiving a pointer,
+ // that points to the actual value. In that case we need a pointer.
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ Args[i].Flags[0].isSplit()) {
+ if (Handler.isIncomingArgumentHandler())
+ Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy);
+ } else {
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part)
+ Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ }
}
assert((j + (NumParts - 1)) < ArgLocs.size() &&
"Too many regs for number of args");
// Coerce into outgoing value types before register assignment.
- if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+ if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy &&
+ VA.getLocInfo() != CCValAssign::Indirect) {
assert(Args[i].OrigRegs.size() == 1);
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
@@ -790,6 +800,28 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
+ // We found an indirect parameter passing and we are at the first part of
+ // the value being passed. In this case copy the incoming pointer into a
+ // virtual register so later we can load it.
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+ Align(8), false);
+
+ auto PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
+ ->getOperand(0)
+ .getReg();
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part], PointerToStackReg,
+ OrigTy, MachinePointerInfo{}, VA);
+ Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ }
+ break;
+ }
+
if (VA.isMemLoc() && !Flags.isByVal()) {
// Individual pieces may have been spilled to the stack and others
// passed in registers.
@@ -866,11 +898,16 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- // Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
- // Merge the split registers into the expected larger result vregs of
- // the original call.
+ // In case of indirect parameter passing load the value referred to by
+ // the argument.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ VA.getLocInfo() == CCValAssign::Indirect) {
+ Handler.assignValueToAddress(Args[i].OrigRegs[0], Args[i].Regs[0], OrigTy,
+ MachinePointerInfo{}, VA);
+
+ } else if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ // Now that all pieces have been assigned, re-pack the register typed values
+ // into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 2bfee45852b20..b1f381f4b30ad 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -89,12 +89,13 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
- uint64_t LocMemOffset = VA.getLocMemOffset();
-
+ uint64_t Offset = 0;
+ if (VA.isMemLoc())
+ Offset = VA.getLocMemOffset();
+
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
- auto MMO =
- MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), LocMemOffset));
+ auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ commonAlignment(Align(16), Offset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
@@ -341,10 +342,8 @@ static bool isLegalElementTypeForRVV(Type *EltTy,
// TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall.
static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
bool IsLowerArgs = false) {
- // TODO: Integers larger than 2*XLen are passed indirectly which is not
- // supported yet.
if (T->isIntegerTy())
- return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ return true;
if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 1a3489521af19..92f5f6220f096 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,173 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs( ) {
+ ; ILP32-LABEL: name: caller_128i_in_regs
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_128i_in_regs(i128 1, i128 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+ ; RV32I-LABEL: name: callee_256i_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i256 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+ ; ILP32-LABEL: name: caller_256i_in_regs
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_256i_in_regs
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_256i_in_regs
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i32 @callee_i64_in_regs(i32 %a, i64 %b) nounwind {
; RV32I-LABEL: name: callee_i64_in_regs
; RV32I: bb.1 (%ir-block.0):
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index b175b8d92e6c9..0467bbe3d41bb 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -16,6 +16,85 @@
; Check that on RV64, i128 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+ ; RV64I-LABEL: name: callee_256i_in_regs
+ ; RV64I: bb.1 (%ir-block.0):
+ ; RV64I-NEXT: liveins: $x10, $x11
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+ ; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ %2 = trunc i256 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+ ; LP64-LABEL: name: caller_256i_in_regs
+ ; LP64: bb.1 (%ir-block.0):
+ ; LP64-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64F-LABEL: name: caller_256i_in_regs
+ ; LP64F: bb.1 (%ir-block.0):
+ ; LP64F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64F-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64F-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64F-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64D-LABEL: name: caller_256i_in_regs
+ ; LP64D: bb.1 (%ir-block.0):
+ ; LP64D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64D-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64D-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
; RV64I-LABEL: name: callee_i128_in_regs
; RV64I: bb.1 (%ir-block.0):
>From 7f0647c34bc342ff3fbf555fa658c973b6c7ffeb Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 19:58:49 +0200
Subject: [PATCH 2/5] Move code responsible for loading one level deeper
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 23 +++++++++-----------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index daa5e0c07e9b7..dd3f6d29524c9 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -765,7 +765,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If we can't directly assign the register, we need one or more
// intermediate values.
Args[i].Regs.resize(NumParts);
-
+
// When we have indirect parameter passing we are receiving a pointer,
// that points to the actual value. In that case we need a pointer.
if (VA.getLocInfo() == CCValAssign::Indirect &&
@@ -804,9 +804,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
- if (Handler.isIncomingArgumentHandler())
+ if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part],
+ Args[i].Regs[Part], OrigTy,
+ MachinePointerInfo{}, VA);
+ } else {
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
Align(8), false);
@@ -815,8 +818,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
->getOperand(0)
.getReg();
- Handler.assignValueToAddress(Args[i].OrigRegs[Part], PointerToStackReg,
- OrigTy, MachinePointerInfo{}, VA);
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part],
+ PointerToStackReg, OrigTy,
+ MachinePointerInfo{}, VA);
Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
}
break;
@@ -898,14 +902,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- // In case of indirect parameter passing load the value referred to by
- // the argument.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
- VA.getLocInfo() == CCValAssign::Indirect) {
- Handler.assignValueToAddress(Args[i].OrigRegs[0], Args[i].Regs[0], OrigTy,
- MachinePointerInfo{}, VA);
-
- } else if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
// Now that all pieces have been assigned, re-pack the register typed values
// into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
>From 0def404cba2915c05edb6d78c01d3e7838daec5f Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 20:49:08 +0200
Subject: [PATCH 3/5] Don't try to merge values when indirect parameter passing
has happened
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index dd3f6d29524c9..b4848383548d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -791,7 +791,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
-
+ bool IndirectParameterPassingHandled = false;
bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
Register ArgReg = Args[i].Regs[Part];
@@ -804,6 +804,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
+ IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
Handler.assignValueToAddress(Args[i].OrigRegs[Part],
@@ -902,9 +903,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
- // Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ !IndirectParameterPassingHandled) {
+ // Now that all pieces have been assigned, re-pack the register typed
+ // values into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
>From 28f66143c406956894b8868cef8eb6f47c965aa8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 21:02:22 +0200
Subject: [PATCH 4/5] Use proper stack alignment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b4848383548d6..c1ad0f6f654e8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -811,9 +811,11 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs[Part], OrigTy,
MachinePointerInfo{}, VA);
} else {
+ Align StackAlign =
+ MF.getSubtarget().getFrameLowering()->getStackAlign();
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
- Align(8), false);
+ StackAlign, false);
auto PointerToStackReg =
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
>From 6d22315eb70283b01331d53f63da2e3f72a983cf Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 21:07:03 +0200
Subject: [PATCH 5/5] Format
---
llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index b1f381f4b30ad..180e238a0a978 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -92,10 +92,10 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
uint64_t Offset = 0;
if (VA.isMemLoc())
Offset = VA.getLocMemOffset();
-
+
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), Offset));
+ commonAlignment(Align(16), Offset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
More information about the llvm-commits
mailing list